Python Experiment.Stop Examples

Programming Language: Python

Namespace/Package Name: gat

Class/Type: Experiment

Method/Function: Stop

Examples at hotexamples.com: 5

Python Experiment.Stop - 5 examples found. These are the top rated real world Python examples of gat.Experiment.Stop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

info(16)

Start(5)

Stop(5)

debug(5)

openOutputFile(4)

warn(4)

Counter(2)

critical(2)

Example #1

Show file

def main(argv):

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(
        version=
        "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-a",
                      "--annotation-file",
                      "--annotations",
                      dest="annotation_files",
                      type="string",
                      action="append",
                      help="filename with annotations [default=%default].")

    parser.add_option(
        "-s",
        "--segment-file",
        "--segments",
        dest="segment_files",
        type="string",
        action="append",
        help=
        "filename with segments. Also accepts a glob in parentheses [default=%default]."
    )

    parser.add_option(
        "-w",
        "--workspace-file",
        "--workspace",
        dest="workspace_files",
        type="string",
        action="append",
        help=
        "filename with workspace segments. Also accepts a glob in parentheses [default=%default]."
    )

    parser.add_option(
        "-i",
        "--isochore-file",
        "--isochores",
        dest="isochore_files",
        type="string",
        action="append",
        help=
        "filename with isochore segments. Also accepts a glob in parentheses [default=%default]."
    )

    parser.add_option(
        "-o",
        "--order",
        dest="output_order",
        type="choice",
        choices=("track", "annotation", "fold", "pvalue", "qvalue"),
        help="order results in output by fold, track, etc. [default=%default]."
    )

    parser.add_option(
        "-q",
        "--qvalue-method",
        dest="qvalue_method",
        type="choice",
        choices=("storey", "BH", "bonferroni", "holm", "hommel", "hochberg",
                 "BY", "none"),
        help=
        "method to perform multiple testing correction by controlling the fdr [default=%default]."
    )

    parser.add_option("--qvalue-lambda",
                      dest="qvalue_lambda",
                      type="float",
                      help="fdr computation: lambda [default=%default].")

    parser.add_option(
        "--qvalue-pi0-method",
        dest="qvalue_pi0_method",
        type="choice",
        choices=("smoother", "bootstrap"),
        help="fdr computation: method for estimating pi0 [default=%default].")
    parser.add_option(
        "--descriptions",
        dest="input_filename_descriptions",
        type="string",
        help="filename mapping annotation terms to descriptions. "
        " if given, the output table will contain additional columns "
        " [default=%default]")

    parser.add_option(
        "--ignore-segment-tracks",
        dest="ignore_segment_tracks",
        action="store_true",
        help=
        "ignore segment tracks - all segments belong to one track [default=%default]"
    )

    parser.add_option(
        "--enable-split-tracks",
        dest="enable_split_tracks",
        action="store_true",
        help="permit the same track to be in multiple files [default=%default]"
    )

    parser.add_option("--output-bed",
                      dest="output_bed",
                      type="choice",
                      action="append",
                      choices=("all", "annotations", "segments", "workspaces",
                               "isochores", "overlap"),
                      help="output bed files [default=%default].")

    parser.add_option("--output-stats",
                      dest="output_stats",
                      type="choice",
                      action="append",
                      choices=("all", "annotations", "segments", "workspaces",
                               "isochores", "overlap"),
                      help="output overlap summary stats [default=%default].")

    parser.add_option(
        "--restrict-workspace",
        dest="restrict_workspace",
        action="store_true",
        help="restrict workspace to those segments that contain both track"
        " and annotations [default=%default]")

    parser.add_option("--counter",
                      dest="counters",
                      type="choice",
                      action="append",
                      choices=("binom", "hyperg"),
                      help="counter to use [default=%default].")

    parser.add_option(
        "--output-tables-pattern",
        dest="output_tables_pattern",
        type="string",
        help=
        "output pattern for result tables. Used if there are multiple counters used [default=%default]."
    )

    parser.set_defaults(annotation_files=[],
                        segment_files=[],
                        workspace_files=[],
                        sample_files=[],
                        counters=[],
                        output_stats=[],
                        output_bed=[],
                        output_tables_pattern="%s.tsv.gz",
                        output_order="fold",
                        input_filename_counts=None,
                        input_filename_results=None,
                        pvalue_method="empirical",
                        output_plots_pattern=None,
                        output_samples_pattern=None,
                        qvalue_method="storey",
                        qvalue_lambda=None,
                        qvalue_pi0_method="smoother",
                        ignore_segment_tracks=False,
                        input_filename_descriptions=None,
                        conditional="unconditional",
                        conditional_extension=None,
                        conditional_expansion=None,
                        restrict_workspace=False,
                        enable_split_tracks=False,
                        shift_expansion=2.0,
                        shift_extension=0,
                        overlap_mode="midpoint",
                        truncate_workspace_to_annotations=False,
                        truncate_segments_to_workspace=False)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    tstart = time.time()

    if len(options.counters) == 0:
        options.counters.append("binom")

    ############################################
    segments, annotations, workspaces, isochores = IO.buildSegments(options)
    E.info("intervals loaded in %i seconds" % (time.time() - tstart))

    # filter segments by workspace
    workspace = IO.applyIsochores(segments, annotations, workspaces, options,
                                  isochores)

    ############################################
    description_header, descriptions, description_width = IO.readDescriptions(
        options)

    ############################################
    ############################################
    # compute per contig

    # compute bases covered by workspace
    workspace2basecoverage, isochores = {}, []
    for contig, ww in workspace.iteritems():
        workspace2basecoverage[contig] = ww.sum()
        isochores.append(contig)

    # compute percentage of bases covered by annotations in workspace
    # per isochore
    annotation2basecoverage = collections.defaultdict(dict)
    for annotation, aa in annotations.iteritems():
        for isochore, a in aa.iteritems():
            # need to truncate to workspace?
            annotation2basecoverage[annotation][isochore] = a.sum()

    results_per_contig = collections.defaultdict(list)

    E.info("computing counts per isochore")

    # results per isochore

    def emptyResult(segment, annotation, isochore, counter,
                    nsegments_in_workspace, basecoverage_annotation,
                    basecoverage_workspace):
        return GREAT_RESULT._make((
            segment,
            annotation,
            isochore,
            counter,
            0,  # observed
            0,  # expected
            nsegments_in_workspace,
            0,  # nannotations_in_workspace
            0,  # nsegments_overlapping_annotation
            0,  # nannotations_overlapping_segments
            0,  # basecoverage_intersection
            0,  # basecoverage_segments
            basecoverage_annotation,
            basecoverage_workspace,
            0.0,
            1.0,
            1.0,
            1.0))

    for isochore in isochores:
        basecoverage_workspace = workspace2basecoverage[isochore]

        # iterate over all isochores
        for segment, segmentdict in segments.iteritems():
            try:
                ss = segmentdict[isochore]
                # select segments overlapping workspace
                segments_in_workspace = GatSegmentList.SegmentList(clone=ss)
                segments_in_workspace.intersect(workspace[isochore])
                # number of segments in workspace
                nsegments_in_workspace = len(segments_in_workspace)
            except KeyError:
                ss = None

            basecoverage_segments = segments_in_workspace.sum()

            for annotation, annotationdict in annotations.iteritems():

                # if annotation != "GO:0030957": continue

                try:
                    aa = annotationdict[isochore]
                except KeyError:
                    aa = None

                # p_A: proportion of bases covered by annotation
                try:
                    basecoverage_annotation = annotation2basecoverage[
                        annotation][isochore]
                except KeyError:
                    basecoverage_annotation = 0

                if ss == None or aa == None:
                    for counter in options.counters:
                        results_per_contig[(counter, segment,
                                            annotation)].append(
                                                emptyResult(
                                                    segment, annotation,
                                                    isochore, counter,
                                                    nsegments_in_workspace,
                                                    basecoverage_annotation,
                                                    basecoverage_workspace))
                    continue

                # select segments overlapping annotation
                segments_overlapping_annotation = GatSegmentList.SegmentList(
                    clone=ss)
                segments_overlapping_annotation.intersect(
                    annotations[annotation][isochore])
                # number of segments in annotation
                nsegments_overlapping_annotation = ss.intersectionWithSegments(
                    annotations[annotation][isochore],
                    mode=options.overlap_mode)

                # number of nucleotides at the intersection of segments,
                # annotation and workspace
                basecoverage_intersection = segments_overlapping_annotation.sum(
                )

                annotations_overlapping_segments = GatSegmentList.SegmentList(
                    clone=aa)
                annotations_overlapping_segments.intersect(ss)
                nannotations_overlapping_segments = len(
                    annotations_overlapping_segments)

                nannotations_in_workspace = len(aa)
                if nannotations_in_workspace == 0:
                    for counter in options.counters:
                        results_per_contig[(counter, segment,
                                            annotation)].append(
                                                emptyResult(
                                                    segment, annotation,
                                                    isochore, counter,
                                                    nsegments_in_workspace,
                                                    basecoverage_annotation,
                                                    basecoverage_workspace))
                    continue

                fraction_coverage_annotation = basecoverage_annotation / \
                    float(basecoverage_workspace)
                fraction_hit_annotation = float(
                    nannotations_overlapping_segments
                ) / nannotations_in_workspace

                for counter in options.counters:
                    if counter.startswith("binom"):
                        # GREAT binomial probability over "regions"
                        # n = number of genomic regions = nannotations_in_workspace
                        # ppi = fraction of genome annotated by annotation = fraction_coverage_annotation
                        # kpi = genomic regions with annotation hit by segments = nannotations_in_segments
                        # sf = survival functions = 1 -cdf
                        # probability of observing >kpi in a sample of n where the probabily of succes is
                        # ppi.
                        pvalue = scipy.stats.binom.sf(
                            nsegments_overlapping_annotation - 1,
                            nsegments_in_workspace,
                            fraction_coverage_annotation)

                        expected = fraction_coverage_annotation * \
                            nsegments_in_workspace
                        observed = nsegments_overlapping_annotation

                    elif counter.startswith("hyperg"):

                        # hypergeometric probability over nucleotides
                        # Sampling without replacement
                        # x,M,n,M
                        # x = observed number of nucleotides in overlap of segments,annotations and workspace
                        # M = number of nucleotides in workspace
                        # n = number of nucleotides in annotations (and workspace)
                        # N = number of nucleotides in segments (and workspace)
                        # P-value of obtaining >x number of nucleotides
                        # overlapping.
                        rv = scipy.stats.hypergeom(basecoverage_workspace,
                                                   basecoverage_annotation,
                                                   basecoverage_segments)

                        pvalue = rv.sf(basecoverage_intersection)
                        expected = rv.mean()
                        observed = basecoverage_intersection

                    if expected != 0:
                        fold = float(observed) / expected
                    else:
                        fold = 1.0

                    r = GREAT_RESULT._make(
                        (segment, annotation, isochore, counter, observed,
                         expected, nsegments_in_workspace,
                         nannotations_in_workspace,
                         nsegments_overlapping_annotation,
                         nannotations_overlapping_segments,
                         basecoverage_intersection, basecoverage_segments,
                         basecoverage_annotation, basecoverage_workspace,
                         fraction_coverage_annotation, fold, pvalue, 1.0))
                    # print "\t".join( map(str, r))
                    results_per_contig[(counter, segment,
                                        annotation)].append(r)

    E.info("merging counts per isochore")

    # compute sums
    results = []

    for niteration, pair in enumerate(results_per_contig.iteritems()):

        counter, segment, annotation = pair[0]
        data = pair[1]

        nsegments_in_workspace = sum([x.nsegments_in_workspace for x in data])
        nsegments_overlapping_annotation = sum([x.observed for x in data])
        nannotations_in_workspace = sum(
            [x.nannotations_in_workspace for x in data])
        nannotations_overlapping_segments = sum(
            [x.nannotations_overlapping_segments for x in data])

        basecoverage_intersection = sum(
            [x.basecoverage_intersection for x in data])
        basecoverage_segments = sum([x.basecoverage_segments for x in data])
        basecoverage_annotation = sum(
            [x.basecoverage_annotation for x in data])
        basecoverage_workspace = sum([x.basecoverage_workspace for x in data])

        fraction_coverage_annotation = basecoverage_annotation / \
            float(basecoverage_workspace)

        if counter.startswith("binom"):
            pvalue = scipy.stats.binom.sf(nsegments_overlapping_annotation - 1,
                                          nsegments_in_workspace,
                                          fraction_coverage_annotation)
            expected = fraction_coverage_annotation * nsegments_in_workspace
            observed = nsegments_overlapping_annotation
        elif counter.startswith("hyperg"):
            rv = scipy.stats.hypergeom(basecoverage_workspace,
                                       basecoverage_annotation,
                                       basecoverage_segments)

            pvalue = rv.sf(basecoverage_intersection)
            expected = rv.mean()
            observed = basecoverage_intersection

        if expected != 0:
            fold = float(observed) / expected
        else:
            fold = 1.0

        r = GREAT_RESULT._make(
            (segment, annotation, "all", counter, observed, expected,
             nsegments_in_workspace, nannotations_in_workspace,
             nsegments_overlapping_annotation,
             nannotations_overlapping_segments, basecoverage_intersection,
             basecoverage_segments, basecoverage_annotation,
             basecoverage_workspace, fraction_coverage_annotation, fold,
             pvalue, 1.0))

        results.append(r)

    IO.outputResults(results, options, GREAT_RESULT._fields,
                     description_header, description_width, descriptions)

    E.Stop()

Example #2

Show file

File: gat-compare.py Project: zongchangli/gat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(
        version=
        "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option(
        "-o",
        "--order",
        dest="output_order",
        type="choice",
        choices=("track", "annotation", "fold", "pvalue", "qvalue",
                 "observed"),
        help="order results in output by fold, track, etc. [default=%default]."
    )

    parser.add_option("-p",
                      "--pvalue-method",
                      dest="pvalue_method",
                      type="choice",
                      choices=(
                          "empirical",
                          "norm",
                      ),
                      help="type of pvalue reported [default=%default].")

    parser.add_option(
        "-q",
        "--qvalue-method",
        dest="qvalue_method",
        type="choice",
        choices=("storey", "BH", "bonferroni", "holm", "hommel", "hochberg",
                 "BY", "none"),
        help=
        "method to perform multiple testing correction by controlling the fdr [default=%default]."
    )

    parser.add_option("--qvalue-lambda",
                      dest="qvalue_lambda",
                      type="float",
                      help="fdr computation: lambda [default=%default].")

    parser.add_option(
        "--qvalue-pi0-method",
        dest="qvalue_pi0_method",
        type="choice",
        choices=("smoother", "bootstrap"),
        help="fdr computation: method for estimating pi0 [default=%default].")

    parser.add_option(
        "--descriptions",
        dest="input_filename_descriptions",
        type="string",
        help="filename mapping annotation terms to descriptions. "
        " if given, the output table will contain additional columns "
        " [default=%default]")

    parser.add_option(
        "--pseudo-count",
        dest="pseudo_count",
        type="float",
        help=
        "pseudo count. The pseudo count is added to both the observed and expected overlap. "
        " Using a pseudo-count avoids gat reporting fold changes of 0 [default=%default]."
    )

    parser.add_option("--output-plots-pattern",
                      dest="output_plots_pattern",
                      type="string",
                      help="output pattern for plots [default=%default]")

    parser.set_defaults(
        pvalue_method="empirical",
        qvalue_method="BH",
        qvalue_lambda=None,
        qvalue_pi0_method="smoother",
        # pseudo count for fold change computation to avoid 0 fc
        pseudo_count=1.0,
        output_order="observed",
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    input_filenames_counts = args

    ##################################################
    E.info("received %i filenames with counts" % len(input_filenames_counts))

    ##################################################
    description_header, descriptions, description_width = IO.readDescriptions(
        options)

    all_annotator_results = []

    for input_filename_counts in input_filenames_counts:

        E.info("processing %s" % input_filename_counts)

        annotator_results = gat.fromCounts(input_filename_counts)

        ##################################################
        if options.pvalue_method != "empirical":
            E.info("updating pvalues to %s" % options.pvalue_method)
            GatEngine.updatePValues(annotator_results, options.pvalue_method)

        ##################################################
        ##################################################
        ##################################################
        # compute global fdr
        ##################################################
        E.info("computing FDR statistics")
        GatEngine.updateQValues(annotator_results,
                                method=options.qvalue_method,
                                vlambda=options.qvalue_lambda,
                                pi0_method=options.qvalue_pi0_method)

        all_annotator_results.append(annotator_results)

    pseudo_count = options.pseudo_count
    results = []

    if len(all_annotator_results) == 1:
        E.info("performing pairwise comparison within a single file")

        # collect all annotations
        annotations, segments = list(), set()
        for x in all_annotator_results[0]:
            segments.add(x.track)
            annotations.append(x)

        if len(segments) != 1:
            raise NotImplementedError("multiple segments of interest")

        for data1, data2 in itertools.combinations(annotations, 2):

            # note that fold changes can be very large if there are 0 samples
            # this is fine for getting the distributional params (mean, stddev)
            fold_changes1 = data1.observed / (data1.samples + pseudo_count)
            fold_changes2 = data2.observed / (data2.samples + pseudo_count)

            # add a separate fc pseudo-count to avoid 0 values
            fold_changes1 += 0.0001
            fold_changes2 += 0.0001

            # Test is if relative fold change rfc is different from 1
            # note: rfc = fc1 / fc2 = obs1 / exp1 * obs2 / exp2
            #                       = obs1 / obs2 * exp2 / exp1
            # Thus, it is equivalent to test rfc = obs1/obs2 versus exp2 / exp1
            #
            # Convert to log space for easier plotting
            # Move the observed fold ratio in order to get an idea of the magnitude
            # of the underlying fold change
            delta_fold = data2.fold - data1.fold
            sampled_delta_fold = numpy.log(
                fold_changes1 / fold_changes2) + delta_fold
            observed_delta_fold = 0.0 + delta_fold

            result = GatEngine.AnnotatorResult(data1.annotation,
                                               data2.annotation,
                                               "na",
                                               observed_delta_fold,
                                               sampled_delta_fold,
                                               reference=None,
                                               pseudo_count=0)

            results.append(result)

    else:
        E.info("performing pairwise comparison between multiple files")

        ##################################################
        # perform pairwise comparison
        for index1, index2 in itertools.combinations(
                range(len(input_filenames_counts)), 2):
            E.info("comparing %i and %i" % (index1, index2))
            a, b = all_annotator_results[index1], all_annotator_results[index2]

            # index results in a and b
            aa = collections.defaultdict(dict)
            for x in a:
                aa[x.track][x.annotation] = x

            bb = collections.defaultdict(dict)
            for x in b:
                bb[x.track][x.annotation] = x

            tracks_a = set(aa.keys())
            tracks_b = set(bb.keys())
            shared_tracks = tracks_a.intersection(tracks_b)
            if len(shared_tracks) == 0:
                E.warn("no shared tracks between {} and {}".format(
                    index1, index2))

            for track in sorted(shared_tracks):
                E.debug("computing results for track {}".format(track))
                # get shared annotations
                annotations1 = aa[track].keys()
                annotations2 = bb[track].keys()
                shared_annotations = list(
                    set(annotations1).intersection(set(annotations2)))
                E.info("%i shared annotations" % len(shared_annotations))

                for annotation in shared_annotations:

                    # if not annotation.startswith("Ram:"): continue

                    data1 = aa[track][annotation]
                    data2 = bb[track][annotation]

                    # note that fold changes can be very large if there are 0 samples
                    # this is fine for getting the distributional params (mean,
                    # stddev)
                    fold_changes1 = data1.observed / (data1.samples +
                                                      pseudo_count)
                    fold_changes2 = data2.observed / (data2.samples +
                                                      pseudo_count)

                    # add a separate fc pseudo-count to avoid 0 values
                    fold_changes1 += 0.0001
                    fold_changes2 += 0.0001

                    # Test is if relative fold change rfc is different from 1
                    # note: rfc = fc1 / fc2 = obs1 / exp1 * obs2 / exp2
                    #                       = obs1 / obs2 * exp2 / exp1
                    # Thus, it is equivalent to test rfc = obs1/obs2 versus exp2 / exp1
                    #
                    # Convert to log space for easier plotting
                    # Move the observed fold ratio in order to get an idea of the magnitude
                    # of the underlying fold change
                    delta_fold = data2.fold - data1.fold
                    sampled_delta_fold = numpy.log(
                        fold_changes1 / fold_changes2) + delta_fold
                    observed_delta_fold = 0.0 + delta_fold

                    result = GatEngine.AnnotatorResult(track,
                                                       annotation,
                                                       "na",
                                                       observed_delta_fold,
                                                       sampled_delta_fold,
                                                       reference=None,
                                                       pseudo_count=0)

                    results.append(result)

    if len(results) == 0:
        E.critical("no results found")
        E.Stop()
        return

    IO.outputResults(results,
                     options,
                     GatEngine.AnnotatorResult.headers,
                     description_header,
                     description_width,
                     descriptions,
                     format_observed="%6.4f")

    IO.plotResults(results, options)

    # write footer and output benchmark information.
    E.Stop()

Example #3

Show file

File: gat-plot.py Project: zongchangli/gat

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(version="%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                                   usage=globals()["__doc__"])

    parser.add_option("-l", "--sample-file", dest="sample_files", type="string", action="append",
                      help="filename with sample files. Start processing from samples [default=%default].")

    parser.add_option("-o", "--order", dest="output_order", type="choice",
                      choices=(
                          "track", "annotation", "fold", "pvalue", "qvalue"),
                      help="order results in output by fold, track, etc. [default=%default].")

    parser.add_option("-p", "--pvalue-method", dest="pvalue_method", type="choice",
                      choices=("empirical", "norm", ),
                      help="type of pvalue reported [default=%default].")

    parser.add_option("--results-file", dest="input_filename_results", type="string",
                      help="start processing from results - no segments required [default=%default].")

    parser.add_option("--output-plots-pattern", dest="output_plots_pattern", type="string",
                      help="output pattern for plots [default=%default]")

    parser.add_option("--output-samples-pattern", dest="output_samples_pattern", type="string",
                      help="output pattern for samples. Samples are stored in bed format, one for "
                      " each segment [default=%default]")

    parser.add_option("--plots", dest="plots", type="choice",
                      choices=("all",
                               "bars-per-track",
                               "bars", ),
                      help="plots to be created [default=%default].")

    parser.set_defaults(
        sample_files=[],
        num_samples=1000,
        output_stats=[],
        output_filename_counts=None,
        output_order="fold",
        input_filename_results=None,
        pvalue_method="empirical",
        output_plots_pattern=None,
        plots=[],
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    annotator_results = IO.readAnnotatorResults(options.input_filename_results)

    if "speparate-bars" in options.plots:
        plotBarplots(annotator_results, options)
    if "bars" in options.plots:
        plotBarplot(annotator_results, options)

    # write footer and output benchmark information.
    E.Stop()

Example #4

Show file

File: gat-geneset.py Project: zongchangli/gat

def main(argv):

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(version="%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                                   usage=globals()["__doc__"])

    parser.add_option("-a", "--gene-file", "--annotations", dest="annotation_files", type="string", action="append",
                      help="filename with annotations - here, location of genes [default=%default].")

    parser.add_option("-s", "--segment-file", "--segments", dest="segment_files", type="string", action="append",
                      help="filename with segments. Also accepts a glob in parentheses [default=%default].")

    parser.add_option("-w", "--workspace-file", "--workspace", dest="workspace_files", type="string", action="append",
                      help="filename with workspace segments. Also accepts a glob in parentheses [default=%default].")

    parser.add_option("-g", "--number-of-genes", dest="number_of_genes", type="int",
                      help="total number of genes [default=%default]")

    parser.add_option("-m", "--annotation-file", dest="annotation_file", type="string",
                      help="filename mapping genes to annotations [default=%default]")

    parser.add_option("-o", "--order", dest="output_order", type="choice",
                      choices=(
                          "track", "annotation", "fold", "pvalue", "qvalue"),
                      help="order results in output by fold, track, etc. [default=%default].")

    parser.add_option("-q", "--qvalue-method", dest="qvalue_method", type="choice",
                      choices=(
                          "storey", "BH", "bonferroni", "holm", "hommel", "hochberg", "BY", "none"),
                      help="method to perform multiple testing correction by controlling the fdr [default=%default].")

    parser.add_option("--qvalue-lambda", dest="qvalue_lambda", type="float",
                      help="fdr computation: lambda [default=%default].")

    parser.add_option("--qvalue-pi0-method", dest="qvalue_pi0_method", type="choice",
                      choices=("smoother", "bootstrap"),
                      help="fdr computation: method for estimating pi0 [default=%default].")
    parser.add_option("--descriptions", dest="input_filename_descriptions", type="string",
                      help="filename mapping annotation terms to descriptions. "
                      " if given, the output table will contain additional columns "
                      " [default=%default]")

    parser.add_option("--ignore-segment-tracks", dest="ignore_segment_tracks", action="store_true",
                      help="ignore segment tracks - all segments belong to one track [default=%default]")

    parser.add_option("--enable-split-tracks", dest="enable_split_tracks", action="store_true",
                      help="permit the same track to be in multiple files [default=%default]")

    parser.add_option("--output-bed", dest="output_bed", type="choice", action="append",
                      choices=("all",
                               "annotations", "segments",
                               "workspaces", "isochores",
                               "overlap"),
                      help="output bed files [default=%default].")

    parser.add_option("--output-stats", dest="output_stats", type="choice", action="append",
                      choices=("all",
                               "annotations", "segments",
                               "workspaces", "isochores",
                               "overlap"),
                      help="output overlap summary stats [default=%default].")

    parser.set_defaults(
        annotation_files=[],
        segment_files=[],
        workspace_files=[],
        sample_files=[],
        annotation_file=None,
        num_samples=1000,
        nbuckets=100000,
        bucket_size=1,
        counter="nucleotide-overlap",
        output_stats=[],
        output_bed=[],
        output_filename_counts=None,
        output_order="fold",
        cache=None,
        input_filename_counts=None,
        input_filename_results=None,
        pvalue_method="empirical",
        output_plots_pattern=None,
        output_samples_pattern=None,
        qvalue_method="storey",
        qvalue_lambda=None,
        qvalue_pi0_method="smoother",
        sampler="annotator",
        ignore_segment_tracks=False,
        input_filename_descriptions=None,
        conditional="unconditional",
        conditional_extension=None,
        conditional_expansion=None,
        restrict_workspace=False,
        enable_split_tracks=False,
        shift_expansion=2.0,
        shift_extension=0,
        overlap_mode="midpoint",
        number_of_genes=None,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    tstart = time.time()

    # load segments
    options.segment_files = IO.expandGlobs(options.segment_files)
    options.annotation_files = IO.expandGlobs(options.annotation_files)
    options.workspace_files = IO.expandGlobs(options.workspace_files)

    # read one or more segment files
    segments = IO.readSegmentList("segments", options.segment_files, options)
    if options.ignore_segment_tracks:
        segments.merge(delete=True)
        E.info("merged all segments into one track with %i segments" %
               len(segments))

    if len(segments) > 1000:
        raise ValueError(
            "too many (%i) segment files - use track definitions or --ignore-segment-tracks" % len(segments))

    # load workspace
    workspaces = IO.readSegmentList(
        "workspaces", options.workspace_files, options, options.enable_split_tracks)

    # intersect workspaces to build a single workspace
    E.info("collapsing workspaces")
    workspaces.collapse()

    # use merged workspace only, discard others
    workspaces.restrict("collapsed")
    workspace = workspaces["collapsed"]

    E.info("intervals loaded in %i seconds" % (time.time() - tstart))

    ############################################
    # load table mapping a gene id to annotations
    gene2annotations = IOTools.readMultiMap(IOTools.openFile(options.annotation_file),
                                            has_header=True)
    annotations = set([y for x in gene2annotations.values() for y in x])
    E.info("loaded %i annotations for %i genes" %
           (len(gene2annotations), len(annotations)))

    ############################################
    # load bed file with gene coordinates
    assert len(options.annotation_files) == 1
    indexed_genes = collections.defaultdict(Intersecter)
    total_genes = 0
    # number of genes per contig
    contig2ngenes = collections.defaultdict(int)
    # compute number of genes with a particular annotation
    # per contig
    annotation2ngenes = collections.defaultdict(int)
    for line in IOTools.openFile(options.annotation_files[0]):
        if line.startswith("#"):
            continue
        contig, start, end, gene_id = line[:-1].split("\t")[:4]
        indexed_genes[contig].add_interval(
            Interval(int(start), int(end), gene_id))
        contig2ngenes[contig] += 1
        total_genes += 1
        try:
            for annotation in gene2annotations[gene_id]:
                annotation2ngenes[annotation] += 1
        except KeyError:
            pass
    E.info("indexed locations for %i contigs" % len(indexed_genes))

    ############################################
    description_header, descriptions, description_width = IO.readDescriptions(
        options)

    ############################################
    ############################################
    # compute results
    E.info("computing counts")

    results = []
    # iterate over segments
    for segment, segmentdict in segments.iteritems():

        # genes hit by segments per annotation
        genes_hit_by_segments_with_annotations = collections.defaultdict(int)

        # genes hit by segments
        genes_hit_by_segments = 0

        for contig, ss in segmentdict.iteritems():
            for start, end in ss:
                overlapping_genes = list(
                    indexed_genes[contig].find(start, end))
                genes_hit_by_segments += len(overlapping_genes)
                for x in overlapping_genes:
                    gene_id = x.value
                    try:
                        for annotation in gene2annotations[gene_id]:
                            genes_hit_by_segments_with_annotations[
                                annotation] += 1
                    except KeyError:
                        pass

        # N = number of genes in genome
        N = total_genes
        # n   = number of genes selected by segments
        n = genes_hit_by_segments

        for annotation in annotations:
            # K = number of genes carrying annotation
            K = annotation2ngenes[annotation]
            # k = number of genes selected by segments and with annotation
            k = genes_hit_by_segments_with_annotations[annotation]

            if n == 0 or N == 0 or K == 0:
                expected = 0
                fold = 1.0
                pvalue = 1.0
            else:
                expected = float(n * K) / N
                fold = k / expected
                pvalue = scipy.stats.hypergeom.sf(k - 1, N, K, n)

            r = GENESET_RESULT._make((
                segment, annotation,
                N,
                K,
                n,
                k,
                expected,
                fold,
                pvalue,
                1.0))

            results.append(r)

    IO.outputResults(results,
                     options,
                     GENESET_RESULT._fields,
                     description_header,
                     description_width,
                     descriptions)

    E.Stop()

Example #5

Show file

def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    parser = gat.buildParser(usage=globals()["__doc__"])

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    ##################################################
    description_header, descriptions, description_width = IO.readDescriptions(
        options)

    ##################################################
    size_pos, size_segment = SegmentList.getSegmentSize()
    E.debug("sizes: pos=%i segment=%i, max_coord=%i" %
            (size_pos, size_segment, 2**(8 * size_pos)))

    ##################################################
    # set default counter
    if not options.counters:
        options.counters.append("nucleotide-overlap")

    ##################################################
    if options.output_tables_pattern is not None:
        if "%s" not in options.output_tables_pattern:
            raise ValueError(
                "output_tables_pattern should contain at least one '%s'")

    if options.output_samples_pattern is not None:
        if "%s" not in options.output_samples_pattern:
            raise ValueError(
                "output_samples_pattern should contain at least one '%s'")

    if options.output_counts_pattern is not None:
        if "%s" not in options.output_counts_pattern:
            raise ValueError(
                "output_counts_pattern should contain at least one '%s'")

    if options.random_seed is not None:
        # initialize python random number generator
        random.seed(options.random_seed)
        # initialize numpy random number generator
        numpy.random.seed(options.random_seed)

    ##################################################
    # read fold changes that results should be compared with
    if options.null != "default":
        if not os.path.exists(options.null):
            raise OSError("file %s not found" % options.null)
        E.info("reading reference results from %s" % options.null)
        options.reference = IO.readAnnotatorResults(options.null)
    else:
        options.reference = None

    if options.input_filename_counts:
        # use pre-computed counts
        annotator_results = Engine.fromCounts(options.input_filename_counts)

    elif options.input_filename_results:
        # use previous results (re-computes fdr)
        E.info("reading gat results from %s" % options.input_filename_results)
        annotator_results = IO.readAnnotatorResults(
            options.input_filename_results)

    else:
        # do full gat analysis
        annotator_results = fromSegments(options, args)

    ##################################################
    if options.pvalue_method != "empirical":
        E.info("updating pvalues to %s" % options.pvalue_method)
        Engine.updatePValues(annotator_results, options.pvalue_method)

    ##################################################
    # output
    IO.outputResults(annotator_results, options,
                     Engine.AnnotatorResultExtended.headers,
                     description_header, description_width, descriptions)

    IO.plotResults(annotator_results, options)

    # write footer and output benchmark information.
    E.Stop()