Python IOToolsの例、gat.IOTools Pythonの例

コード例 #1

0

ファイルを表示

ファイル: __init__.py プロジェクト: zongchangli/gat

def fromCounts(filename):
    '''build annotator results from a tab-separated table
    with counts.'''

    annotator_results = []

    with IOTools.openFile(filename, "r") as infile:

        E.info("loading data")

        header = infile.readline()
        if not header == "track\tannotation\tobserved\tcounts\n":
            raise ValueError("%s not a counts file: got %s" % (infile, header))

        for line in infile:
            track, annotation, observed, counts = line[:-1].split("\t")
            samples = numpy.array(
                list(map(float, counts.split(","))), dtype=numpy.float)
            observed = float(observed)
            annotator_results.append(Engine.AnnotatorResult(
                track=track,
                annotation=annotation,
                counter="na",
                observed=observed,
                samples=samples))

    return annotator_results

コード例 #2

0

ファイルを表示

ファイル: __init__.py プロジェクト: zongchangli/gat

def readFromBedOld(filenames, name="track"):
    '''read Segment Lists from one or more bed files.

    Segment lists are grouped by *contig* and *track*.

    If no track is given, the *name* attribute is taken.
    '''

    segment_lists = collections.defaultdict(
        lambda: collections.defaultdict(Engine.SegmentList))

    if name == "track":
        f = lambda x: x.mTrack["name"]
    elif name == "name":
        f = lambda x: x.mFields[0]
    else:
        raise ValueError("unknown name: '%s'" % name)

    for filename in filenames:
        infile = IOTools.openFile(filename, "r")
        for bed in Bed.iterator(infile):
            try:
                name = f(bed)
            except TypeError:
                name = "default"
            segment_lists[name][bed.contig].add(bed.start, bed.end)

    return segment_lists

コード例 #3

0

ファイルを表示

def readDescriptions(options):
    '''read descriptions from tab separated file.'''

    description_header, descriptions, description_width = [], {}, 0
    if options.input_filename_descriptions:
        E.info("reading descriptions from %s" %
               options.input_filename_descriptions)

        with IOTools.openFile(options.input_filename_descriptions) as inf:
            first = True
            for line in inf:
                if line.startswith("#"):
                    continue
                data = line[:-1].split("\t")

                if description_width:
                    assert len(data) - 1 == description_width, \
                        "inconsistent number of descriptions in %s" %\
                        options.input_filename_descriptions
                else:
                    description_width = len(data) - 1

                if first:
                    description_header = data[1:]
                    first = False
                else:
                    descriptions[data[0]] = data[1:]
        assert len(description_header) == description_width, \
            "number of descriptions (%i) inconsistent with header (%s) in %s" % \
            (description_width, len(description_header),
             options.input_filename_descriptions)

    return description_header, descriptions, description_width

コード例 #4

0

ファイルを表示

ファイル: IO.py プロジェクト: abudulemusa/gat

def readDescriptions(options):
    '''read descriptions from tab separated file.'''

    description_header, descriptions, description_width = [], {}, 0
    if options.input_filename_descriptions:
        E.info("reading descriptions from %s" %
               options.input_filename_descriptions)

        with IOTools.openFile(options.input_filename_descriptions) as inf:
            first = True
            for line in inf:
                if line.startswith("#"):
                    continue
                data = line[:-1].split("\t")

                if description_width:
                    assert len(
                        data) - 1 == description_width, "inconsistent number of descriptions in %s" % options.input_filename_descriptions
                else:
                    description_width = len(data) - 1

                if first:
                    description_header = data[1:]
                    first = False
                else:
                    descriptions[data[0]] = data[1:]
        assert len(description_header) == description_width, "number of descriptions (%i) inconsistent with header (%s) in %s" % \
            (description_width, len(description_header),
             options.input_filename_descriptions)

    return description_header, descriptions, description_width

コード例 #5

0

ファイルを表示

def readAnnotatorResults(filename):
    '''load annotator results from a tab-separated results table.'''

    annotator_results = []

    with IOTools.openFile(filename, "r") as infile:
        for line in infile:
            if line.startswith("#"):
                continue
            if line.startswith("track"):
                continue
            r = gat.DummyAnnotatorResult._fromLine(line)
            annotator_results.append(r)

    return annotator_results

コード例 #6

0

ファイルを表示

ファイル: IO.py プロジェクト: abudulemusa/gat

def readAnnotatorResults(filename):
    '''load annotator results from a tab-separated results table.'''

    annotator_results = []

    with IOTools.openFile(filename, "r") as infile:
        for line in infile:
            if line.startswith("#"):
                continue
            if line.startswith("track"):
                continue
            r = gat.DummyAnnotatorResult._fromLine(line)
            annotator_results.append(r)

    return annotator_results

コード例 #7

0

ファイルを表示

ファイル: __init__.py プロジェクト: zongchangli/gat

def run(segments,
        annotations,
        workspace,
        sampler,
        counters,
        workspace_generator,
        **kwargs):
    '''run an enrichment analysis.

    segments: an IntervalCollection
    workspace: an IntervalCollection
    annotations: an IntervalCollection

    kwargs recognized are:

    cache
       filename of cache

    num_samples
       number of samples to compute

    output_counts_pattern
       output counts to filename

    output_samples_pattern
       if given, output samles to these files, one per segment

    sample_files
       if given, read samples from these files.

    fdr
       method to compute qvalues

    outfiles
       dictionary of optional additional output files.

    pseudo_count
       pseudo_count to add to observed and expected values

    reference
       data with reference observed and expected values.
    '''

    # get arguments
    num_samples = kwargs.get("num_samples", 10000)
    cache = kwargs.get("cache", None)
    output_counts_pattern = kwargs.get("output_counts_pattern", None)
    sample_files = kwargs.get("sample_files", [])
    pseudo_count = kwargs.get("pseudo_count", 1.0)
    reference = kwargs.get("reference", None)
    output_samples_pattern = kwargs.get("output_samples_pattern", None)
    outfiles = kwargs.get("outfiles", {})
    num_threads = kwargs.get("num_threads", 0)

    ##################################################
    ##################################################
    ##################################################
    # computing summary metrics for segments
    if "segment_metrics" in outfiles:
        E.info("computing summary metrics for segments")
        outfile = outfiles["segment_metrics"]
        outfile.write("track\tsection\tmetric\t%s\n" %
                      "\t".join(Stats.Summary().getHeaders()))
        for track in segments.tracks:
            IO.outputMetrics(outfile,
                             segments[track],
                             workspace,
                             track,
                             'segments',
                             )
        E.info("wrote summary metrics for segments to %s" % str(outfile))

    ##################################################
    ##################################################
    ##################################################
    # collect observed counts from segments
    E.info("collecting observed counts")
    observed_counts = []
    for counter in counters:
        observed_counts.append(Engine.computeCounts(
            counter=counter,
            aggregator=sum,
            segments=segments,
            annotations=annotations,
            workspace=workspace,
            workspace_generator=workspace_generator))

    ##################################################
    ##################################################
    ##################################################
    # sample and collect counts
    ##################################################
    E.info("starting sampling")

    if cache:
        E.info("samples are cached in %s" % cache)
        samples = Engine.SamplesCached(filename=cache)
    elif sample_files:
        if not output_samples_pattern:
            raise ValueError(
                "require output_samples_pattern if loading samples from files")
        # build regex
        regex = re.compile(re.sub("%s", "(\S+)", output_samples_pattern))
        E.info("loading samples from %i files" % len(sample_files))
        samples = Engine.SamplesFile(
            filenames=sample_files,
            regex=regex)
    else:
        samples = Engine.Samples()

    sampled_counts = {}

    counts = E.Counter()

    ntracks = len(segments.tracks)

    for ntrack, track in enumerate(segments.tracks):

        segs = segments[track]

        E.info("sampling: %s: %i/%i" % (track, ntrack + 1, ntracks))

        if output_samples_pattern and not sample_files:
            filename = re.sub("%s", track, output_samples_pattern)
            E.debug("saving samples to %s" % filename)
            dirname = os.path.dirname(filename)
            if dirname and not os.path.exists(dirname):
                os.makedirs(dirname)
            if filename.endswith(".gz"):
                samples_outfile = gzip.open(filename, "w")
            else:
                samples_outfile = open(filename, "w")
        else:
            samples_outfile = None

        if workspace_generator.is_conditional:
            outer_sampler = ConditionalSampler(num_samples,
                                               samples,
                                               samples_outfile,
                                               sampler,
                                               workspace_generator,
                                               counters,
                                               outfiles,
                                               num_threads=num_threads)
        else:
            outer_sampler = UnconditionalSampler(num_samples,
                                                 samples,
                                                 samples_outfile,
                                                 sampler,
                                                 workspace_generator,
                                                 counters,
                                                 outfiles,
                                                 num_threads=num_threads)

        counts_per_track = outer_sampler.sample(
            track, counts, counters, segs, annotations, workspace, outfiles)

        # skip empty tracks
        if counts_per_track is None:
            continue

        if samples_outfile:
            samples_outfile.close()

        sampled_counts[track] = counts_per_track

        # old code, refactor into loop to save samples
        if 0:
            E.info("sampling stats: %s" % str(counts))
            if track not in samples:
                E.warn("no samples for track %s" % track)
                continue

            # clean up samples
            del samples[track]

    E.info("sampling finished")

    # build annotator results
    E.info("computing PValue statistics")

    annotator_results = list()
    counter_id = 0
    for counter, observed_count in zip(counters, observed_counts):
        for track, r in observed_count.items():
            for annotation, observed in r.items():
                temp_segs, temp_annos, temp_workspace = workspace_generator(
                    segments[track],
                    annotations[annotation],
                    workspace)

                # ignore empty results
                if temp_workspace.sum() == 0:
                    continue

                # if reference is given, p-value will indicate difference
                # The test that track and annotation are present is done
                # elsewhere
                if reference:
                    ref = reference[track][annotation]
                else:
                    ref = None

                annotator_results.append(Engine.AnnotatorResultExtended(
                    track=track,
                    annotation=annotation,
                    counter=counter.name,
                    observed=observed,
                    samples=sampled_counts[track][counter_id][annotation],
                    track_segments=temp_segs,
                    annotation_segments=temp_annos,
                    workspace=temp_workspace,
                    reference=ref,
                    pseudo_count=pseudo_count))
        counter_id += 1

    # dump (large) table with counts
    if output_counts_pattern:
        for counter in counters:
            name = counter.name
            filename = re.sub("%s", name, output_counts_pattern)

            E.info("writing counts to %s" % filename)
            output = [x for x in annotator_results if x.counter == name]
            outfile = IOTools.openFile(filename, "w")
            outfile.write("track\tannotation\tobserved\tcounts\n")

            for o in output:
                outfile.write("%s\t%s\t%i\t%s\n" %
                              (o.track, o.annotation,
                               o.observed,
                               ",".join(["%i" % x for x in o.samples])))

    return annotator_results

コード例 #8

0

ファイルを表示

ファイル: __init__.py プロジェクト: zongchangli/gat

def computeSample(args):
    '''compute a single sample.
    '''

    workdata, samples_outfile, metrics_outfile, lock = args

    (track,
     sample_id,
     sampler,
     segs,
     annotations,
     contig_annotations,
     workspace,
     contig_workspace,
     counters) = workdata

    # E.debug("track=%s, sample=%s - started" % (track, str(sample_id)))

    counts = E.Counter()

    sample_id = str(sample_id)

    outf_samples = samples_outfile

    if samples_outfile:
        if lock:
            lock.acquire()
            outf_samples = IOTools.openFile(samples_outfile, "a")

        samples_outfile.write("track name=%s\n" % sample_id)

        if lock:
            outf_samples.close()
            lock.release()

    sample = Engine.IntervalDictionary()

    for isochore in list(segs.keys()):

        counts.pairs += 1

        # skip empty isochores
        if workspace[isochore].isEmpty or segs[isochore].isEmpty:
            counts.skipped += 1
            continue

        counts.sampled += 1
        r = sampler.sample(segs[isochore], workspace[isochore])

        # TODO : activate
        # self.outputSampleStats( sample_id, isochore, r )

        sample.add(isochore, r)

        # save sample
        if samples_outfile:
            if lock:
                lock.acquire()
                outf_samples = IOTools.openFile(samples_outfile, "a")

            for start, end in r:
                outf_samples.write("%s\t%i\t%i\n" % (isochore, start, end))

            if lock:
                outf_samples.close()
                lock.release()

    # re-combine isochores
    # adjacent intervals are merged.
    sample.fromIsochores()

    if metrics_outfile:
        if lock:
            lock.acquire()
            outf = IOTools.openFile(metrics_outfile, "a")
        else:
            outf = metrics_outfile

        IO.outputMetrics(outf, sample, workspace, track, sample_id)

        if lock:
            outf.close()
            lock.release()

    counts_per_track = [collections.defaultdict(float) for x in counters]
    # compute counts for each counter
    for counter_id, counter in enumerate(counters):
        # TODO: choose aggregator
        for annotation in annotations.tracks:
            counts_per_track[counter_id][annotation] = sum([
                counter(sample[contig],
                        contig_annotations[annotation][contig],
                        contig_workspace[contig])
                for contig in list(sample.keys())])

    # E.debug("track=%s, sample=%s - completed" % (track,str(sample_id )))

    return counts_per_track

コード例 #9

0

ファイルを表示

def expandGlobs(infiles):
    return IOTools.flatten([glob.glob(x) for x in infiles])

コード例 #10

0

ファイルを表示

def outputResults(results,
                  options,
                  header,
                  description_header,
                  description_width,
                  descriptions,
                  format_observed="%i"):
    '''compute FDR and output results.'''

    pvalues = [x.pvalue for x in results]

    ##################################################
    ##################################################
    ##################################################
    # compute global fdr
    ##################################################
    E.info("computing FDR statistics")
    qvalues = Engine.getQValues(pvalues,
                                method=options.qvalue_method,
                                vlambda=options.qvalue_lambda,
                                pi0_method=options.qvalue_pi0_method)

    try:
        results = [
            x._replace(qvalue=qvalue) for x, qvalue in zip(results, qvalues)
        ]
        is_tuple = True
    except AttributeError:
        # not a namedtuple
        for x, qvalue in zip(results, qvalues):
            x.qvalue = qvalue
            x.format_observed = format_observed

        is_tuple = False

    counters = set([x.counter for x in results])

    for counter in counters:

        if len(counters) == 1:
            outfile = options.stdout
            output = results
        else:
            outfilename = re.sub("%s", counter, options.output_tables_pattern)
            E.info("output for counter %s goes to outfile %s" %
                   (counter, outfilename))
            outfile = IOTools.openFile(outfilename, "w")
            output = [x for x in results if x.counter == counter]

        outfile.write("\t".join(list(header) + list(description_header)) +
                      "\n")

        if options.output_order == "track":
            output.sort(key=lambda x: (x.track, x.annotation))
        elif options.output_order == "observed":
            output.sort(key=lambda x: x.observed)
        elif options.output_order == "annotation":
            output.sort(key=lambda x: (x.annotation, x.track))
        elif options.output_order == "fold":
            output.sort(key=lambda x: x.fold)
        elif options.output_order == "pvalue":
            output.sort(key=lambda x: x.pvalue)
        elif options.output_order == "qvalue":
            output.sort(key=lambda x: x.qvalue)
        else:
            raise ValueError("unknown sort order %s" % options.output_order)

        for result in output:
            if is_tuple:
                outfile.write("\t".join(map(str, result)))
            else:
                outfile.write(str(result))

            if descriptions:
                try:
                    outfile.write("\t" +
                                  "\t".join(descriptions[result.annotation]))
                except KeyError:
                    outfile.write("\t" + "\t".join([""] * description_width))
            outfile.write("\n")

        if outfile != options.stdout:
            outfile.close()

コード例 #11

0

ファイルを表示

ファイル: IO.py プロジェクト: abudulemusa/gat

def expandGlobs(infiles):
    return IOTools.flatten([glob.glob(x) for x in infiles])

コード例 #12

0

ファイルを表示

ファイル: IO.py プロジェクト: abudulemusa/gat

def outputResults(results,
                  options,
                  header,
                  description_header,
                  description_width,
                  descriptions,
                  format_observed="%i"):
    '''compute FDR and output results.'''

    pvalues = [x.pvalue for x in results]

    ##################################################
    ##################################################
    ##################################################
    # compute global fdr
    ##################################################
    E.info("computing FDR statistics")
    qvalues = GatEngine.getQValues(pvalues,
                                   method=options.qvalue_method,
                                   vlambda=options.qvalue_lambda,
                                   pi0_method=options.qvalue_pi0_method)

    try:
        results = [x._replace(qvalue=qvalue)
                   for x, qvalue in zip(results, qvalues)]
        is_tuple = True
    except AttributeError:
        # not a namedtuple
        for x, qvalue in zip(results, qvalues):
            x.qvalue = qvalue
            x.format_observed = format_observed

        is_tuple = False

    counters = set([x.counter for x in results])

    for counter in counters:

        if len(counters) == 1:
            outfile = options.stdout
            output = results
        else:
            outfilename = re.sub("%s", counter, options.output_tables_pattern)
            E.info("output for counter %s goes to outfile %s" %
                   (counter, outfilename))
            outfile = IOTools.openFile(outfilename, "w")
            output = [x for x in results if x.counter == counter]

        outfile.write(
            "\t".join(list(header) + list(description_header)) + "\n")

        if options.output_order == "track":
            output.sort(key=lambda x: (x.track, x.annotation))
        elif options.output_order == "observed":
            output.sort(key=lambda x: x.observed)
        elif options.output_order == "annotation":
            output.sort(key=lambda x: (x.annotation, x.track))
        elif options.output_order == "fold":
            output.sort(key=lambda x: x.fold)
        elif options.output_order == "pvalue":
            output.sort(key=lambda x: x.pvalue)
        elif options.output_order == "qvalue":
            output.sort(key=lambda x: x.qvalue)
        else:
            raise ValueError("unknown sort order %s" % options.output_order)

        for result in output:
            if is_tuple:
                outfile.write("\t".join(map(str, result)))
            else:
                outfile.write(str(result))

            if descriptions:
                try:
                    outfile.write(
                        "\t" + "\t".join(descriptions[result.annotation]))
                except KeyError:
                    outfile.write("\t" + "\t".join([""] * description_width))
            outfile.write("\n")

        if outfile != options.stdout:
            outfile.close()

コード例 #13

0

ファイルを表示

ファイル: gat-geneset.py プロジェクト: zongchangli/gat

def main(argv):

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = optparse.OptionParser(version="%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $",
                                   usage=globals()["__doc__"])

    parser.add_option("-a", "--gene-file", "--annotations", dest="annotation_files", type="string", action="append",
                      help="filename with annotations - here, location of genes [default=%default].")

    parser.add_option("-s", "--segment-file", "--segments", dest="segment_files", type="string", action="append",
                      help="filename with segments. Also accepts a glob in parentheses [default=%default].")

    parser.add_option("-w", "--workspace-file", "--workspace", dest="workspace_files", type="string", action="append",
                      help="filename with workspace segments. Also accepts a glob in parentheses [default=%default].")

    parser.add_option("-g", "--number-of-genes", dest="number_of_genes", type="int",
                      help="total number of genes [default=%default]")

    parser.add_option("-m", "--annotation-file", dest="annotation_file", type="string",
                      help="filename mapping genes to annotations [default=%default]")

    parser.add_option("-o", "--order", dest="output_order", type="choice",
                      choices=(
                          "track", "annotation", "fold", "pvalue", "qvalue"),
                      help="order results in output by fold, track, etc. [default=%default].")

    parser.add_option("-q", "--qvalue-method", dest="qvalue_method", type="choice",
                      choices=(
                          "storey", "BH", "bonferroni", "holm", "hommel", "hochberg", "BY", "none"),
                      help="method to perform multiple testing correction by controlling the fdr [default=%default].")

    parser.add_option("--qvalue-lambda", dest="qvalue_lambda", type="float",
                      help="fdr computation: lambda [default=%default].")

    parser.add_option("--qvalue-pi0-method", dest="qvalue_pi0_method", type="choice",
                      choices=("smoother", "bootstrap"),
                      help="fdr computation: method for estimating pi0 [default=%default].")
    parser.add_option("--descriptions", dest="input_filename_descriptions", type="string",
                      help="filename mapping annotation terms to descriptions. "
                      " if given, the output table will contain additional columns "
                      " [default=%default]")

    parser.add_option("--ignore-segment-tracks", dest="ignore_segment_tracks", action="store_true",
                      help="ignore segment tracks - all segments belong to one track [default=%default]")

    parser.add_option("--enable-split-tracks", dest="enable_split_tracks", action="store_true",
                      help="permit the same track to be in multiple files [default=%default]")

    parser.add_option("--output-bed", dest="output_bed", type="choice", action="append",
                      choices=("all",
                               "annotations", "segments",
                               "workspaces", "isochores",
                               "overlap"),
                      help="output bed files [default=%default].")

    parser.add_option("--output-stats", dest="output_stats", type="choice", action="append",
                      choices=("all",
                               "annotations", "segments",
                               "workspaces", "isochores",
                               "overlap"),
                      help="output overlap summary stats [default=%default].")

    parser.set_defaults(
        annotation_files=[],
        segment_files=[],
        workspace_files=[],
        sample_files=[],
        annotation_file=None,
        num_samples=1000,
        nbuckets=100000,
        bucket_size=1,
        counter="nucleotide-overlap",
        output_stats=[],
        output_bed=[],
        output_filename_counts=None,
        output_order="fold",
        cache=None,
        input_filename_counts=None,
        input_filename_results=None,
        pvalue_method="empirical",
        output_plots_pattern=None,
        output_samples_pattern=None,
        qvalue_method="storey",
        qvalue_lambda=None,
        qvalue_pi0_method="smoother",
        sampler="annotator",
        ignore_segment_tracks=False,
        input_filename_descriptions=None,
        conditional="unconditional",
        conditional_extension=None,
        conditional_expansion=None,
        restrict_workspace=False,
        enable_split_tracks=False,
        shift_expansion=2.0,
        shift_extension=0,
        overlap_mode="midpoint",
        number_of_genes=None,
    )

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.Start(parser, argv=argv, add_output_options=True)

    tstart = time.time()

    # load segments
    options.segment_files = IO.expandGlobs(options.segment_files)
    options.annotation_files = IO.expandGlobs(options.annotation_files)
    options.workspace_files = IO.expandGlobs(options.workspace_files)

    # read one or more segment files
    segments = IO.readSegmentList("segments", options.segment_files, options)
    if options.ignore_segment_tracks:
        segments.merge(delete=True)
        E.info("merged all segments into one track with %i segments" %
               len(segments))

    if len(segments) > 1000:
        raise ValueError(
            "too many (%i) segment files - use track definitions or --ignore-segment-tracks" % len(segments))

    # load workspace
    workspaces = IO.readSegmentList(
        "workspaces", options.workspace_files, options, options.enable_split_tracks)

    # intersect workspaces to build a single workspace
    E.info("collapsing workspaces")
    workspaces.collapse()

    # use merged workspace only, discard others
    workspaces.restrict("collapsed")
    workspace = workspaces["collapsed"]

    E.info("intervals loaded in %i seconds" % (time.time() - tstart))

    ############################################
    # load table mapping a gene id to annotations
    gene2annotations = IOTools.readMultiMap(IOTools.openFile(options.annotation_file),
                                            has_header=True)
    annotations = set([y for x in gene2annotations.values() for y in x])
    E.info("loaded %i annotations for %i genes" %
           (len(gene2annotations), len(annotations)))

    ############################################
    # load bed file with gene coordinates
    assert len(options.annotation_files) == 1
    indexed_genes = collections.defaultdict(Intersecter)
    total_genes = 0
    # number of genes per contig
    contig2ngenes = collections.defaultdict(int)
    # compute number of genes with a particular annotation
    # per contig
    annotation2ngenes = collections.defaultdict(int)
    for line in IOTools.openFile(options.annotation_files[0]):
        if line.startswith("#"):
            continue
        contig, start, end, gene_id = line[:-1].split("\t")[:4]
        indexed_genes[contig].add_interval(
            Interval(int(start), int(end), gene_id))
        contig2ngenes[contig] += 1
        total_genes += 1
        try:
            for annotation in gene2annotations[gene_id]:
                annotation2ngenes[annotation] += 1
        except KeyError:
            pass
    E.info("indexed locations for %i contigs" % len(indexed_genes))

    ############################################
    description_header, descriptions, description_width = IO.readDescriptions(
        options)

    ############################################
    ############################################
    # compute results
    E.info("computing counts")

    results = []
    # iterate over segments
    for segment, segmentdict in segments.iteritems():

        # genes hit by segments per annotation
        genes_hit_by_segments_with_annotations = collections.defaultdict(int)

        # genes hit by segments
        genes_hit_by_segments = 0

        for contig, ss in segmentdict.iteritems():
            for start, end in ss:
                overlapping_genes = list(
                    indexed_genes[contig].find(start, end))
                genes_hit_by_segments += len(overlapping_genes)
                for x in overlapping_genes:
                    gene_id = x.value
                    try:
                        for annotation in gene2annotations[gene_id]:
                            genes_hit_by_segments_with_annotations[
                                annotation] += 1
                    except KeyError:
                        pass

        # N = number of genes in genome
        N = total_genes
        # n   = number of genes selected by segments
        n = genes_hit_by_segments

        for annotation in annotations:
            # K = number of genes carrying annotation
            K = annotation2ngenes[annotation]
            # k = number of genes selected by segments and with annotation
            k = genes_hit_by_segments_with_annotations[annotation]

            if n == 0 or N == 0 or K == 0:
                expected = 0
                fold = 1.0
                pvalue = 1.0
            else:
                expected = float(n * K) / N
                fold = k / expected
                pvalue = scipy.stats.hypergeom.sf(k - 1, N, K, n)

            r = GENESET_RESULT._make((
                segment, annotation,
                N,
                K,
                n,
                k,
                expected,
                fold,
                pvalue,
                1.0))

            results.append(r)

    IO.outputResults(results,
                     options,
                     GENESET_RESULT._fields,
                     description_header,
                     description_width,
                     descriptions)

    E.Stop()