コード例 #1
0
ファイル: check_run.py プロジェクト: zongchangli/gat
    def setUp(self):

        parser = gat.buildParser()

        options, args = parser.parse_args([])

        options.segment_files = self.filename_segments
        options.annotation_files = self.filename_annotations
        options.workspace_files = self.filename_workspace

        self.segments, self.annotations, workspaces, isochores = gat.IO.buildSegments(
            options)
        self.workspace = gat.IO.applyIsochores(self.segments, self.annotations,
                                               workspaces, options, isochores)

        self.sampler = Engine.SamplerAnnotator(bucket_size=1, nbuckets=100000)

        self.counters = [Engine.CounterNucleotideOverlap()]
        self.workspace_generator = Engine.UnconditionalWorkspace()

        self.reference_data = gat.IO.readAnnotatorResults(
            'data/output_single.tsv')
コード例 #2
0
def fromSegments(options, args):
    '''run analysis from segment files.

    This is the most common use case.
    '''

    tstart = time.time()

    # build segments
    segments, annotations, workspaces, isochores = IO.buildSegments(options)

    E.info("intervals loaded in %i seconds" % (time.time() - tstart))

    # open various additional output files
    outfiles = {}
    for section in (
            "sample",
            "segment_metrics",
            "sample_metrics",
    ):
        if section in options.output_stats or \
            "all" in options.output_stats or \
                len([x for x in options.output_stats
                     if re.search(x, "section")]) > 0:
            outfiles[section] = E.openOutputFile(section)

    if 'sample_metrics' in outfiles:
        outfiles['sample_metrics'].write(
            "track\tsection\tmetric\t%s\n" %
            "\t".join(Stats.Summary().getHeaders()))

    # filter segments by workspace
    workspace = IO.applyIsochores(
        segments,
        annotations,
        workspaces,
        options,
        isochores,
        truncate_segments_to_workspace=options.truncate_segments_to_workspace,
        truncate_workspace_to_annotations=options.
        truncate_workspace_to_annotations,
        restrict_workspace=options.restrict_workspace)

    # check memory requirements
    # previous algorithm: memory requirements if all samples are stored
    # counts = segments.countsPerTrack()
    # max_counts = max(counts.values())
    # memory = 8 * 2 * options.num_samples * max_counts * len(workspace)

    # initialize sampler
    if options.sampler == "annotator":
        sampler = Engine.SamplerAnnotator(bucket_size=options.bucket_size,
                                          nbuckets=options.nbuckets)
    elif options.sampler == "shift":
        sampler = Engine.SamplerShift(radius=options.shift_expansion,
                                      extension=options.shift_extension)
    elif options.sampler == "segments":
        sampler = Engine.SamplerSegments()
    elif options.sampler == "local-permutation":
        sampler = Engine.SamplerLocalPermutation()
    elif options.sampler == "global-permutation":
        sampler = Engine.SamplerGlobalPermutation()
    elif options.sampler == "brute-force":
        sampler = Engine.SamplerBruteForce()
    elif options.sampler == "uniform":
        sampler = Engine.SamplerUniform()

    # initialize counter
    counters = []
    for counter in options.counters:
        if counter == "nucleotide-overlap":
            counters.append(Engine.CounterNucleotideOverlap())
        elif counter == "nucleotide-density":
            counters.append(Engine.CounterNucleotideDensity())
        elif counter == "segment-overlap":
            counters.append(Engine.CounterSegmentOverlap())
        elif counter == "annotation-overlap":
            counters.append(Engine.CounterAnnotationOverlap())
        elif counter == "segment-midoverlap":
            counters.append(Engine.CounterSegmentMidpointOverlap())
        elif counter == "annotation-midoverlap":
            counters.append(Engine.CounterAnnotationMidpointOverlap())
        else:
            raise ValueError("unknown counter '%s'" % counter)

    # initialize workspace generator
    if options.conditional == "unconditional":
        workspace_generator = Engine.UnconditionalWorkspace()
    elif options.conditional == "cooccurance":
        workspace_generator = Engine.ConditionalWorkspaceCooccurance()
    elif options.conditional == "annotation-centered":
        if options.conditional_expansion is None:
            raise ValueError(
                "please specify either --conditional-expansion or "
                "--conditional-extension")
        workspace_generator = Engine.ConditionalWorkspaceAnnotationCentered(
            options.conditional_extension, options.conditional_expansion)
    elif options.conditional == "segment-centered":
        if options.conditional_expansion is None:
            raise ValueError(
                "please specify either --conditional-expansion or "
                "--conditional-extension")

        workspace_generator = Engine.ConditionalWorkspaceSegmentCentered(
            options.conditional_extension, options.conditional_expansion)
    else:
        raise ValueError("unknown conditional workspace '%s'" %
                         options.conditional)

    # check if reference is compplete
    if options.reference:
        for track in segments.tracks:
            if track not in options.reference:
                raise ValueError("missing track '%s' in reference" % track)
            r = options.reference[track]
            for annotation in annotations.tracks:
                if annotation not in r:
                    raise ValueError(
                        "missing annotation '%s' in annotations for "
                        "track='%s'" % (annotation, track))

    # compute
    annotator_results = gat.run(
        segments,
        annotations,
        workspace,
        sampler,
        counters,
        workspace_generator=workspace_generator,
        num_samples=options.num_samples,
        cache=options.cache,
        outfiles=outfiles,
        output_counts_pattern=options.output_counts_pattern,
        output_samples_pattern=options.output_samples_pattern,
        sample_files=options.sample_files,
        conditional=options.conditional,
        conditional_extension=options.conditional_extension,
        reference=options.reference,
        pseudo_count=options.pseudo_count,
        num_threads=options.num_threads)

    return annotator_results