def setUp(self): parser = gat.buildParser() options, args = parser.parse_args([]) options.segment_files = self.filename_segments options.annotation_files = self.filename_annotations options.workspace_files = self.filename_workspace self.segments, self.annotations, workspaces, isochores = gat.IO.buildSegments( options) self.workspace = gat.IO.applyIsochores(self.segments, self.annotations, workspaces, options, isochores) self.sampler = Engine.SamplerAnnotator(bucket_size=1, nbuckets=100000) self.counters = [Engine.CounterNucleotideOverlap()] self.workspace_generator = Engine.UnconditionalWorkspace() self.reference_data = gat.IO.readAnnotatorResults( 'data/output_single.tsv')
def fromSegments(options, args): '''run analysis from segment files. This is the most common use case. ''' tstart = time.time() # build segments segments, annotations, workspaces, isochores = IO.buildSegments(options) E.info("intervals loaded in %i seconds" % (time.time() - tstart)) # open various additional output files outfiles = {} for section in ( "sample", "segment_metrics", "sample_metrics", ): if section in options.output_stats or \ "all" in options.output_stats or \ len([x for x in options.output_stats if re.search(x, "section")]) > 0: outfiles[section] = E.openOutputFile(section) if 'sample_metrics' in outfiles: outfiles['sample_metrics'].write( "track\tsection\tmetric\t%s\n" % "\t".join(Stats.Summary().getHeaders())) # filter segments by workspace workspace = IO.applyIsochores( segments, annotations, workspaces, options, isochores, truncate_segments_to_workspace=options.truncate_segments_to_workspace, truncate_workspace_to_annotations=options. truncate_workspace_to_annotations, restrict_workspace=options.restrict_workspace) # check memory requirements # previous algorithm: memory requirements if all samples are stored # counts = segments.countsPerTrack() # max_counts = max(counts.values()) # memory = 8 * 2 * options.num_samples * max_counts * len(workspace) # initialize sampler if options.sampler == "annotator": sampler = Engine.SamplerAnnotator(bucket_size=options.bucket_size, nbuckets=options.nbuckets) elif options.sampler == "shift": sampler = Engine.SamplerShift(radius=options.shift_expansion, extension=options.shift_extension) elif options.sampler == "segments": sampler = Engine.SamplerSegments() elif options.sampler == "local-permutation": sampler = Engine.SamplerLocalPermutation() elif options.sampler == "global-permutation": sampler = Engine.SamplerGlobalPermutation() elif options.sampler == "brute-force": sampler = Engine.SamplerBruteForce() elif options.sampler == "uniform": sampler = Engine.SamplerUniform() # initialize counter counters = [] for counter in options.counters: if counter == "nucleotide-overlap": counters.append(Engine.CounterNucleotideOverlap()) elif counter == "nucleotide-density": counters.append(Engine.CounterNucleotideDensity()) elif counter == "segment-overlap": counters.append(Engine.CounterSegmentOverlap()) elif counter == "annotation-overlap": counters.append(Engine.CounterAnnotationOverlap()) elif counter == "segment-midoverlap": counters.append(Engine.CounterSegmentMidpointOverlap()) elif counter == "annotation-midoverlap": counters.append(Engine.CounterAnnotationMidpointOverlap()) else: raise ValueError("unknown counter '%s'" % counter) # initialize workspace generator if options.conditional == "unconditional": workspace_generator = Engine.UnconditionalWorkspace() elif options.conditional == "cooccurance": workspace_generator = Engine.ConditionalWorkspaceCooccurance() elif options.conditional == "annotation-centered": if options.conditional_expansion is None: raise ValueError( "please specify either --conditional-expansion or " "--conditional-extension") workspace_generator = Engine.ConditionalWorkspaceAnnotationCentered( options.conditional_extension, options.conditional_expansion) elif options.conditional == "segment-centered": if options.conditional_expansion is None: raise ValueError( "please specify either --conditional-expansion or " "--conditional-extension") workspace_generator = Engine.ConditionalWorkspaceSegmentCentered( options.conditional_extension, options.conditional_expansion) else: raise ValueError("unknown conditional workspace '%s'" % options.conditional) # check if reference is compplete if options.reference: for track in segments.tracks: if track not in options.reference: raise ValueError("missing track '%s' in reference" % track) r = options.reference[track] for annotation in annotations.tracks: if annotation not in r: raise ValueError( "missing annotation '%s' in annotations for " "track='%s'" % (annotation, track)) # compute annotator_results = gat.run( segments, annotations, workspace, sampler, counters, workspace_generator=workspace_generator, num_samples=options.num_samples, cache=options.cache, outfiles=outfiles, output_counts_pattern=options.output_counts_pattern, output_samples_pattern=options.output_samples_pattern, sample_files=options.sample_files, conditional=options.conditional, conditional_extension=options.conditional_extension, reference=options.reference, pseudo_count=options.pseudo_count, num_threads=options.num_threads) return annotator_results