def processChunk( contig, regions ): if contig == None: return start = 0 end = contigs[contig] regions = Intervals.combineIntervals( regions ) for xstart, xend in Intervals.complementIntervals( regions, start, end ): locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, ".") )
def FilterEliminateOverlappingTranscripts( exons, filter_exons, eliminated_predictions, contig_sizes, options): """eliminate predictions that overlap or span a positive set of transcripts. """ eliminated = [] # convert list of filter exons into a list of ranges. filter_ranges = getRangesFromExons( filter_exons, both_strands=options.filter_remove_spanning_both_strands, contig_sizes=contig_sizes) for k, r in filter_ranges.items(): filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r)) exon_ranges = getRangesFromExons(exons, both_strands=False) # and now go through exons and delete transcripts whose # exons overlap one of the forbidden ranges for k, ee in exon_ranges.items(): if k not in filter_ranges: continue ff = filter_ranges[k] ee.sort() # set exon index e and filter index f # (both are indices in sorted lists) e, f = 0, 0 while e < len(ee): efrom, eto, id = ee[e] # increment filter, such that its extent # is larger than current range ee[e] to test. while f < len(ff) and ff[f][1] < efrom: f += 1 if f == len(ff): break if eto < ff[f][0]: # no overlap pass else: options.stdout.write( "%s\t%s\n" % (id, "eliminated: filtered by %s:%i:%i" % (k, ff[f][0], ff[f][1]))) eliminated_predictions[id] = 0 eliminated.append((id, "f")) e += 1 return eliminated
def FilterEliminateOverlappingTranscripts(exons, filter_exons, eliminated_predictions, contig_sizes, options): """eliminate predictions that overlap or span a positive set of transcripts. """ eliminated = [] # convert list of filter exons into a list of ranges. filter_ranges = getRangesFromExons( filter_exons, both_strands=options.filter_remove_spanning_both_strands, contig_sizes=contig_sizes) for k, r in filter_ranges.items(): filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r)) exon_ranges = getRangesFromExons(exons, both_strands=False) # and now go through exons and delete transcripts whose # exons overlap one of the forbidden ranges for k, ee in exon_ranges.items(): if k not in filter_ranges: continue ff = filter_ranges[k] ee.sort() # set exon index e and filter index f # (both are indices in sorted lists) e, f = 0, 0 while e < len(ee): efrom, eto, id = ee[e] # increment filter, such that its extent # is larger than current range ee[e] to test. while f < len(ff) and ff[f][1] < efrom: f += 1 if f == len(ff): break if eto < ff[f][0]: # no overlap pass else: options.stdout.write("%s\t%s\n" % (id, "eliminated: filtered by %s:%i:%i" % (k, ff[f][0], ff[f][1]))) eliminated_predictions[id] = 0 eliminated.append((id, "f")) e += 1 return eliminated
def processChunk(contig, regions): if contig == None: return start = 0 end = contigs[contig] regions = Intervals.combineIntervals(regions) for xstart, xend in Intervals.complementIntervals(regions, start, end): locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, "."))
def transform_third_codon(start, end, intervals_with_gff): """transform: only return nucleotide positions in window (start, end) that are in third codon position. """ intervals = [] for istart, iend, gff in intervals_with_gff: if gff.frame == ".": raise ValueError("need a frame for third codon positions.") # frame = nucleotides from start to next codon frame = int(gff.frame) # to make life easier, convert to 0-based coordinates, # with zero starting at first position in window # re-arrange positions on negative strand if Genomics.IsNegativeStrand(gff.strand): # convert to negative strand coordinates counting from 0 coordinate_offset = end reverse = True istart, iend = end - iend, end - istart else: istart, iend = istart - start, iend - start reverse = False coordinate_offset = start # make sure that you start on a second codon position and within window if istart < 0: frame = (frame + istart) % 3 istart = 0 if frame != 0: istart -= (3 - frame) istart += 2 iend = min(iend, end - start) for x in range(istart, iend, 3): if reverse: c = coordinate_offset - x - 1 else: c = coordinate_offset + x intervals.append((c, c + 1)) return Intervals.combineIntervals(intervals)
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals([(x[0], x[1]) for x in intervals_with_gff]) return Intervals.complementIntervals(y, start, end)
def transform_overlap(start, end, intervals_with_gff): """transform: overlap of intervals in x with y.""" y = Intervals.combineIntervals([(x[0], x[1]) for x in intervals_with_gff]) return Intervals.pruneIntervals(y, start, end)
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals( map(lambda x: (x[0], x[1]), intervals_with_gff)) return Intervals.complementIntervals(y, start, end)
def transform_overlap(start, end, intervals_with_gff): """transform: overlap of intervals in x with y.""" y = Intervals.combineIntervals( map(lambda x: (x[0], x[1]), intervals_with_gff)) return Intervals.pruneIntervals(y, start, end)
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals( [(x[0], x[1]) for x in intervals_with_gff]) return Intervals.complementIntervals(y, start, end)
def transform_overlap(start, end, intervals_with_gff): """transform: overlap of intervals in x with y.""" y = Intervals.combineIntervals( [(x[0], x[1]) for x in intervals_with_gff]) return Intervals.pruneIntervals(y, start, end)
def processChunk(gene_id, contig, strand, frame, regions): if gene_id is None: return for start, end in Intervals.combineIntervals(regions): locations.append((gene_id, gene_id, contig, strand, start, end))
def processChunk(gene_id, contig, strand, frame, regions): if gene_id == None: return for start, end in Intervals.combineIntervals(regions): locations.append((gene_id, gene_id, contig, strand, start, end))