def processChunk( contig, regions ): if contig == None: return start = 0 end = contigs[contig] regions = Intervals.combineIntervals( regions ) for xstart, xend in Intervals.complementIntervals( regions, start, end ): locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, ".") )
def processChunk( gene_id, contig, strand, frame, regions ): if gene_id == None: return start = min( map( lambda x: x[0], regions ) ) end = max( map( lambda x: x[0], regions ) ) intervals = Intervals.complementIntervals( regions, start, end ) for start, end in intervals: locations.append( (gene_id, gene_id, contig, strand, start, end, frame ) )
def processChunk(contig, regions): if contig == None: return start = 0 end = contigs[contig] regions = Intervals.combineIntervals(regions) for xstart, xend in Intervals.complementIntervals(regions, start, end): locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, "."))
def processChunk(gene_id, contig, strand, frame, regions): if gene_id == None: return start = min(map(lambda x: x[0], regions)) end = max(map(lambda x: x[0], regions)) intervals = Intervals.complementIntervals(regions, start, end) for start, end in intervals: locations.append( (gene_id, gene_id, contig, strand, start, end, frame))
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals([(x[0], x[1]) for x in intervals_with_gff]) return Intervals.complementIntervals(y, start, end)
def maskAlignment(mali, map_component2masks, map_component2extracts, map_sample2reference, options): """mask an alignment. If map_sample2reference is given, coordinates in references are used to mask residues in sample. """ id = mali.getName() if options.loglevel >= 5: options.stdout.write("# multiple alignment %s before masking:\n" % id) mali.writeToFile(sys.stdout) def getMasks(id, map1, map_sample2reference): if map_sample2reference and id in map_sample2reference: xid = map_sample2reference[id] if options.loglevel >= 1: options.stdlog.write( "# using mapped coordinates from %s for %s\n" % (xid, id)) else: xid = id if xid in map1: return map1[xid] else: return [] ############################################################################ ## mask alignment if map_component2masks: masks = getMasks(id, map_component2masks, map_sample2reference) if masks: for start, end in masks: if options.loglevel >= 1: options.stdlog.write( "# component: %s: masking region due to mask %i-%i\n" % (id, start, end)) mali.maskColumns(range(start, min(end, mali.getWidth()))) ############################################################################ ## extract regions from an alignment by masking everything else if map_component2extracts: masks = getMasks(id, map_component2extracts, map_sample2reference) if masks: other_masks = Intervals.complementIntervals( masks, 0, mali.getWidth()) for start, end in other_masks: if options.loglevel >= 1: options.stdlog.write( "# component: %s: masking region due to extract %i-%i\n" % (id, start, end)) mali.maskColumns(range(start, min(end, mali.getWidth()))) if options.loglevel >= 5: options.stdout.write("# multiple alignment after masking:\n") mali.writeToFile(sys.stdout) if mali.getAlphabet() == "aa": gap_chars = "Xx-" else: gap_chars = "XxNn-" if options.remove_all_gaps: width_before = mali.getNumColumns() mali.removePattern(match_function=lambda x: x in gap_chars, minimum_matches=mali.getNumSequences(), search_frame=1, delete_frame=options.remove_all_gaps) width_after = mali.getNumColumns() if options.loglevel >= 1: options.stdlog.write( "# component: %s: removed %i fully gapped/masked columns, old size=%i, new size=%i\n" % (id, width_before - width_after, width_before, width_after)) if options.remove_any_gaps: width_before = mali.getNumColumns() mali.removePattern(match_function=lambda x: x in gap_chars, minimum_matches=1, search_frame=1, delete_frame=options.remove_any_gaps) width_after = mali.getNumColumns() if options.loglevel >= 1: options.stdlog.write( "# component: %s: removed %i columns containing at least one gap/mask, old size=%i, new size=%i\n" % (id, width_before - width_after, width_before, width_after)) if options.loglevel >= 5: options.stdout.write("# multiple alignment after cleaning:\n") mali.writeToFile(sys.stdout)
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals( map(lambda x: (x[0], x[1]), intervals_with_gff)) return Intervals.complementIntervals(y, start, end)
def transform_complement(start, end, intervals_with_gff): y = Intervals.combineIntervals( [(x[0], x[1]) for x in intervals_with_gff]) return Intervals.complementIntervals(y, start, end)
def maskAlignment(mali, map_component2masks, map_component2extracts, map_sample2reference, options): """mask an alignment. If map_sample2reference is given, coordinates in references are used to mask residues in sample. """ id = mali.getName() if options.loglevel >= 5: options.stdout.write("# multiple alignment %s before masking:\n" % id) mali.writeToFile(sys.stdout) def getMasks(id, map1, map_sample2reference): if map_sample2reference and id in map_sample2reference: xid = map_sample2reference[id] if options.loglevel >= 1: options.stdlog.write( "# using mapped coordinates from %s for %s\n" % (xid, id)) else: xid = id if xid in map1: return map1[xid] else: return [] ########################################################################## # mask alignment if map_component2masks: masks = getMasks(id, map_component2masks, map_sample2reference) if masks: for start, end in masks: if options.loglevel >= 1: options.stdlog.write( "# component: %s: masking region due to mask %i-%i\n" % (id, start, end)) mali.maskColumns(range(start, min(end, mali.getWidth()))) ########################################################################## # extract regions from an alignment by masking everything else if map_component2extracts: masks = getMasks(id, map_component2extracts, map_sample2reference) if masks: other_masks = Intervals.complementIntervals( masks, 0, mali.getWidth()) for start, end in other_masks: if options.loglevel >= 1: options.stdlog.write( "# component: %s: masking region due to extract %i-%i\n" % (id, start, end)) mali.maskColumns(range(start, min(end, mali.getWidth()))) if options.loglevel >= 5: options.stdout.write("# multiple alignment after masking:\n") mali.writeToFile(sys.stdout) if mali.getAlphabet() == "aa": gap_chars = "Xx-" else: gap_chars = "XxNn-" if options.remove_all_gaps: width_before = mali.getNumColumns() mali.removePattern( match_function=lambda x: x in gap_chars, minimum_matches=mali.getNumSequences(), search_frame=1, delete_frame=options.remove_all_gaps) width_after = mali.getNumColumns() if options.loglevel >= 1: options.stdlog.write("# component: %s: removed %i fully gapped/masked columns, old size=%i, new size=%i\n" % (id, width_before - width_after, width_before, width_after)) if options.remove_any_gaps: width_before = mali.getNumColumns() mali.removePattern( match_function=lambda x: x in gap_chars, minimum_matches=1, search_frame=1, delete_frame=options.remove_any_gaps) width_after = mali.getNumColumns() if options.loglevel >= 1: options.stdlog.write("# component: %s: removed %i columns containing at least one gap/mask, old size=%i, new size=%i\n" % (id, width_before - width_after, width_before, width_after)) if options.loglevel >= 5: options.stdout.write("# multiple alignment after cleaning:\n") mali.writeToFile(sys.stdout)