def parse_alignments (file_adr): ## # @brief Read the COORD file 'file_adr' to fill and return a table containing all the alignments between two different contigs. # @param file_adr file containing the alignments between two assemblies (MUMmer COORD file) # @return a table containing all the alignments between two different contigs global all_alignments,aln_threshold,ctg_threshold,ctg_threshold,cov_threshold all_alignments = [x for x in mummerParser.parse_mummerFile(file_adr) if x['TAGQ']!=x['TAGR']] alignments=[] contigs = {} contigs_included_in_other = [] for aln in all_alignments: # Build mapping for c in ["R", "Q"] : if aln["TAG"+c] not in contigs : contigs[aln["TAG"+c]] = aln["LEN"+c] if aln['TAGQ']==aln['TAGR']: continue # We know that the two alignments are different, # Do we have contigs completely included (AKA "covered") by another ? if ((aln["COVR"] > cov_threshold) or ((aln["LENR"] - aln["LEN1"]) < ctg_threshold)): contigs_included_in_other.append(aln["TAGR"]) elif ((aln["COVQ"] > cov_threshold) or ((aln["LENQ"] - aln["LEN2"]) < ctg_threshold)): contigs_included_in_other.append(aln["TAGQ"]) if (aln_pass_thresholds(aln)) and (not reverse_is_in_alignments(aln, alignments)) and (is_extremal(aln)): alignments.append(aln) contigs_included_in_other=list(set(contigs_included_in_other)) logger.debug("list of contigs %s\n included in others:%s ",str(contigs),contigs_included_in_other) return alignments, contigs, contigs_included_in_other
def main(argv=None): parser=argparse.ArgumentParser(description="Pipe out the alignments in CSV format") parser.add_argument('-e',dest="extremal_only",action="store_true",help="Only keep extremal alignments") parser.add_argument('-k',dest="florence_selection",action="store_true",help="Remove alignments not satisfying Florence criteria") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout,dest="outfile") args=parser.parse_args() alignments=mummerParser.parse_mummerFile(args.infile) # Get col names a=alignments[0] items = a.items() items.sort(key=itemgetter(0)) print>>args.outfile, "\t".join(map(str,[x[0] for x in items])) for a in alignments: if args.extremal_only: if not is_extremal(a): continue if args.florence_selection: if not is_kept(a): continue items = a.items() items.sort(key=itemgetter(0)) print>>args.outfile, "\t".join(map(str,[x[1] for x in items]))
def main(argv=None): parser=argparse.ArgumentParser(description="Pipe out the alignments involving or excluding provided contigs ID") parser.add_argument('-e',dest="inverse_match",action="store_true",help="Inverse the criterion, filter out any conting in CONTIGID list (not implemented yet)") parser.add_argument('-r',dest="use_re",action="store_true",help="Consider CONTIGID as python regexp (not implemented yet)") parser.add_argument('-s',dest="remove_self",action="store_true",help="remove self alignments") parser.add_argument('-p',dest="pretty",action="store_true",help="Pretty print") parser.add_argument('-v',dest="verbose",action="store_true",help="Verbose output") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout,dest="outfile") parser.add_argument('CONTIGS',action='append',nargs="+",help='List of CONTIGS to keep. Use "*" to keep them all') args=parser.parse_args() contig_ids=args.CONTIGS[0] alignments=mummerParser.parse_mummerFile(args.infile) kept_alignments=[] for a in alignments: if "*" in contig_ids and ((args.remove_self and a['TAGQ']!=a['TAGR'])or not args.remove_self): kept_alignments.append(a) if a['TAGQ'] in contig_ids and a['TAGR'] in contig_ids: kept_alignments.append(a) elif args.verbose: print >>args.outfile,"Skipped",a for a in kept_alignments: mummerParser.print_alignment(a,args.outfile,args.pretty)
def main(argv=None): parser = argparse.ArgumentParser( description= "Pipe out the alignments involving or excluding provided contigs ID") parser.add_argument( '-e', dest="inverse_match", action="store_true", help= "Inverse the criterion, filter out any conting in CONTIGID list (not implemented yet)" ) parser.add_argument( '-r', dest="use_re", action="store_true", help="Consider CONTIGID as python regexp (not implemented yet)") parser.add_argument('-s', dest="remove_self", action="store_true", help="remove self alignments") parser.add_argument('-p', dest="pretty", action="store_true", help="Pretty print") parser.add_argument('-v', dest="verbose", action="store_true", help="Verbose output") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout, dest="outfile") parser.add_argument( 'CONTIGS', action='append', nargs="+", help='List of CONTIGS to keep. Use "*" to keep them all') args = parser.parse_args() contig_ids = args.CONTIGS[0] alignments = mummerParser.parse_mummerFile(args.infile) kept_alignments = [] for a in alignments: if "*" in contig_ids and ((args.remove_self and a['TAGQ'] != a['TAGR']) or not args.remove_self): kept_alignments.append(a) if a['TAGQ'] in contig_ids and a['TAGR'] in contig_ids: kept_alignments.append(a) elif args.verbose: print >> args.outfile, "Skipped", a for a in kept_alignments: mummerParser.print_alignment(a, args.outfile, args.pretty)
def main(argv=None): parser = argparse.ArgumentParser( description="Pipe out the alignments in CSV format") parser.add_argument('-e', dest="extremal_only", action="store_true", help="Only keep extremal alignments") parser.add_argument( '-k', dest="florence_selection", action="store_true", help="Remove alignments not satisfying Florence criteria") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout, dest="outfile") args = parser.parse_args() alignments = mummerParser.parse_mummerFile(args.infile) # Get col names a = alignments[0] items = a.items() items.sort(key=itemgetter(0)) print >> args.outfile, "\t".join(map(str, [x[0] for x in items])) for a in alignments: if args.extremal_only: if not is_extremal(a): continue if args.florence_selection: if not is_kept(a): continue items = a.items() items.sort(key=itemgetter(0)) print >> args.outfile, "\t".join(map(str, [x[1] for x in items]))
def main(): parser=argparse.ArgumentParser(description="Display nucmer alignments in ASCII") parser.add_argument('-w',dest="max_width",action="store",type=int,default=110,help="Width of text output") parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin,help="Nucmer coords file") args=parser.parse_args() alignments=mummerParser.parse_mummerFile(args.infile) # Filter out self alignments alignments=[x for x in alignments if x['TAGQ']!=x['TAGR']] print "Found",len(alignments) if len(alignments)<1: sys.exit(0) if len(alignments)>len(align_keys): alignments.sort(key=lambda x:x['LEN2'],reverse=True) alignments=alignments[:len(align_keys)-3] alignments.sort(key=lambda x:x['S1']) print "Filter down to",len(alignments) print_aligned_contigs(alignments,max_width=args.max_width)