Python parse_mummerFile Examples, mummerParser.parse_mummerFile Python Examples

Example #1

0

Show file

File: Mix.py Project: bdartigues/MIX

def parse_alignments (file_adr):
	##
	# @brief Read the COORD file 'file_adr' to fill and return a table containing all the alignments between two different contigs.
	# @param file_adr file containing the alignments between two assemblies (MUMmer COORD file)
	# @return a table containing all the alignments between two different contigs
	global all_alignments,aln_threshold,ctg_threshold,ctg_threshold,cov_threshold

	all_alignments = [x for x in mummerParser.parse_mummerFile(file_adr) if x['TAGQ']!=x['TAGR']]
	alignments=[]
	contigs = {}
	contigs_included_in_other = []
	for aln in all_alignments:
		# Build mapping
		for c in ["R", "Q"] :
			if aln["TAG"+c] not in contigs : 
				contigs[aln["TAG"+c]] = aln["LEN"+c]


		if aln['TAGQ']==aln['TAGR']:
			continue

		# We know that the two alignments are different, 
		# Do we have contigs completely included (AKA "covered") by another ? 

		if ((aln["COVR"] > cov_threshold) or ((aln["LENR"] - aln["LEN1"]) < ctg_threshold)):
			contigs_included_in_other.append(aln["TAGR"])
		elif ((aln["COVQ"] > cov_threshold) or ((aln["LENQ"] - aln["LEN2"]) < ctg_threshold)):
			contigs_included_in_other.append(aln["TAGQ"])

		if (aln_pass_thresholds(aln)) and (not reverse_is_in_alignments(aln, alignments)) and (is_extremal(aln)):
			alignments.append(aln)
	contigs_included_in_other=list(set(contigs_included_in_other))
	logger.debug("list of contigs %s\n included in others:%s ",str(contigs),contigs_included_in_other)

	return alignments, contigs, contigs_included_in_other

Example #2

0

Show file

File: nucmer_to_csv.py Project: BioinformaticsArchive/MIX

def main(argv=None):
	parser=argparse.ArgumentParser(description="Pipe out the alignments in CSV format")
	parser.add_argument('-e',dest="extremal_only",action="store_true",help="Only keep extremal alignments")
	parser.add_argument('-k',dest="florence_selection",action="store_true",help="Remove alignments not satisfying Florence criteria")

	parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
	parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout,dest="outfile")
	args=parser.parse_args()

	alignments=mummerParser.parse_mummerFile(args.infile)
	# Get col names 
	a=alignments[0]	
	items = a.items()
	items.sort(key=itemgetter(0))
	print>>args.outfile, "\t".join(map(str,[x[0] for x in items]))

	for a in alignments:
		if args.extremal_only:
			if not is_extremal(a):
				continue
		if args.florence_selection:
			if not is_kept(a):
				continue

		items = a.items()
		items.sort(key=itemgetter(0))
		print>>args.outfile, "\t".join(map(str,[x[1] for x in items]))

Example #3

0

Show file

File: nucmer_coords_filter.py Project: BioinformaticsArchive/MIX

def main(argv=None):
	parser=argparse.ArgumentParser(description="Pipe out the alignments involving or excluding provided contigs ID")
	parser.add_argument('-e',dest="inverse_match",action="store_true",help="Inverse the criterion, filter out any conting in CONTIGID list (not implemented yet)")
	parser.add_argument('-r',dest="use_re",action="store_true",help="Consider CONTIGID as python regexp (not implemented yet)")
	parser.add_argument('-s',dest="remove_self",action="store_true",help="remove self alignments")

	parser.add_argument('-p',dest="pretty",action="store_true",help="Pretty print")
	parser.add_argument('-v',dest="verbose",action="store_true",help="Verbose output")
	parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
	parser.add_argument('-o', nargs='?', type=argparse.FileType('w'), default=sys.stdout,dest="outfile")
	parser.add_argument('CONTIGS',action='append',nargs="+",help='List of CONTIGS to keep. Use "*" to keep them all')
	args=parser.parse_args()

	contig_ids=args.CONTIGS[0]
	alignments=mummerParser.parse_mummerFile(args.infile)
	kept_alignments=[]
	for a in alignments:
		if "*" in contig_ids and ((args.remove_self and a['TAGQ']!=a['TAGR'])or not args.remove_self):
			kept_alignments.append(a)
		if a['TAGQ'] in contig_ids and a['TAGR'] in contig_ids:
			kept_alignments.append(a)
		elif args.verbose:
			print >>args.outfile,"Skipped",a
	for a in kept_alignments:
		mummerParser.print_alignment(a,args.outfile,args.pretty)

Example #4

0

Show file

def main(argv=None):
    parser = argparse.ArgumentParser(
        description=
        "Pipe out the alignments involving or excluding provided contigs ID")
    parser.add_argument(
        '-e',
        dest="inverse_match",
        action="store_true",
        help=
        "Inverse the criterion, filter out any conting in CONTIGID list (not implemented yet)"
    )
    parser.add_argument(
        '-r',
        dest="use_re",
        action="store_true",
        help="Consider CONTIGID as python regexp (not implemented yet)")
    parser.add_argument('-s',
                        dest="remove_self",
                        action="store_true",
                        help="remove self alignments")

    parser.add_argument('-p',
                        dest="pretty",
                        action="store_true",
                        help="Pretty print")
    parser.add_argument('-v',
                        dest="verbose",
                        action="store_true",
                        help="Verbose output")
    parser.add_argument('infile',
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdin)
    parser.add_argument('-o',
                        nargs='?',
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        dest="outfile")
    parser.add_argument(
        'CONTIGS',
        action='append',
        nargs="+",
        help='List of CONTIGS to keep. Use "*" to keep them all')
    args = parser.parse_args()

    contig_ids = args.CONTIGS[0]
    alignments = mummerParser.parse_mummerFile(args.infile)
    kept_alignments = []
    for a in alignments:
        if "*" in contig_ids and ((args.remove_self and a['TAGQ'] != a['TAGR'])
                                  or not args.remove_self):
            kept_alignments.append(a)
        if a['TAGQ'] in contig_ids and a['TAGR'] in contig_ids:
            kept_alignments.append(a)
        elif args.verbose:
            print >> args.outfile, "Skipped", a
    for a in kept_alignments:
        mummerParser.print_alignment(a, args.outfile, args.pretty)

Example #5

0

Show file

def main(argv=None):
    parser = argparse.ArgumentParser(
        description="Pipe out the alignments in CSV format")
    parser.add_argument('-e',
                        dest="extremal_only",
                        action="store_true",
                        help="Only keep extremal alignments")
    parser.add_argument(
        '-k',
        dest="florence_selection",
        action="store_true",
        help="Remove alignments not satisfying Florence criteria")

    parser.add_argument('infile',
                        nargs='?',
                        type=argparse.FileType('r'),
                        default=sys.stdin)
    parser.add_argument('-o',
                        nargs='?',
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        dest="outfile")
    args = parser.parse_args()

    alignments = mummerParser.parse_mummerFile(args.infile)
    # Get col names
    a = alignments[0]
    items = a.items()
    items.sort(key=itemgetter(0))
    print >> args.outfile, "\t".join(map(str, [x[0] for x in items]))

    for a in alignments:
        if args.extremal_only:
            if not is_extremal(a):
                continue
        if args.florence_selection:
            if not is_kept(a):
                continue

        items = a.items()
        items.sort(key=itemgetter(0))
        print >> args.outfile, "\t".join(map(str, [x[1] for x in items]))

Example #6

0

Show file

File: display_coords.py Project: BioinformaticsArchive/MIX

def main():

	parser=argparse.ArgumentParser(description="Display nucmer alignments in ASCII")
	parser.add_argument('-w',dest="max_width",action="store",type=int,default=110,help="Width of text output")
	parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin,help="Nucmer coords file")


	args=parser.parse_args()
	alignments=mummerParser.parse_mummerFile(args.infile)
	# Filter out self alignments
	alignments=[x for x in alignments if x['TAGQ']!=x['TAGR']]
	print "Found",len(alignments)
	if len(alignments)<1:
		sys.exit(0)
	if len(alignments)>len(align_keys):
		alignments.sort(key=lambda x:x['LEN2'],reverse=True)
		alignments=alignments[:len(align_keys)-3]
		alignments.sort(key=lambda x:x['S1'])
		print "Filter down to",len(alignments)
	print_aligned_contigs(alignments,max_width=args.max_width)