def main(): parser = argparse.ArgumentParser() parser.add_argument("--min-length", dest="min_prot_length", default=0, help="Minimum protein length", type=int) parser.add_argument("--max-length", dest="max_prot_length", default=10000, help="Maximum protein length", type=int) parser.add_argument("--min-bits", dest="min_bits_saved", default=-10000, help="Minimum bits saved", type=float) parser.add_argument("--max-bits", dest="max_bits_saved", default=10000, help="Maximum bits saved", type=float) parser.add_argument("--min-bits-ratio", dest="min_bits_ratio", default=0, help="Minimum bits saved per position", type=float) parser.add_argument("--max-bits-ratio", dest="max_bits_ratio", default=10000, help="Maximum bits saved per position", type=float) parser.add_argument("--stem", dest="stem", default="filtered", help="Output file stem") parser.add_argument("--direction", dest="direction", help="Search direction", default=set(["left", "right"]), type=float, choices=["left", "right"]) parser.add_argument("hmmgs_file") parser.add_argument("hmmgs_nucl_seqs") parser.add_argument("hmmgs_prot_seqs") #min_prot_length=0, max_prot_length=10000, min_bits_saved=-1000, max_bits_saved=10000, min_bits_ratio=0, max_bits_ratio=10000, direction=set(["left", "right"]), state_after=0, state_before=10000 args = parser.parse_args() nucl_seqs = SeqIO.to_dict(SeqIO.parse(open(args.hmmgs_nucl_seqs), "fasta")) prot_seqs = SeqIO.to_dict(SeqIO.parse(open(args.hmmgs_prot_seqs), "fasta")) hmmgs_lines = [x for x in hmmgs_utils.read_hmmgs_file(args.hmmgs_file)] hmmgs_utils.plot_bits_ratio(hmmgs_lines, "prefilter.eps") filter_params = copy.copy(vars(args)) del filter_params["hmmgs_file"] del filter_params["stem"] del filter_params["hmmgs_nucl_seqs"] del filter_params["hmmgs_prot_seqs"] filtered_lines = [x for x in hmmgs_utils.filter(hmmgs_lines, **filter_params)] hmmgs_utils.plot_bits_ratio(filtered_lines, "postfilter.eps") filtered_hmmgs = open("{0}_hmmgs.txt".format(args.stem), "w") filtered_nucl = open("{0}_nucl.fasta".format(args.stem), "w") filtered_prot = open("{0}_prot.fasta".format(args.stem), "w") hmmgs_header = hmmgs_utils.read_hmmgs_header(args.hmmgs_file) hmmgs_utils.write_hmmgs_header(filtered_hmmgs, hmmgs_header) for line in filtered_lines: hmmgs_utils.write_hmmgs_line(filtered_hmmgs, hmmgs_header, line) filtered_nucl.write(">{0}\n{1}\n".format(line["contig_id"], nucl_seqs[line["contig_id"]].seq)) filtered_prot.write(">{0}\n{1}\n".format(line["contig_id"], prot_seqs[line["contig_id"]].seq))
#!/usr/bin/env python import sys import hmmgs_utils from Bio import SeqIO if len(sys.argv) != 3: print >>sys.stderr, "USAGE: attach_starting_mer.py <hmmgs_file" sys.exit(1) hmmgs_results = {} for line in hmmgs_utils.read_hmmgs_file(sys.argv[1]): hmmgs_results[line[0]] = line for seq in SeqIO.parse(open(sys.argv[2]), "fasta"): pass
#!/usr/bin/env python import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import hmmgs_utils import sys if len(sys.argv) != 3: print "USAGE: scatter_length_to_bits.py <hmmgs_file> <chart_file>" sys.exit(1) lengths = [] bits = [] for line in hmmgs_utils.read_hmmgs_file(sys.argv[1]): prot_length = int(line.prot_length) bits_saved = float(line.bits) lengths.append(prot_length) bits.append(bits) plt.scatter(lengths, bits) plt.savefig(sys.argv[2])