def strip_bases(args): """ Strip the 1st and last 'N' bases from mapping consensuses Uses: * args.cons * args.seqs_of_interest * arg.strip To avoid the effects of lead in and lead out coverage resulting in uncalled bases :param args: the argparse args containing args.strip value :type args: argparse args :rtype: the updated args to reflect the args.cons & args.seqs_of_interest location """ # Get in the fasta files in the consensus directory fasta_in = util.get_fasta_files(args.cons) # Build a stripped directory new_cons_dir = os.path.join(args.cons, 'stripped') try: os.mkdir(new_cons_dir) except OSError: sys.stderr.write("A stripped directory exists. Overwriting\n") # Update the args.cons to the stripped directory args.cons = new_cons_dir args.strip = int(args.strip) # Strip the start and end for fa in fasta_in: tmp = os.path.basename(fa) out = os.path.join(args.cons, tmp) with open(fa, "rU") as fin, open(out, 'w') as fout: records = SeqIO.parse(fin, "fasta") for rec in records: rec.seq = rec.seq[args.strip:-args.strip] SeqIO.write(rec, fout, "fasta") # Trim the db as well tmp = args.seqs_of_interest.split('.') stripped_db = '.'.join(tmp[:-1])+'_trimmed.'+tmp[-1] with open(args.seqs_of_interest, "rU") as fin, open(stripped_db, 'w') as fout: records = SeqIO.parse(fin, "fasta") for rec in records: rec.seq = rec.seq[args.strip:-args.strip] SeqIO.write(rec, fout, "fasta") #Update the args.seqs_of_interest args.seqs_of_interest = stripped_db return args
def do_run(args, data_path, match_score, vfs_list): """ Perform a SeqFindR run """ matrix, y_label = [], [] in_files = util.get_fasta_files(data_path) # Reorder if requested if args.index_file != None: in_files = util.order_inputs(args.index_file, in_files) for subject in in_files: strain_id = blast.make_BLAST_database(subject) y_label.append(strain_id) database = os.path.basename(subject) blast_xml = blast.run_BLAST(args.seqs_of_interest, os.path.join(os.getcwd(), "DBs/"+database), args) accepted_hits = blast.parse_BLAST(blast_xml, float(args.tol)) row = build_matrix_row(vfs_list, accepted_hits, match_score) row.insert(0,strain_id) matrix.append(row) return matrix, y_label