def divide_values(file, ref_scores): """divide each BSR value in a row by that row's maximum value""" errors = [] outdata = [] with open(file) as infile: firstLine = infile.readline() FL_F=firstLine.split() outfile = open("BSR_matrix_values.txt", "w") outfile.write('\t'.join([str(item) for item in FL_F])+"\n") for line in infile: fields=line.split() all_fields=list(fields) try: fields=list(map(float, all_fields[1:])) except: raise TypeError("abnormal number of fields observed") values= [] for x in fields: try: values.append(float(x)/float(ref_scores.get(all_fields[0]))) except: """if a mismatch error in names encountered, change values to 0""" errors.append(all_fields[0]) values.append(float("0")) sort_values=['%.2f' % elem for elem in values] outfile.write('\t'.join([str(item) for item in sort_values])+"\n") outdata.append(values) outfile.close() if len(errors)>0: nr=[x for i, x in enumerate(errors) if x not in errors[i+1:]] logging.logPrint("The following genes had no hits in datasets or are too short, values changed to 0, check names and output:%s" % "\n".join(nr)) return outdata
def translate_genes(genes,outfile,min_len): """translate nucleotide into peptide with BioPython""" output = [] output_handle = open(outfile, "w") too_short = [] with open(genes) as infile: for record in SeqIO.parse(infile, "fasta"): try: min_pep_len=int(min_len) """Should I trim these sequences back to be multiples of 3?""" if (len(record.seq)/3.0).is_integer(): pep_seq=record.seq.translate(to_stop=True, table=11) elif ((len(record.seq)-1)/3.0).is_integer(): pep_seq=record.seq[:-1].translate(to_stop=True, table=11) elif ((len(record.seq)-2)/3.0).is_integer(): pep_seq=record.seq[:-2].translate(to_stop=True, table=11) elif ((len(record.seq)-3)/3.0).is_integer(): pep_seq=record.seq[:-3].translate(to_stop=True, table=11) else: print("Sequence of odd length found and couldn't be trimmed") if len(pep_seq)>=min_pep_len: output_handle.write(">"+record.id+"\n") output_handle.write("".join(pep_seq)+"\n") output.append(pep_seq) else: too_short.append(record.id) except: raise TypeError("odd characters observed in sequence %s" % record.id) output_handle.close() for record in output: return str(record) if len(too_short)>0: logging.logPrint("The following sequences were too short and will not be processed: %s" % "\n".join(too_short))
def main(directory,id,filter,processors,genes,cluster_method,blast,length, max_plog,min_hlog,f_plog,keep,filter_peps,filter_scaffolds,prefix,min_pep_length, intergenics,min_len,dup_toggle): start_dir = os.getcwd() ap=os.path.abspath("%s" % start_dir) dir_path=os.path.abspath("%s" % directory) """Here's a check to make sure that there are no conflicting methods""" if "null" not in genes and "null" not in cluster_method: logging.logPrint("Choose either genes or de novo clustering method, not both") sys.exit() """Test for use of intergenics with a protein alignment method""" if intergenics=="T" and blast=="tblastn": logging.logPrint("Incompatible choices: if incorporating intergenics, choose a nucleotide alignment method") sys.exit() elif intergenics == "T" and blast=="blastp": logging.logPrint("Incompatible choices: if incorporating intergenics, choose a nucleotide alignment method") sys.exit() elif intergenics == "T" and blast=="diamond": logging.logPrint("Incompatible choices: if incorporating intergenics, choose a nucleotide alignment method") sys.exit() logging.logPrint("Testing paths of dependencies") if blast=="blastn" or blast=="tblastn" or blast=="blastp": ab = subprocess.call(['which', 'blastn']) if ab == 0: print("citation: Altschul SF, Madden TL, Schaffer AA, Zhang J, Zhang Z, Miller W, and Lipman DJ. 1997. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res 25:3389-3402") else: print("blast isn't in your path, but needs to be!") sys.exit() elif blast=="blat": ac = subprocess.call(['which', 'blat']) if ac == 0: print("citation: W.James Kent. 2002. BLAT - The BLAST-Like Alignment Tool. Genome Research 12:656-664") else: print("You have requested blat, but it is not in your PATH") sys.exit() elif blast=="diamond": ac = subprocess.call(['which', 'diamond']) if ac == 0: print("citation: Buchfink B, Xie C, Huson DH. 2015. Fast and sensitive protein alignment using DIAMOND. Nature methods, 12, 59-60.") else: print("You have requested DIAMOND, but it is not in your PATH (as diamond)") sys.exit() if "NULL" in prefix: import datetime timestamp = datetime.datetime.now() tmp_rename = str(timestamp.year), str(timestamp.month), str(timestamp.day), str(timestamp.hour), str(timestamp.minute), str(timestamp.second) rename = "".join(tmp_rename) if os.path.exists("%s/%s" % (ap,rename)): print("old temp directory exists (%s/%s). Delete and run again" % (ap,rename)) sys.exit() else: os.makedirs("%s/%s" % (ap,rename)) fastadir = ("%s/%s" % (ap,rename)) else: if os.path.exists("%s/%s" % (ap,prefix)): print("old temp directory exists (%s/%s). Delete and run again" % (ap,prefix)) sys.exit() else: os.makedirs("%s/%s" % (ap,prefix)) fastadir = "%s/%s" % (ap,prefix) samples = [] for infile in glob.glob(os.path.join(dir_path, '*.fasta')): name=get_seq_name(infile) samples.append(name) try: os.symlink("%s" % infile, os.path.join(dir_path, os.path.dirname(dir_path))) except: copyfile("%s" % infile, "%s/%s.new" % (fastadir,name)) genbank_files = [] for infile in glob.glob(os.path.join(dir_path, '*.gbk')): name=get_seq_name(infile) genbank_files.append("1") """Do I need to add this to samples as well?""" if len(samples) == 0 and len(genbank_files) == 0: print("no usable genome files found, exiting...") sys.exit() """This is the section on de novo clustering""" if "null" in genes: if "null" in cluster_method: print("Clustering method needed if genes aren't provided...exiting") sys.exit() else: pass rc = subprocess.call(['which', 'prodigal']) if rc == 0: print("citation: Hyatt D, Chen GL, Locascio PF, Land ML, Larimer FW, and Hauser LJ. 2010. Prodigal: prokaryotic gene recognition and translation initiation site identification. BMC Bioinformatics 11:119") else: print("prodigal is not in your path, but needs to be!") sys.exit() if "usearch" in cluster_method: rc = subprocess.call(['which', 'usearch']) if rc == 0: print("citation: Edgar RC. 2010. Search and clustering orders of magnitude faster than BLAST. Bioinformatics 26:2460-2461") else: print("usearch is not in your path, but needs to be!") sys.exit() elif "cd-hit" in cluster_method: if blast == "blastp" or blast == "diamond": rc = subprocess.call(['which', 'cd-hit']) else: rc = subprocess.call(['which', 'cd-hit-est']) if rc == 0: print("citation: Li, W., Godzik, A. 2006. Cd-hit: a fast program for clustering and comparing large sets of protein or nuceltodie sequences. Bioinformatics 22(13):1658-1659") else: print("cd-hit is not in your path, but needs to be!") sys.exit() elif "vsearch" in cluster_method: if blast == "blastp" or blast == "diamond": print("vsearch not compatible with proteins, exiting...") sys.exit() else: rc = subprocess.call(['which', 'vsearch']) if rc == 0: print("citation: Rognes, T., Flouri, T., Nichols, B., Qunice, C., Mahe, Frederic. 2016. VSEARCH: a versatile open source tool for metagenomics. PeerJ Preprints. DOI: https://doi.org/10.7287/peerj.preprints.2409v1") else: print("vsearch is not in your path, but needs to be!") sys.exit() if len(samples) == 0: pass else: logging.logPrint("predicting genes with Prodigal") """Only predict genes if there are FASTA files""" predict_genes(fastadir, processors, intergenics) logging.logPrint("Prodigal done") """This function produces locus tags""" if len(genbank_files)>0: logging.logPrint("Converting genbank files") os.chdir("%s" % fastadir) genbank_hits = process_genbank_files(dir_path) else: genbank_hits = [] if genbank_files == None or len(genbank_files) == 0: if intergenics == "F": os.system("cat *genes.seqs > all_gene_seqs.out") elif intergenics == "T": os.system("cat *genes.seqs *intergenics.seqs > all_gene_seqs.out") else: pass if filter_scaffolds == "T": filter_scaffolds_fun("all_gene_seqs.out") os.system("mv tmp.out all_gene_seqs.out") else: pass else: """First combine all of the prodigal files into one file :First check that there will be prodigal annotations""" if len(samples)>0: if intergenics == "F": os.system("cat *genes.seqs > all_gene_seqs.out.tmp") elif intergenics == "T": os.system("cat *genes.seqs *intergenics.seqs > all_gene_seqs.out.tmp") if filter_scaffolds == "T": filter_scaffolds_fun("all_gene_seqs.out.tmp") os.system("mv tmp.out all_gene_seqs.out.tmp") else: pass """This combines the locus tags with the Prodigal prediction. If there are no prodigal predictions, then no error is printed""" if len(samples)>0: os.system("cat *locus_tags.fasta all_gene_seqs.out.tmp > all_gene_seqs.out") else: os.system("cat *locus_tags.fasta > all_gene_seqs.out") if blast=="blastp" or blast=="diamond": """Need to convert the locus tags into peptides here""" translate_genes("all_gene_seqs.out","all_genes.pep",30) for infile in glob.glob(os.path.join(fastadir, "*locus_tags.fasta")): base = os.path.basename(infile) name = base.replace(".locus_tags.fasta","") translate_genes(base,"%s.fasta.new_genes.pep" % name,30) else: for hit in genbank_hits: reduced_hit = hit.replace(".gbk","") """This is to ensure that genes are aligned back against the genome""" SeqIO.convert("%s/%s" % (dir_path, hit), "genbank", "%s.fasta.new" % reduced_hit, "fasta") if "NULL" in cluster_method: print("Clustering chosen, but no method selected...exiting") sys.exit() elif "usearch" in cluster_method: os.system("mkdir split_files") if blast == "blastp" or blast == "diamond": if genbank_hits == None or len(genbank_hits) == 0: os.system("cat *new_genes.pep > split_files/all_sorted.txt") else: os.system("cp all_genes.pep split_files/all_sorted.txt") else: os.system("cp all_gene_seqs.out split_files/all_sorted.txt") os.chdir("split_files/") logging.logPrint("Splitting FASTA file for use with USEARCH") split_files("all_sorted.txt") logging.logPrint("clustering with USEARCH at an ID of %s" % id) run_usearch_dev(id,processors) os.system("cat *.usearch.out > all_sorted.txt") os.system("mv all_sorted.txt %s" % fastadir) os.chdir("%s" % fastadir) """Need to make output either FASTA or PEP""" data_type = find_data_type("all_sorted.txt") uclust_cluster(id,data_type) logging.logPrint("USEARCH clustering finished") elif "vsearch" in cluster_method: logging.logPrint("clustering with VSEARCH at an ID of %s, using %s processors" % (id,processors)) run_vsearch(id, processors, "all_gene_seqs.out") os.system("mv vsearch.out consensus.fasta") logging.logPrint("VSEARCH clustering finished") elif "cd-hit" in cluster_method: logging.logPrint("clustering with cd-hit at an ID of %s, length percentage of %s, using %s processors" % (id,min_len,processors)) if blast == "blastp" or blast == "diamond": os.system("cat *new_genes.pep > all_gene_seqs.pep") subprocess.check_call("cd-hit -i all_gene_seqs.pep -o consensus.pep -M 0 -T %s -c %s -s %s > cdhit.cluster 2>&1" % (processors,id,min_len), shell=True) else: subprocess.check_call("cd-hit-est -i all_gene_seqs.out -o consensus.fasta -M 0 -T %s -c %s -s %s > cdhit.cluster 2>&1" % (processors,id,min_len), shell=True) """need to check for dups here""" if os.path.exists("consensus.fasta"): dup_ids = test_duplicate_header_ids("consensus.fasta") elif os.path.exists("consensus.pep"): dup_ids = test_duplicate_header_ids("consensus.pep") else: print("clustering didn't work. Check input and try again") sys.exit() if dup_ids == "True": pass elif dup_ids == "False": print("duplicate headers identified, renaming..") try: rename_fasta_header("consensus.fasta", "tmp.txt") os.system("mv tmp.txt consensus.fasta") except: rename_fasta_header("consensus.pep", "tmp.txt") os.system("mv tmp.txt consensus.pep") else: pass if "tblastn" == blast: subprocess.check_call("makeblastdb -in consensus.fasta -dbtype nucl > /dev/null 2>&1", shell=True) translate_genes("consensus.fasta","tmp.pep",min_pep_length) if filter_peps == "T": filter_seqs("tmp.pep","consensus.pep") os.system("rm tmp.pep") else: os.system("mv tmp.pep consensus.pep") clusters = get_cluster_ids("consensus.pep") blast_against_self_tblastn("tblastn", "consensus.fasta", "consensus.pep", "tmp_blast.out", processors, filter) elif "blastn" == blast: subprocess.check_call("makeblastdb -in consensus.fasta -dbtype nucl > /dev/null 2>&1", shell=True) blast_against_self_blastn("blastn", "consensus.fasta", "consensus.fasta", "tmp_blast.out", filter, processors) clusters = get_cluster_ids("consensus.fasta") elif "blat" == blast: blat_against_self("consensus.fasta", "consensus.fasta", "tmp_blast.out", processors) clusters = get_cluster_ids("consensus.fasta") elif "blastp" == blast: subprocess.check_call("makeblastdb -in consensus.pep -dbtype prot > /dev/null 2>&1", shell=True) blast_against_self_tblastn("blastp", "consensus.pep", "consensus.pep", "tmp_blast.out", processors, filter) clusters = get_cluster_ids("consensus.pep") elif "diamond" == blast: """Check this""" if filter_peps == "T": filter_seqs("consensus.pep","tmp.pep") os.system("mv tmp.pep consensus.pep") else: pass subprocess.check_call("diamond makedb --in consensus.pep -d consensus > /dev/null 2>&1", shell=True) subprocess.check_call("diamond blastp -p 4 -d consensus -f 6 -q consensus.pep -o tmp_blast.out > /dev/null 2>&1", shell=True) clusters = get_cluster_ids("consensus.pep") subprocess.check_call("sort -u -k 1,1 tmp_blast.out > self_blast.out", shell=True) ref_scores=parse_self_blast("self_blast.out") os.system("cp tmp_blast.out ref.scores") subprocess.check_call("rm tmp_blast.out self_blast.out", shell=True) if blast == "tblastn" or blast == "blastn" or blast == "blastp": logging.logPrint("starting BLAST") elif blast == "diamond": logging.logPrint("starting Diamond") else: logging.logPrint("starting BLAT") if "tblastn" == blast: blast_against_each_genome_tblastn_dev(processors, "consensus.pep", filter) elif "blastn" == blast: blast_against_each_genome_blastn_dev(processors, filter, "consensus.fasta") elif "blat" == blast: blat_against_each_genome_dev("consensus.fasta",processors) elif "blastp" == blast: blastp_against_each_annotation("consensus.pep",processors,filter) elif "diamond" == blast: diamond_against_each_annotation("consensus.pep",processors) else: pass else: #########This section focuses on providing your own genes with -g############ logging.logPrint("Using pre-compiled set of predicted genes") files = glob.glob(os.path.join(dir_path, "*.fasta")) genbank_files = glob.glob(os.path.join(dir_path, "*.gbk")) if len(genbank_files)>0: for hit in genbank_files: base = os.path.basename(hit) reduced_hit = base.replace(".gbk","") """This is to ensure that genes are aligned back against the genome""" os.chdir(fastadir) SeqIO.convert("%s/%s" % (dir_path, base), "genbank", "%s.fasta.new" % reduced_hit, "fasta") os.chdir(start_dir) if len(files)==0 and len(genbank_files)==0: print("no usable reference genomes found!") sys.exit() else: pass gene_path=os.path.abspath("%s" % genes) """new method: aa,nt,unknown""" data_type = find_data_type(gene_path) dup_ids = test_duplicate_header_ids(gene_path) if dup_ids == "True": pass elif dup_ids == "False": print("duplicate headers identified, exiting..") sys.exit() clusters = get_cluster_ids(gene_path) os.chdir("%s" % fastadir) if gene_path.endswith(".pep"): if data_type == "aa": pass else: print("File is supposed to contain proteins, but doesn't look correct..exiting") sys.exit() os.system("cp %s %s/genes.pep" % (gene_path,fastadir)) if blast=="tblastn" or blast=="blastp": logging.logPrint("using %s on peptides" % blast) try: subprocess.check_call("makeblastdb -in genes.pep -dbtype prot > /dev/null 2>&1", shell=True) except: logging.logPrint("problem encountered formatting BLAST database") sys.exit() blast_against_self_tblastn("blastp", "genes.pep", "genes.pep", "tmp_blast.out", processors, filter) elif blast=="diamond": logging.logPrint("using %s on peptides" % blast) try: subprocess.check_call("diamond makedb --in %s -d self > /dev/null 2>&1" % gene_path, shell=True) except: logging.logPrint("problem encountered formatting DIAMOND database") subprocess.check_call("diamond blastp -p 4 -d self -f 6 -q %s -o tmp_blast.out > /dev/null 2>&1" % gene_path, shell=True) elif blast=="blat" or blast=="blastn": print("Nucleotide aligner not compatible with protein sequences...exiting") sys.exit() #Aligning back against each genome if blast == "tblastn": logging.logPrint("starting TBLASTN") blast_against_each_genome_tblastn_dev(processors,gene_path,filter) elif blast == "blastp": """I will need to first do gene prediction for each genome""" for infile in glob.glob(os.path.join(dir_path, '*.fasta')): name=get_seq_name(infile) try: os.symlink("%s" % infile, os.path.join(dir_path, os.path.dirname(dir_path))) except: copyfile("%s" % infile, "%s/%s.new" % (fastadir,name)) logging.logPrint("Predicting genes with Prodigal") predict_genes(fastadir, processors, intergenics) logging.logPrint("BlastP starting") blastp_against_each_annotation("genes.pep",processors,filter) elif blast == "diamond": for infile in glob.glob(os.path.join(dir_path, '*.fasta')): name=get_seq_name(infile) try: os.symlink("%s" % infile, os.path.join(dir_path, os.path.dirname(dir_path))) except: copyfile("%s" % infile, "%s/%s.new" % (fastadir,name)) logging.logPrint("Predicting genes with Prodigal") predict_genes(fastadir, processors, intergenics) logging.logPrint("Diamond starting") diamond_against_each_annotation(gene_path,processors) elif gene_path.endswith(".fasta"): if data_type == "nt": pass else: print("File is supposed to contain nucleotides, but doesn't look correct..exiting ") sys.exit() os.system("cp %s %s" % (gene_path,fastadir)) if blast == "diamond" or blast == "blastp": print("protein alignment not compatible with nucleotide input..exiting") sys.exit() if "tblastn" == blast: logging.logPrint("using tblastn") translate_genes(gene_path,"genes.pep",min_pep_length) try: subprocess.check_call("makeblastdb -in %s -dbtype nucl > /dev/null 2>&1" % gene_path, shell=True) except: logging.logPrint("problem encountered with BLAST database") sys.exit() blast_against_self_tblastn("tblastn", gene_path, "genes.pep", "tmp_blast.out", processors, filter) logging.logPrint("starting BLAST") blast_against_each_genome_tblastn_dev(processors, "genes.pep", filter) os.system("cp genes.pep %s" % start_dir) elif "blastn" == blast: logging.logPrint("using blastn") try: subprocess.check_call("makeblastdb -in %s -dbtype nucl > /dev/null 2>&1" % gene_path, shell=True) except: logging.logPrint("Database not formatted correctly...exiting") sys.exit() try: blast_against_self_blastn("blastn", gene_path, gene_path, "tmp_blast.out", filter, processors) except: print("problem with blastn, exiting") sys.exit() logging.logPrint("starting BLAST") try: blast_against_each_genome_blastn_dev(processors, filter, gene_path) except: print("problem with blastn, exiting") sys.exit() elif "blat" == blast: logging.logPrint("using blat") blat_against_self(gene_path, gene_path, "tmp_blast.out", processors) logging.logPrint("starting BLAT") blat_against_each_genome_dev(gene_path,processors) else: pass else: print("input file format not supported") sys.exit() subprocess.check_call("sort -u -k 1,1 tmp_blast.out > self_blast.out", shell=True) os.system("cp self_blast.out ref.scores") ref_scores=parse_self_blast("self_blast.out") subprocess.check_call("rm tmp_blast.out self_blast.out", shell=True) """testing block complete""" if blast=="blat": logging.logPrint("BLAT complete") elif blast=="diamond": logging.logPrint("Diamond complete") else: logging.logPrint("BLAST done") if dup_toggle == "T": logging.logPrint("Finding duplicates") find_dups_dev(ref_scores, length, max_plog, min_hlog, clusters, processors) logging.logPrint("Finding duplicates complete") else: logging.logPrint("Duplicate searching turned off") parse_blast_report_dev("false",4) curr_dir=os.getcwd() table_files = glob.glob(os.path.join(curr_dir, "*.filtered.unique")) files_and_temp_names = [(str(idx), os.path.join(curr_dir, f)) for idx, f in enumerate(table_files)] names=[] table_list = [] nr_sorted=sorted(clusters) centroid_list = [] centroid_list.append(" ") for x in nr_sorted: centroid_list.append(x) table_list.append(centroid_list) logging.logPrint("starting matrix building") new_names,new_table = new_loop_dev(files_and_temp_names, processors, clusters) new_table_list = table_list+new_table open("ref.list", "a").write("\n") for x in nr_sorted: open("ref.list", "a").write("%s\n" % x) names_out = open("names.txt", "w") names_redux = [val for subl in new_names for val in subl] for x in names_redux: names_out.write("".join(x)+"\n") names_out.close() create_bsr_matrix_dev(new_table_list) divide_values("bsr_matrix", ref_scores) subprocess.check_call("paste ref.list BSR_matrix_values.txt > %s/bsr_matrix_values.txt" % start_dir, shell=True) try: if dup_toggle == "T": subprocess.check_call("cp dup_matrix.txt names.txt consensus.pep duplicate_ids.txt consensus.fasta %s" % ap, shell=True, stderr=open(os.devnull, 'w')) else: subprocess.check_call("cp names.txt consensus.pep consensus.fasta %s" % ap, shell=True, stderr=open(os.devnull, 'w')) except: pass if "T" in f_plog: logging.logPrint("filtering duplicates") num_filtered = filter_paralogs("%s/bsr_matrix_values.txt" % start_dir, "duplicate_ids.txt") logging.logPrint("%s duplicates filtered" % str(num_filtered)) if "NULL" in prefix: os.system("cp bsr_matrix_values_filtered.txt %s/%s_paralogs_filtered_bsr_matrix_values.txt" % (start_dir,"".join(rename))) else: os.system("cp bsr_matrix_values_filtered.txt %s/%s_paralogs_filtered_bsr_matrix_values.txt" % (start_dir,prefix)) os.chdir("%s" % ap) logging.logPrint("matrix built") if "NULL" in prefix: if dup_toggle == "T": os.system("mv dup_matrix.txt %s_dup_matrix.txt" % "".join(rename)) os.system("mv duplicate_ids.txt %s_duplicate_ids.txt" % "".join(rename)) else: pass os.system("mv names.txt %s_names.txt" % "".join(rename)) os.system("mv bsr_matrix_values.txt %s_bsr_matrix.txt" % "".join(rename)) if os.path.isfile("consensus.fasta"): os.system("mv consensus.fasta %s_consensus.fasta" % "".join(rename)) if os.path.isfile("consensus.pep"): os.system("mv consensus.pep %s_consensus.pep" % "".join(rename)) else: if dup_toggle == "T": os.system("mv dup_matrix.txt %s_dup_matrix.txt" % prefix) os.system("mv duplicate_ids.txt %s_duplicate_ids.txt" % prefix) else: pass os.system("mv names.txt %s_names.txt" % prefix) os.system("mv bsr_matrix_values.txt %s_bsr_matrix.txt" % prefix) if os.path.isfile("consensus.fasta"): os.system("mv consensus.fasta %s_consensus.fasta" % prefix) if os.path.isfile("consensus.pep"): os.system("mv consensus.pep %s_consensus.pep" % prefix) if "NULL" in prefix: outfile = open("%s_run_parameters.txt" % "".join(rename), "w") else: outfile = open("%s_run_parameters.txt" % prefix, "w") outfile.write("-d %s \\\n" % directory) outfile.write("-i %s \\\n" % id) outfile.write("-f %s \\\n" % filter) outfile.write("-p %s \\\n" % processors) outfile.write("-g %s \\\n" % genes) outfile.write("-c %s \\\n" % cluster_method) outfile.write("-b %s \\\n" % blast) outfile.write("-l %s \\\n" % length) outfile.write("-m %s \\\n" % max_plog) outfile.write("-n %s \\\n" % min_hlog) outfile.write("-t %s \\\n" % f_plog) outfile.write("-k %s \\\n" % keep) outfile.write("-s %s \\\n" % filter_peps) outfile.write("-e %s \\\n" % filter_scaffolds) outfile.write("-x %s \\\n" % prefix) outfile.write("-y %s \\\n" % intergenics) outfile.write("-ml %s \\\n" % min_len) outfile.write("-z %s\n" % dup_toggle) outfile.write("temp data stored here if kept: %s" % fastadir) outfile.close() logging.logPrint("all Done") if "T" == keep: pass else: os.system("rm -rf %s" % fastadir) os.chdir("%s" % ap)