def initialize_seed_database(which_bowtie2, fasta_f, overwrite=False, verbose=False): # bowtie index new_seed_file = fasta_f + ".modified" changed = check_fasta_seq_names(fasta_f, new_seed_file) if changed: seed_file = new_seed_file else: seed_file = fasta_f output_base = remove_db_postfix(fasta_f) + ".index" sys.stdout.write("bowtie2-build " + os.path.basename(fasta_f) + " ... ") sys.stdout.flush() if overwrite or sum([ os.path.exists(output_base + postfix) for postfix in (".1.bt2l", ".2.bt2l", ".3.bt2l", ".4.bt2l", ".rev.1.bt2l", ".rev.2.bt2l") ]) != 6: build_bowtie2_db(seed_file=seed_file, seed_index_base=output_base, which_bowtie2=which_bowtie2, overwrite=overwrite, random_seed=12345, silent=verbose, verbose_log=verbose) sys.stdout.write("finished\n") else: sys.stdout.write("skipped\n") if changed: os.remove(seed_file)
def initialize_seed_database(which_bowtie2, overwrite=False): for fasta_f in os.listdir(SEQ_DIR): if fasta_f.endswith(".fasta") and fasta_f[:-6] in ("embplant_pt", "other_pt", "embplant_mt", "embplant_nr", "animal_mt", "fungus_mt"): fasta_f = os.path.join(SEQ_DIR, fasta_f) new_seed_file = fasta_f + ".modified" changed = check_fasta_seq_names(fasta_f, new_seed_file) if changed: seed_file = new_seed_file else: seed_file = fasta_f output_base = remove_db_postfix(fasta_f) + ".index" sys.stdout.write("bowtie2-build " + output_base + " ... ") sys.stdout.flush() if overwrite or sum([os.path.exists(output_base + postfix) for postfix in (".1.bt2l", ".2.bt2l", ".3.bt2l", ".4.bt2l", ".rev.1.bt2l", ".rev.2.bt2l")]) != 6: build_bowtie2_db(seed_file=seed_file, seed_index_base=output_base, which_bowtie2=which_bowtie2, overwrite=overwrite, random_seed=12345, silent=True) sys.stdout.write("finished\n") else: sys.stdout.write("skipped\n") if changed: os.remove(seed_file)