def test_filter_length(): workdir = "tests/output/test_selectbylength" absworkdir = os.path.abspath(workdir) conf = ConfigObj("tests/data/test.config", interactive=False) threshold = 2 selectby = "length" downtorank = "species" add_unpubl_seq = None blacklist = None id_to_spn_addseq_json = None ingroup_mrca = None shared_blast_folder = None data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) # Now combine the data, the ids, and the configuration into a single physcraper scrape object filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape.blacklist = blacklist sys.stdout.write("BLASTing input sequences\n") if shared_blast_folder: filteredScrape.blast_subdir = shared_blast_folder else: shared_blast_folder = None # filteredScrape.run_blast_wrapper() filteredScrape.read_blast_wrapper( blast_dir="tests/data/precooked/fixed/tte_blast_files") filteredScrape.remove_identical_seqs() filteredScrape.dump() sys.stdout.write("Filter the sequences\n") length_unfiltered = len(filteredScrape.new_seqs_otu_id) # if threshold is not None: # filteredScrape.filter_seqs() length_filtered = len(filteredScrape.new_seqs)
def test_add_local(): data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load(open("tests/data/precooked/tiny_acc_map.p", "rb")) if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) if os.path.exists(otu_jsonfi_local): otu_json_local = json.load(open(otu_jsonfi_local)) else: otu_json_local = OtuJsonDict(id_to_spn_addseq, ids) json.dump(otu_json_local, open(otu_jsonfi_local, "w")) # Now combine the data, the ids, and the configuration into a single physcraper scrape object filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape.blacklist = blacklist if add_local_seq is not None: filteredScrape.unpublished = True if filteredScrape.unpublished is True: # use unpublished data # filteredScrape.unpublished = True filteredScrape.data.unpubl_otu_json = otu_json_local filteredScrape.write_unpubl_blastdb(add_local_seq) # filteredScrape.make_otu_dict_entry_unpubl() filteredScrape.run_blast_wrapper() filteredScrape.read_blast_wrapper() filteredScrape.remove_identical_seqs() test = False for key in filteredScrape.data.otu_dict.keys(): if '^ncbi:title' in filteredScrape.data.otu_dict[key].keys(): if filteredScrape.data.otu_dict[key]['^ncbi:title'] == "unpublished": test = True break assert test == True
def test_blacklist(): workdir = "tests/output/test_blacklist" configfi = "tests/data/test.config" # make one run without blacklist blacklist = None noblack = os.path.join(workdir, "noblacklist") absworkdir = os.path.abspath(noblack) if not os.path.exists(os.path.join(absworkdir, "current_blast_run/")): os.makedirs(os.path.join(absworkdir, "current_blast_run/")) conf = ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load(open("tests/data/precooked/tiny_acc_map.p", "rb")) noblackScrape = PhyscraperScrape(data_obj, ids) noblackScrape._blasted = 1 src = "tests/data/precooked/fixed/tte_blast_files" src_files = os.listdir(src) for file_name in src_files: dest = os.path.join(absworkdir, "current_blast_run/") # print(dest) full_file_name = os.path.join(src, file_name) if (os.path.isfile(full_file_name)): shutil.copy(full_file_name, dest) noblackScrape.read_blast_wrapper() noblackScrape.remove_identical_seqs() new_test_generate_streamed_aln(noblackScrape) # one run with blacklist blacklist = ['JX895340.1'] absworkdir = os.path.abspath(workdir) conf = ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load(open("tests/data/precooked/tiny_acc_map.p", "rb")) filteredScrape = PhyscraperScrape(data_obj, ids) filteredScrape.blacklist = blacklist filteredScrape._blasted = 1 if not os.path.exists(os.path.join(absworkdir, "current_blast_run/")): os.makedirs(os.path.join(absworkdir, "current_blast_run/")) src = "tests/data/precooked/fixed/tte_blast_files" src_files = os.listdir(src) for file_name in src_files: dest = os.path.join(absworkdir, "current_blast_run/") full_file_name = os.path.join(src, file_name) if (os.path.isfile(full_file_name)): shutil.copy(full_file_name, dest) # filteredScrape.acc_list_mrca = pickle.load(open("tests/data/precooked/acc_list_mrca.p", 'rb')) filteredScrape.read_blast_wrapper() filteredScrape.remove_identical_seqs() new_test_generate_streamed_aln(filteredScrape) print("RUN TESTS!") gi_l = [] gi_l_2 = [] for tax in filteredScrape.data.tre.taxon_namespace: gi_id = filteredScrape.data.otu_dict[tax.label].get("^ncbi:accession") gi_l.append(gi_id) print(gi_l) for tax in noblackScrape.data.tre.taxon_namespace: # print(filteredScrape.data.otu_dict[tax.label]) gi_id = noblackScrape.data.otu_dict[tax.label].get("^ncbi:accession") gi_l_2.append(gi_id) print(gi_l_2) for item in blacklist: assert item not in gi_l