def test(): concat = Concat(workdir_comb, email) for item in genelist.keys(): concat.load_single_genes(genelist[item]['workdir'], genelist[item]["pickle"], item) concat.combine() spnl = [] for genename in concat.single_runs: for otu in concat.single_runs[genename].data.otu_dict.keys(): data = concat.single_runs[genename].data.otu_dict[otu] if '^ot:ottTaxonName' in data: spn = concat.get_taxon_info('^ot:ottTaxonName', data) elif '^user:TaxonName' in data: spn = concat.get_taxon_info('^user:TaxonName', data) spnl.append(spn) len_single = len(set(spnl)) # print(set(spnl)) # print(concat.sp_acc_comb) len_concat_id_dict = len(concat.sp_acc_comb) assert len_single == len_concat_id_dict
def test(): # get to test status concat = Concat(workdir_comb, email) for item in genelist.keys(): concat.load_single_genes(genelist[item]['workdir'], genelist[item]["pickle"], item) concat.combine() concat.sp_seq_counter() sp_to_keep = concat.sp_to_keep() # print(sp_to_keep.keys()) # # print("tests sp_to_keep") counter = 0 sp_keep = [] for sp in concat.sp_counter: for gene in concat.sp_counter[sp]: if concat.sp_counter[sp][gene] == 0: sp_keep.append(sp) # print(sp_keep) # print(sp_to_keep.keys()) # print(len(sp_keep)) # print(len(sp_to_keep.keys())) try: assert set(sp_to_keep.keys()) == set(sp_keep) print("tests passed") except: print("test fails")
def concat(genelistdict, workdir_comb, email, num_threads=None, percentage=0.37, user_concat_fn=None, backbone=False): """This is to concatenate different physcraper runs into a single alignment and tree. genelistdict is a dict with gene names as key and the corresponding workdir """ license_print() if not os.path.exists(path="{}/concat_checkpoint.p".format(workdir_comb)): if not os.path.exists( path="{}/load_single_data.p".format(workdir_comb)): # save_copy_code(workdir_comb) conc = Concat(workdir_comb, email) conc.concatfile = user_concat_fn for item in genelistdict.keys(): conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item) conc.combine() else: sys.stdout.write("load single data dump file\n") conc = pickle.load( open("{}/load_single_data.p".format(workdir_comb), "rb")) # conc.dump() conc.sp_seq_counter() conc.get_largest_tre() conc.make_sp_gene_dict() conc.make_alns_dict() conc.concatenate_alns() conc.get_short_seq_from_concat(percentage) conc.remove_short_seq() conc.dump() else: sys.stdout.write("load concat_checkpoint dump file\n") conc = pickle.load( open("{}/concat_checkpoint.p".format(workdir_comb), "rb")) conc.backbone = backbone conc.make_concat_table() conc.write_partition() conc.write_otu_info() conc.place_new_seqs(num_threads) if backbone is False: conc.calculate_bootstrap(num_threads) conc.write_labelled('RAxML_bestTree.autoMRE_fa') else: conc.est_full_tree(num_threads) conc.write_labelled('RAxML_bestTree.backbone_concat') return conc
def test(): concat = Concat(workdir_comb, email) for item in genelist.keys(): concat.load_single_genes(genelist[item]["workdir"], genelist[item]["pickle"], item) concat.combine() concat.sp_seq_counter() # print("tests if nothing gets lost from loading single runs to make sp_counter") counter = 0 for sp in concat.sp_counter: for gene in concat.sp_counter[sp]: counter += concat.sp_counter[sp][gene] # print(counter) single_run_items = 0 for item in concat.single_runs: single_run_items += (len(concat.single_runs[item].aln.taxon_namespace)) assert counter == single_run_items
def test_run_raxml_concat(): time.sleep(10) # needs time so that file of order1 is written before workdir_its = "tests/data/precooked/concat_pre" workdir_ets = "tests/data/precooked/concat_pre" email = "*****@*****.**" pickle_fn = "final_ATT_checkpoint.p" email = "*****@*****.**" percentage = 0.4 num_threads = 2 workdir_comb = "./tests/output/concat_test" genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}} conc = Concat(workdir_comb, email) for item in genelistdict.keys(): conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item) conc.combine() assert os.path.exists("{}/load_single_data.p".format(workdir_comb)) conc.sp_seq_counter() conc.get_largest_tre() assert conc.tre_start_gene == "ets" conc.make_sp_gene_dict() conc.make_alns_dict() conc.concatenate_alns() conc.get_short_seq_from_concat(percentage) conc.remove_short_seq() conc.backbone = True conc.make_concat_table() conc.write_partition() conc.write_otu_info() if conc.backbone is False: conc.calculate_bootstrap(num_threads) conc.write_labelled('RAxML_bestTree.autoMRE_fa') else: conc.est_full_tree(num_threads) conc.write_labelled('RAxML_bestTree.backbone_concat') conc.backbone = False conc.place_new_seqs(num_threads)
def test_concat_combine(): workdir_its = "tests/data/precooked/concat_pre" workdir_ets = "tests/data/precooked/concat_pre" email = "*****@*****.**" pickle_fn = "final_ATT_checkpoint.p" email = "*****@*****.**" percentage = 0.4 num_threads = 2 workdir_comb = "./tests/output/concat_test" genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}} conc = Concat(workdir_comb, email) for item in genelistdict.keys(): conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item) conc.combine() assert os.path.exists("{}/load_single_data.p".format(workdir_comb)) conc.sp_seq_counter() conc.get_largest_tre() assert conc.tre_start_gene == "ets" conc.make_sp_gene_dict() conc.make_alns_dict() for gene in conc.aln_all: conc.li((conc.aln_all[gene].taxon_namespace)) assert len(conc.aln_all[gene]) == 20 conc.concatenate_alns() conc.get_short_seq_from_concat(percentage) conc.remove_short_seq() conc.dump() assert len(conc.concatenated_aln) == 5
def test(): workdir_its = "runs/tiny_comb_its" workdir_ets = "runs/tiny_comb_ets" email = "*****@*****.**" pickle_fn = "scrape_checkpoint.p" workdir_comb = "tests/output/impl_concat" genelist = { "its": { "workdir": workdir_its, "pickle": pickle_fn }, "ets": { "workdir": workdir_ets, "pickle": pickle_fn } } # get to test status sys.stdout.write("\ntests Concat func select_rnd_seq\n") concat = Concat(workdir_comb, email) for item in genelist.keys(): concat.load_single_genes(genelist[item]['workdir'], genelist[item]["pickle"], item) concat.combine() concat.sp_seq_counter() sp_to_keep = concat.sp_to_keep() concat.get_largest_tre() # print("test: select rnd seq") count = 2 concat.tmp_dict = deepcopy(concat.sp_acc_comb) # print("while") # part of make_sp_gene_dict len_before = len(concat.comb_seq) while len(concat.tmp_dict.keys()) >= 1: del_gi = {} for spn in concat.tmp_dict.keys(): # print(spn) sp_to_keep_list = sp_to_keep.keys() # debug(sp_to_keep_list) if spn.replace(" ", "_") in sp_to_keep_list: tmp_gene = deepcopy(concat.genes_present) for gene in concat.tmp_dict[spn]: tmp_gene.remove(gene) # print("select_rnd_seq") # print(spn, gene,del_gi, count) del_gi = concat.select_rnd_seq(spn, gene, del_gi) # print("now it should break") break break break break len_after = len(concat.comb_seq[gene]) # assert len_before + 1 == len_after