def concat(genelistdict, workdir_comb, email, num_threads=None, percentage=0.37, user_concat_fn=None, backbone=False): """This is to concatenate different physcraper runs into a single alignment and tree. genelistdict is a dict with gene names as key and the corresponding workdir """ license_print() if not os.path.exists(path="{}/concat_checkpoint.p".format(workdir_comb)): if not os.path.exists( path="{}/load_single_data.p".format(workdir_comb)): # save_copy_code(workdir_comb) conc = Concat(workdir_comb, email) conc.concatfile = user_concat_fn for item in genelistdict.keys(): conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item) conc.combine() else: sys.stdout.write("load single data dump file\n") conc = pickle.load( open("{}/load_single_data.p".format(workdir_comb), "rb")) # conc.dump() conc.sp_seq_counter() conc.get_largest_tre() conc.make_sp_gene_dict() conc.make_alns_dict() conc.concatenate_alns() conc.get_short_seq_from_concat(percentage) conc.remove_short_seq() conc.dump() else: sys.stdout.write("load concat_checkpoint dump file\n") conc = pickle.load( open("{}/concat_checkpoint.p".format(workdir_comb), "rb")) conc.backbone = backbone conc.make_concat_table() conc.write_partition() conc.write_otu_info() conc.place_new_seqs(num_threads) if backbone is False: conc.calculate_bootstrap(num_threads) conc.write_labelled('RAxML_bestTree.autoMRE_fa') else: conc.est_full_tree(num_threads) conc.write_labelled('RAxML_bestTree.backbone_concat') return conc
def test_run_raxml_concat(): time.sleep(10) # needs time so that file of order1 is written before workdir_its = "tests/data/precooked/concat_pre" workdir_ets = "tests/data/precooked/concat_pre" email = "*****@*****.**" pickle_fn = "final_ATT_checkpoint.p" email = "*****@*****.**" percentage = 0.4 num_threads = 2 workdir_comb = "./tests/output/concat_test" genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}} conc = Concat(workdir_comb, email) for item in genelistdict.keys(): conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item) conc.combine() assert os.path.exists("{}/load_single_data.p".format(workdir_comb)) conc.sp_seq_counter() conc.get_largest_tre() assert conc.tre_start_gene == "ets" conc.make_sp_gene_dict() conc.make_alns_dict() conc.concatenate_alns() conc.get_short_seq_from_concat(percentage) conc.remove_short_seq() conc.backbone = True conc.make_concat_table() conc.write_partition() conc.write_otu_info() if conc.backbone is False: conc.calculate_bootstrap(num_threads) conc.write_labelled('RAxML_bestTree.autoMRE_fa') else: conc.est_full_tree(num_threads) conc.write_labelled('RAxML_bestTree.backbone_concat') conc.backbone = False conc.place_new_seqs(num_threads)