Ejemplo n.º 1
0
def concat(genelistdict,
           workdir_comb,
           email,
           num_threads=None,
           percentage=0.37,
           user_concat_fn=None,
           backbone=False):
    """This is to concatenate different physcraper runs into a single alignment and tree.
    genelistdict is a dict with gene names as key and the corresponding workdir
    """
    license_print()

    if not os.path.exists(path="{}/concat_checkpoint.p".format(workdir_comb)):
        if not os.path.exists(
                path="{}/load_single_data.p".format(workdir_comb)):
            # save_copy_code(workdir_comb)
            conc = Concat(workdir_comb, email)
            conc.concatfile = user_concat_fn
            for item in genelistdict.keys():
                conc.load_single_genes(genelistdict[item]["workdir"],
                                       genelistdict[item]["pickle"], item)
            conc.combine()
        else:
            sys.stdout.write("load single data dump file\n")
            conc = pickle.load(
                open("{}/load_single_data.p".format(workdir_comb), "rb"))
            # conc.dump()
        conc.sp_seq_counter()
        conc.get_largest_tre()
        conc.make_sp_gene_dict()
        conc.make_alns_dict()
        conc.concatenate_alns()
        conc.get_short_seq_from_concat(percentage)
        conc.remove_short_seq()
        conc.dump()
    else:
        sys.stdout.write("load concat_checkpoint dump file\n")
        conc = pickle.load(
            open("{}/concat_checkpoint.p".format(workdir_comb), "rb"))
    conc.backbone = backbone
    conc.make_concat_table()
    conc.write_partition()
    conc.write_otu_info()
    conc.place_new_seqs(num_threads)

    if backbone is False:
        conc.calculate_bootstrap(num_threads)
        conc.write_labelled('RAxML_bestTree.autoMRE_fa')
    else:
        conc.est_full_tree(num_threads)
        conc.write_labelled('RAxML_bestTree.backbone_concat')
    return conc
Ejemplo n.º 2
0
def test_run_raxml_concat():
	time.sleep(10)  # needs time so that file of order1 is written before
	workdir_its = "tests/data/precooked/concat_pre"
	workdir_ets = "tests/data/precooked/concat_pre"
	email = "*****@*****.**"
	pickle_fn = "final_ATT_checkpoint.p"
	email = "*****@*****.**"
	percentage = 0.4
	num_threads = 2
	workdir_comb = "./tests/output/concat_test"
	genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, 
	        "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}}
	conc = Concat(workdir_comb, email)
	for item in genelistdict.keys():
	    conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item)
	conc.combine()
	assert os.path.exists("{}/load_single_data.p".format(workdir_comb))
	conc.sp_seq_counter()
	conc.get_largest_tre()
	assert conc.tre_start_gene == "ets"
	conc.make_sp_gene_dict()
	conc.make_alns_dict()
	conc.concatenate_alns()
	conc.get_short_seq_from_concat(percentage)
	conc.remove_short_seq()
	conc.backbone = True
	conc.make_concat_table()
	conc.write_partition()
	conc.write_otu_info()
	if conc.backbone is False:
		conc.calculate_bootstrap(num_threads)
		conc.write_labelled('RAxML_bestTree.autoMRE_fa')
	else:
		conc.est_full_tree(num_threads)
		conc.write_labelled('RAxML_bestTree.backbone_concat')
	conc.backbone = False
	conc.place_new_seqs(num_threads)