Exemple #1
0
def test():

    concat = Concat(workdir_comb, email)
    for item in genelist.keys():
        concat.load_single_genes(genelist[item]['workdir'], genelist[item]["pickle"], item)

    concat.combine()
    spnl = []
    for genename in concat.single_runs:
        for otu in concat.single_runs[genename].data.otu_dict.keys():
            data = concat.single_runs[genename].data.otu_dict[otu]
           
            if '^ot:ottTaxonName' in data:
                spn = concat.get_taxon_info('^ot:ottTaxonName', data)
            elif '^user:TaxonName' in data:
                spn = concat.get_taxon_info('^user:TaxonName', data)
            spnl.append(spn)

    len_single = len(set(spnl))
    # print(set(spnl))
    # print(concat.sp_acc_comb)
    len_concat_id_dict = len(concat.sp_acc_comb)


    assert len_single == len_concat_id_dict
Exemple #2
0
def test():
    # get to test status

    concat = Concat(workdir_comb, email)
    for item in genelist.keys():
        concat.load_single_genes(genelist[item]['workdir'],
                                 genelist[item]["pickle"], item)

    concat.combine()
    concat.sp_seq_counter()
    sp_to_keep = concat.sp_to_keep()

    # print(sp_to_keep.keys())
    #
    # print("tests sp_to_keep")

    counter = 0
    sp_keep = []
    for sp in concat.sp_counter:
        for gene in concat.sp_counter[sp]:
            if concat.sp_counter[sp][gene] == 0:
                sp_keep.append(sp)

    # print(sp_keep)
    # print(sp_to_keep.keys())
    # print(len(sp_keep))
    #
    print(len(sp_to_keep.keys()))

    try:
        assert set(sp_to_keep.keys()) == set(sp_keep)
        print("tests passed")
    except:
        print("test fails")
Exemple #3
0
def concat(genelistdict,
           workdir_comb,
           email,
           num_threads=None,
           percentage=0.37,
           user_concat_fn=None,
           backbone=False):
    """This is to concatenate different physcraper runs into a single alignment and tree.
    genelistdict is a dict with gene names as key and the corresponding workdir
    """
    license_print()

    if not os.path.exists(path="{}/concat_checkpoint.p".format(workdir_comb)):
        if not os.path.exists(
                path="{}/load_single_data.p".format(workdir_comb)):
            # save_copy_code(workdir_comb)
            conc = Concat(workdir_comb, email)
            conc.concatfile = user_concat_fn
            for item in genelistdict.keys():
                conc.load_single_genes(genelistdict[item]["workdir"],
                                       genelistdict[item]["pickle"], item)
            conc.combine()
        else:
            sys.stdout.write("load single data dump file\n")
            conc = pickle.load(
                open("{}/load_single_data.p".format(workdir_comb), "rb"))
            # conc.dump()
        conc.sp_seq_counter()
        conc.get_largest_tre()
        conc.make_sp_gene_dict()
        conc.make_alns_dict()
        conc.concatenate_alns()
        conc.get_short_seq_from_concat(percentage)
        conc.remove_short_seq()
        conc.dump()
    else:
        sys.stdout.write("load concat_checkpoint dump file\n")
        conc = pickle.load(
            open("{}/concat_checkpoint.p".format(workdir_comb), "rb"))
    conc.backbone = backbone
    conc.make_concat_table()
    conc.write_partition()
    conc.write_otu_info()
    conc.place_new_seqs(num_threads)

    if backbone is False:
        conc.calculate_bootstrap(num_threads)
        conc.write_labelled('RAxML_bestTree.autoMRE_fa')
    else:
        conc.est_full_tree(num_threads)
        conc.write_labelled('RAxML_bestTree.backbone_concat')
    return conc
def test():
    concat = Concat(workdir_comb, email)
    for item in genelist.keys():
        concat.load_single_genes(genelist[item]["workdir"],
                                 genelist[item]["pickle"], item)

    concat.combine()
    concat.sp_seq_counter()

    # print("tests if nothing gets lost from loading single runs to make sp_counter")

    counter = 0
    for sp in concat.sp_counter:
        for gene in concat.sp_counter[sp]:
            counter += concat.sp_counter[sp][gene]
    # print(counter)

    single_run_items = 0
    for item in concat.single_runs:
        single_run_items += (len(concat.single_runs[item].aln.taxon_namespace))

    assert counter == single_run_items
def test_run_raxml_concat():
	time.sleep(10)  # needs time so that file of order1 is written before
	workdir_its = "tests/data/precooked/concat_pre"
	workdir_ets = "tests/data/precooked/concat_pre"
	email = "*****@*****.**"
	pickle_fn = "final_ATT_checkpoint.p"
	email = "*****@*****.**"
	percentage = 0.4
	num_threads = 2
	workdir_comb = "./tests/output/concat_test"
	genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, 
	        "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}}
	conc = Concat(workdir_comb, email)
	for item in genelistdict.keys():
	    conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item)
	conc.combine()
	assert os.path.exists("{}/load_single_data.p".format(workdir_comb))
	conc.sp_seq_counter()
	conc.get_largest_tre()
	assert conc.tre_start_gene == "ets"
	conc.make_sp_gene_dict()
	conc.make_alns_dict()
	conc.concatenate_alns()
	conc.get_short_seq_from_concat(percentage)
	conc.remove_short_seq()
	conc.backbone = True
	conc.make_concat_table()
	conc.write_partition()
	conc.write_otu_info()
	if conc.backbone is False:
		conc.calculate_bootstrap(num_threads)
		conc.write_labelled('RAxML_bestTree.autoMRE_fa')
	else:
		conc.est_full_tree(num_threads)
		conc.write_labelled('RAxML_bestTree.backbone_concat')
	conc.backbone = False
	conc.place_new_seqs(num_threads)
def test_concat_combine():   

	workdir_its = "tests/data/precooked/concat_pre"
	workdir_ets = "tests/data/precooked/concat_pre"
	email = "*****@*****.**"
	pickle_fn = "final_ATT_checkpoint.p"
	email = "*****@*****.**"
	percentage = 0.4
	num_threads = 2
	workdir_comb = "./tests/output/concat_test"
	genelistdict = {"its": {"workdir": workdir_its, "pickle": "its_{}".format(pickle_fn)}, 
	        "ets": {"workdir": workdir_ets, "pickle": "ets_{}".format(pickle_fn)}}
	conc = Concat(workdir_comb, email)
	for item in genelistdict.keys():
	    conc.load_single_genes(genelistdict[item]["workdir"], genelistdict[item]["pickle"], item)
	conc.combine()
	assert os.path.exists("{}/load_single_data.p".format(workdir_comb))
	   

	conc.sp_seq_counter()
	conc.get_largest_tre()
	assert conc.tre_start_gene == "ets"
	conc.make_sp_gene_dict()
	conc.make_alns_dict()

	for gene in conc.aln_all:
		conc.li((conc.aln_all[gene].taxon_namespace))
		assert len(conc.aln_all[gene]) == 20

	conc.concatenate_alns()
	conc.get_short_seq_from_concat(percentage)
	conc.remove_short_seq()

	conc.dump()

	assert len(conc.concatenated_aln) == 5
Exemple #7
0
def test():
    workdir_its = "runs/tiny_comb_its"
    workdir_ets = "runs/tiny_comb_ets"
    email = "*****@*****.**"

    pickle_fn = "scrape_checkpoint.p"

    workdir_comb = "tests/output/impl_concat"
    genelist = {
        "its": {
            "workdir": workdir_its,
            "pickle": pickle_fn
        },
        "ets": {
            "workdir": workdir_ets,
            "pickle": pickle_fn
        }
    }

    # get to test status

    sys.stdout.write("\ntests Concat func select_rnd_seq\n")

    concat = Concat(workdir_comb, email)
    for item in genelist.keys():
        concat.load_single_genes(genelist[item]['workdir'],
                                 genelist[item]["pickle"], item)

    concat.combine()
    concat.sp_seq_counter()
    sp_to_keep = concat.sp_to_keep()
    concat.get_largest_tre()

    # print("test: select rnd seq")
    count = 2
    concat.tmp_dict = deepcopy(concat.sp_acc_comb)
    # print("while")
    # part of make_sp_gene_dict

    len_before = len(concat.comb_seq)

    while len(concat.tmp_dict.keys()) >= 1:
        del_gi = {}
        for spn in concat.tmp_dict.keys():
            # print(spn)
            sp_to_keep_list = sp_to_keep.keys()
            # debug(sp_to_keep_list)
            if spn.replace(" ", "_") in sp_to_keep_list:
                tmp_gene = deepcopy(concat.genes_present)
                for gene in concat.tmp_dict[spn]:

                    tmp_gene.remove(gene)
                    # print("select_rnd_seq")
                    # print(spn, gene,del_gi, count)
                    del_gi = concat.select_rnd_seq(spn, gene, del_gi)
                    # print("now it should break")
                    break
                break
            break
        break

    len_after = len(concat.comb_seq[gene])

    #

    assert len_before + 1 == len_after