def test_species_translation(): spn = "Mephitis mephitis" info = get_ott_taxon_info(spn) if info: ottid, ottname, ncbi_id = info a = ottid == 231602 tree_of_life.mrca(ott_ids=[ottid], wrap_response=False) ott_ids = [770315, 158484] ott_mrca = get_mrca_ott(ott_ids) b = ott_mrca == 312031 workdir = "tests/output/tmp" configfi = "tests/data/test.config" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" otu_jsonfi = "{}/otu_dict.json".format(workdir) """Tests if your own input files will generate a data object of class AlignTreeTax """ conf = ConfigObj(configfi, interactive=False) ids = IdDicts(conf, workdir=workdir) otu_json = OtuJsonDict(id_to_spn, ids) c = otu_json == expected_json assert a * b * c == 1
def test_owndata(): seqaln = "tests/data/tiny_test_example/test.fas" mattype = "fasta" trfn = "tests/data/tiny_test_example/test.tre" schema_trf = "newick" workdir = "tests/output/owndata" configfi = "tests/data/localblast.config" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" otu_jsonfi = "{}/otu_dict.json".format(workdir) """Tests if your own input files will generate a data object of class AlignTreeTax """ if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) data_obj = generate_ATT_from_files(seqaln=seqaln, mattype=mattype, workdir=workdir, config_obj=conf, treefile=trfn, schema_trf=schema_trf, otu_json=otu_jsonfi, ingroup_mrca=None) assert isinstance(data_obj, AlignTreeTax)
def make_otujsondict(id_to_spn, workdir, ids, local=False): """ Generate a dictionary equivalent to the OToL one. :param id_to_spn: csv delimited file, where tipnames correspond to species names :param workdir: the working directory :param ids: physcraper Id object :param local: is needed for local database to change file name of otujson dict :return: otu dict as json file """ workdir = os.path.abspath(workdir) # print(workdir) otu_jsonfi = "{}/otu_dict.json".format(workdir) if local is not False: otu_jsonfi = "{}/otu_dict_localseq.json".format(workdir) # print(id_to_spn) if os.path.exists(otu_jsonfi): otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) # print(otu_json) # json.dump(otu_json, open("otu_dict.json", "w")) with open('{}'.format(otu_jsonfi), 'wb') as outfile: json.dump(otu_json, outfile)
def test(): seqaln = "tests/data/tiny_test_example/test.fas" mattype = "fasta" trfn = "tests/data/tiny_test_example/test.tre" schema_trf = "newick" workdir = "tests/output/test_own_local" configfi = "tests/data/test.config" # configfi = "tests/data/aws.config" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" otu_jsonfi = "{}/otu_dict.json".format(workdir) if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) wrappers.own_data_run(seqaln, mattype, trfn, schema_trf, workdir, otu_jsonfi, configfi)
def test_add_local(): conf = ConfigObj(configfi, interactive=False) data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb')) data_obj.workdir = absworkdir ids = IdDicts(conf, workdir=data_obj.workdir) ids.acc_ncbi_dict = pickle.load( open("tests/data/precooked/tiny_acc_map.p", "rb")) if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) if os.path.exists(otu_jsonfi_local): otu_json_local = json.load(open(otu_jsonfi_local)) else: otu_json_local = OtuJsonDict(id_to_spn_addseq, ids) json.dump(otu_json_local, open(otu_jsonfi_local, "w")) sys.stdout.write("\ntest addLocal\n") # Prune sequences below a certain length threshold data_obj.prune_short() data_obj.write_files() data_obj.write_labelled(label='^ot:ottTaxonName', add_gb_id=True) data_obj.write_otus("otu_info", schema='table') data_obj.dump() sys.stdout.write("setting up id dictionaries\n") sys.stdout.flush() ids = IdDicts(conf, workdir=workdir) # Now combine the data, the ids, and the configuration into a single physcraper scrape object filteredScrape = FilterBlast(data_obj, ids) filteredScrape.blacklist = blacklist if add_local_seq is not None: filteredScrape.unpublished = True if filteredScrape.unpublished is True: # use unpublished data # filteredScrape.unpublished = True filteredScrape.data.unpubl_otu_json = otu_json_local filteredScrape.write_unpubl_blastdb(add_local_seq) # filteredScrape.make_otu_dict_entry_unpubl() filteredScrape.run_blast_wrapper() filteredScrape.read_blast_wrapper() filteredScrape.remove_identical_seqs() test = False for key in filteredScrape.data.otu_dict.keys(): if '^ncbi:title' in filteredScrape.data.otu_dict[key].keys(): if filteredScrape.data.otu_dict[key][ '^ncbi:title'] == "unpublished": test = True break assert test == True
def test(): if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi, interactive=False) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) # that's the main function wrappers.own_data_run(seqaln, mattype, trfn, schema_trf, workdir, otu_jsonfi, configfi)
def test_trim(): #------------------------ seqaln= "tests/data/tiny_test_example/test_extralongseq.fas" mattype="fasta" treefile= "tests/data/tiny_test_example/test.tre" schema_trf = "newick" workdir="tests/output/test_trim" configfi = "tests/data/test.config" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" otu_jsonfi = "{}/otu_dict.json".format(workdir) if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi, interactive=False) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi,"w")) data_obj = generate_ATT_from_files(seqaln=seqaln, mattype=mattype, workdir=workdir, config_obj=conf, treefile=treefile, schema_trf = schema_trf, otu_json=otu_jsonfi, ingroup_mrca=None) for tax, seq in data_obj.aln.items(): len_start = len(seq) next data_obj.trim() for tax, seq in data_obj.aln.items(): len_end = len(seq) assert len_start != len_end
def test(): # define here your files seqaln = "tests/data/tiny_test_example/test.fas" mattype = "fasta" trfn = "tests/data/tiny_test_example/test.tre" schema_trf = "newick" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" workdir = "tests/output/tiny_filter_own2" configfi = "tests/data/remote.config" otu_jsonfi = "{}/otu_dict.json".format(workdir) # change to your filtering criteria threshold = 2 selectby = "blast" downtorank = "species" ingroup_mrca = 723076 # setup the run if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) # select a wrapper function, depending on what you want to do, see short tutorial: wrappers.filter_data_run(seqaln, mattype, trfn, schema_trf, workdir, threshold, otu_jsonfi, configfi, selectby=selectby, downtorank=downtorank, ingroup_mrca=ingroup_mrca)
def test(): if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi, interactive=False) ids = IdDicts(conf, workdir=workdir) otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) data_obj = generate_ATT_from_files(seqaln=seqaln, mattype=mattype, workdir=workdir, config_obj=conf, treefile=treefile, schema_trf=schema_trf, otu_json=otu_jsonfi, ingroup_mrca=None) for tax, seq in data_obj.aln.items(): len_start = len(seq) data_obj.trim() for tax, seq in data_obj.aln.items(): len_end = len(seq) assert len_start == len_end for tax, seq in data_obj.aln.items(): len_start = len(seq) data_obj.config.trim_perc = 0.5 data_obj.trim() for tax, seq in data_obj.aln.items(): len_end = len(seq) assert len_start > len_end
workdir = "tiny_comb_ets" configfi = "tests/data/localblast.config" otu_jsonfi = "{}/otu_dict.json".format(workdir) threshold = 2 selectby = "blast" downtorank = None if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) wrappers.filter_data_run(seqaln, mattype, trfn, schema_trf, workdir, threshold, otu_jsonfi, configfi, selectby=selectby, downtorank=downtorank)
def test_compare_json(): expected_json = { 'otuSdoronicum': { '^ncbi:taxon': u'462523', '^ot:ottTaxonName': u'Senecio doronicum', '^ncbi:TaxonName': 'Senecio doronicum', '^physcraper:TaxonName': 'Senecio doronicum', '^physcraper:status': 'original', '^ot:ottId': 318436, '^user:TaxonName': 'Senecio_doronicum', '^ot:originalLabel': 'S_doronicum', '^physcraper:last_blasted': None }, 'otuSlagascanus': { '^ncbi:taxon': u'1268580', '^ot:ottTaxonName': u'Senecio lagascanus', '^ncbi:TaxonName': 'Senecio lagascanus', '^physcraper:TaxonName': 'Senecio lagascanus', '^physcraper:status': 'original', '^ot:ottId': 640718, '^user:TaxonName': 'Senecio_lagascanus', '^ot:originalLabel': 'S_lagascanus', '^physcraper:last_blasted': None }, 'otu2029doronicum': { '^ncbi:taxon': u'462523', '^ot:ottTaxonName': u'Senecio doronicum', '^ncbi:TaxonName': 'Senecio doronicum', '^physcraper:TaxonName': 'Senecio doronicum', '^physcraper:status': 'original', '^ot:ottId': 318436, '^user:TaxonName': 'Senecio_doronicum', '^ot:originalLabel': '2029_doronicum', '^physcraper:last_blasted': None }, 'otuSlopezii': { '^ncbi:taxon': u'1268581', '^ot:ottTaxonName': u'Senecio lopezii', '^ncbi:TaxonName': 'Senecio lopezii', '^physcraper:TaxonName': 'Senecio lopezii', '^physcraper:status': 'original', '^ot:ottId': 688688, '^user:TaxonName': 'Senecio_lopezii', '^ot:originalLabel': 'S_lopezii', '^physcraper:last_blasted': None }, 'otuSscopolii': { '^ncbi:taxon': u'1268589', '^ot:ottTaxonName': u'Senecio scopolii', '^ncbi:TaxonName': 'Senecio scopolii', '^physcraper:TaxonName': 'Senecio scopolii', '^physcraper:status': 'original', '^ot:ottId': 688671, '^user:TaxonName': 'Senecio_scopolii', '^ot:originalLabel': 'S_scopolii', '^physcraper:last_blasted': None } } workdir = "tests/output/tmp" configfi = "tests/data/test.config" id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv" otu_jsonfi = "{}/otu_dict.json".format(workdir) conf = ConfigObj(configfi, interactive=False) ids = IdDicts(conf, workdir=workdir) otu_json = OtuJsonDict(id_to_spn, ids) print(otu_json) assert otu_json == expected_json
blacklist = None # is a list with gi numbers ingroup_mrca = 557768 # setup the run if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) if add_unpubl_seq is not None: if os.path.exists(otu_jsonfi_local): print("load json local") otu_json_local = json.load(open(otu_jsonfi_local)) print(otu_json_local) else: otu_json_local = OtuJsonDict(id_to_spn_addseq, ids) json.dump(otu_json_local, open(otu_jsonfi_local, "w")) print(otu_json_local) # select a wrapper function, depending on what you want to do, see short tutorial: wrappers.filter_data_run( seqaln,
def test(): # tiny its seqaln = "tests/data/tiny_comb_its/tiny_comb_its.fasta" mattype = "fasta" trfn = "tests/data/tiny_comb_its/tiny_comb_its.tre" schema_trf = "newick" id_to_spn = r"tests/data/tiny_comb_its/nicespl.csv" workdir = "tests/data/tiny_comb_its" configfi = "tests/data/test.config" otu_jsonfi = "{}/otu_dict.json".format(workdir) threshold = 2 selectby = "blast" if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) wrappers.filter_data_run(seqaln, mattype, trfn, schema_trf, workdir, threshold, otu_jsonfi, configfi, selectby=selectby) # tiny ets seqaln = "tests/data/tiny_comb_ets/tiny_comb_ets.fasta" mattype = "fasta" trfn = "tests/data/tiny_comb_ets/tiny_comb_ets.tre" schema_trf = "newick" id_to_spn = r"tests/data/tiny_comb_ets/nicespl.csv" workdir = "tests/data/tiny_comb_ets" configfi = "tests/data/test.config" otu_jsonfi = "{}/otu_dict.json".format(workdir) treshold = 2 selectby = "blast" if not os.path.exists("{}".format(workdir)): os.makedirs("{}".format(workdir)) conf = ConfigObj(configfi) ids = IdDicts(conf, workdir=workdir) if os.path.exists(otu_jsonfi): print("load json") otu_json = json.load(open(otu_jsonfi)) else: otu_json = OtuJsonDict(id_to_spn, ids) json.dump(otu_json, open(otu_jsonfi, "w")) wrappers.filter_data_run(seqaln, mattype, trfn, schema_trf, workdir, threshold, otu_jsonfi, configfi, selectby=selectby)