Example #1
0
def test_species_translation():
    spn = "Mephitis mephitis"
    info = get_ott_taxon_info(spn)
    if info:
        ottid, ottname, ncbi_id = info
    a = ottid == 231602

    tree_of_life.mrca(ott_ids=[ottid], wrap_response=False)

    ott_ids = [770315, 158484]
    ott_mrca = get_mrca_ott(ott_ids)
    b = ott_mrca == 312031

    workdir = "tests/output/tmp"
    configfi = "tests/data/test.config"
    id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)
    """Tests if your own input files will generate a data object of class AlignTreeTax
	"""

    conf = ConfigObj(configfi, interactive=False)
    ids = IdDicts(conf, workdir=workdir)

    otu_json = OtuJsonDict(id_to_spn, ids)

    c = otu_json == expected_json
    assert a * b * c == 1
Example #2
0
def test_owndata():
    seqaln = "tests/data/tiny_test_example/test.fas"
    mattype = "fasta"
    trfn = "tests/data/tiny_test_example/test.tre"
    schema_trf = "newick"
    workdir = "tests/output/owndata"
    configfi = "tests/data/localblast.config"
    id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)
    """Tests if your own input files will generate a data object of class AlignTreeTax
	"""

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    data_obj = generate_ATT_from_files(seqaln=seqaln,
                                       mattype=mattype,
                                       workdir=workdir,
                                       config_obj=conf,
                                       treefile=trfn,
                                       schema_trf=schema_trf,
                                       otu_json=otu_jsonfi,
                                       ingroup_mrca=None)

    assert isinstance(data_obj, AlignTreeTax)
Example #3
0
def make_otujsondict(id_to_spn, workdir, ids, local=False):
    """
    Generate a dictionary equivalent to the OToL one.

    :param id_to_spn: csv delimited file, where tipnames correspond to species names
    :param workdir: the working directory
    :param ids: physcraper Id object
    :param local: is needed for local database to change file name of otujson dict
    :return: otu dict as json file
    """
    workdir = os.path.abspath(workdir)
    # print(workdir)
    otu_jsonfi = "{}/otu_dict.json".format(workdir)
    if local is not False:
        otu_jsonfi = "{}/otu_dict_localseq.json".format(workdir)

    # print(id_to_spn)
    if os.path.exists(otu_jsonfi):
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        # print(otu_json)
        # json.dump(otu_json, open("otu_dict.json", "w"))
        with open('{}'.format(otu_jsonfi), 'wb') as outfile:
            json.dump(otu_json, outfile)
Example #4
0
def test():

    seqaln = "tests/data/tiny_test_example/test.fas"
    mattype = "fasta"
    trfn = "tests/data/tiny_test_example/test.tre"
    schema_trf = "newick"
    workdir = "tests/output/test_own_local"
    configfi = "tests/data/test.config"
    # configfi = "tests/data/aws.config"
    id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    wrappers.own_data_run(seqaln, mattype, trfn, schema_trf, workdir,
                          otu_jsonfi, configfi)
Example #5
0
def test_add_local():
    conf = ConfigObj(configfi, interactive=False)
    data_obj = pickle.load(open("tests/data/precooked/tiny_dataobj.p", 'rb'))
    data_obj.workdir = absworkdir
    ids = IdDicts(conf, workdir=data_obj.workdir)
    ids.acc_ncbi_dict = pickle.load(
        open("tests/data/precooked/tiny_acc_map.p", "rb"))

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    if os.path.exists(otu_jsonfi_local):
        otu_json_local = json.load(open(otu_jsonfi_local))
    else:
        otu_json_local = OtuJsonDict(id_to_spn_addseq, ids)
        json.dump(otu_json_local, open(otu_jsonfi_local, "w"))

    sys.stdout.write("\ntest addLocal\n")

    # Prune sequences below a certain length threshold
    data_obj.prune_short()
    data_obj.write_files()
    data_obj.write_labelled(label='^ot:ottTaxonName', add_gb_id=True)
    data_obj.write_otus("otu_info", schema='table')
    data_obj.dump()

    sys.stdout.write("setting up id dictionaries\n")
    sys.stdout.flush()

    ids = IdDicts(conf, workdir=workdir)

    # Now combine the data, the ids, and the configuration into a single physcraper scrape object
    filteredScrape = FilterBlast(data_obj, ids)
    filteredScrape.blacklist = blacklist

    if add_local_seq is not None:
        filteredScrape.unpublished = True
    if filteredScrape.unpublished is True:  # use unpublished data
        # filteredScrape.unpublished = True
        filteredScrape.data.unpubl_otu_json = otu_json_local
        filteredScrape.write_unpubl_blastdb(add_local_seq)

        # filteredScrape.make_otu_dict_entry_unpubl()
        filteredScrape.run_blast_wrapper()
        filteredScrape.read_blast_wrapper()
        filteredScrape.remove_identical_seqs()

    test = False
    for key in filteredScrape.data.otu_dict.keys():
        if '^ncbi:title' in filteredScrape.data.otu_dict[key].keys():
            if filteredScrape.data.otu_dict[key][
                    '^ncbi:title'] == "unpublished":
                test = True
                break
    assert test == True
Example #6
0
def test():
    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi, interactive=False)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    # that's the main function
    wrappers.own_data_run(seqaln, mattype, trfn, schema_trf, workdir,
                          otu_jsonfi, configfi)
Example #7
0
def test_trim():
  #------------------------
  seqaln= "tests/data/tiny_test_example/test_extralongseq.fas"
  mattype="fasta"
  treefile= "tests/data/tiny_test_example/test.tre"
  schema_trf = "newick"
  workdir="tests/output/test_trim"
  configfi = "tests/data/test.config"
  id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
  otu_jsonfi = "{}/otu_dict.json".format(workdir)



  if not os.path.exists("{}".format(workdir)):
          os.makedirs("{}".format(workdir))

  conf = ConfigObj(configfi, interactive=False)
  ids = IdDicts(conf, workdir=workdir)

  if os.path.exists(otu_jsonfi):
      print("load json")
      otu_json = json.load(open(otu_jsonfi))
  else:
      otu_json = OtuJsonDict(id_to_spn, ids)
      json.dump(otu_json, open(otu_jsonfi,"w"))


  data_obj = generate_ATT_from_files(seqaln=seqaln, 
                                   mattype=mattype, 
                                   workdir=workdir,
                                   config_obj=conf,
                                   treefile=treefile,
                                   schema_trf = schema_trf,
                                   otu_json=otu_jsonfi,
                                   ingroup_mrca=None)

  for tax, seq in data_obj.aln.items():
  	len_start = len(seq)
  	next
  data_obj.trim()
  for tax, seq in data_obj.aln.items():
  	len_end = len(seq)

  assert len_start != len_end
Example #8
0
def test():
    # define here your files
    seqaln = "tests/data/tiny_test_example/test.fas"
    mattype = "fasta"
    trfn = "tests/data/tiny_test_example/test.tre"
    schema_trf = "newick"
    id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
    workdir = "tests/output/tiny_filter_own2"
    configfi = "tests/data/remote.config"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)

    # change to your filtering criteria
    threshold = 2
    selectby = "blast"
    downtorank = "species"
    ingroup_mrca = 723076

    # setup the run
    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    # select a wrapper function, depending on what you want to do, see short tutorial:
    wrappers.filter_data_run(seqaln,
                             mattype,
                             trfn,
                             schema_trf,
                             workdir,
                             threshold,
                             otu_jsonfi,
                             configfi,
                             selectby=selectby,
                             downtorank=downtorank,
                             ingroup_mrca=ingroup_mrca)
Example #9
0
def test():

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi, interactive=False)
    ids = IdDicts(conf, workdir=workdir)

    otu_json = OtuJsonDict(id_to_spn, ids)
    json.dump(otu_json, open(otu_jsonfi, "w"))

    data_obj = generate_ATT_from_files(seqaln=seqaln,
                                       mattype=mattype,
                                       workdir=workdir,
                                       config_obj=conf,
                                       treefile=treefile,
                                       schema_trf=schema_trf,
                                       otu_json=otu_jsonfi,
                                       ingroup_mrca=None)

    for tax, seq in data_obj.aln.items():
        len_start = len(seq)

    data_obj.trim()

    for tax, seq in data_obj.aln.items():
        len_end = len(seq)

    assert len_start == len_end

    for tax, seq in data_obj.aln.items():
        len_start = len(seq)

    data_obj.config.trim_perc = 0.5
    data_obj.trim()

    for tax, seq in data_obj.aln.items():
        len_end = len(seq)

    assert len_start > len_end
Example #10
0
workdir = "tiny_comb_ets"
configfi = "tests/data/localblast.config"
otu_jsonfi = "{}/otu_dict.json".format(workdir)
threshold = 2
selectby = "blast"
downtorank = None

if not os.path.exists("{}".format(workdir)):
    os.makedirs("{}".format(workdir))

conf = ConfigObj(configfi)
ids = IdDicts(conf, workdir=workdir)

if os.path.exists(otu_jsonfi):
    print("load json")
    otu_json = json.load(open(otu_jsonfi))
else:
    otu_json = OtuJsonDict(id_to_spn, ids)
    json.dump(otu_json, open(otu_jsonfi, "w"))

wrappers.filter_data_run(seqaln,
                         mattype,
                         trfn,
                         schema_trf,
                         workdir,
                         threshold,
                         otu_jsonfi,
                         configfi,
                         selectby=selectby,
                         downtorank=downtorank)
def test_compare_json():
    expected_json = {
        'otuSdoronicum': {
            '^ncbi:taxon': u'462523',
            '^ot:ottTaxonName': u'Senecio doronicum',
            '^ncbi:TaxonName': 'Senecio doronicum',
            '^physcraper:TaxonName': 'Senecio doronicum',
            '^physcraper:status': 'original',
            '^ot:ottId': 318436,
            '^user:TaxonName': 'Senecio_doronicum',
            '^ot:originalLabel': 'S_doronicum',
            '^physcraper:last_blasted': None
        },
        'otuSlagascanus': {
            '^ncbi:taxon': u'1268580',
            '^ot:ottTaxonName': u'Senecio lagascanus',
            '^ncbi:TaxonName': 'Senecio lagascanus',
            '^physcraper:TaxonName': 'Senecio lagascanus',
            '^physcraper:status': 'original',
            '^ot:ottId': 640718,
            '^user:TaxonName': 'Senecio_lagascanus',
            '^ot:originalLabel': 'S_lagascanus',
            '^physcraper:last_blasted': None
        },
        'otu2029doronicum': {
            '^ncbi:taxon': u'462523',
            '^ot:ottTaxonName': u'Senecio doronicum',
            '^ncbi:TaxonName': 'Senecio doronicum',
            '^physcraper:TaxonName': 'Senecio doronicum',
            '^physcraper:status': 'original',
            '^ot:ottId': 318436,
            '^user:TaxonName': 'Senecio_doronicum',
            '^ot:originalLabel': '2029_doronicum',
            '^physcraper:last_blasted': None
        },
        'otuSlopezii': {
            '^ncbi:taxon': u'1268581',
            '^ot:ottTaxonName': u'Senecio lopezii',
            '^ncbi:TaxonName': 'Senecio lopezii',
            '^physcraper:TaxonName': 'Senecio lopezii',
            '^physcraper:status': 'original',
            '^ot:ottId': 688688,
            '^user:TaxonName': 'Senecio_lopezii',
            '^ot:originalLabel': 'S_lopezii',
            '^physcraper:last_blasted': None
        },
        'otuSscopolii': {
            '^ncbi:taxon': u'1268589',
            '^ot:ottTaxonName': u'Senecio scopolii',
            '^ncbi:TaxonName': 'Senecio scopolii',
            '^physcraper:TaxonName': 'Senecio scopolii',
            '^physcraper:status': 'original',
            '^ot:ottId': 688671,
            '^user:TaxonName': 'Senecio_scopolii',
            '^ot:originalLabel': 'S_scopolii',
            '^physcraper:last_blasted': None
        }
    }

    workdir = "tests/output/tmp"
    configfi = "tests/data/test.config"
    id_to_spn = r"tests/data/tiny_test_example/test_nicespl.csv"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)

    conf = ConfigObj(configfi, interactive=False)
    ids = IdDicts(conf, workdir=workdir)

    otu_json = OtuJsonDict(id_to_spn, ids)

    print(otu_json)
    assert otu_json == expected_json
Example #12
0
blacklist = None  # is a list with gi numbers
ingroup_mrca = 557768

# setup the run
if not os.path.exists("{}".format(workdir)):
    os.makedirs("{}".format(workdir))

conf = ConfigObj(configfi)
ids = IdDicts(conf, workdir=workdir)

if os.path.exists(otu_jsonfi):
    print("load json")
    otu_json = json.load(open(otu_jsonfi))
else:
    otu_json = OtuJsonDict(id_to_spn, ids)
    json.dump(otu_json, open(otu_jsonfi, "w"))

if add_unpubl_seq is not None:
    if os.path.exists(otu_jsonfi_local):
        print("load json local")
        otu_json_local = json.load(open(otu_jsonfi_local))
        print(otu_json_local)
    else:
        otu_json_local = OtuJsonDict(id_to_spn_addseq, ids)
        json.dump(otu_json_local, open(otu_jsonfi_local, "w"))
        print(otu_json_local)

# select a wrapper function, depending on what you want to do, see short tutorial:
wrappers.filter_data_run(
    seqaln,
Example #13
0
def test():

    # tiny its
    seqaln = "tests/data/tiny_comb_its/tiny_comb_its.fasta"
    mattype = "fasta"
    trfn = "tests/data/tiny_comb_its/tiny_comb_its.tre"
    schema_trf = "newick"
    id_to_spn = r"tests/data/tiny_comb_its/nicespl.csv"
    workdir = "tests/data/tiny_comb_its"
    configfi = "tests/data/test.config"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)
    threshold = 2
    selectby = "blast"

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    wrappers.filter_data_run(seqaln,
                             mattype,
                             trfn,
                             schema_trf,
                             workdir,
                             threshold,
                             otu_jsonfi,
                             configfi,
                             selectby=selectby)

    # tiny ets
    seqaln = "tests/data/tiny_comb_ets/tiny_comb_ets.fasta"
    mattype = "fasta"
    trfn = "tests/data/tiny_comb_ets/tiny_comb_ets.tre"
    schema_trf = "newick"
    id_to_spn = r"tests/data/tiny_comb_ets/nicespl.csv"

    workdir = "tests/data/tiny_comb_ets"
    configfi = "tests/data/test.config"
    otu_jsonfi = "{}/otu_dict.json".format(workdir)
    treshold = 2
    selectby = "blast"

    if not os.path.exists("{}".format(workdir)):
        os.makedirs("{}".format(workdir))

    conf = ConfigObj(configfi)
    ids = IdDicts(conf, workdir=workdir)

    if os.path.exists(otu_jsonfi):
        print("load json")
        otu_json = json.load(open(otu_jsonfi))
    else:
        otu_json = OtuJsonDict(id_to_spn, ids)
        json.dump(otu_json, open(otu_jsonfi, "w"))

    wrappers.filter_data_run(seqaln,
                             mattype,
                             trfn,
                             schema_trf,
                             workdir,
                             threshold,
                             otu_jsonfi,
                             configfi,
                             selectby=selectby)