예제 #1
0
 def testNewickExport(self):
     n = pathmap.nexson_obj('10/pg_10.json')
     newick = extract_tree(
         n, 'tree3',
         PhyloSchema('newick',
                     tip_label='ot:ottTaxonName',
                     bracket_ingroup=True))
     self.assertTrue('[pre-ingroup-marker' in newick)
     self.assertTrue('[post-ingroup-marker' in newick)
     self.assertTrue(newick.startswith('('))
     self.assertTrue('*tip #1 not mapped' in newick)
     self.assertTrue('*tip #2 not mapped' in newick)
     self.assertTrue('*tip #3 not mapped' not in newick)
     newick = extract_tree(
         n, 'tree3', PhyloSchema('newick', tip_label='ot:ottTaxonName'))
     self.assertTrue('[pre-ingroup-marker' not in newick)
     self.assertTrue('[post-ingroup-marker' not in newick)
     self.assertTrue('*tip #1 not mapped' in newick)
     self.assertTrue('*tip #2 not mapped' in newick)
     self.assertTrue('*tip #3 not mapped' not in newick)
     self.assertTrue(newick.startswith('('))
     newick = extract_tree(
         n, 'tree3', PhyloSchema('newick', tip_label='ot:originallabel'))
     self.assertTrue('[pre-ingroup-marker' not in newick)
     self.assertTrue('[post-ingroup-marker' not in newick)
     self.assertTrue('*tip #' not in newick)
예제 #2
0
 def testNewickExport(self):
     n = pathmap.nexson_obj('10/pg_10.json')
     newick = extract_tree(n, 'tree3', PhyloSchema('newick', tip_label='ot:ottTaxonName', bracket_ingroup=True))
     self.assertTrue('[pre-ingroup-marker' in newick)
     self.assertTrue('[post-ingroup-marker' in newick)
     self.assertTrue(newick.startswith('('))
     newick = extract_tree(n, 'tree3', PhyloSchema('newick', tip_label='ot:ottTaxonName'))
     self.assertTrue('[pre-ingroup-marker' not in newick)
     self.assertTrue('[post-ingroup-marker' not in newick)
     self.assertTrue(newick.startswith('('))
예제 #3
0
def generate_ATT_from_phylesystem(aln,
                                  workdir,
                                  study_id,
                                  tree_id,
                                  phylesystem_loc='api'):
    """gathers together tree, alignment, and study info - forces names to otu_ids.
    Outputs AlignTreeTax object.
    an alignemnt, a
    Input can be either a study ID and tree ID from OpenTree
    Alignemnt need to be a Dendropy DNA character matrix!"""
    #TODO CHECK ARGS
    assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix))
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
    nexson = get_nexson(study_id, phylesystem_loc)
    ott_ids = get_subtree_otus(nexson,
                               tree_id=tree_id,
                               subtree_id="ingroup",
                               return_format="ottid")
    ott_mrca = get_mrca_ott(ott_ids)
    newick = extract_tree(nexson,
                          tree_id,
                          PhyloSchema('newick',
                                      output_nexml2json='1.2.1',
                                      content="tree",
                                      tip_label="ot:originalLabel"))
    newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well.
    tre = Tree.get(data=newick,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    otus = get_subtree_otus(nexson, tree_id=tree_id)
    otu_dict = {}
    orig_lab_to_otu = {}
    treed_taxa = {}
    for otu_id in otus:
        otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id]
        otu_dict[otu_id]['^physcraper:status'] = "original"
        otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01"
        orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_")
        orig_lab_to_otu[orig] = otu_id
        treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId')
    for tax in aln.taxon_namespace:
        try:
            tax.label = orig_lab_to_otu[tax.label].encode('ascii')
        except KeyError:
            sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label))
   #need to prune tree to seqs and seqs to tree...     
    otu_newick = tre.as_string(schema="newick")
    return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
예제 #4
0
)
import dendropy

configfi = "aws.config"
study_id = "ot_350"
tree_id = "Tr53297"
workdir = "scrape_ot_350"

# Read in the configuration information
conf = physcraper.ConfigObj(configfi)

#Get an existing tree from the Open Tree of life, and convert it to newick format
nexson = physcraper.opentree_helpers.get_nexson(study_id, 'api')
newick = extract_tree(
    nexson, tree_id,
    PhyloSchema('newick',
                output_nexml2json='1.2.1',
                content="tree",
                tip_label="ot:originalLabel"))

tre = dendropy.Tree.get(data=newick,
                        schema="newick",
                        preserve_underscores=True)

#Pull down an alignment from treebase.
dataset = physcraper.opentree_helpers.get_dataset_from_treebase(
    study_id, phylesystem_loc='api')

aln = None
##order of data matrices is arbitratry, so we choose one that matches the tree length
for mat in dataset.char_matrices:
    if len(mat) == len(tre.taxon_namespace):
예제 #5
0
runname = sys.argv[5]

#Fixed values
E_VALUE_THRESH = 0.04
ott_ncbi = "../ott_ncbi"  #TODO config file
Entrez.email = "*****@*****.**"

phy = Phylesystem()
n = phy.return_study(study_id)[0]
api_wrapper.study.get(study_id, tree=tree_id)

##This is a weird way to get the ingroup node, but I need the OTT ids anyhow.
m = extract_tree(n,
                 tree_id,
                 PhyloSchema('newick',
                             output_nexml2json='1.2.1',
                             content="tree",
                             tip_label="ot:ottId"),
                 subtree_id="ingroup")
otu_dict = gen_otu_dict(n)
ottids = []
for oid, o in otu_dict.items():
    try:
        ottid = o[u'^ot:ottId']
        if ("{}:".format(ottid) in m) or ("{})".format(ottid)
                                          in m) or ("{},".format(ottid) in m):
            ottids.append(ottid)
        else:
            print(o)
    except:
        pass
예제 #6
0
 def testTreeExport(self):
     n = pathmap.nexson_obj('10/pg_10.json')
     newick = extract_tree(
         n, 'tree3', PhyloSchema('nexus', tip_label='ot:ottTaxonName'))
     self.assertTrue(newick.startswith('#'))
예제 #7
0
def generate_ATT_from_phylesystem(aln,
                                  workdir,
                                  config_obj,
                                  study_id,
                                  tree_id,
                                  phylesystem_loc='api',
                                  ingroup_mrca=None):
    """gathers together tree, alignment, and study info - forces names to otu_ids.

    Study and tree ID's can be obtained by using python ./scripts/find_trees.py LINEAGE_NAME

    Spaces vs underscores kept being an issue, so all spaces are coerced to underscores when data are read in.

    :param aln: dendropy :class:`DnaCharacterMatrix <dendropy.datamodel.charmatrixmodel.DnaCharacterMatrix>` alignment object
    :param workdir: path to working directory
    :param config_obj: config class containing the settings
    :param study_id: OToL study id of the corresponding phylogeny which shall be updated
    :param tree_id: OToL corresponding tree ID as some studies have several phylogenies
    :param phylesystem_loc: access the github version of the OpenTree data store, or a local clone
    :param ingroup_mrca: optional.  OToL identifier of the mrca of the clade that shall be updated (can be subset of the phylogeny)
    :return: object of class ATT
    """
    assert isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix), \
            "your alignment `%s` ist not of type DnaCharacterMatrix" % aln
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ",
                                      "_")  # Forcing all spaces to underscore
    nexson = get_nexson(study_id, phylesystem_loc)
    newick = extract_tree(
        nexson, tree_id,
        PhyloSchema('newick',
                    output_nexml2json='1.2.1',
                    content="tree",
                    tip_label="ot:originalLabel"))
    newick = newick.replace(
        " ", "_"
    )  # UGH Very heavy handed, need to make sure happens on alignment side as well.
    tre = Tree.get(data=newick,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    # this gets the taxa that are in the subtree with all of their info - ott_id, original name,
    otus = get_subtree_otus(nexson, tree_id=tree_id)
    otu_dict = {}
    orig_lab_to_otu = {}
    treed_taxa = {}
    for otu_id in otus:
        otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id]
        otu_dict[otu_id]["^physcraper:status"] = "original"
        otu_dict[otu_id]["^physcraper:last_blasted"] = None
        orig = otu_dict[otu_id].get(u"^ot:originalLabel").replace(" ", "_")
        orig_lab_to_otu[orig] = otu_id
        treed_taxa[orig] = otu_dict[otu_id].get(u"^ot:ottId")
    for tax in aln.taxon_namespace:
        if tax.label in otu_dict:
            sys.stdout.write("{} aligned\n".format(tax.label))
        else:
            try:
                tax.label = orig_lab_to_otu[tax.label].encode("ascii")
            except KeyError:
                sys.stderr.write(
                    "{} doesn't have an otu id. It is being removed from the alignment. "
                    "This may indicate a mismatch between tree and alignment\n"
                    .format(tax.label))
    # need to prune tree to seqs and seqs to tree...
    otu_newick = tre.as_string(schema="newick")
    ott_ids = get_subtree_otus(nexson,
                               tree_id=tree_id,
                               subtree_id="ingroup",
                               return_format="ottid")
    if ingroup_mrca:
        if type(ingroup_mrca) == list:
            ott_ids = set(ingroup_mrca)
            ott_mrca = get_mrca_ott(ott_ids)
        else:
            ott_mrca = int(ingroup_mrca)
    elif ott_ids:  # if no ingroup is specified, ott_ids will be none
        ott_mrca = get_mrca_ott(ott_ids)
    else:  # just get the mrca for teh whole tree
        ott_mrca = get_mrca_ott(
            [otu_dict[otu_id].get(u"^ot:ottId") for otu_id in otu_dict])
    workdir = os.path.abspath(workdir)
    return physcraper.aligntreetax.AlignTreeTax(otu_newick,
                                                otu_dict,
                                                aln,
                                                ingroup_mrca=ott_mrca,
                                                workdir=workdir,
                                                config_obj=config_obj)
예제 #8
0
 def testTreeExport(self):
     n = pathmap.nexson_obj('10/pg_10.json')
     newick = extract_tree(n, 'tree3', PhyloSchema('nexus', tip_label='ot:ottTaxonName'))
     self.assertTrue(newick.startswith('#'))