Exemple #1
0
 def testCachedValidation(self):
     try:
         # noinspection PyPackageRequirements
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         write_as_json(nexson, sys.stdout)
Exemple #2
0
 def phylesystem_obj(self):
     if self._phylesystem_obj is None:
         if self._src_code == _GET_LOCAL:
             self._phylesystem_obj = Phylesystem()
         else:
             self._phylesystem_obj = PhylesystemProxy(self.phylesystem_config)
     return self._phylesystem_obj
Exemple #3
0
 def testCachedValidation(self):
     try:
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         import sys; from peyotl import write_as_json; 
         write_as_json(nexson, sys.stdout)
Exemple #4
0
 def testCachedValidation(self):
     try:
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         import sys
         from peyotl import write_as_json
         write_as_json(nexson, sys.stdout)
Exemple #5
0
 def phylesystem_obj(self):
     if self._phylesystem_obj is None:
         if self._src_code == _GET_LOCAL:
             self._phylesystem_obj = Phylesystem(
                 repos_dict=self._locals_repo_dict)
         else:
             self._phylesystem_obj = PhylesystemProxy(
                 self.phylesystem_config)
     return self._phylesystem_obj
#!/usr/bin/env python
"""Lists the absolute filepath for every study in the
phylesystem directories that the peyotl library can
find (see README for discussion of configuration).
"""
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
phy = Phylesystem()
for study_id, filepath in phy.iter_study_filepaths():
    print(filepath)
#!/usr/bin/env python
"""Examines the tags (ot:tag) study. Prints out a list 
of each unique tag used in the studies """
from peyotl.manip import iter_trees
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.nexson_syntax import get_nexml_el
from collections import defaultdict
import codecs
import sys

phy = Phylesystem()
study_dict = defaultdict(int)
tree_dict = defaultdict(int)
out = codecs.getwriter("utf-8")(sys.stdout)
for study_id, n in phy.iter_study_objs():
    nexml = get_nexml_el(n)
    t = nexml.get("^ot:tag")
    if t:
        # print study_id, t
        if isinstance(t, list):
            for tag in t:
                study_dict[tag] += 1
        else:
            study_dict[t] += 1
    for trees_group_id, tree_id, tree in iter_trees(n):
        t = tree.get("^ot:tag")
        if t:
            # print study_id, tree_id, t
            if isinstance(t, list):
                for tag in t:
                    study_dict[tag] += 1
mattype="fasta"
print study_id
'''

study_id = sys.argv[1]
tree_id = sys.argv[2]
seqaln = sys.argv[3]
mattype = sys.argv[4]
runname = sys.argv[5]

#Fixed values
E_VALUE_THRESH = 0.04
ott_ncbi = "../ott_ncbi"  #TODO config file
Entrez.email = "*****@*****.**"

phy = Phylesystem()
n = phy.return_study(study_id)[0]
api_wrapper.study.get(study_id, tree=tree_id)

##This is a weird way to get the ingroup node, but I need the OTT ids anyhow.
m = extract_tree(n,
                 tree_id,
                 PhyloSchema('newick',
                             output_nexml2json='1.2.1',
                             content="tree",
                             tip_label="ot:ottId"),
                 subtree_id="ingroup")
otu_dict = gen_otu_dict(n)
ottids = []
for oid, o in otu_dict.items():
    try:
Exemple #9
0
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.nexson_syntax import get_nexml_el
from peyotl.manip import iter_otus
from collections import defaultdict
import argparse
import codecs
import sys
import os

description = __doc__
prog = os.path.split(sys.argv[0])[-1]
parser = argparse.ArgumentParser(prog=prog, description=description)
parser.add_argument('output')
args = parser.parse_args(sys.argv[1:])
if os.path.exists(args.output):
    sys.exit('{} already exists! Exiting...\n'.format(args.output))
phy = Phylesystem()
with codecs.open(args.output, 'w', encoding='utf-8') as out:
    num_unmapped = 0
    for study_id, n in phy.iter_study_objs():
        for og, otu_id, otu in iter_otus(n):
            if '^ot:ottTaxonName' in otu:
                out.write(u'{s}\t{o}\t{r}\t{m}\n'.format(
                    s=study_id,
                    o=otu_id,
                    r=otu['^ot:originalLabel'],
                    m=otu['^ot:ottTaxonName']))
            else:
                num_unmapped += 1
sys.stderr.write('{n:d} unmapped otus\n'.format(n=num_unmapped))
Exemple #10
0
from peyotl.phylesystem.git_workflows import acquire_lock_raise, \
                                             commit_and_try_merge2master, \
                                             delete_study, \
                                             GitWorkflowError, \
                                             merge_from_master
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.utility.input_output import read_as_json
import unittest
import codecs
import json
import copy
from peyotl.test.support import pathmap
from peyotl.utility import get_logger
_LOG = get_logger(__name__)

phylesystem = Phylesystem(pathmap.get_test_repos())

_MINI_PHYL_SHA1 = '2d59ab892ddb3d09d4b18c91470b8c1c4cca86dc'
_SID = 'xy_10'
_AUTH = {
    'name': 'test_name',
    'email': '*****@*****.**',
    'login': '******',
}


class TestPhylesystemDel(unittest.TestCase):
    def testDelStudy(self):
        ga = phylesystem.create_git_action(_SID)
        ga.acquire_lock()
        try:
Exemple #11
0
#!/usr/bin/env python
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.nexson_syntax import extract_tree_nexson
import sys
try:
    phylsys = Phylesystem()
except Exception as e:
    sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message))
    sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' \
             'instance. Double check your configuration (see ' \
             'http://opentreeoflife.github.io/peyotl/configuration/ for info).')
try:
    print_freq = 500
    num_trees = 0
    num_studies = 0
    max_trees_per_study = 0
    biggest_study = None
    studies_without_trees = []
    sys.stderr.write('count_trees.py: beginning loop over studies...\n')
    for study_id, nexson in phylsys.iter_study_objs():
        num_studies += 1
        try:
            nt = len(extract_tree_nexson(nexson, tree_id=None))
        except:
            sys.stderr.write(
                'Problem extracting trees from study {}'.format(study_id))
            raise
        if nt == 0:
            studies_without_trees.append(study_id)
        else:
            num_trees += nt
Exemple #12
0
#!/usr/bin/env python
from __future__ import print_function
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.nexson_syntax import extract_tree_nexson
import sys
try:
    phylsys = Phylesystem()
except Exception as e:
    sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message))
    sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem '
             'instance. Double check your configuration (see '
             'http://opentreeoflife.github.io/peyotl/configuration/ for info).')
try:
    print_freq = 500
    num_trees = 0
    num_studies = 0
    max_trees_per_study = 0
    biggest_study = None
    studies_without_trees = []
    sys.stderr.write('count_trees.py: beginning loop over studies...\n')
    for study_id, nexson in phylsys.iter_study_objs():
        num_studies += 1
        try:
            nt = len(extract_tree_nexson(nexson, tree_id=None))
        except:
            sys.stderr.write('Problem extracting trees from study {}'.format(study_id))
            raise
        if nt == 0:
            studies_without_trees.append(study_id)
        else:
            num_trees += nt
def write_tree_list(outpath):
    conflict_analyses = read_conflict_analyses()
    trees_in_synthesis = read_synthesis_list()
    taxa_in_synthesis = read_synthesis_taxa()
    phylesystem = Phylesystem()
    study_count = 0
    tree_count = 0
    preferred_count = 0
    table = []
    for study_id, nexson in phylesystem.iter_study_objs():
        study_count += 1
        nexml_el = nexson[u'nexml']
        n_intended = 1
        not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis')
        if not_intended == True:
            n_intended = 0
        else:
            n_intended = 2
        candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis')
        if candidates == None: candidates = []
        tid_tree_otug = extract_tree_nexson(nexson, tree_id=None)
        for (tree_id, tree, otu_group) in tid_tree_otug:
            tree_count += 1
            row = Row()

            # otu_group = otu_groups[ogi]['otuById']
            long_id = '%s@%s' % (study_id, tree_id)
            row.id = long_id

            row.n_intended = n_intended  # per study

            if len(candidates) == 0: # No selection(s) made
                if len(tid_tree_otug) == 1:
                    n_preferred = 2    # Only one tree; use it
                else:
                    n_preferred = 1    # More than one tree; decision required
            else:
                if tree_id in candidates:
                    preferred_count += 1
                    n_preferred = 2    # This is a preferred tree; use it
                else:
                    n_preferred = 0    # Not preferred, another is; do not use
            row.n_preferred = n_preferred

            ctype = tree.get('^ot:curatedType')
            n_ctype = 0
            if ctype != None and ctype != '':
                n_ctype = 1
            row.n_ctype = n_ctype

            # whether a curator has confirmed the root
            root = tree.get('^ot:specifiedRoot')
            root_confirmed = 0
            if root != None and root != '':
                root_confirmed = 1
            row.root_confirmed = root_confirmed

            row.n_synth = 1 if long_id in trees_in_synthesis else 0

            ingroup_node_id = tree.get('^ot:inGroupClade')
            row.n_ingroup = (1 if (ingroup_node_id != None) else 0)

            (row.tip_count, row.ott_count, row.new_count) = \
                examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis)

            row.conflict_count = 0
            row.resolve_count = 0
            analysis = conflict_analyses.get(long_id)
            if analysis != None:
                row.conflict_count = int(analysis[1])
                row.resolve_count = int(analysis[2])

            row.score = ((row.new_count + row.resolve_count) -
                         (row.conflict_count * 20) +
                         (row.n_ingroup * 10) +
                         (row.n_preferred * 50) +
                         (row.n_intended * 100))

            table.append(row)
            if tree_count % 500 == 0:
                print tree_count, long_id, ctype
    table.sort(key=lambda row:(-row.score,
                               row.n_intended == 0,   # whether intended for synthesis
                               -row.n_preferred,   # whether preferred
                               -row.n_ingroup,   # whether ingroup is designated
                               row.conflict_count,    # number of synth tree conflicts
                               -row.new_count,   # number of OTUs mapped to OTT
                               -row.n_ctype,   # whether there's a 'curated type'
                               -row.tip_count,   # total number of tips (for comparison)
                               ))
    with codecs.open(outpath, 'w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['tree', 'intended', 'preferred', 'has ingroup',
                         'has method', 'root confirmed', 'in synth', '#tips',
                         '#mapped', '#new', '#resolved', '#conflicts',
                         'score'])
        for row in table:
            writer.writerow([row.id, row.n_intended, row.n_preferred,
                             row.n_ingroup, row.n_ctype,
                             row.root_confirmed, row.n_synth,
                             row.tip_count, row.ott_count,
                             row.new_count,
                             row.resolve_count,
                             row.conflict_count,
                             row.score])
    print 'studies:', study_count
    print 'trees:', tree_count
    print 'preferred:', preferred_count
def write_tree_list(outpath):
    conflict_analyses = read_conflict_analyses()
    trees_in_synthesis = read_synthesis_list()
    taxa_in_synthesis = read_synthesis_taxa()
    phylesystem = Phylesystem()
    study_count = 0
    tree_count = 0
    preferred_count = 0
    table = []
    for study_id, nexson in phylesystem.iter_study_objs():
        study_count += 1
        nexml_el = nexson[u'nexml']
        n_intended = 1
        not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis')
        if not_intended == True:
            n_intended = 0
        else:
            n_intended = 2
        candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis')
        if candidates == None: candidates = []
        tid_tree_otug = extract_tree_nexson(nexson, tree_id=None)
        for (tree_id, tree, otu_group) in tid_tree_otug:
            tree_count += 1
            row = Row()

            # otu_group = otu_groups[ogi]['otuById']
            long_id = '%s@%s' % (study_id, tree_id)
            row.id = long_id

            row.n_intended = n_intended  # per study

            if len(candidates) == 0:  # No selection(s) made
                if len(tid_tree_otug) == 1:
                    n_preferred = 2  # Only one tree; use it
                else:
                    n_preferred = 1  # More than one tree; decision required
            else:
                if tree_id in candidates:
                    preferred_count += 1
                    n_preferred = 2  # This is a preferred tree; use it
                else:
                    n_preferred = 0  # Not preferred, another is; do not use
            row.n_preferred = n_preferred

            ctype = tree.get('^ot:curatedType')
            n_ctype = 0
            if ctype != None and ctype != '':
                n_ctype = 1
            row.n_ctype = n_ctype

            # whether a curator has confirmed the root
            root = tree.get('^ot:specifiedRoot')
            root_confirmed = 0
            if root != None and root != '':
                root_confirmed = 1
            row.root_confirmed = root_confirmed

            row.n_synth = 1 if long_id in trees_in_synthesis else 0

            ingroup_node_id = tree.get('^ot:inGroupClade')
            row.n_ingroup = (1 if (ingroup_node_id != None) else 0)

            (row.tip_count, row.ott_count, row.new_count) = \
                examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis)

            row.conflict_count = 0
            row.resolve_count = 0
            analysis = conflict_analyses.get(long_id)
            if analysis != None:
                row.conflict_count = int(analysis[1])
                row.resolve_count = int(analysis[2])

            row.score = ((row.new_count + row.resolve_count) -
                         (row.conflict_count * 20) + (row.n_ingroup * 10) +
                         (row.n_preferred * 50) + (row.n_intended * 100))

            table.append(row)
            if tree_count % 500 == 0:
                print tree_count, long_id, ctype
    table.sort(key=lambda row: (
        -row.score,
        row.n_intended == 0,  # whether intended for synthesis
        -row.n_preferred,  # whether preferred
        -row.n_ingroup,  # whether ingroup is designated
        row.conflict_count,  # number of synth tree conflicts
        -row.new_count,  # number of OTUs mapped to OTT
        -row.n_ctype,  # whether there's a 'curated type'
        -row.tip_count,  # total number of tips (for comparison)
    ))
    with codecs.open(outpath, 'w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow([
            'tree', 'intended', 'preferred', 'has ingroup', 'has method',
            'root confirmed', 'in synth', '#tips', '#mapped', '#new',
            '#resolved', '#conflicts', 'score'
        ])
        for row in table:
            writer.writerow([
                row.id, row.n_intended, row.n_preferred, row.n_ingroup,
                row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count,
                row.ott_count, row.new_count, row.resolve_count,
                row.conflict_count, row.score
            ])
    print 'studies:', study_count
    print 'trees:', tree_count
    print 'preferred:', preferred_count