Exemple #1
0
 def phylesystem_obj(self):
     if self._phylesystem_obj is None:
         if self._src_code == _GET_LOCAL:
             self._phylesystem_obj = Phylesystem()
         else:
             self._phylesystem_obj = PhylesystemProxy(self.phylesystem_config)
     return self._phylesystem_obj
Exemple #2
0
 def phylesystem_obj(self):
     if self._phylesystem_obj is None:
         if self._src_code == _GET_LOCAL:
             self._phylesystem_obj = Phylesystem(
                 repos_dict=self._locals_repo_dict)
         else:
             self._phylesystem_obj = PhylesystemProxy(
                 self.phylesystem_config)
     return self._phylesystem_obj
Exemple #3
0
 def testCachedValidation(self):
     try:
         # noinspection PyPackageRequirements
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         write_as_json(nexson, sys.stdout)
Exemple #4
0
 def testCachedValidation(self):
     try:
         import dogpile.cache
     except:
         pass
     else:
         r = pathmap.get_test_repos()
         p = Phylesystem(r)
         nexson, sha = p.return_study('xy_10')
         r = p.add_validation_annotation(nexson, sha)
         cache_hits = p._cache_hits
         r1 = p.add_validation_annotation(nexson, sha)
         self.assertEqual(1 + cache_hits, p._cache_hits)
         self.assertEqual(r, r1)
         import sys
         from peyotl import write_as_json
         write_as_json(nexson, sys.stdout)
mattype="fasta"
print study_id
'''

study_id = sys.argv[1]
tree_id = sys.argv[2]
seqaln = sys.argv[3]
mattype = sys.argv[4]
runname = sys.argv[5]

#Fixed values
E_VALUE_THRESH = 0.04
ott_ncbi = "../ott_ncbi"  #TODO config file
Entrez.email = "*****@*****.**"

phy = Phylesystem()
n = phy.return_study(study_id)[0]
api_wrapper.study.get(study_id, tree=tree_id)

##This is a weird way to get the ingroup node, but I need the OTT ids anyhow.
m = extract_tree(n,
                 tree_id,
                 PhyloSchema('newick',
                             output_nexml2json='1.2.1',
                             content="tree",
                             tip_label="ot:ottId"),
                 subtree_id="ingroup")
otu_dict = gen_otu_dict(n)
ottids = []
for oid, o in otu_dict.items():
    try:
Exemple #6
0
from peyotl.phylesystem.git_workflows import acquire_lock_raise, \
                                             commit_and_try_merge2master, \
                                             delete_study, \
                                             GitWorkflowError, \
                                             merge_from_master
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.utility.input_output import read_as_json
import unittest
import codecs
import json
import copy
from peyotl.test.support import pathmap
from peyotl.utility import get_logger
_LOG = get_logger(__name__)

phylesystem = Phylesystem(pathmap.get_test_repos())

_MINI_PHYL_SHA1 = '2d59ab892ddb3d09d4b18c91470b8c1c4cca86dc'
_SID = 'xy_10'
_AUTH = {
    'name': 'test_name',
    'email': '*****@*****.**',
    'login': '******',
}


class TestPhylesystemDel(unittest.TestCase):
    def testDelStudy(self):
        ga = phylesystem.create_git_action(_SID)
        ga.acquire_lock()
        try:
Exemple #7
0
#!/usr/bin/env python
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem
from peyotl.nexson_syntax import extract_tree_nexson
import sys
try:
    phylsys = Phylesystem()
except Exception as e:
    sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message))
    sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' \
             'instance. Double check your configuration (see ' \
             'http://opentreeoflife.github.io/peyotl/configuration/ for info).')
try:
    print_freq = 500
    num_trees = 0
    num_studies = 0
    max_trees_per_study = 0
    biggest_study = None
    studies_without_trees = []
    sys.stderr.write('count_trees.py: beginning loop over studies...\n')
    for study_id, nexson in phylsys.iter_study_objs():
        num_studies += 1
        try:
            nt = len(extract_tree_nexson(nexson, tree_id=None))
        except:
            sys.stderr.write(
                'Problem extracting trees from study {}'.format(study_id))
            raise
        if nt == 0:
            studies_without_trees.append(study_id)
        else:
            num_trees += nt
def write_tree_list(outpath):
    conflict_analyses = read_conflict_analyses()
    trees_in_synthesis = read_synthesis_list()
    taxa_in_synthesis = read_synthesis_taxa()
    phylesystem = Phylesystem()
    study_count = 0
    tree_count = 0
    preferred_count = 0
    table = []
    for study_id, nexson in phylesystem.iter_study_objs():
        study_count += 1
        nexml_el = nexson[u'nexml']
        n_intended = 1
        not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis')
        if not_intended == True:
            n_intended = 0
        else:
            n_intended = 2
        candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis')
        if candidates == None: candidates = []
        tid_tree_otug = extract_tree_nexson(nexson, tree_id=None)
        for (tree_id, tree, otu_group) in tid_tree_otug:
            tree_count += 1
            row = Row()

            # otu_group = otu_groups[ogi]['otuById']
            long_id = '%s@%s' % (study_id, tree_id)
            row.id = long_id

            row.n_intended = n_intended  # per study

            if len(candidates) == 0:  # No selection(s) made
                if len(tid_tree_otug) == 1:
                    n_preferred = 2  # Only one tree; use it
                else:
                    n_preferred = 1  # More than one tree; decision required
            else:
                if tree_id in candidates:
                    preferred_count += 1
                    n_preferred = 2  # This is a preferred tree; use it
                else:
                    n_preferred = 0  # Not preferred, another is; do not use
            row.n_preferred = n_preferred

            ctype = tree.get('^ot:curatedType')
            n_ctype = 0
            if ctype != None and ctype != '':
                n_ctype = 1
            row.n_ctype = n_ctype

            # whether a curator has confirmed the root
            root = tree.get('^ot:specifiedRoot')
            root_confirmed = 0
            if root != None and root != '':
                root_confirmed = 1
            row.root_confirmed = root_confirmed

            row.n_synth = 1 if long_id in trees_in_synthesis else 0

            ingroup_node_id = tree.get('^ot:inGroupClade')
            row.n_ingroup = (1 if (ingroup_node_id != None) else 0)

            (row.tip_count, row.ott_count, row.new_count) = \
                examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis)

            row.conflict_count = 0
            row.resolve_count = 0
            analysis = conflict_analyses.get(long_id)
            if analysis != None:
                row.conflict_count = int(analysis[1])
                row.resolve_count = int(analysis[2])

            row.score = ((row.new_count + row.resolve_count) -
                         (row.conflict_count * 20) + (row.n_ingroup * 10) +
                         (row.n_preferred * 50) + (row.n_intended * 100))

            table.append(row)
            if tree_count % 500 == 0:
                print tree_count, long_id, ctype
    table.sort(key=lambda row: (
        -row.score,
        row.n_intended == 0,  # whether intended for synthesis
        -row.n_preferred,  # whether preferred
        -row.n_ingroup,  # whether ingroup is designated
        row.conflict_count,  # number of synth tree conflicts
        -row.new_count,  # number of OTUs mapped to OTT
        -row.n_ctype,  # whether there's a 'curated type'
        -row.tip_count,  # total number of tips (for comparison)
    ))
    with codecs.open(outpath, 'w', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow([
            'tree', 'intended', 'preferred', 'has ingroup', 'has method',
            'root confirmed', 'in synth', '#tips', '#mapped', '#new',
            '#resolved', '#conflicts', 'score'
        ])
        for row in table:
            writer.writerow([
                row.id, row.n_intended, row.n_preferred, row.n_ingroup,
                row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count,
                row.ott_count, row.new_count, row.resolve_count,
                row.conflict_count, row.score
            ])
    print 'studies:', study_count
    print 'trees:', tree_count
    print 'preferred:', preferred_count