コード例 #1
0
 def synthesize(self,
                reinitialize=True):
     synth_db = self.synthesis_db
     synth_ott_id = self.synth_ott_id
     log_filepath = self.log_filepath
     if reinitialize:
         load_db = self.load_db
         if os.path.abspath(load_db) != os.path.abspath(synth_db):
             if not os.path.exists(load_db):
                 f = 'loading a graph with reinitialize requies that the trees have been loaded into a loading db'
                 raise RuntimeError(f)
             self._remove_filepath(synth_db)
             _LOG.debug('copying "{s}" to "{d}"'.format(s=load_db, d=synth_db))
             shutil.copytree(load_db, synth_db)
     loaded_trees_json = self.loaded_trees_json
     if not os.path.exists(loaded_trees_json):
         f = '"{}" does not exist, so I can not tell what studies have been loaded'
         raise RuntimeError(f.format(loaded_trees_json))
     loaded = read_as_json(loaded_trees_json)
     return treemachine_synthesize(self.java_invoc,
                                   self.treemachine_jar,
                                   synth_db,
                                   synth_ott_id,
                                   loaded,
                                   log_filepath)
コード例 #2
0
 def testNextStudyIds(self):
     p = _Phylesystem(repos_dict=self.r)
     mf = p._growing_shard._id_minting_file
     nsi = p._mint_new_study_id()
     self.assertEqual(
         int(nsi.split('_')[-1]) + 1,
         read_as_json(mf)['next_study_id'])
     self.assertTrue(nsi.startswith('zz_'))
コード例 #3
0
 def _read_master_branch_resource(self, fn, is_json=False):
     '''This will force the current branch to master! '''
     with self._master_branch_repo_lock:
         ga = self._create_git_action_for_global_resource()
         with ga.lock():
             ga.checkout_master()
             if os.path.exists(fn):
                 if is_json:
                     return read_as_json(fn)
                 return codecs.open(fn, 'rU', encoding='utf-8').read()
             return None
コード例 #4
0
ファイル: __init__.py プロジェクト: mtholder/peyotl
def collection_to_included_trees(collection):
    """Takes a collection object (or a filepath to collection object), returns
    each element of the `decisions` list that has the decision set to included.
    """
    if is_str_type(collection):
        collection = read_as_json(collection)
    inc = []
    for d in collection.get('decisions', []):
        if d['decision'] == 'INCLUDED':
            inc.append(d)
    return inc
コード例 #5
0
ファイル: phylesystem_shard.py プロジェクト: rvosa/peyotl
 def _read_master_branch_resource(self, fn, is_json=False):
     '''This will force the current branch to master! '''
     with self._master_branch_repo_lock:
         ga = self._create_git_action_for_global_resource()
         with ga.lock():
             ga.checkout_master()
             if os.path.exists(fn):
                 if is_json:
                     return read_as_json(fn)
                 return codecs.open(fn, 'rU', encoding='utf-8').read()
             return None
コード例 #6
0
ファイル: __init__.py プロジェクト: mtholder/peyotl
def collection_to_included_trees(collection):
    """Takes a collection object (or a filepath to collection object), returns
    each element of the `decisions` list that has the decision set to included.
    """
    if is_str_type(collection):
        collection = read_as_json(collection)
    inc = []
    for d in collection.get('decisions', []):
        if d['decision'] == 'INCLUDED':
            inc.append(d)
    return inc
コード例 #7
0
ファイル: git_shard.py プロジェクト: mtholder/peyotl
 def _read_master_branch_resource(self, fn, is_json=False):
     """This will force the current branch to master! """
     with self._master_branch_repo_lock:
         ga = self._create_git_action_for_global_resource()
         with ga.lock():
             ga.checkout_master()
             if os.path.exists(fn):
                 if is_json:
                     return read_as_json(fn)
                 with codecs.open(fn, "rU", encoding="utf-8") as f:
                     ret = f.read()
                 return ret
             return None
コード例 #8
0
ファイル: sync.py プロジェクト: pombredanne/peyotl
 def _read_cached_or_refetch(self, paths, phylografter):
     lock_policy = self.lock_policy
     nexson_path = paths['nexson']
     lockfile = nexson_path + '.lock'
     owns_lock = lock_policy.wait_for_lock(lockfile)[1]
     nexson = None
     try:
         if os.path.exists(nexson_path):
             nexson = read_as_json(nexson_path)
     finally:
         if owns_lock:
             lock_policy.remove_lock()
     if nexson is None:
         return self.download_nexson_from_phylografter(paths, phylografter)
     return nexson
コード例 #9
0
ファイル: sync.py プロジェクト: rvosa/peyotl
 def _read_cached_or_refetch(self, paths, phylografter):
     lock_policy = self.lock_policy
     nexson_path = paths['nexson']
     lockfile = nexson_path + '.lock'
     owns_lock = lock_policy.wait_for_lock(lockfile)[1]
     nexson = None
     try:
         if os.path.exists(nexson_path):
             nexson = read_as_json(nexson_path)
     finally:
         if owns_lock:
             lock_policy.remove_lock()
     if nexson is None:
         return self.download_nexson_from_phylografter(paths, phylografter)
     return nexson
コード例 #10
0
def treemachine_load_one_tree(java_invoc,
                              treemachine_jar_path,
                              db_path,
                              study_filepath,
                              tree_id,
                              log_filepath,
                              testing=False):
    _bail_if_file_not_found('study file', study_filepath)
    _bail_if_file_not_found('load db', db_path)
    nexson = read_as_json(study_filepath)
    sha = get_git_sha(nexson)
    java_invoc = _treemachine_start(java_invoc, treemachine_jar_path)
    java_invoc.extend(['pgloadind', db_path, study_filepath, tree_id, sha])
    verb = 'loading'
    if testing:
        java_invoc.append('f')
        verb = 'testing'
    _LOG.debug(
        '{v} tree {t} from NexSON from "{p}" and logging to "{l}"'.format(
            v=verb, t=tree_id, p=study_filepath, l=log_filepath))
    with codecs.open(log_filepath, 'a', encoding='utf-8') as logf:
        _run(java_invoc, stdout=logf)
    return sha
コード例 #11
0
ファイル: __init__.py プロジェクト: rvosa/peyotl
def treemachine_load_one_tree(java_invoc,
                              treemachine_jar_path,
                              db_path,
                              study_filepath,
                              tree_id,
                              log_filepath,
                              testing=False):
    _bail_if_file_not_found('study file', study_filepath)
    _bail_if_file_not_found('load db', db_path)
    nexson = read_as_json(study_filepath)
    sha = get_git_sha(nexson)
    java_invoc = _treemachine_start(java_invoc, treemachine_jar_path)
    java_invoc.extend(['pgloadind', db_path, study_filepath, tree_id, sha])
    verb = 'loading'
    if testing:
        java_invoc.append('f')
        verb = 'testing'
    _LOG.debug('{v} tree {t} from NexSON from "{p}" and logging to "{l}"'.format(v=verb,
                                                                                 t=tree_id,
                                                                                 p=study_filepath,
                                                                                 l=log_filepath))
    with codecs.open(log_filepath, 'a', encoding='utf-8') as logf:
        _run(java_invoc, stdout=logf)
    return sha
コード例 #12
0
ファイル: cull-mappings.py プロジェクト: mtholder/peyotl
#!/usr/bin/env python
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl import get_logger

import sys
import re

_LOG = get_logger('evaluate-auto-mapping')
if len(sys.argv) != 4:
    sys.exit('expecting an input file path for the JSON mapping file and '
             '2 output file for the plausible and implausible unmapped')
inf = sys.argv[1]
poutf = sys.argv[2]
ioutf = sys.argv[3]
_LOG.debug('Reading test cases from "{}"'.format(inf))
test_case_dict = read_as_json(inf)

possible = {}
impossible = {}

np = 0
ni = 0
for study_id, otu_list in test_case_dict.items():
    p = []
    i = []
    for el in otu_list:
        matches = el[1]
        orig = el[0].lower()
        is_plausible = False
        for m in matches:
            if m.lower() in orig:
コード例 #13
0
ファイル: evaluate-auto-mapping.py プロジェクト: rvosa/peyotl
#!/usr/bin/env python
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl import get_logger

import sys
import re
_LOG = get_logger('evaluate-auto-mapping')
if len(sys.argv) != 3:
    sys.exit(
        'expecting an input file path for the JSON mapping file and output file for the unmapped'
    )
inf = sys.argv[1]
outf = sys.argv[2]
_LOG.debug('Reading test cases from "{}"'.format(inf))
m = read_as_json(inf)


def no_op(orig):
    return [orig]


cf_pat = re.compile(r'([A-Z]{3,})[^a-z]cf[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)',
                    re.I)
aff_pat = re.compile(r'([A-Z]{3,})[^a-z]aff[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)',
                     re.I)
word_then_punc = re.compile(r'([A-Z]{3,})[.]', re.I)
word_then_cruft = re.compile(r'([A-Z]{3,})[^A-Z]{2,}', re.I)
no_casing_ssp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})[-_. ]([a-z]{3,})',
                           re.I)
no_casing_sp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})', re.I)
unnamed_sp_term_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp$')
コード例 #14
0
for pair in studytreelist:
    study, tree = pair.split('_')
    if len(study) == 1:
        study = '0' + study
    study2tree.setdefault('pg_' + study, []).append('tree' + tree)


pa = PhylesystemAPI(get_from='local')
raw_phylsys = pa.phylesystem_obj
nexson_version = raw_phylsys.repo_nexml2json
for study_id, tree_list in study2tree.items():
    if verbose:
        sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id))
    try:
        fp = raw_phylsys.get_filepath_for_study(study_id)
        blob = read_as_json(fp)

        nex = get_nexml_el(blob)
        prev = nex.setdefault('^ot:candidateTreeForSynthesis', [])
        for tree_id in tree_list:
            if tree_id not in prev:
                prev.append(tree_id)
            i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version)
            if not i_t_o_list:
                sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id))
            for tid, tree, otus_group in i_t_o_list:
                tree['^ot:unrootedTree'] = False
                tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId']
        if not dry_run:
            write_as_json(blob, fp)
        
コード例 #15
0
#! /usr/bin/env python
from peyotl.phylesystem.git_actions import PhylesystemGitAction
import unittest
from peyotl.utility.input_output import read_as_json
from peyotl.test.support import pathmap
from peyotl.phylesystem.helper import get_repos

try:
    r = get_repos()
    HAS_LOCAL_PHYLESYSTEM_REPOS = True
except:
    HAS_LOCAL_PHYLESYSTEM_REPOS = False

n = read_as_json(pathmap.json_source_path("1003.json"))


class TestCreate(unittest.TestCase):
    @unittest.skipIf(
        not HAS_LOCAL_PHYLESYSTEM_REPOS,
        "only available if you are have a [phylesystem] section with" ' "parent" variable in your peyotl config',
    )
    def testWriteStudy(self):
        self.reponame = list(get_repos().keys())[0]
        self.repodir = get_repos()[self.reponame]
        PhylesystemGitAction(self.repodir)


if __name__ == "__main__":
    unittest.main()
コード例 #16
0
ファイル: test_shared_api_tests.py プロジェクト: rvosa/peyotl
    _LOG.debug('skipping shared tests due to lack of "{}" dir'.format(
        shared_tests_par))
else:
    update_shared_tests = True
    if update_shared_tests:
        _LOG.debug(
            'updating shared-api-tests dir "{}"'.format(shared_tests_par))
        git_pull = subprocess.Popen(['git', 'pull', 'origin', 'master'],
                                    cwd=shared_tests_par)
        try:
            git_pull.wait()
        except:
            pass  # we want the pass to test when we are offline...
    for fn in test_files:
        local_fp = os.path.join(shared_tests_par, fn)
        tblob = read_as_json(local_fp)
        keys = list(tblob.keys())
        keys.sort()
        for k in keys:
            curr_test = tblob[k]

            def nf(self, n=k, blob=curr_test):
                global STOP
                if STOP or n == 'test_subtree_demo':
                    return
                oi_name = blob['test_function']
                expected = blob['tests']
                s = oi_name.split('_')[0]
                peyotl_meth = '_'.join(oi_name.split('_')[1:])
                trans = OI_FUNC_TO_PEYOTL.get(s, s)
                wrapper = getattr(self.ot, trans)
コード例 #17
0
#!/usr/bin/env python
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl import get_logger

import sys
import re
_LOG = get_logger('evaluate-auto-mapping')
if len(sys.argv) != 3:
    sys.exit('expecting an input file path for the JSON mapping file and output file for the unmapped')
inf = sys.argv[1]
outf = sys.argv[2]
_LOG.debug('Reading test cases from "{}"'.format(inf))
m = read_as_json(inf)


def no_op(orig):
    return [orig]

cf_pat = re.compile(r'([A-Z]{3,})[^a-z]cf[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I)
aff_pat = re.compile(r'([A-Z]{3,})[^a-z]aff[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I)
word_then_punc = re.compile(r'([A-Z]{3,})[.]', re.I)
word_then_cruft = re.compile(r'([A-Z]{3,})[^A-Z]{2,}', re.I)
no_casing_ssp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})[-_. ]([a-z]{3,})', re.I)
no_casing_sp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})', re.I)
unnamed_sp_term_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp$') 
unnamed_sp_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp[^a-z]') 
var_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})[-_. ]+var[-_. ]+([a-z]{3,})')
ssp_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})[-_. ]([-a-z]{3,})')
sp_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})')
ex_pat = re.compile(r'(.+)[^a-z]ex[^a-z].+')
def cascade_with_ssp_sp_handling(orig):
コード例 #18
0
ファイル: test_shared_api_tests.py プロジェクト: rvosa/peyotl
_TYPE_MAP = {'dict': dict}
if not os.path.exists(shared_tests_par):
    _LOG.debug('skipping shared tests due to lack of "{}" dir'.format(shared_tests_par))
else:
    update_shared_tests = True
    if update_shared_tests:
        _LOG.debug('updating shared-api-tests dir "{}"'.format(shared_tests_par))
        git_pull = subprocess.Popen(['git', 'pull', 'origin', 'master'],
                                    cwd=shared_tests_par)
        try:
            git_pull.wait()
        except:
            pass # we want the pass to test when we are offline...
    for fn in test_files:
        local_fp = os.path.join(shared_tests_par, fn)
        tblob = read_as_json(local_fp)
        keys = list(tblob.keys())
        keys.sort()
        for k in keys:
            curr_test = tblob[k]
            def nf(self, n=k, blob=curr_test):
                global STOP
                if STOP or n == 'test_subtree_demo':
                    return
                oi_name = blob['test_function']
                expected = blob['tests']
                s = oi_name.split('_')[0]
                peyotl_meth = '_'.join(oi_name.split('_')[1:])
                trans = OI_FUNC_TO_PEYOTL.get(s, s)
                wrapper = getattr(self.ot, trans)
                bound_m = getattr(wrapper, peyotl_meth)
コード例 #19
0
    def load_graph(self,
                   tree_list,
                   reinitialize=False,
                   testing=False,
                   report=True,
                   map_compat=True):
        tb = self.load_db
        nc = self.nexson_cache
        log_filepath = self.log_filepath
        tree_log = self.tree_log
        loaded_trees_json = self.loaded_trees_json
        for id_obj in tree_list:
            study_id = id_obj['study_id']
            path = os.path.join(nc, study_id)
            if not os.path.exists(path):
                f = 'Study file not found at "{p}". All studies must be fetched before they can be loaded.'
                raise RuntimeError(f.format(p=path))
        if os.path.exists(loaded_trees_json):
            loaded = read_as_json(loaded_trees_json)
        else:
            loaded = []
        if reinitialize:
            tax_db = self.taxonomy_db
            if os.path.abspath(tax_db) != os.path.abspath(tb):
                if not os.path.exists(tax_db):
                    f = 'loading a graph with reinitialize requies that the taxonomy has been loaded into a taxonomy db'
                    raise RuntimeError(f)
                self._remove_filepath(tb)
                _LOG.debug('copying "{s}" to "{d}"'.format(s=tax_db, d=tb))
                shutil.copytree(tax_db, tb)
                if os.path.exists(loaded_trees_json):
                    os.remove(loaded_trees_json)
                loaded = []

        for id_obj in tree_list:
            study_id = id_obj['study_id']
            tree_id = id_obj['tree_id']
            path = os.path.join(nc, study_id)
            sha = treemachine_load_one_tree(self.java_invoc,
                                            self.treemachine_jar,
                                            tb,
                                            path,
                                            tree_id,
                                            log_filepath,
                                            testing=testing)
            loaded.append({'study_id':study_id, 'tree_id': tree_id, 'sha': sha})
            write_as_json(loaded, loaded_trees_json)
            if report:
                tree_str = self._report_source_tree(tb, study_id, tree_id, sha)
                with codecs.open(tree_log, 'a', encoding='utf-8') as tree_fo:
                    tree_fo.write(tree_str)
                    tree_fo.write('\n')
                print(tree_str)
            if map_compat:
                map_content = treemachine_map_compat_one_tree(self.java_invoc,
                                                              self.treemachine_jar,
                                                              tb,
                                                              study_id,
                                                              tree_id,
                                                              sha)
                with codecs.open(log_filepath, 'a', encoding='utf-8') as log_fo:
                    log_fo.write(map_content)
                print(map_content)
コード例 #20
0
#!/usr/bin/env python
from peyotl.api import APIWrapper
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl.nexson_syntax import get_nexml_el

a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'})
pa = a.phylesystem_api
p = pa.phylesystem_obj
for sid, fp in p.iter_study_filepaths():
    blob = read_as_json(fp)
    nex = get_nexml_el(blob)
    x = nex.get('^ot:studyId')
    if x != sid:
        nex['^ot:studyId'] = sid
        write_as_json(blob, fp)
        print(x, sid)
コード例 #21
0
 def testNextStudyIds(self):
     p = _Phylesystem(repos_dict=self.r)
     mf = p._growing_shard._id_minting_file
     nsi = p._mint_new_study_id()
     self.assertEqual(int(nsi.split('_')[-1]) + 1, read_as_json(mf)['next_study_id'])
     self.assertTrue(nsi.startswith('zz_'))
コード例 #22
0
#! /usr/bin/env python
from peyotl.phylesystem.git_actions import GitAction
import unittest
from peyotl.utility.input_output import read_as_json
from peyotl.test.support import pathmap
from peyotl.phylesystem.helper import get_repos
try:
    r = get_repos()
    HAS_LOCAL_PHYLESYSTEM_REPOS = True
except:
    HAS_LOCAL_PHYLESYSTEM_REPOS = False

n = read_as_json(pathmap.json_source_path('1003.json'))

class TestCreate(unittest.TestCase):
    @unittest.skipIf(not HAS_LOCAL_PHYLESYSTEM_REPOS,
                     'only available if you are have a [phylesystem] section with' \
                     ' "parent" variable in your peyotl config')
    def testWriteStudy(self):
        self.reponame = list(get_repos().keys())[0]
        self.repodir = get_repos()[self.reponame]
        GitAction(self.repodir)

if __name__ == "__main__":
    unittest.main()
コード例 #23
0
ファイル: __init__.py プロジェクト: rvosa/peyotl
 def _load_pickle_fp_raw(fp):
     return read_as_json(fp)
コード例 #24
0
#!/usr/bin/env python
from peyotl.utility.input_output import read_as_json
from peyotl.nexson_syntax import extract_supporting_file_messages
import codecs
import json
import sys
only_with_url = '-u' in sys.argv
out = codecs.getwriter('utf-8')(sys.stdout)
for fn in sys.argv[1:]:
    if fn == '-u':
        continue
    obj = read_as_json(fn)
    m_list = extract_supporting_file_messages(obj)
    if m_list:
        if only_with_url:
            for m in m_list:
                files = m.get('data', {}).get('files', {}).get('file', [])
                for f in files:
                    if '@url' in f:
                        msg = u'''  Internal-id = {i}
  Broken URL = http://tree.opentreeoflife.org{u}
  Filename = "{f}"
  Publication = {p}
  Curator link = http://tree.opentreeoflife.org/curator/study/view/{s}

'''.format(i=m.get('@id', '-'),
           u=f['@url'].replace('uploadid=', 'uploadId='),
           f=f.get('@filename', ''),
           p=obj['nexml']['^ot:studyPublicationReference'],
           s=obj['nexml']['^ot:studyId'])
                        out.write(msg)
コード例 #25
0
ファイル: __init__.py プロジェクト: pombredanne/peyotl
 def _load_pickle_fp_raw(fp):
     return read_as_json(fp)
コード例 #26
0
 err_stream = sys.stderr
 args = parser.parse_args()
 try:
     inp_filepath = args.input[0]
 except:
     sys.exit('Expecting a filepath to a NexSON file as the only argument.\n')
 outfn = args.output
 if outfn is not None:
     try:
         out = codecs.open(outfn, mode='w', encoding='utf-8')
     except:
         sys.exit('validate_ot_nexson: Could not open output filepath "{fn}"\n'.format(fn=outfn))
 else:
     out = codecs.getwriter('utf-8')(sys.stdout)
 try:
     nexson = read_as_json(inp_filepath)
 except ValueError as vx:
     _LOG.error('Not valid JSON.')
     if args.verbose:
         raise vx
     else:
         sys.exit(1)
 except Exception as nx:
     _LOG.error(nx.value)
     sys.exit(1)
 convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)
 trees = extract_tree_nexson(nexson, tree_id=args.tree_id)
 if len(trees) == 0:
     trees = extract_tree_nexson(nexson, tree_id=None)
     if trees:
         v = '", "'.join([i[0] for i in trees])