def synthesize(self, reinitialize=True): synth_db = self.synthesis_db synth_ott_id = self.synth_ott_id log_filepath = self.log_filepath if reinitialize: load_db = self.load_db if os.path.abspath(load_db) != os.path.abspath(synth_db): if not os.path.exists(load_db): f = 'loading a graph with reinitialize requies that the trees have been loaded into a loading db' raise RuntimeError(f) self._remove_filepath(synth_db) _LOG.debug('copying "{s}" to "{d}"'.format(s=load_db, d=synth_db)) shutil.copytree(load_db, synth_db) loaded_trees_json = self.loaded_trees_json if not os.path.exists(loaded_trees_json): f = '"{}" does not exist, so I can not tell what studies have been loaded' raise RuntimeError(f.format(loaded_trees_json)) loaded = read_as_json(loaded_trees_json) return treemachine_synthesize(self.java_invoc, self.treemachine_jar, synth_db, synth_ott_id, loaded, log_filepath)
def testNextStudyIds(self): p = _Phylesystem(repos_dict=self.r) mf = p._growing_shard._id_minting_file nsi = p._mint_new_study_id() self.assertEqual( int(nsi.split('_')[-1]) + 1, read_as_json(mf)['next_study_id']) self.assertTrue(nsi.startswith('zz_'))
def _read_master_branch_resource(self, fn, is_json=False): '''This will force the current branch to master! ''' with self._master_branch_repo_lock: ga = self._create_git_action_for_global_resource() with ga.lock(): ga.checkout_master() if os.path.exists(fn): if is_json: return read_as_json(fn) return codecs.open(fn, 'rU', encoding='utf-8').read() return None
def collection_to_included_trees(collection): """Takes a collection object (or a filepath to collection object), returns each element of the `decisions` list that has the decision set to included. """ if is_str_type(collection): collection = read_as_json(collection) inc = [] for d in collection.get('decisions', []): if d['decision'] == 'INCLUDED': inc.append(d) return inc
def _read_master_branch_resource(self, fn, is_json=False): """This will force the current branch to master! """ with self._master_branch_repo_lock: ga = self._create_git_action_for_global_resource() with ga.lock(): ga.checkout_master() if os.path.exists(fn): if is_json: return read_as_json(fn) with codecs.open(fn, "rU", encoding="utf-8") as f: ret = f.read() return ret return None
def _read_cached_or_refetch(self, paths, phylografter): lock_policy = self.lock_policy nexson_path = paths['nexson'] lockfile = nexson_path + '.lock' owns_lock = lock_policy.wait_for_lock(lockfile)[1] nexson = None try: if os.path.exists(nexson_path): nexson = read_as_json(nexson_path) finally: if owns_lock: lock_policy.remove_lock() if nexson is None: return self.download_nexson_from_phylografter(paths, phylografter) return nexson
def treemachine_load_one_tree(java_invoc, treemachine_jar_path, db_path, study_filepath, tree_id, log_filepath, testing=False): _bail_if_file_not_found('study file', study_filepath) _bail_if_file_not_found('load db', db_path) nexson = read_as_json(study_filepath) sha = get_git_sha(nexson) java_invoc = _treemachine_start(java_invoc, treemachine_jar_path) java_invoc.extend(['pgloadind', db_path, study_filepath, tree_id, sha]) verb = 'loading' if testing: java_invoc.append('f') verb = 'testing' _LOG.debug( '{v} tree {t} from NexSON from "{p}" and logging to "{l}"'.format( v=verb, t=tree_id, p=study_filepath, l=log_filepath)) with codecs.open(log_filepath, 'a', encoding='utf-8') as logf: _run(java_invoc, stdout=logf) return sha
def treemachine_load_one_tree(java_invoc, treemachine_jar_path, db_path, study_filepath, tree_id, log_filepath, testing=False): _bail_if_file_not_found('study file', study_filepath) _bail_if_file_not_found('load db', db_path) nexson = read_as_json(study_filepath) sha = get_git_sha(nexson) java_invoc = _treemachine_start(java_invoc, treemachine_jar_path) java_invoc.extend(['pgloadind', db_path, study_filepath, tree_id, sha]) verb = 'loading' if testing: java_invoc.append('f') verb = 'testing' _LOG.debug('{v} tree {t} from NexSON from "{p}" and logging to "{l}"'.format(v=verb, t=tree_id, p=study_filepath, l=log_filepath)) with codecs.open(log_filepath, 'a', encoding='utf-8') as logf: _run(java_invoc, stdout=logf) return sha
#!/usr/bin/env python from peyotl.utility.input_output import read_as_json, write_as_json from peyotl import get_logger import sys import re _LOG = get_logger('evaluate-auto-mapping') if len(sys.argv) != 4: sys.exit('expecting an input file path for the JSON mapping file and ' '2 output file for the plausible and implausible unmapped') inf = sys.argv[1] poutf = sys.argv[2] ioutf = sys.argv[3] _LOG.debug('Reading test cases from "{}"'.format(inf)) test_case_dict = read_as_json(inf) possible = {} impossible = {} np = 0 ni = 0 for study_id, otu_list in test_case_dict.items(): p = [] i = [] for el in otu_list: matches = el[1] orig = el[0].lower() is_plausible = False for m in matches: if m.lower() in orig:
#!/usr/bin/env python from peyotl.utility.input_output import read_as_json, write_as_json from peyotl import get_logger import sys import re _LOG = get_logger('evaluate-auto-mapping') if len(sys.argv) != 3: sys.exit( 'expecting an input file path for the JSON mapping file and output file for the unmapped' ) inf = sys.argv[1] outf = sys.argv[2] _LOG.debug('Reading test cases from "{}"'.format(inf)) m = read_as_json(inf) def no_op(orig): return [orig] cf_pat = re.compile(r'([A-Z]{3,})[^a-z]cf[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I) aff_pat = re.compile(r'([A-Z]{3,})[^a-z]aff[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I) word_then_punc = re.compile(r'([A-Z]{3,})[.]', re.I) word_then_cruft = re.compile(r'([A-Z]{3,})[^A-Z]{2,}', re.I) no_casing_ssp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})[-_. ]([a-z]{3,})', re.I) no_casing_sp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})', re.I) unnamed_sp_term_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp$')
for pair in studytreelist: study, tree = pair.split('_') if len(study) == 1: study = '0' + study study2tree.setdefault('pg_' + study, []).append('tree' + tree) pa = PhylesystemAPI(get_from='local') raw_phylsys = pa.phylesystem_obj nexson_version = raw_phylsys.repo_nexml2json for study_id, tree_list in study2tree.items(): if verbose: sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id)) try: fp = raw_phylsys.get_filepath_for_study(study_id) blob = read_as_json(fp) nex = get_nexml_el(blob) prev = nex.setdefault('^ot:candidateTreeForSynthesis', []) for tree_id in tree_list: if tree_id not in prev: prev.append(tree_id) i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version) if not i_t_o_list: sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id)) for tid, tree, otus_group in i_t_o_list: tree['^ot:unrootedTree'] = False tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId'] if not dry_run: write_as_json(blob, fp)
#! /usr/bin/env python from peyotl.phylesystem.git_actions import PhylesystemGitAction import unittest from peyotl.utility.input_output import read_as_json from peyotl.test.support import pathmap from peyotl.phylesystem.helper import get_repos try: r = get_repos() HAS_LOCAL_PHYLESYSTEM_REPOS = True except: HAS_LOCAL_PHYLESYSTEM_REPOS = False n = read_as_json(pathmap.json_source_path("1003.json")) class TestCreate(unittest.TestCase): @unittest.skipIf( not HAS_LOCAL_PHYLESYSTEM_REPOS, "only available if you are have a [phylesystem] section with" ' "parent" variable in your peyotl config', ) def testWriteStudy(self): self.reponame = list(get_repos().keys())[0] self.repodir = get_repos()[self.reponame] PhylesystemGitAction(self.repodir) if __name__ == "__main__": unittest.main()
_LOG.debug('skipping shared tests due to lack of "{}" dir'.format( shared_tests_par)) else: update_shared_tests = True if update_shared_tests: _LOG.debug( 'updating shared-api-tests dir "{}"'.format(shared_tests_par)) git_pull = subprocess.Popen(['git', 'pull', 'origin', 'master'], cwd=shared_tests_par) try: git_pull.wait() except: pass # we want the pass to test when we are offline... for fn in test_files: local_fp = os.path.join(shared_tests_par, fn) tblob = read_as_json(local_fp) keys = list(tblob.keys()) keys.sort() for k in keys: curr_test = tblob[k] def nf(self, n=k, blob=curr_test): global STOP if STOP or n == 'test_subtree_demo': return oi_name = blob['test_function'] expected = blob['tests'] s = oi_name.split('_')[0] peyotl_meth = '_'.join(oi_name.split('_')[1:]) trans = OI_FUNC_TO_PEYOTL.get(s, s) wrapper = getattr(self.ot, trans)
#!/usr/bin/env python from peyotl.utility.input_output import read_as_json, write_as_json from peyotl import get_logger import sys import re _LOG = get_logger('evaluate-auto-mapping') if len(sys.argv) != 3: sys.exit('expecting an input file path for the JSON mapping file and output file for the unmapped') inf = sys.argv[1] outf = sys.argv[2] _LOG.debug('Reading test cases from "{}"'.format(inf)) m = read_as_json(inf) def no_op(orig): return [orig] cf_pat = re.compile(r'([A-Z]{3,})[^a-z]cf[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I) aff_pat = re.compile(r'([A-Z]{3,})[^a-z]aff[^a-z]([A-Z]{3,})[^a-z]?([A-Z]*)', re.I) word_then_punc = re.compile(r'([A-Z]{3,})[.]', re.I) word_then_cruft = re.compile(r'([A-Z]{3,})[^A-Z]{2,}', re.I) no_casing_ssp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})[-_. ]([a-z]{3,})', re.I) no_casing_sp = re.compile(r'([A-Z]{3,})[-_. ]([-a-z]{3,})', re.I) unnamed_sp_term_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp$') unnamed_sp_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]sp[^a-z]') var_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})[-_. ]+var[-_. ]+([a-z]{3,})') ssp_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})[-_. ]([-a-z]{3,})') sp_name_pat = re.compile(r'([A-Z][a-z]{2,})[-_. ]([-a-z]{3,})') ex_pat = re.compile(r'(.+)[^a-z]ex[^a-z].+') def cascade_with_ssp_sp_handling(orig):
_TYPE_MAP = {'dict': dict} if not os.path.exists(shared_tests_par): _LOG.debug('skipping shared tests due to lack of "{}" dir'.format(shared_tests_par)) else: update_shared_tests = True if update_shared_tests: _LOG.debug('updating shared-api-tests dir "{}"'.format(shared_tests_par)) git_pull = subprocess.Popen(['git', 'pull', 'origin', 'master'], cwd=shared_tests_par) try: git_pull.wait() except: pass # we want the pass to test when we are offline... for fn in test_files: local_fp = os.path.join(shared_tests_par, fn) tblob = read_as_json(local_fp) keys = list(tblob.keys()) keys.sort() for k in keys: curr_test = tblob[k] def nf(self, n=k, blob=curr_test): global STOP if STOP or n == 'test_subtree_demo': return oi_name = blob['test_function'] expected = blob['tests'] s = oi_name.split('_')[0] peyotl_meth = '_'.join(oi_name.split('_')[1:]) trans = OI_FUNC_TO_PEYOTL.get(s, s) wrapper = getattr(self.ot, trans) bound_m = getattr(wrapper, peyotl_meth)
def load_graph(self, tree_list, reinitialize=False, testing=False, report=True, map_compat=True): tb = self.load_db nc = self.nexson_cache log_filepath = self.log_filepath tree_log = self.tree_log loaded_trees_json = self.loaded_trees_json for id_obj in tree_list: study_id = id_obj['study_id'] path = os.path.join(nc, study_id) if not os.path.exists(path): f = 'Study file not found at "{p}". All studies must be fetched before they can be loaded.' raise RuntimeError(f.format(p=path)) if os.path.exists(loaded_trees_json): loaded = read_as_json(loaded_trees_json) else: loaded = [] if reinitialize: tax_db = self.taxonomy_db if os.path.abspath(tax_db) != os.path.abspath(tb): if not os.path.exists(tax_db): f = 'loading a graph with reinitialize requies that the taxonomy has been loaded into a taxonomy db' raise RuntimeError(f) self._remove_filepath(tb) _LOG.debug('copying "{s}" to "{d}"'.format(s=tax_db, d=tb)) shutil.copytree(tax_db, tb) if os.path.exists(loaded_trees_json): os.remove(loaded_trees_json) loaded = [] for id_obj in tree_list: study_id = id_obj['study_id'] tree_id = id_obj['tree_id'] path = os.path.join(nc, study_id) sha = treemachine_load_one_tree(self.java_invoc, self.treemachine_jar, tb, path, tree_id, log_filepath, testing=testing) loaded.append({'study_id':study_id, 'tree_id': tree_id, 'sha': sha}) write_as_json(loaded, loaded_trees_json) if report: tree_str = self._report_source_tree(tb, study_id, tree_id, sha) with codecs.open(tree_log, 'a', encoding='utf-8') as tree_fo: tree_fo.write(tree_str) tree_fo.write('\n') print(tree_str) if map_compat: map_content = treemachine_map_compat_one_tree(self.java_invoc, self.treemachine_jar, tb, study_id, tree_id, sha) with codecs.open(log_filepath, 'a', encoding='utf-8') as log_fo: log_fo.write(map_content) print(map_content)
#!/usr/bin/env python from peyotl.api import APIWrapper from peyotl.utility.input_output import read_as_json, write_as_json from peyotl.nexson_syntax import get_nexml_el a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'}) pa = a.phylesystem_api p = pa.phylesystem_obj for sid, fp in p.iter_study_filepaths(): blob = read_as_json(fp) nex = get_nexml_el(blob) x = nex.get('^ot:studyId') if x != sid: nex['^ot:studyId'] = sid write_as_json(blob, fp) print(x, sid)
def testNextStudyIds(self): p = _Phylesystem(repos_dict=self.r) mf = p._growing_shard._id_minting_file nsi = p._mint_new_study_id() self.assertEqual(int(nsi.split('_')[-1]) + 1, read_as_json(mf)['next_study_id']) self.assertTrue(nsi.startswith('zz_'))
#! /usr/bin/env python from peyotl.phylesystem.git_actions import GitAction import unittest from peyotl.utility.input_output import read_as_json from peyotl.test.support import pathmap from peyotl.phylesystem.helper import get_repos try: r = get_repos() HAS_LOCAL_PHYLESYSTEM_REPOS = True except: HAS_LOCAL_PHYLESYSTEM_REPOS = False n = read_as_json(pathmap.json_source_path('1003.json')) class TestCreate(unittest.TestCase): @unittest.skipIf(not HAS_LOCAL_PHYLESYSTEM_REPOS, 'only available if you are have a [phylesystem] section with' \ ' "parent" variable in your peyotl config') def testWriteStudy(self): self.reponame = list(get_repos().keys())[0] self.repodir = get_repos()[self.reponame] GitAction(self.repodir) if __name__ == "__main__": unittest.main()
def _load_pickle_fp_raw(fp): return read_as_json(fp)
#!/usr/bin/env python from peyotl.utility.input_output import read_as_json from peyotl.nexson_syntax import extract_supporting_file_messages import codecs import json import sys only_with_url = '-u' in sys.argv out = codecs.getwriter('utf-8')(sys.stdout) for fn in sys.argv[1:]: if fn == '-u': continue obj = read_as_json(fn) m_list = extract_supporting_file_messages(obj) if m_list: if only_with_url: for m in m_list: files = m.get('data', {}).get('files', {}).get('file', []) for f in files: if '@url' in f: msg = u''' Internal-id = {i} Broken URL = http://tree.opentreeoflife.org{u} Filename = "{f}" Publication = {p} Curator link = http://tree.opentreeoflife.org/curator/study/view/{s} '''.format(i=m.get('@id', '-'), u=f['@url'].replace('uploadid=', 'uploadId='), f=f.get('@filename', ''), p=obj['nexml']['^ot:studyPublicationReference'], s=obj['nexml']['^ot:studyId']) out.write(msg)
err_stream = sys.stderr args = parser.parse_args() try: inp_filepath = args.input[0] except: sys.exit('Expecting a filepath to a NexSON file as the only argument.\n') outfn = args.output if outfn is not None: try: out = codecs.open(outfn, mode='w', encoding='utf-8') except: sys.exit('validate_ot_nexson: Could not open output filepath "{fn}"\n'.format(fn=outfn)) else: out = codecs.getwriter('utf-8')(sys.stdout) try: nexson = read_as_json(inp_filepath) except ValueError as vx: _LOG.error('Not valid JSON.') if args.verbose: raise vx else: sys.exit(1) except Exception as nx: _LOG.error(nx.value) sys.exit(1) convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH) trees = extract_tree_nexson(nexson, tree_id=args.tree_id) if len(trees) == 0: trees = extract_tree_nexson(nexson, tree_id=None) if trees: v = '", "'.join([i[0] for i in trees])