def fetch_nexsons(self, tree_list, download=False): nc = self.nexson_cache pa = self.phyleystem_api if download: pa.phylesystem_obj.pull() schema = create_content_spec(nexson_version='0.0.0') for id_obj in tree_list: study_id = id_obj['study_id'] nexson = pa.get_study(study_id, schema=schema) path = os.path.join(nc, study_id) write_as_json(nexson, path)
def _write_master_branch_resource(self, content, fn, commit_msg, is_json=False): '''This will force the current branch to master! ''' #TODO: we might want this to push, but currently it is only called in contexts in which # we are about to push any way (study creation) with self._master_branch_repo_lock: ga = self._create_git_action_for_global_resource() with ga.lock(): ga.checkout_master() if is_json: write_as_json(content, fn) else: write_to_filepath(content, fn) ga._add_and_commit(fn, self._infrastructure_commit_author, commit_msg)
def _write_master_branch_resource(self, content, fn, commit_msg, is_json=False): '''This will force the current branch to master! ''' #TODO: we might want this to push, but currently it is only called in contexts in which # we are about to push any way (study creation) with self._master_branch_repo_lock: ga = self._create_git_action_for_global_resource() with ga.lock(): ga.checkout_master() if is_json: write_as_json(content, fn) else: write_to_filepath(content, fn) ga._add_and_commit(fn, self._infrastructure_commit_author, commit_msg)
def _write_pickle(directory, fn, obj): fp = os.path.join(directory, fn + '.pickle') _LOG.debug('Creating "{p}"'.format(p=fp)) with open(fp, 'wb') as fo: write_as_json(obj, fo)
impossible = {} np = 0 ni = 0 for study_id, otu_list in test_case_dict.items(): p = [] i = [] for el in otu_list: matches = el[1] orig = el[0].lower() is_plausible = False for m in matches: if m.lower() in orig: is_plausible = True break if is_plausible: p.append(el) else: i.append(el) if p: possible[study_id] = p np += len(p) if i: impossible[study_id] = i ni += len(i) write_as_json(possible, poutf) write_as_json(impossible, ioutf) _LOG.debug('%d promising mappings written to %s.' % (np, poutf)) _LOG.debug('%d implausible mapping written to %s' % (ni, ioutf))
def evaluate_strategy(func, name, test_case_dict): num, num_matched = 0, 0 d = {} for k, v in test_case_dict.items(): m, n, u = evaluate_strategy_for_study(func, v) if u: d[k] = u num += n num_matched += m p = float(num_matched) / float(num) _LOG.debug( 'Strategy "{s}" matched {m} out of {n} which is {p:5.2f}%'.format( s=name, m=num_matched, n=num, p=100 * p)) return num_matched, num, d num_matched, num, d = evaluate_strategy(no_op, "no_op", m) num_matched, num, d = evaluate_strategy(case_sensitive, "case sensitive", m) num_matched, num, d = evaluate_strategy(case_sensitive_cascade, "case sensitive cascade", m) num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp, "case sensitive cascade with ssp", m) num_matched, num, d = evaluate_strategy( case_sensitive_cascade_with_ssp_sp_handling, "case sensitive cascade with ssp + 'sp.' handling", m) cascade_with_ssp_sp_handling num_matched, num, d = evaluate_strategy( cascade_with_ssp_sp_handling, "full cascade with ssp + 'sp.' handling", m) write_as_json(d, outf)
def load_graph(self, tree_list, reinitialize=False, testing=False, report=True, map_compat=True): tb = self.load_db nc = self.nexson_cache log_filepath = self.log_filepath tree_log = self.tree_log loaded_trees_json = self.loaded_trees_json for id_obj in tree_list: study_id = id_obj['study_id'] path = os.path.join(nc, study_id) if not os.path.exists(path): f = 'Study file not found at "{p}". All studies must be fetched before they can be loaded.' raise RuntimeError(f.format(p=path)) if os.path.exists(loaded_trees_json): loaded = read_as_json(loaded_trees_json) else: loaded = [] if reinitialize: tax_db = self.taxonomy_db if os.path.abspath(tax_db) != os.path.abspath(tb): if not os.path.exists(tax_db): f = 'loading a graph with reinitialize requies that the taxonomy has been loaded into a taxonomy db' raise RuntimeError(f) self._remove_filepath(tb) _LOG.debug('copying "{s}" to "{d}"'.format(s=tax_db, d=tb)) shutil.copytree(tax_db, tb) if os.path.exists(loaded_trees_json): os.remove(loaded_trees_json) loaded = [] for id_obj in tree_list: study_id = id_obj['study_id'] tree_id = id_obj['tree_id'] path = os.path.join(nc, study_id) sha = treemachine_load_one_tree(self.java_invoc, self.treemachine_jar, tb, path, tree_id, log_filepath, testing=testing) loaded.append({'study_id':study_id, 'tree_id': tree_id, 'sha': sha}) write_as_json(loaded, loaded_trees_json) if report: tree_str = self._report_source_tree(tb, study_id, tree_id, sha) with codecs.open(tree_log, 'a', encoding='utf-8') as tree_fo: tree_fo.write(tree_str) tree_fo.write('\n') print(tree_str) if map_compat: map_content = treemachine_map_compat_one_tree(self.java_invoc, self.treemachine_jar, tb, study_id, tree_id, sha) with codecs.open(log_filepath, 'a', encoding='utf-8') as log_fo: log_fo.write(map_content) print(map_content)
def convert(self, src, serialize=None, output_dest=None, src_schema=None): if src_schema is None: src_format = PhyloSchema.NEXSON current_format = None else: src_format = src_schema.format_code current_format = src_schema.version if not self.can_convert_from(): m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description) raise NotImplementedError(m) if src_format != PhyloSchema.NEXSON: raise NotImplementedError('Only conversion from NexSON is currently supported') if self.format_code == PhyloSchema.NEXSON: d = src if self.content == 'study': d = convert_nexson_format(src, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) elif self.content in ('tree', 'subtree'): if self.content == 'tree' and self.cull_nonmatching: d = cull_nonmatching_trees(d, self.content_id, current_format) d = convert_nexson_format(d, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) else: i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = {} for ito_tup in i_t_o_list: i, t = ito_tup[0], ito_tup[1] d[i] = t elif self.content == 'meta': strip_to_meta_only(d, current_format) elif self.content == 'otus': d = extract_otus_nexson(d, self.content_id, current_format) elif self.content == 'otu': d = extract_otu_nexson(d, self.content_id, current_format) elif self.content == 'otumap': if self.content_id is None: r = extract_otu_nexson(d, None, current_format) else: p = extract_otus_nexson(d, self.content_id, current_format) if p is None: r = extract_otu_nexson(d, self.content_id, current_format) else: r = {} for v in p.values(): r.update(v.get('otuById', {})) if not r: return None d = _otu_dict_to_otumap(r) elif self.content == 'treelist': i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = [i[0] for i in i_t_o_list] if d is None: return None if serialize: if output_dest: write_as_json(d, output_dest) return None else: f, wrapper = get_utf_8_string_io_writer() write_as_json(d, wrapper) flush_utf_8_writer(wrapper) return f.getvalue() else: return d # Non-NexSON types go here... if (serialize is not None) and (not serialize): raise ValueError('Conversion without serialization is only supported for the NexSON format') if output_dest: if is_str_type(output_dest): output_dest = codecs.open(output_dest, 'w', encoding='utf-8') if self.format_code == PhyloSchema.NEXML: if output_dest: write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop) return return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop) elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]: if self.content in ('tree', 'subtree'): if isinstance(self.content_id, list) or isinstance(self.content_id, tuple): ci, subtree_id = self.content_id else: ci, subtree_id = self.content_id, None else: ci, subtree_id = None, None response = extract_tree(src, ci, self, subtree_id=subtree_id) # these formats are always serialized... if output_dest: output_dest.write(response) output_dest.write('\n') return response assert False
#!/usr/bin/env python from peyotl.api import APIWrapper from peyotl.utility.input_output import read_as_json, write_as_json from peyotl.nexson_syntax import get_nexml_el a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'}) pa = a.phylesystem_api p = pa.phylesystem_obj for sid, fp in p.iter_study_filepaths(): blob = read_as_json(fp) nex = get_nexml_el(blob) x = nex.get('^ot:studyId') if x != sid: nex['^ot:studyId'] = sid write_as_json(blob, fp) print(x, sid)
def convert(self, src, serialize=None, output_dest=None, src_schema=None): if src_schema is None: src_format = PhyloSchema.NEXSON current_format = None else: src_format = src_schema.format_code current_format = src_schema.version if not self.can_convert_from(): m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description) raise NotImplementedError(m) if src_format != PhyloSchema.NEXSON: raise NotImplementedError('Only conversion from NexSON is currently supported') if self.format_code == PhyloSchema.NEXSON: d = src if self.content == 'study': d = convert_nexson_format(src, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) elif self.content in ('tree', 'subtree'): if self.content == 'tree' and self.cull_nonmatching: d = cull_nonmatching_trees(d, self.content_id, current_format) d = convert_nexson_format(d, out_nexson_format=self.version, current_format=current_format, remove_old_structs=True, pristine_if_invalid=False, sort_arbitrary=False) else: i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = {} for ito_tup in i_t_o_list: i, t = ito_tup[0], ito_tup[1] d[i] = t elif self.content == 'meta': strip_to_meta_only(d, current_format) elif self.content == 'otus': d = extract_otus_nexson(d, self.content_id, current_format) elif self.content == 'otu': d = extract_otu_nexson(d, self.content_id, current_format) elif self.content == 'otumap': if self.content_id is None: r = extract_otu_nexson(d, None, current_format) else: p = extract_otus_nexson(d, self.content_id, current_format) if p is None: r = extract_otu_nexson(d, self.content_id, current_format) else: r = {} for v in p.values(): r.update(v.get('otuById', {})) if not r: return None d = _otu_dict_to_otumap(r) elif self.content == 'treelist': i_t_o_list = extract_tree_nexson(d, self.content_id, current_format) d = [i[0] for i in i_t_o_list] if d is None: return None if serialize: if output_dest: write_as_json(d, output_dest) return None else: f, wrapper = get_utf_8_string_io_writer() write_as_json(d, wrapper) flush_utf_8_writer(wrapper) return f.getvalue() else: return d # Non-NexSON types go here... if (serialize is not None) and (not serialize): raise ValueError('Conversion without serialization is only supported for the NexSON format') if output_dest: if is_str_type(output_dest): output_dest = codecs.open(output_dest, 'w', encoding='utf-8') if self.format_code == PhyloSchema.NEXML: if output_dest: write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop) return return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop) elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]: if self.content in ('tree', 'subtree'): if isinstance(self.content_id, list) or isinstance(self.content_id, tuple): ci, subtree_id = self.content_id else: ci, subtree_id = self.content_id, None else: ci, subtree_id = None, None response = extract_tree(src, ci, self, subtree_id=subtree_id) # these formats are always serialized... if output_dest: output_dest.write(response) output_dest.write('\n') return response assert False
def _write_pickle(directory, fn, obj): fp = os.path.join(directory, fn + '.pickle') _LOG.debug('Creating "{p}"'.format(p=fp)) with open(fp, 'wb') as fo: write_as_json(obj, fo)
else: unmatched.append(el) return num_matched, num, unmatched def evaluate_strategy(func, name, test_case_dict): num, num_matched = 0, 0 d = {} for k, v in test_case_dict.items(): m, n, u = evaluate_strategy_for_study(func, v) if u: d[k] = u num += n num_matched += m p = float(num_matched)/float(num) _LOG.debug('Strategy "{s}" matched {m} out of {n} which is {p:5.2f}%'.format(s=name, m=num_matched, n=num, p=100*p)) return num_matched, num, d num_matched, num, d = evaluate_strategy(no_op, "no_op", m) num_matched, num, d = evaluate_strategy(case_sensitive, "case sensitive", m) num_matched, num, d = evaluate_strategy(case_sensitive_cascade, "case sensitive cascade", m) num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp, "case sensitive cascade with ssp", m) num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp_sp_handling, "case sensitive cascade with ssp + 'sp.' handling", m) cascade_with_ssp_sp_handling num_matched, num, d = evaluate_strategy(cascade_with_ssp_sp_handling, "full cascade with ssp + 'sp.' handling", m) write_as_json(d, outf)
if len(study) == 1: study = '0' + study study2tree.setdefault('pg_' + study, []).append('tree' + tree) pa = PhylesystemAPI(get_from='local') raw_phylsys = pa.phylesystem_obj nexson_version = raw_phylsys.repo_nexml2json for study_id, tree_list in study2tree.items(): if verbose: sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id)) try: fp = raw_phylsys.get_filepath_for_study(study_id) blob = read_as_json(fp) nex = get_nexml_el(blob) prev = nex.setdefault('^ot:candidateTreeForSynthesis', []) for tree_id in tree_list: if tree_id not in prev: prev.append(tree_id) i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version) if not i_t_o_list: sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id)) for tid, tree, otus_group in i_t_o_list: tree['^ot:unrootedTree'] = False tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId'] if not dry_run: write_as_json(blob, fp) except KeyError: sys.stderr.write('study {} not found !!!\n'.format(study_id))