Esempio n. 1
0
 def fetch_nexsons(self, tree_list, download=False):
     nc = self.nexson_cache
     pa = self.phyleystem_api
     if download:
         pa.phylesystem_obj.pull()
     schema = create_content_spec(nexson_version='0.0.0')
     for id_obj in tree_list:
         study_id = id_obj['study_id']
         nexson = pa.get_study(study_id, schema=schema)
         path = os.path.join(nc, study_id)
         write_as_json(nexson, path)
Esempio n. 2
0
 def _write_master_branch_resource(self, content, fn, commit_msg, is_json=False):
     '''This will force the current branch to master! '''
     #TODO: we might want this to push, but currently it is only called in contexts in which
     # we are about to push any way (study creation)
     with self._master_branch_repo_lock:
         ga = self._create_git_action_for_global_resource()
         with ga.lock():
             ga.checkout_master()
             if is_json:
                 write_as_json(content, fn)
             else:
                 write_to_filepath(content, fn)
             ga._add_and_commit(fn, self._infrastructure_commit_author, commit_msg)
Esempio n. 3
0
 def _write_master_branch_resource(self, content, fn, commit_msg, is_json=False):
     '''This will force the current branch to master! '''
     #TODO: we might want this to push, but currently it is only called in contexts in which
     # we are about to push any way (study creation)
     with self._master_branch_repo_lock:
         ga = self._create_git_action_for_global_resource()
         with ga.lock():
             ga.checkout_master()
             if is_json:
                 write_as_json(content, fn)
             else:
                 write_to_filepath(content, fn)
             ga._add_and_commit(fn, self._infrastructure_commit_author, commit_msg)
Esempio n. 4
0
 def _write_pickle(directory, fn, obj):
     fp = os.path.join(directory, fn + '.pickle')
     _LOG.debug('Creating "{p}"'.format(p=fp))
     with open(fp, 'wb') as fo:
         write_as_json(obj, fo)
Esempio n. 5
0
impossible = {}

np = 0
ni = 0
for study_id, otu_list in test_case_dict.items():
    p = []
    i = []
    for el in otu_list:
        matches = el[1]
        orig = el[0].lower()
        is_plausible = False
        for m in matches:
            if m.lower() in orig:
                is_plausible = True
                break
        if is_plausible:
            p.append(el)
        else:
            i.append(el)
    if p:
        possible[study_id] = p
        np += len(p)
    if i:
        impossible[study_id] = i
        ni += len(i)

write_as_json(possible, poutf)
write_as_json(impossible, ioutf)
_LOG.debug('%d promising mappings written to %s.' % (np, poutf))
_LOG.debug('%d implausible mapping written to %s' % (ni, ioutf))
Esempio n. 6
0
def evaluate_strategy(func, name, test_case_dict):
    num, num_matched = 0, 0
    d = {}
    for k, v in test_case_dict.items():
        m, n, u = evaluate_strategy_for_study(func, v)
        if u:
            d[k] = u
        num += n
        num_matched += m
    p = float(num_matched) / float(num)
    _LOG.debug(
        'Strategy "{s}" matched {m} out of {n} which is {p:5.2f}%'.format(
            s=name, m=num_matched, n=num, p=100 * p))
    return num_matched, num, d


num_matched, num, d = evaluate_strategy(no_op, "no_op", m)
num_matched, num, d = evaluate_strategy(case_sensitive, "case sensitive", m)
num_matched, num, d = evaluate_strategy(case_sensitive_cascade,
                                        "case sensitive cascade", m)
num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp,
                                        "case sensitive cascade with ssp", m)
num_matched, num, d = evaluate_strategy(
    case_sensitive_cascade_with_ssp_sp_handling,
    "case sensitive cascade with ssp + 'sp.' handling", m)
cascade_with_ssp_sp_handling
num_matched, num, d = evaluate_strategy(
    cascade_with_ssp_sp_handling, "full cascade with ssp + 'sp.' handling", m)

write_as_json(d, outf)
Esempio n. 7
0
    def load_graph(self,
                   tree_list,
                   reinitialize=False,
                   testing=False,
                   report=True,
                   map_compat=True):
        tb = self.load_db
        nc = self.nexson_cache
        log_filepath = self.log_filepath
        tree_log = self.tree_log
        loaded_trees_json = self.loaded_trees_json
        for id_obj in tree_list:
            study_id = id_obj['study_id']
            path = os.path.join(nc, study_id)
            if not os.path.exists(path):
                f = 'Study file not found at "{p}". All studies must be fetched before they can be loaded.'
                raise RuntimeError(f.format(p=path))
        if os.path.exists(loaded_trees_json):
            loaded = read_as_json(loaded_trees_json)
        else:
            loaded = []
        if reinitialize:
            tax_db = self.taxonomy_db
            if os.path.abspath(tax_db) != os.path.abspath(tb):
                if not os.path.exists(tax_db):
                    f = 'loading a graph with reinitialize requies that the taxonomy has been loaded into a taxonomy db'
                    raise RuntimeError(f)
                self._remove_filepath(tb)
                _LOG.debug('copying "{s}" to "{d}"'.format(s=tax_db, d=tb))
                shutil.copytree(tax_db, tb)
                if os.path.exists(loaded_trees_json):
                    os.remove(loaded_trees_json)
                loaded = []

        for id_obj in tree_list:
            study_id = id_obj['study_id']
            tree_id = id_obj['tree_id']
            path = os.path.join(nc, study_id)
            sha = treemachine_load_one_tree(self.java_invoc,
                                            self.treemachine_jar,
                                            tb,
                                            path,
                                            tree_id,
                                            log_filepath,
                                            testing=testing)
            loaded.append({'study_id':study_id, 'tree_id': tree_id, 'sha': sha})
            write_as_json(loaded, loaded_trees_json)
            if report:
                tree_str = self._report_source_tree(tb, study_id, tree_id, sha)
                with codecs.open(tree_log, 'a', encoding='utf-8') as tree_fo:
                    tree_fo.write(tree_str)
                    tree_fo.write('\n')
                print(tree_str)
            if map_compat:
                map_content = treemachine_map_compat_one_tree(self.java_invoc,
                                                              self.treemachine_jar,
                                                              tb,
                                                              study_id,
                                                              tree_id,
                                                              sha)
                with codecs.open(log_filepath, 'a', encoding='utf-8') as log_fo:
                    log_fo.write(map_content)
                print(map_content)
Esempio n. 8
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False
Esempio n. 9
0
#!/usr/bin/env python
from peyotl.api import APIWrapper
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl.nexson_syntax import get_nexml_el

a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'})
pa = a.phylesystem_api
p = pa.phylesystem_obj
for sid, fp in p.iter_study_filepaths():
    blob = read_as_json(fp)
    nex = get_nexml_el(blob)
    x = nex.get('^ot:studyId')
    if x != sid:
        nex['^ot:studyId'] = sid
        write_as_json(blob, fp)
        print(x, sid)
Esempio n. 10
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False
Esempio n. 11
0
 def _write_pickle(directory, fn, obj):
     fp = os.path.join(directory, fn + '.pickle')
     _LOG.debug('Creating "{p}"'.format(p=fp))
     with open(fp, 'wb') as fo:
         write_as_json(obj, fo)
Esempio n. 12
0
        else:
            unmatched.append(el)
    return num_matched, num, unmatched

def evaluate_strategy(func, name, test_case_dict):
    num, num_matched = 0, 0
    d = {}
    for k, v in test_case_dict.items():
        m, n, u = evaluate_strategy_for_study(func, v)
        if u:
            d[k] = u
        num += n
        num_matched += m
    p = float(num_matched)/float(num)
    _LOG.debug('Strategy "{s}" matched {m} out of {n} which is {p:5.2f}%'.format(s=name,
                                                                           m=num_matched,
                                                                           n=num,
                                                                           p=100*p))
    return num_matched, num, d

num_matched, num, d = evaluate_strategy(no_op, "no_op", m)
num_matched, num, d = evaluate_strategy(case_sensitive, "case sensitive", m)
num_matched, num, d = evaluate_strategy(case_sensitive_cascade, "case sensitive cascade", m)
num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp, "case sensitive cascade with ssp", m)
num_matched, num, d = evaluate_strategy(case_sensitive_cascade_with_ssp_sp_handling, "case sensitive cascade with ssp + 'sp.' handling", m)
cascade_with_ssp_sp_handling
num_matched, num, d = evaluate_strategy(cascade_with_ssp_sp_handling, 
                                        "full cascade with ssp + 'sp.' handling", m)

write_as_json(d, outf)
    if len(study) == 1:
        study = '0' + study
    study2tree.setdefault('pg_' + study, []).append('tree' + tree)


pa = PhylesystemAPI(get_from='local')
raw_phylsys = pa.phylesystem_obj
nexson_version = raw_phylsys.repo_nexml2json
for study_id, tree_list in study2tree.items():
    if verbose:
        sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id))
    try:
        fp = raw_phylsys.get_filepath_for_study(study_id)
        blob = read_as_json(fp)

        nex = get_nexml_el(blob)
        prev = nex.setdefault('^ot:candidateTreeForSynthesis', [])
        for tree_id in tree_list:
            if tree_id not in prev:
                prev.append(tree_id)
            i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version)
            if not i_t_o_list:
                sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id))
            for tid, tree, otus_group in i_t_o_list:
                tree['^ot:unrootedTree'] = False
                tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId']
        if not dry_run:
            write_as_json(blob, fp)
        
    except KeyError:
        sys.stderr.write('study {} not found !!!\n'.format(study_id))