Ejemplo n.º 1
0
 def read_exemplified_phylo(self):
     d = os.path.join(self.top_output_dir, 'exemplified_phylo')
     x = read_as_json(os.path.join(d, 'exemplified_log.json'))
     tx = x['taxa_exemplified']
     if not tx:
         tx = {}
     by_source_tree = {}
     for ott_id, exdict in tx.items():
         tm = exdict['trees_modified']
         for tree in tm:
             key = '.'.join(tree.split('.')[:-1])
             by_source_tree.setdefault(key, []).append(ott_id)
     for v in by_source_tree.values():
         v.sort()
     ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt')
     subprocess.call(['make', ptdd])
     assert (os.path.exists(ptdd))
     ddlines = [
         i.split() for i in stripped_nonempty_lines(ptdd)
         if i.split()[0] == '0'
     ]
     assert (len(ddlines) == 1)
     leaf_line = ddlines[0]  # should b
     assert (len(leaf_line) == 2)
     blob = Extensible()
     blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1])
     blob.taxa_exemplified = tx
     blob.source_tree_to_ott_id_exemplified_list = by_source_tree
     f = os.path.join(d, 'nonempty_trees.txt')
     blob.nonempty_tree_filenames = stripped_nonempty_lines(f)
     blob.nonempty_trees = [
         propinquity_fn_to_study_tree(i)
         for i in blob.nonempty_tree_filenames
     ]
     return blob
Ejemplo n.º 2
0
 def read_phylo_input(self):
     blob = Extensible()
     blob.directory = os.path.join(self.top_output_dir, 'phylo_input')
     blob.study_tree_pair_file = os.path.join(blob.directory, 'study_tree_pairs.txt')
     x = stripped_nonempty_lines(blob.study_tree_pair_file)
     blob.study_id_tree_id_pairs = [propinquity_fn_to_study_tree(i, strip_extension=False) for i in x]
     return blob
Ejemplo n.º 3
0
 def read_exemplified_phylo(self):
     d = os.path.join(self.top_output_dir, 'exemplified_phylo')
     x = read_as_json(os.path.join(d, 'exemplified_log.json'))
     tx = x['taxa_exemplified']
     if not tx:
         tx = {}
     by_source_tree = {}
     for ott_id, exdict in tx.items():
         tm = exdict['trees_modified']
         for tree in tm:
             key = '.'.join(tree.split('.')[:-1])
             by_source_tree.setdefault(key, []).append(ott_id)
     for v in by_source_tree.values():
         v.sort()
     ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt')
     subprocess.call(['make', ptdd])
     assert(os.path.exists(ptdd))
     ddlines = [i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0']
     assert(len(ddlines) == 1)
     leaf_line = ddlines[0] # should b
     assert(len(leaf_line) == 2)
     blob = Extensible()
     blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1])
     blob.taxa_exemplified = tx
     blob.source_tree_to_ott_id_exemplified_list = by_source_tree
     f = os.path.join(d, 'nonempty_trees.txt')
     blob.nonempty_tree_filenames = stripped_nonempty_lines(f)
     blob.nonempty_trees = [propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames]
     return blob
Ejemplo n.º 4
0
 def read_subproblems(self):
     d = os.path.join(self.top_output_dir, 'subproblems')
     blob = Extensible()
     conf_tax_json_fp = os.path.join(d, 'contesting-trees.json')
     conf_tax_info = read_as_json(conf_tax_json_fp)
     if not conf_tax_info:
         conf_tax_info = {}
     externalized_conf_tax_info = {}
     for ott_id, tree2node_info_list in conf_tax_info.items():
         tr_ob_li = []
         if ott_id.startswith('ott'):
             ott_id = ott_id[3:]
         externalized_conf_tax_info[ott_id] = tr_ob_li
         for study_tree_fn, node_info_list in tree2node_info_list.items():
             study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn)
             cf_nl = []
             tre_obj = {'study_id': study_id,
                        'tree_id': tree_id,
                        'tree_filename': study_tree_fn,
                        'conflicting_nodes': cf_nl}
             tr_ob_li.append(tre_obj)
             if len(node_info_list) < 2:
                 raise RuntimeError('read_subproblems < 2 node info elements for taxon ID = {}'.format(ott_id))
             for node_info in node_info_list:
                 rcfn = node_info['children_from_taxon']
                 cfn = [node_label2obj(i) for i in rcfn]
                 el = {'parent': node_label2obj(node_info['parent']),
                       'children_from_taxon': cfn
                      }
                 cf_nl.append(el)
     blob.contested_taxa = externalized_conf_tax_info
     blob.tree_files = stripped_nonempty_lines(os.path.join(d, 'subproblem-ids.txt'))
     id2num_leaves = {}
     for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes:
         id2num_leaves[el[0]] = el[1]
     by_num_phylo = []
     by_input = {}
     for s in blob.tree_files:
         assert s.endswith('.tre')
         pref = s[:-4]
         assert pref.startswith('ott')
         tree_name_file = os.path.join(d, pref + '-tree-names.txt')
         phylo_inputs = []
         for i in stripped_nonempty_lines(tree_name_file):
             x = i[:-4] if i.endswith('.tre') else i
             phylo_inputs.append(i)
             if x != 'TAXONOMY':
                 by_input.setdefault(x, []).append(pref)
         npi = len(phylo_inputs)
         by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs))
     by_num_phylo.sort(reverse=True)
     blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo]
     by_input = [(len(v), k, v) for k, v in by_input.items()]
     by_input.sort(reverse=True)
     blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input]
     return blob
Ejemplo n.º 5
0
 def read_phylo_input(self):
     blob = Extensible()
     blob.directory = os.path.join(self.top_output_dir, 'phylo_input')
     blob.study_tree_pair_file = os.path.join(blob.directory,
                                              'study_tree_pairs.txt')
     x = stripped_nonempty_lines(blob.study_tree_pair_file)
     blob.study_id_tree_id_pairs = [
         propinquity_fn_to_study_tree(i, strip_extension=False) for i in x
     ]
     return blob
Ejemplo n.º 6
0
 def read_subproblems(self):
     d = os.path.join(self.top_output_dir, 'subproblems')
     blob = Extensible()
     conf_tax_json_fp = os.path.join(d, 'contesting-trees.json')
     conf_tax_info = read_as_json(conf_tax_json_fp)
     if not conf_tax_info:
         conf_tax_info = {}
     externalized_conf_tax_info = {}
     for ott_id, tree2node_info_list in conf_tax_info.items():
         tr_ob_li = []
         if ott_id.startswith('ott'):
             ott_id = ott_id[3:]
         externalized_conf_tax_info[ott_id] = tr_ob_li
         for study_tree_fn, node_info_list in tree2node_info_list.items():
             study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn)
             cf_nl = []
             tre_obj = {
                 'study_id': study_id,
                 'tree_id': tree_id,
                 'tree_filename': study_tree_fn,
                 'conflicting_nodes': cf_nl
             }
             tr_ob_li.append(tre_obj)
             if len(node_info_list) < 2:
                 raise RuntimeError(
                     'read_subproblems < 2 node info elements for taxon ID = {}'
                     .format(ott_id))
             for node_info in node_info_list:
                 rcfn = node_info['children_from_taxon']
                 cfn = [node_label2obj(i) for i in rcfn]
                 el = {
                     'parent': node_label2obj(node_info['parent']),
                     'children_from_taxon': cfn
                 }
                 cf_nl.append(el)
     blob.contested_taxa = externalized_conf_tax_info
     blob.tree_files = stripped_nonempty_lines(
         os.path.join(d, 'subproblem-ids.txt'))
     id2num_leaves = {}
     for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes:
         id2num_leaves[el[0]] = el[1]
     by_num_phylo = []
     by_input = {}
     for s in blob.tree_files:
         assert s.endswith('.tre')
         pref = s[:-4]
         assert pref.startswith('ott')
         tree_name_file = os.path.join(d, pref + '-tree-names.txt')
         phylo_inputs = []
         for i in stripped_nonempty_lines(tree_name_file):
             x = i[:-4] if i.endswith('.tre') else i
             phylo_inputs.append(i)
             if x != 'TAXONOMY':
                 by_input.setdefault(x, []).append(pref)
         npi = len(phylo_inputs)
         by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs))
     by_num_phylo.sort(reverse=True)
     blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]]
                                        for i in by_num_phylo]
     by_input = [(len(v), k, v) for k, v in by_input.items()]
     by_input.sort(reverse=True)
     blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input]
     return blob
Ejemplo n.º 7
0
 in_dir = args.input_dir
 if in_dir:
     in_dir = os.path.expanduser(in_dir)
     inp_files = [os.path.join(in_dir, i) for i in inp_files]
 if flags_str is None:
     flags = OTT.TREEMACHINE_SUPPRESS_FLAGS
 else:
     flags = flags_str.split(',')
 ott = OTT(ott_dir=args.ott_dir)
 to_prune_fsi_set = ott.convert_flag_string_set_to_union(flags)
 for inp in inp_files:
     _LOG.debug('{}'.format(inp))
     log_obj = {}
     inp_fn = os.path.split(inp)[-1]
     study_tree = '.'.join(inp_fn.split('.')[:-1])  # strip extension
     study_id, tree_id = propinquity_fn_to_study_tree(inp_fn)
     nexson_blob = read_as_json(inp)
     ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj)
     assert ntw.root_node_id
     taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre')
     try:
         ntw.prune_tree_for_supertree(ott=ott,
                                      to_prune_fsi_set=to_prune_fsi_set,
                                      root_ott_id=root,
                                      taxonomy_treefile=taxonomy_treefile,
                                      id_to_other_prune_reason=to_prune_for_reasons)
     except EmptyTreeError:
         log_obj['EMPTY_TREE'] = True
     out_log = os.path.join(args.out_dir, study_tree + '.json')
     write_as_json(log_obj, out_log)
     newick_fp = os.path.join(args.out_dir, study_tree + '.tre')
#!/usr/bin/env python
"""Takes meta properties for a tree requested"""
from __future__ import absolute_import, division, print_function, unicode_literals
from peyotl.api import PhylesystemAPI
from peyotl.utility import propinquity_fn_to_study_tree
import codecs
import sys

out = codecs.getwriter('utf-8')(sys.stdout)

NON_META = frozenset([u'^ot:rootNodeId',
                      u'nodeById',
                      u'edgeBySourceId',
                      u'^ot:inGroupClade',
                      u'@xsi:type',
                      u'^ot:branchLengthTimeUnit',
                      u'^ot:branchLengthDescription',
                      u'^ot:tag',
                      u'^ot:branchLengthMode'])
for arg in sys.argv[1:]:
    study_id, tree_id = propinquity_fn_to_study_tree(arg, strip_extension=False)
    pa = PhylesystemAPI(get_from='local')
    try:
        tree = pa.get(study_id, tree_id=tree_id)[tree_id]
        print('Tree "{}" in study "{}":'.format(tree_id, study_id))
        for k, v in tree.items():
            if (v is not None) and (v is not '') and (k not in NON_META):
                print(k, v)
    except:
        sys.stderr.write('WARNING: did not find tree "{}" in study "{}"'.format(tree_id, study_id))
Ejemplo n.º 9
0
 in_dir = args.input_dir
 if in_dir:
     in_dir = os.path.expanduser(in_dir)
     inp_files = [os.path.join(in_dir, i) for i in inp_files]
 if flags_str is None:
     flags = OTT.TREEMACHINE_SUPPRESS_FLAGS
 else:
     flags = flags_str.split(',')
 ott = OTT(ott_dir=args.ott_dir)
 to_prune_fsi_set = ott.convert_flag_string_set_to_union(flags)
 for inp in inp_files:
     _LOG.debug('{}'.format(inp))
     log_obj = {}
     inp_fn = os.path.split(inp)[-1]
     study_tree = '.'.join(inp_fn.split('.')[:-1])  # strip extension
     study_id, tree_id = propinquity_fn_to_study_tree(inp_fn)
     nexson_blob = read_as_json(inp)
     ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj)
     assert ntw.root_node_id
     taxonomy_treefile = os.path.join(args.out_dir,
                                      study_tree + '-taxonomy.tre')
     try:
         ntw.prune_tree_for_supertree(
             ott=ott,
             to_prune_fsi_set=to_prune_fsi_set,
             root_ott_id=root,
             taxonomy_treefile=taxonomy_treefile,
             id_to_other_prune_reason=to_prune_for_reasons)
     except EmptyTreeError:
         log_obj['EMPTY_TREE'] = True
     out_log = os.path.join(args.out_dir, study_tree + '.json')