def read_exemplified_phylo(self): d = os.path.join(self.top_output_dir, 'exemplified_phylo') x = read_as_json(os.path.join(d, 'exemplified_log.json')) tx = x['taxa_exemplified'] if not tx: tx = {} by_source_tree = {} for ott_id, exdict in tx.items(): tm = exdict['trees_modified'] for tree in tm: key = '.'.join(tree.split('.')[:-1]) by_source_tree.setdefault(key, []).append(ott_id) for v in by_source_tree.values(): v.sort() ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt') subprocess.call(['make', ptdd]) assert (os.path.exists(ptdd)) ddlines = [ i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0' ] assert (len(ddlines) == 1) leaf_line = ddlines[0] # should b assert (len(leaf_line) == 2) blob = Extensible() blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1]) blob.taxa_exemplified = tx blob.source_tree_to_ott_id_exemplified_list = by_source_tree f = os.path.join(d, 'nonempty_trees.txt') blob.nonempty_tree_filenames = stripped_nonempty_lines(f) blob.nonempty_trees = [ propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames ] return blob
def read_phylo_input(self): blob = Extensible() blob.directory = os.path.join(self.top_output_dir, 'phylo_input') blob.study_tree_pair_file = os.path.join(blob.directory, 'study_tree_pairs.txt') x = stripped_nonempty_lines(blob.study_tree_pair_file) blob.study_id_tree_id_pairs = [propinquity_fn_to_study_tree(i, strip_extension=False) for i in x] return blob
def read_exemplified_phylo(self): d = os.path.join(self.top_output_dir, 'exemplified_phylo') x = read_as_json(os.path.join(d, 'exemplified_log.json')) tx = x['taxa_exemplified'] if not tx: tx = {} by_source_tree = {} for ott_id, exdict in tx.items(): tm = exdict['trees_modified'] for tree in tm: key = '.'.join(tree.split('.')[:-1]) by_source_tree.setdefault(key, []).append(ott_id) for v in by_source_tree.values(): v.sort() ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt') subprocess.call(['make', ptdd]) assert(os.path.exists(ptdd)) ddlines = [i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0'] assert(len(ddlines) == 1) leaf_line = ddlines[0] # should b assert(len(leaf_line) == 2) blob = Extensible() blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1]) blob.taxa_exemplified = tx blob.source_tree_to_ott_id_exemplified_list = by_source_tree f = os.path.join(d, 'nonempty_trees.txt') blob.nonempty_tree_filenames = stripped_nonempty_lines(f) blob.nonempty_trees = [propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames] return blob
def read_subproblems(self): d = os.path.join(self.top_output_dir, 'subproblems') blob = Extensible() conf_tax_json_fp = os.path.join(d, 'contesting-trees.json') conf_tax_info = read_as_json(conf_tax_json_fp) if not conf_tax_info: conf_tax_info = {} externalized_conf_tax_info = {} for ott_id, tree2node_info_list in conf_tax_info.items(): tr_ob_li = [] if ott_id.startswith('ott'): ott_id = ott_id[3:] externalized_conf_tax_info[ott_id] = tr_ob_li for study_tree_fn, node_info_list in tree2node_info_list.items(): study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn) cf_nl = [] tre_obj = {'study_id': study_id, 'tree_id': tree_id, 'tree_filename': study_tree_fn, 'conflicting_nodes': cf_nl} tr_ob_li.append(tre_obj) if len(node_info_list) < 2: raise RuntimeError('read_subproblems < 2 node info elements for taxon ID = {}'.format(ott_id)) for node_info in node_info_list: rcfn = node_info['children_from_taxon'] cfn = [node_label2obj(i) for i in rcfn] el = {'parent': node_label2obj(node_info['parent']), 'children_from_taxon': cfn } cf_nl.append(el) blob.contested_taxa = externalized_conf_tax_info blob.tree_files = stripped_nonempty_lines(os.path.join(d, 'subproblem-ids.txt')) id2num_leaves = {} for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes: id2num_leaves[el[0]] = el[1] by_num_phylo = [] by_input = {} for s in blob.tree_files: assert s.endswith('.tre') pref = s[:-4] assert pref.startswith('ott') tree_name_file = os.path.join(d, pref + '-tree-names.txt') phylo_inputs = [] for i in stripped_nonempty_lines(tree_name_file): x = i[:-4] if i.endswith('.tre') else i phylo_inputs.append(i) if x != 'TAXONOMY': by_input.setdefault(x, []).append(pref) npi = len(phylo_inputs) by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs)) by_num_phylo.sort(reverse=True) blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo] by_input = [(len(v), k, v) for k, v in by_input.items()] by_input.sort(reverse=True) blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input] return blob
def read_phylo_input(self): blob = Extensible() blob.directory = os.path.join(self.top_output_dir, 'phylo_input') blob.study_tree_pair_file = os.path.join(blob.directory, 'study_tree_pairs.txt') x = stripped_nonempty_lines(blob.study_tree_pair_file) blob.study_id_tree_id_pairs = [ propinquity_fn_to_study_tree(i, strip_extension=False) for i in x ] return blob
def read_subproblems(self): d = os.path.join(self.top_output_dir, 'subproblems') blob = Extensible() conf_tax_json_fp = os.path.join(d, 'contesting-trees.json') conf_tax_info = read_as_json(conf_tax_json_fp) if not conf_tax_info: conf_tax_info = {} externalized_conf_tax_info = {} for ott_id, tree2node_info_list in conf_tax_info.items(): tr_ob_li = [] if ott_id.startswith('ott'): ott_id = ott_id[3:] externalized_conf_tax_info[ott_id] = tr_ob_li for study_tree_fn, node_info_list in tree2node_info_list.items(): study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn) cf_nl = [] tre_obj = { 'study_id': study_id, 'tree_id': tree_id, 'tree_filename': study_tree_fn, 'conflicting_nodes': cf_nl } tr_ob_li.append(tre_obj) if len(node_info_list) < 2: raise RuntimeError( 'read_subproblems < 2 node info elements for taxon ID = {}' .format(ott_id)) for node_info in node_info_list: rcfn = node_info['children_from_taxon'] cfn = [node_label2obj(i) for i in rcfn] el = { 'parent': node_label2obj(node_info['parent']), 'children_from_taxon': cfn } cf_nl.append(el) blob.contested_taxa = externalized_conf_tax_info blob.tree_files = stripped_nonempty_lines( os.path.join(d, 'subproblem-ids.txt')) id2num_leaves = {} for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes: id2num_leaves[el[0]] = el[1] by_num_phylo = [] by_input = {} for s in blob.tree_files: assert s.endswith('.tre') pref = s[:-4] assert pref.startswith('ott') tree_name_file = os.path.join(d, pref + '-tree-names.txt') phylo_inputs = [] for i in stripped_nonempty_lines(tree_name_file): x = i[:-4] if i.endswith('.tre') else i phylo_inputs.append(i) if x != 'TAXONOMY': by_input.setdefault(x, []).append(pref) npi = len(phylo_inputs) by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs)) by_num_phylo.sort(reverse=True) blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo] by_input = [(len(v), k, v) for k, v in by_input.items()] by_input.sort(reverse=True) blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input] return blob
in_dir = args.input_dir if in_dir: in_dir = os.path.expanduser(in_dir) inp_files = [os.path.join(in_dir, i) for i in inp_files] if flags_str is None: flags = OTT.TREEMACHINE_SUPPRESS_FLAGS else: flags = flags_str.split(',') ott = OTT(ott_dir=args.ott_dir) to_prune_fsi_set = ott.convert_flag_string_set_to_union(flags) for inp in inp_files: _LOG.debug('{}'.format(inp)) log_obj = {} inp_fn = os.path.split(inp)[-1] study_tree = '.'.join(inp_fn.split('.')[:-1]) # strip extension study_id, tree_id = propinquity_fn_to_study_tree(inp_fn) nexson_blob = read_as_json(inp) ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj) assert ntw.root_node_id taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre') try: ntw.prune_tree_for_supertree(ott=ott, to_prune_fsi_set=to_prune_fsi_set, root_ott_id=root, taxonomy_treefile=taxonomy_treefile, id_to_other_prune_reason=to_prune_for_reasons) except EmptyTreeError: log_obj['EMPTY_TREE'] = True out_log = os.path.join(args.out_dir, study_tree + '.json') write_as_json(log_obj, out_log) newick_fp = os.path.join(args.out_dir, study_tree + '.tre')
#!/usr/bin/env python """Takes meta properties for a tree requested""" from __future__ import absolute_import, division, print_function, unicode_literals from peyotl.api import PhylesystemAPI from peyotl.utility import propinquity_fn_to_study_tree import codecs import sys out = codecs.getwriter('utf-8')(sys.stdout) NON_META = frozenset([u'^ot:rootNodeId', u'nodeById', u'edgeBySourceId', u'^ot:inGroupClade', u'@xsi:type', u'^ot:branchLengthTimeUnit', u'^ot:branchLengthDescription', u'^ot:tag', u'^ot:branchLengthMode']) for arg in sys.argv[1:]: study_id, tree_id = propinquity_fn_to_study_tree(arg, strip_extension=False) pa = PhylesystemAPI(get_from='local') try: tree = pa.get(study_id, tree_id=tree_id)[tree_id] print('Tree "{}" in study "{}":'.format(tree_id, study_id)) for k, v in tree.items(): if (v is not None) and (v is not '') and (k not in NON_META): print(k, v) except: sys.stderr.write('WARNING: did not find tree "{}" in study "{}"'.format(tree_id, study_id))
in_dir = args.input_dir if in_dir: in_dir = os.path.expanduser(in_dir) inp_files = [os.path.join(in_dir, i) for i in inp_files] if flags_str is None: flags = OTT.TREEMACHINE_SUPPRESS_FLAGS else: flags = flags_str.split(',') ott = OTT(ott_dir=args.ott_dir) to_prune_fsi_set = ott.convert_flag_string_set_to_union(flags) for inp in inp_files: _LOG.debug('{}'.format(inp)) log_obj = {} inp_fn = os.path.split(inp)[-1] study_tree = '.'.join(inp_fn.split('.')[:-1]) # strip extension study_id, tree_id = propinquity_fn_to_study_tree(inp_fn) nexson_blob = read_as_json(inp) ntw = NexsonTreeWrapper(nexson_blob, tree_id, log_obj=log_obj) assert ntw.root_node_id taxonomy_treefile = os.path.join(args.out_dir, study_tree + '-taxonomy.tre') try: ntw.prune_tree_for_supertree( ott=ott, to_prune_fsi_set=to_prune_fsi_set, root_ott_id=root, taxonomy_treefile=taxonomy_treefile, id_to_other_prune_reason=to_prune_for_reasons) except EmptyTreeError: log_obj['EMPTY_TREE'] = True out_log = os.path.join(args.out_dir, study_tree + '.json')