def read_labelled_supertree(self): d = os.path.join(self.top_output_dir, 'labelled_supertree') p = 'labelled_supertree_out_degree_distribution.txt' lsodd = os.path.join(d, p) subprocess.call(['make', lsodd]) subprocess.call(['make', os.path.join(d, 'labelled_supertree_ottnames.tre')]) assert(os.path.exists(lsodd)) blob = Extensible() blob.unprune_stats = read_as_json(os.path.join(d, 'input_output_stats.json')) blob.non_monophyletic_taxa = read_as_json(os.path.join(d, 'broken_taxa.json')) if blob.non_monophyletic_taxa['non_monophyletic_taxa'] is None: blob.non_monophyletic_taxa['non_monophyletic_taxa'] = {} return blob
def read_exemplified_phylo(self): d = os.path.join(self.top_output_dir, 'exemplified_phylo') x = read_as_json(os.path.join(d, 'exemplified_log.json')) tx = x['taxa_exemplified'] if not tx: tx = {} by_source_tree = {} for ott_id, exdict in tx.items(): tm = exdict['trees_modified'] for tree in tm: key = '.'.join(tree.split('.')[:-1]) by_source_tree.setdefault(key, []).append(ott_id) for v in by_source_tree.values(): v.sort() ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt') subprocess.call(['make', ptdd]) assert(os.path.exists(ptdd)) ddlines = [i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0'] assert(len(ddlines) == 1) leaf_line = ddlines[0] # should b assert(len(leaf_line) == 2) blob = Extensible() blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1]) blob.taxa_exemplified = tx blob.source_tree_to_ott_id_exemplified_list = by_source_tree f = os.path.join(d, 'nonempty_trees.txt') blob.nonempty_tree_filenames = stripped_nonempty_lines(f) blob.nonempty_trees = [propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames] return blob
def read_exemplified_phylo(self): d = os.path.join(self.top_output_dir, 'exemplified_phylo') x = read_as_json(os.path.join(d, 'exemplified_log.json')) tx = x['taxa_exemplified'] if not tx: tx = {} by_source_tree = {} for ott_id, exdict in tx.items(): tm = exdict['trees_modified'] for tree in tm: key = '.'.join(tree.split('.')[:-1]) by_source_tree.setdefault(key, []).append(ott_id) for v in by_source_tree.values(): v.sort() ptdd = os.path.join(d, 'pruned_taxonomy_degree_distribution.txt') subprocess.call(['make', ptdd]) assert (os.path.exists(ptdd)) ddlines = [ i.split() for i in stripped_nonempty_lines(ptdd) if i.split()[0] == '0' ] assert (len(ddlines) == 1) leaf_line = ddlines[0] # should b assert (len(leaf_line) == 2) blob = Extensible() blob.num_leaves_in_exemplified_taxonomy = int(leaf_line[1]) blob.taxa_exemplified = tx blob.source_tree_to_ott_id_exemplified_list = by_source_tree f = os.path.join(d, 'nonempty_trees.txt') blob.nonempty_tree_filenames = stripped_nonempty_lines(f) blob.nonempty_trees = [ propinquity_fn_to_study_tree(i) for i in blob.nonempty_tree_filenames ] return blob
def perform_separation(taxalotl_config, part_name, id_list, sep_fn): ott_res = taxalotl_config.get_terminalized_res_by_id( "ott", 'enforce-new-separators') if not ott_res.has_been_partitioned(): partition_resources(taxalotl_config, ["ott"], PREORDER_PART_LIST) sep_mapping_fp = os.path.join(ott_res.partitioned_filepath, SEP_MAPPING) if not os.path.isfile(sep_mapping_fp): cache_separator_names(taxalotl_config) top_dir = get_part_dir_from_part_name(ott_res, part_name) active_sep_fn = os.path.join(top_dir, sep_fn) try: active_seps = return_sep_obj_copy_with_ott_fields( read_as_json(active_sep_fn)) print(active_seps) except: raise ValueError('{} does not exist'.format(part_name, active_sep_fn)) if id_list: resource_ids = id_list else: resource_ids = get_taxonomies_for_dir(top_dir) for rid in resource_ids: rw = taxalotl_config.get_resource_by_id(rid) print(rid, rw) perform_dynamic_separation(ott_res, res=rw, part_key=part_name, separation_by_ott=active_seps)
def get_separator_dict(self): from taxalotl.commands import SEP_MAPPING, cache_separator_names from peyotl import read_as_json fn = os.path.join(self.partitioned_dir, SEP_MAPPING) if not os.path.exists(fn): cache_separator_names(self) return read_as_json(fn)
def read_subproblems(self): d = os.path.join(self.top_output_dir, 'subproblems') blob = Extensible() conf_tax_json_fp = os.path.join(d, 'contesting-trees.json') conf_tax_info = read_as_json(conf_tax_json_fp) if not conf_tax_info: conf_tax_info = {} externalized_conf_tax_info = {} for ott_id, tree2node_info_list in conf_tax_info.items(): tr_ob_li = [] if ott_id.startswith('ott'): ott_id = ott_id[3:] externalized_conf_tax_info[ott_id] = tr_ob_li for study_tree_fn, node_info_list in tree2node_info_list.items(): study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn) cf_nl = [] tre_obj = {'study_id': study_id, 'tree_id': tree_id, 'tree_filename': study_tree_fn, 'conflicting_nodes': cf_nl} tr_ob_li.append(tre_obj) if len(node_info_list) < 2: raise RuntimeError('read_subproblems < 2 node info elements for taxon ID = {}'.format(ott_id)) for node_info in node_info_list: rcfn = node_info['children_from_taxon'] cfn = [node_label2obj(i) for i in rcfn] el = {'parent': node_label2obj(node_info['parent']), 'children_from_taxon': cfn } cf_nl.append(el) blob.contested_taxa = externalized_conf_tax_info blob.tree_files = stripped_nonempty_lines(os.path.join(d, 'subproblem-ids.txt')) id2num_leaves = {} for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes: id2num_leaves[el[0]] = el[1] by_num_phylo = [] by_input = {} for s in blob.tree_files: assert s.endswith('.tre') pref = s[:-4] assert pref.startswith('ott') tree_name_file = os.path.join(d, pref + '-tree-names.txt') phylo_inputs = [] for i in stripped_nonempty_lines(tree_name_file): x = i[:-4] if i.endswith('.tre') else i phylo_inputs.append(i) if x != 'TAXONOMY': by_input.setdefault(x, []).append(pref) npi = len(phylo_inputs) by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs)) by_num_phylo.sort(reverse=True) blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo] by_input = [(len(v), k, v) for k, v in by_input.items()] by_input.sort(reverse=True) blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input] return blob
def read_cleaned_ott(self): blob = Extensible() d = os.path.join(self.top_output_dir, 'cleaned_ott') o = read_as_json(os.path.join(d, 'cleaned_ott.json')) for k, v in o.items(): setattr(blob, k, v) if k == 'flags_to_prune': v.sort() blob.root_ott_id = self.config.root_ott_id return blob
def read_labelled_supertree(self): d = os.path.join(self.top_output_dir, 'labelled_supertree') p = 'labelled_supertree_out_degree_distribution.txt' lsodd = os.path.join(d, p) subprocess.call(['make', lsodd]) subprocess.call( ['make', os.path.join(d, 'labelled_supertree_ottnames.tre')]) assert (os.path.exists(lsodd)) blob = Extensible() blob.unprune_stats = read_as_json( os.path.join(d, 'input_output_stats.json')) blob.non_monophyletic_taxa = read_as_json( os.path.join(d, 'broken_taxa.json')) if blob.non_monophyletic_taxa['non_monophyletic_taxa'] is None: blob.non_monophyletic_taxa['non_monophyletic_taxa'] = {} blob.non_monophyletic_taxa = add_taxonomy_metadata( blob.non_monophyletic_taxa) return blob
def read_assessments(self): d = os.path.join(self.top_output_dir, 'assessments') blob = Extensible() blob.assessments = read_as_json(os.path.join(d, 'summary.json')) blob.categories_of_checks = list(blob.assessments.keys()) blob.categories_of_checks.sort() blob.categories_of_checks_with_errors = [] for k, v in blob.assessments.items(): if v['result'] != 'OK': blob.categories_of_checks_with_errors.append(k) blob.categories_of_checks_with_errors.sort() return blob
def _read_json_and_coerce_to_otttaxon(tax_dir, misc_tax_dir, fn): r = {} for td in [tax_dir, misc_tax_dir]: rf = os.path.join(td, fn) if os.path.exists(rf): rd = read_as_json(rf) for k, v in rd.items(): try: k = int(k) except: pass r[k] = Taxon(d=v) return r
def get_auto_gen_part_mapper(res): fp = os.path.join(res.partitioned_filepath, GEN_MAPPING_FILENAME) if not os.path.isfile(fp): m = 'Mapping file not found at "{}"\nRun the build-partitions-maps command.' raise RuntimeError(m.format(fp)) master_mapping = read_as_json(fp) a_list = list(res.alias_list) base_res = res.base_resource if base_res: a_list.extend(base_res.alias_list) poss_ids = [res.id] + a_list + [res.base_id] for k in poss_ids: if k in master_mapping: return master_mapping[k] m = 'No entry for ids {} found in "{}".' raise RuntimeError(m.format(', '.join(poss_ids), fp))
parser = argparse.ArgumentParser( prog=SCRIPT_NAME, description= 'Simple tool to combine the logs from pruning via flags and pruning via higher-level taxa that have become tips' ) parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str) parser.add_argument('higher_taxon_pruned_json', metavar='H', nargs=1, type=str) parser.add_argument('combined_json', nargs=1, metavar='O', type=str) args = parser.parse_args() fj_fn = args.flag_pruned_json[0] htj_fn = args.higher_taxon_pruned_json[0] out_fn = args.combined_json[0] blob = read_as_json(fj_fn) higher_taxon_blob = read_as_json(htj_fn) if higher_taxon_blob: p = blob['pruned'] httk = 'higher-taxon-tip' intk = 'empty-after-higher-taxon-tip-prune' high_tax_tip_pruned = higher_taxon_blob.get(httk, {}) internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {}) p[httk] = high_tax_tip_pruned p[intk] = internal_high_tax_tip_pruned n_ht_in_pruned = len(internal_high_tax_tip_pruned) n_ht_pruned = len(high_tax_tip_pruned) blob['num_non_leaf_nodes'] -= n_ht_in_pruned blob['num_pruned_anc_nodes'] += n_ht_in_pruned blob['num_tips'] -= n_ht_pruned blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
final_tree = os.path.join(top_dir, 'labelled_supertree', 'labelled_supertree.tre') # Check that we have the same # of leaves in the cleaned_ott and the final tree # tax_dd_file = os.path.join(assessments_dir, 'taxonomy_degree_distribution.txt') supertree_dd_file = os.path.join(assessments_dir, 'supertree_degree_distribution.txt') tdd = parse_degree_dist(tax_dd_file) sdd = parse_degree_dist(supertree_dd_file) if tdd[0] != sdd[0]: err('The number of leaves differed between the taxonomy and supertree') nt = {'result':'ERROR', 'data':[tdd[0][1], sdd[0][1]]} else: nt = {'result':'OK', 'data': tdd[0][1]} nt['description'] = 'Check that the cleaned version of the taxonomy and the supertree have the same number of leaves' summary['num_tips'] = nt annot_file = os.path.join(top_dir, 'annotated_supertree', 'annotations.json') annotations = read_as_json(annot_file) nodes_annotations = annotations['nodes'] # Check that otc-taxonomy-parser and otc-unprune-solution-and-name-unnamed-nodes # agree on the number of taxa that were lost # if False: ltb = {'result': 'Skipped test - have not updated tests to deal with 2 layers of taxon filtering', 'data': []} btb = dict(ltb) ub = dict(ltb) else: lt_file = os.path.join(assessments_dir, 'lost_taxa.txt') lt_name = 'otc-taxonomy-parser lost-taxon' lt_pair = [lt_file, lt_name] lt_set = parse_otc_taxonomy_parser_lost_taxa(lt_file) bt_file = os.path.join(top_dir, 'labelled_supertree', 'broken_taxa.json') bt_name = 'otc-unprune-solution-and-name-unnamed-nodes broken_taxa.json'
import sys if __name__ == '__main__': import argparse import os bin_dir, SCRIPT_NAME = os.path.split(__file__) propinquity_dir = os.path.dirname(bin_dir) parser = argparse.ArgumentParser(prog=SCRIPT_NAME, description='Simple tool to combine the logs from pruning via flags and pruning via higher-level taxa that have become tips') parser.add_argument('flag_pruned_json', nargs=1, metavar='F', type=str) parser.add_argument('higher_taxon_pruned_json', metavar='H', nargs=1, type=str) parser.add_argument('combined_json', nargs=1, metavar='O', type=str) args = parser.parse_args() fj_fn = args.flag_pruned_json[0] htj_fn = args.higher_taxon_pruned_json[0] out_fn = args.combined_json[0] blob = read_as_json(fj_fn) higher_taxon_blob = read_as_json(htj_fn) if higher_taxon_blob: p = blob['pruned'] httk = 'higher-taxon-tip' intk = 'empty-after-higher-taxon-tip-prune' high_tax_tip_pruned = higher_taxon_blob.get(httk, {}) internal_high_tax_tip_pruned = higher_taxon_blob.get(intk, {}) p[httk] = high_tax_tip_pruned p[intk] = internal_high_tax_tip_pruned n_ht_in_pruned = len(internal_high_tax_tip_pruned) n_ht_pruned = len(high_tax_tip_pruned) blob['num_non_leaf_nodes'] -= n_ht_in_pruned blob['num_pruned_anc_nodes'] += n_ht_in_pruned blob['num_tips'] -= n_ht_pruned blob['num_nodes'] -= (n_ht_pruned + n_ht_in_pruned)
default=None, type=int, required=False, help='Optional taxonomy root argument.') parser.add_argument('--input-files-list', default=None, type=str, required=False, help='A list of input NexSON filenames.') args = parser.parse_args(sys.argv[1:]) ott_dir, out_dir, root = args.ott_dir, args.out_dir, args.root to_prune_for_reasons = {} nonflagged_json_fn = args.ott_prune_nonflagged_json if nonflagged_json_fn is not None: try: nonflagged_blob = read_as_json(nonflagged_json_fn) except: nonflagged_blob = None if nonflagged_blob: for reason, id_list in nonflagged_blob.items(): for ott_id in id_list: to_prune_for_reasons[ott_id] = reason flags_str = args.ott_prune_flags try: assert os.path.isdir(args.ott_dir) except: error('Expecting ott-dir argument to be a directory. Got "{}"'.format(args.ott_dir)) sys.exit(1) if args.nexson: inp_files = list(args.nexson) else:
def main(): import argparse description = "The main CLI for taxalotl" p = argparse.ArgumentParser(description=description) p.add_argument("--config", type=str, help="the taxalotl.conf filepath (optional)") p.add_argument( "--show-completions", action="store_true", default=False, help="print the list of options for the next word in the command line") p.set_defaults(which="all") subp = p.add_subparsers(help="command help") # ANALYZE UPDATE analyze_update_p = subp.add_parser( 'analyze-update', help="calculates a diff between the last version of a " "taxonomy used and the latest version downloaded.") analyze_update_p.add_argument('resources', nargs=2, help="IDs of the resources to analyzed.") _add_level_arg(analyze_update_p) analyze_update_p.set_defaults(which="analyze-update") # PULL OTifacts pull_otifacts_p = subp.add_parser( 'pull-otifacts', help="refresh list of taxonomic artifacts from OTifacts repo") pull_otifacts_p.set_defaults(which="pull-otifacts") # STATUS status_p = subp.add_parser( 'status', help="report the status of a resource (or all resources)") status_p.add_argument('resources', nargs="*", help="IDs of the resources to report status on") status_p.add_argument("-i", "--ids-only", action='store_true', default=False, help="just list the IDs") status_p.add_argument("--by-status", action='store_true', default=False, help="group the report by status") status_p.add_argument( "--terminal", action='store_true', default=False, help="Report only on the terminalized resource of each type.") status_p.set_defaults(which="status") # CACHE-separator-names compare_tax_p = subp.add_parser( 'compare-taxonomies', help="compare taxonomies for a separated dir") _add_level_arg(compare_tax_p) compare_tax_p.set_defaults(which="compare-taxonomies") # CACHE-separator-names cache_p = subp.add_parser( 'cache-separator-names', help="Accumulate a list of separator names for tab-completion") cache_p.set_defaults(which="cache-separator-names") # DOWNLOAD download_p = subp.add_parser( 'download', help="download an artifact to your local filesystem") download_p.add_argument('resources', nargs="+", help="IDs of the resources to download") download_p.set_defaults(which="download") # UNPACK unpack_p = subp.add_parser( 'unpack', help="unpack an resource (downloads if necessary)") unpack_p.add_argument('resources', nargs="+", help="IDs of the resources to unpack") unpack_p.set_defaults(which="unpack") # NORMALIZE normalize_p = subp.add_parser( 'normalize', help="converts to the OTT format (unpacks if necessary)") normalize_p.add_argument('resources', nargs="+", help="IDs of the resources to normalize") normalize_p.set_defaults(which="normalize") # PARTITION partition_p = subp.add_parser('partition', help="Breaks the resource taxon") partition_p.add_argument('resources', nargs="+", help="IDs of the resources to partitition") _add_level_arg(partition_p) partition_p.set_defaults(which="partition") # INFO info_p = subp.add_parser('info', help="Report statistics about a resource") info_p.add_argument('resources', nargs="+", help="IDs of the resources") _add_level_arg(info_p) info_p.set_defaults(which="info") # DIAGNOSE-NEW-SEPARATORS diag_sep_p = subp.add_parser( 'diagnose-new-separators', help="Uses the last OTT build to find taxa IDs that " "feature are common to the relevant inputs") _add_level_arg(diag_sep_p) diag_sep_p.set_defaults(which="diagnose-new-separators") # ENFORCE-NEW-SEPARATORS enf_sep_p = subp.add_parser( 'enforce-new-separators', help="Uses the __sep__.json files created by " "diagnose-new-separators to partition by unproblematic " "taxa") enf_sep_p.add_argument('resources', nargs="*", help="IDs of the resources to separate") _add_level_arg(enf_sep_p) enf_sep_p.set_defaults(which="enforce-new-separators") # Align align_p = subp.add_parser( 'align', help= "Attempts to align a new (parititioned) resource to the latest OTT for a level" ) align_p.add_argument('resources', nargs="*", help="IDs of the resources to separate") _add_level_arg(align_p) align_p.set_defaults(which="align") # ACCUMULATE-SEPARATED-DESCENDANTS accum_sep_des_p = subp.add_parser( 'accumulate-separated-descendants', help= "Should be run after enforce-separators and before compare-taxonomies") accum_sep_des_p.add_argument('resources', nargs="*", help="IDs of the resources") accum_sep_des_p.set_defaults(which="accumulate-separated-descendants") # BUILD-PARTITION-MAPS build_partition_maps_p = subp.add_parser( 'build-partition-maps', help="Uses the last OTT build to find the " "ID mappings needed to " "partition the inputs taxonomies.") build_partition_maps_p.set_defaults(which="build-partition-maps") # CLEAN-PARTITION clean_p = subp.add_parser( 'clean-partition', help= "remove the results the partition+enforce-new-separator for a resource." ) clean_p.add_argument('resources', nargs="*", help="IDs of the resources to clean") clean_p.set_defaults(which='clean-partition') # CLEAN-PARTITION clean_s_p = subp.add_parser( 'clean-separation', help="remove the results the diagnose-new-separator for a resource.") _add_level_arg(clean_s_p) clean_s_p.set_defaults(which='clean-separation') # Handle --show-completions differently from the others, because # argparse does not help us out here... at all if "--show-completions" in sys.argv: a = sys.argv[1:] univ = frozenset([ '--config', ]) sel_cmd = None num_cmds = 0 for c in all_cmds: if c in a: if sel_cmd is None: sel_cmd = c num_cmds += 1 comp_list = [] if sel_cmd is None: comp_list = [] for u in univ: found = False for arg in a: if arg.startswith(u): found = True break if not found: comp_list.append(u) comp_list.extend(all_cmds) else: if sel_cmd in res_dep_cmds \ or sel_cmd in ['compare-taxonomies'] \ or sel_cmd in ver_inp_res_dep_cmds: # From Ned Batchelder's answer on http://stackoverflow.com/a/14728477 class ArgumentParserError(Exception): pass # noinspection PyClassHasNoInit class ThrowingArgumentParser(argparse.ArgumentParser): def error(self, message): raise ArgumentParserError(message) fake_parser = ThrowingArgumentParser() fake_parser.add_argument("--config", type=str) fake_parser.add_argument('blah', nargs="*") comp_list = [] taxalotl_config = None try: fa = fake_parser.parse_known_args()[0] config = fa.config taxalotl_config = TaxalotlConfig(filepath=config) if sel_cmd in res_dep_cmds: comp_list = list( taxalotl_config.resources_mgr.resources.keys()) elif sel_cmd in ver_inp_res_dep_cmds: comp_list = list(taxalotl_config.resources_mgr. abstract_input_resource_types()) except Exception as _excep: _LOG.warn('Exception: {}'.format(_excep)) pass if sel_cmd == 'status': if '-i' not in a and '--ids-only' not in a: comp_list.extend(["-i", "--ids-only"]) for x in ['--by-status', '--terminal']: if x not in a: comp_list.extend([x]) elif sel_cmd == 'partition': # sys.stderr.write(str(a)) if '--level' == a[-1] or (len(a) > 1 and '--level' == a[-2]): comp_list = list(NONTERMINAL_PART_NAMES) elif '--level' not in a: comp_list.extend(['--level']) elif sel_cmd in ('diagnose-new-separators', 'enforce-new-separators'): # sys.stderr.write(str(a)) if '--level' == a[-1] or (len(a) > 1 and '--level' == a[-2]): comp_list = list(TERMINAL_PART_NAMES) elif '--level' not in a: comp_list.extend(['--level']) elif sel_cmd in ['compare-taxonomies']: rw = taxalotl_config.get_terminalized_res_by_id("ott", '') outfn = os.path.join(rw.partitioned_filepath, SEP_NAMES) if os.path.exists(outfn): comp_list.extend(read_as_json(outfn)) sys.stdout.write('{}\n'.format(' '.join(comp_list))) else: rc = main_post_parse(p.parse_args()) sys.exit(rc)
default=None, type=int, required=False, help='Optional taxonomy root argument.') parser.add_argument('--input-files-list', default=None, type=str, required=False, help='A list of input NexSON filenames.') args = parser.parse_args(sys.argv[1:]) ott_dir, out_dir, root = args.ott_dir, args.out_dir, args.root to_prune_for_reasons = {} nonflagged_json_fn = args.ott_prune_nonflagged_json if nonflagged_json_fn is not None: try: nonflagged_blob = read_as_json(nonflagged_json_fn) except: nonflagged_blob = None if nonflagged_blob: for reason, id_list in nonflagged_blob.items(): for ott_id in id_list: to_prune_for_reasons[ott_id] = reason flags_str = args.ott_prune_flags try: assert os.path.isdir(args.ott_dir) except: error('Expecting ott-dir argument to be a directory. Got "{}"'.format( args.ott_dir)) sys.exit(1) if args.nexson: inp_files = list(args.nexson)
def get_primary_partition_map(self): return read_as_json( os.path.join(self.normalized_filedir, GEN_MAPPING_FILENAME))
def read_subproblems(self): d = os.path.join(self.top_output_dir, 'subproblems') blob = Extensible() conf_tax_json_fp = os.path.join(d, 'contesting-trees.json') conf_tax_info = read_as_json(conf_tax_json_fp) if not conf_tax_info: conf_tax_info = {} externalized_conf_tax_info = {} for ott_id, tree2node_info_list in conf_tax_info.items(): tr_ob_li = [] if ott_id.startswith('ott'): ott_id = ott_id[3:] externalized_conf_tax_info[ott_id] = tr_ob_li for study_tree_fn, node_info_list in tree2node_info_list.items(): study_id, tree_id = propinquity_fn_to_study_tree(study_tree_fn) cf_nl = [] tre_obj = { 'study_id': study_id, 'tree_id': tree_id, 'tree_filename': study_tree_fn, 'conflicting_nodes': cf_nl } tr_ob_li.append(tre_obj) if len(node_info_list) < 2: raise RuntimeError( 'read_subproblems < 2 node info elements for taxon ID = {}' .format(ott_id)) for node_info in node_info_list: rcfn = node_info['children_from_taxon'] cfn = [node_label2obj(i) for i in rcfn] el = { 'parent': node_label2obj(node_info['parent']), 'children_from_taxon': cfn } cf_nl.append(el) blob.contested_taxa = externalized_conf_tax_info blob.tree_files = stripped_nonempty_lines( os.path.join(d, 'subproblem-ids.txt')) id2num_leaves = {} for el in self.subproblem_solutions.subproblem_num_leaves_num_internal_nodes: id2num_leaves[el[0]] = el[1] by_num_phylo = [] by_input = {} for s in blob.tree_files: assert s.endswith('.tre') pref = s[:-4] assert pref.startswith('ott') tree_name_file = os.path.join(d, pref + '-tree-names.txt') phylo_inputs = [] for i in stripped_nonempty_lines(tree_name_file): x = i[:-4] if i.endswith('.tre') else i phylo_inputs.append(i) if x != 'TAXONOMY': by_input.setdefault(x, []).append(pref) npi = len(phylo_inputs) by_num_phylo.append((npi, int(pref[3:]), s, phylo_inputs)) by_num_phylo.sort(reverse=True) blob.sorted_by_num_phylo_inputs = [[i[2], i[3], id2num_leaves[i[2]]] for i in by_num_phylo] by_input = [(len(v), k, v) for k, v in by_input.items()] by_input.sort(reverse=True) blob.input_and_subproblems_sorted = [[i[1], i[2]] for i in by_input] return blob
#!/usr/bin/env python from peyotl import read_as_json import codecs import json import sys try: subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[1:] except: sys.exit('Expecting 3 arguments:\n subproblem_ids_file, in_annotations_file, out_annotations_file') import os bin_dir = os.path.abspath(os.path.dirname(sys.argv[0])) sys.path.append(os.path.join(bin_dir)) from document_outputs import stripped_nonempty_lines subproblems = [] for s in stripped_nonempty_lines(subproblem_ids_file): assert s.endswith('.tre') subproblems.append(s[:-4]) jsonblob = read_as_json(in_annotations_file) nodes_dict = jsonblob['nodes'] for ott_id in subproblems: d = nodes_dict.setdefault(ott_id, {}) d['was_constrained'] = True d['was_uncontested'] = True with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream: json.dump(jsonblob, out_stream, indent=2, sort_keys=True, separators=(',', ': '))
#!/usr/bin/env python from peyotl import concatenate_collections, read_as_json, write_as_json if __name__ == '__main__': import argparse import sys import os description = 'Takes a list of collections and writes a collection that is a concatenation of their decisions' parser = argparse.ArgumentParser(prog='collection_export.py', description=description) parser.add_argument('--output', type=str, required=True, help='output filepath for collection json') parser.add_argument('collection', default=None, type=str, nargs="*", help='filepath for the collections JSON') args = parser.parse_args(sys.argv[1:]) inp = [read_as_json(i) for i in args.collection] out = concatenate_collections(inp) write_as_json(out, args.output)
nt = {'result':'ERROR', 'data':[tdd[0][1], sdd[0][1]]} else: nt = {'result':'OK', 'data': tdd[0][1]} nt['description'] = 'Check that the cleaned version of the taxonomy and the supertree have the same number of leaves' summary['num_tips'] = nt # Check that otc-taxonomy-parser and otc-unprune-solution-and-name-unnamed-nodes # agree on the number of taxa that were lost # lt_file = os.path.join(assessments_dir, 'lost_taxa.txt') lt_name = 'otc-taxonomy-parser lost-taxon' lt_pair = [lt_file, lt_name] lt_set = parse_otc_taxonomy_parser_lost_taxa(lt_file) bt_file = os.path.join(top_dir, 'labelled_supertree', 'broken_taxa.json') bt_name = 'otc-unprune-solution-and-name-unnamed-nodes broken_taxa.json' bt_pair = [bt_file, bt_name] bt_dict = read_as_json(bt_file)['non_monophyletic_taxa'] if not bt_dict: bt_dict = {} cleaned_ott_json_pruned = read_as_json(cleaned_taxonomy_json).get('pruned', {}) # pruned because they became empty httip_key = 'higher-taxon-tip' int_key = 'empty-after-higher-taxon-tip-prune' htpruned_ids = set() for key in [httip_key, int_key]: pl = set(cleaned_ott_json_pruned.get(key, [])) htpruned_ids.update(pl) lte = {} for ott_id in lt_set: ott_id_str = 'ott{}'.format(ott_id) if (ott_id_str not in bt_dict) and (ott_id not in htpruned_ids):
import codecs import json import sys try: subproblem_ids_file, in_annotations_file, out_annotations_file = sys.argv[ 1:] except: sys.exit( 'Expecting 3 arguments:\n subproblem_ids_file, in_annotations_file, out_annotations_file' ) import os bin_dir = os.path.abspath(os.path.dirname(sys.argv[0])) sys.path.append(os.path.join(bin_dir)) from document_outputs import stripped_nonempty_lines subproblems = [] for s in stripped_nonempty_lines(subproblem_ids_file): assert s.endswith('.tre') subproblems.append(s[:-4]) jsonblob = read_as_json(in_annotations_file) nodes_dict = jsonblob['nodes'] for ott_id in subproblems: d = nodes_dict.setdefault(ott_id, {}) d['was_constrained'] = True d['was_uncontested'] = True with codecs.open(out_annotations_file, 'w', encoding='utf-8') as out_stream: json.dump(jsonblob, out_stream, indent=2, sort_keys=True, separators=(',', ': '))