def add_or_replace_annotation(self, # pylint: disable=R0201 obj, annotation, agent, add_agent_only=False): """Takes an `annotation` dictionary which is expected to have a string as the value of annotation['author']['name'] This function will remove all annotations from obj that: 1. have the same author/name, and 2. have no messages that are flagged as messages to be preserved (values for 'preserve' that evaluate to true) """ nex = get_nexml_el(obj) nvers = detect_nexson_version(obj) _LOG.debug('detected version as ' + nvers) agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers) if not agents_obj: agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []}, nvers) agents_list = agents_obj.setdefault('agent', []) found_agent = False aid = agent['@id'] for a in agents_list: if a.get('@id') == aid: found_agent = True break if not found_agent: agents_list.append(agent) if add_agent_only: delete_same_agent_annotation(obj, annotation) else: replace_same_agent_annotation(obj, annotation)
def convert(self, obj): """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. """ if self.pristine_if_invalid: raise NotImplementedError( 'pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex self._recursive_convert_dict(nex) # pluralization simplifications in hbf: # convert dicts to lists for the primary datastructures... self._dict_to_list_of_dicts(nex, 'otus') self._dict_to_list_of_dicts(nex, 'otus', 'otu') self._dict_to_list_of_dicts(nex, 'trees') self._dict_to_list_of_dicts(nex, 'trees', 'tree') self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'node') self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'edge') if self._add_tree_xsi_type: for tb in nex.get('trees', []): for t in tb.get('tree', []): t.setdefault('@xsi:type', 'nex:FloatTree') nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH) return obj
def extract_tree_nexson(nexson, tree_id, curr_version=None): '''Returns a list of (id, tree, otus_group) tuples for the specified tree_id (all trees if tree_id is None) ''' if curr_version is None: curr_version = detect_nexson_version(nexson) if not _is_by_id_hbf(curr_version): nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH) nexml_el = get_nexml_el(nexson) tree_groups = nexml_el['treesById'] tree_obj_otus_group_list = [] for tree_group in tree_groups.values(): if tree_id: tree_list = [(tree_id, tree_group['treeById'].get(tree_id))] else: tree_list = tree_group['treeById'].items() for tid, tree in tree_list: if tree is not None: otu_groups = nexml_el['otusById'] ogi = tree_group['@otus'] otu_group = otu_groups[ogi]['otuById'] tree_obj_otus_group_list.append((tid, tree, otu_group)) if tree_id is not None: return tree_obj_otus_group_list return tree_obj_otus_group_list
def strip_to_meta_only(blob, nexson_version): if nexson_version is None: nexson_version = detect_nexson_version(blob) nex = get_nexml_el(blob) if _is_by_id_hbf(nexson_version): for otus_group in nex.get('otusById', {}).values(): if 'otuById' in otus_group: del otus_group['otuById'] for trees_group in nex.get('treesById', {}).values(): tree_group = trees_group['treeById'] key_list = tree_group.keys() for k in key_list: tree_group[k] = None else: otus = nex['otus'] if not isinstance(otus, list): otus = [otus] for otus_group in otus: if 'otu' in otus_group: del otus_group['otu'] trees = nex['trees'] if not isinstance(trees, list): trees = [trees] for trees_group in trees: tree_list = trees_group.get('tree') if not isinstance(tree_list, list): tree_list = [tree_list] t = [{'id': i.get('@id')} for i in tree_list] trees_group['tree'] = t
def convert(self, obj): """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. """ if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex self._recursive_convert_dict(nex) # pluralization simplifications in hbf: # convert dicts to lists for the primary datastructures... self._dict_to_list_of_dicts(nex, 'otus') self._dict_to_list_of_dicts(nex, 'otus', 'otu') self._dict_to_list_of_dicts(nex, 'trees') self._dict_to_list_of_dicts(nex, 'trees', 'tree') self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'node') self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'edge') if self._add_tree_xsi_type: for tb in nex.get('trees', []): for t in tb.get('tree', []): t.setdefault('@xsi:type', 'nex:FloatTree') nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH) return obj
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.2.* type and converts it to DIRECT_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError( 'pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otusById = nex['otusById'] otusElementOrder = nex['^ot:otusElementOrder'] otus = self.convert_otus(otusById, otusElementOrder) nex['otus'] = otus treesById = nex['treesById'] treesElementOrder = nex['^ot:treesElementOrder'] trees = self.convert_trees(treesById, treesElementOrder) # add the locals to the object nex['trees'] = trees nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otusById'] del nex['^ot:otusElementOrder'] del nex['treesById'] del nex['^ot:treesElementOrder'] return obj
def cull_nonmatching_trees(nexson, tree_id, curr_version=None): '''Modifies `nexson` and returns it in version 1.2.1 with any tree that does not match the ID removed. Note that this does not search through the NexSON for every node, edge, tree that was deleted. So the resulting NexSON may have broken references ! ''' if curr_version is None: curr_version = detect_nexson_version(nexson) if not _is_by_id_hbf(curr_version): nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH) nexml_el = get_nexml_el(nexson) tree_groups = nexml_el['treesById'] tree_groups_to_del = [] for tgi, tree_group in tree_groups.items(): tbi = tree_group['treeById'] if tree_id in tbi: trees_to_del = [i for i in tbi.keys() if i != tree_id] for tid in trees_to_del: tree_group['^ot:treeElementOrder'].remove(tid) del tbi[tid] else: tree_groups_to_del.append(tgi) for tgid in tree_groups_to_del: nexml_el['^ot:treesElementOrder'].remove(tgid) del tree_groups[tgid] return nexson
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.2.* type and converts it to DIRECT_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otusById = nex['otusById'] otusElementOrder = nex['^ot:otusElementOrder'] otus = self.convert_otus(otusById, otusElementOrder) nex['otus'] = otus treesById = nex['treesById'] treesElementOrder = nex['^ot:treesElementOrder'] trees = self.convert_trees(treesById, treesElementOrder) # add the locals to the object nex['trees'] = trees nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otusById'] del nex['^ot:otusElementOrder'] del nex['treesById'] del nex['^ot:treesElementOrder'] return obj
def convert(self, doc_root): key, val = self._gen_hbf_el(doc_root) val['@nexml2json'] = self.output_format o = {key: val} try: n = get_nexml_el(o) assert n except: return o # ot: discard characters... if 'characters' in n: del n['characters'] # ot: expect root=true for exactly one node in a tree. for trees in _get_index_list_of_values(n, 'trees'): for tree in _get_index_list_of_values(trees, 'tree'): node_list = _get_index_list_of_values(tree, 'node') root_node_flagged = False for node in node_list: if node.get('@root') == True: root_node_flagged = True break if not root_node_flagged: node_id_map = dict( (node['@id'], node) for node in node_list) edge_list = _get_index_list_of_values(tree, 'edge') target_set = set([i['@target'] for i in edge_list]) root_id_set = set(node_id_map.keys()) - target_set assert len(root_id_set) == 1 for ri in root_id_set: node_id_map[ri]['@root'] = True return o
def convert(self, doc_root): key, val = self._gen_hbf_el(doc_root) val['@nexml2json'] = self.output_format o = {key: val} try: n = get_nexml_el(o) assert n except: return o # ot: discard characters... if 'characters' in n: del n['characters'] # ot: expect root=true for exactly one node in a tree. for trees in _get_index_list_of_values(n, 'trees'): for tree in _get_index_list_of_values(trees, 'tree'): node_list = _get_index_list_of_values(tree, 'node') root_node_flagged = False for node in node_list: if node.get('@root') == True: root_node_flagged = True break if not root_node_flagged: node_id_map = dict((node['@id'], node) for node in node_list) edge_list = _get_index_list_of_values(tree, 'edge') target_set = set([i['@target'] for i in edge_list]) root_id_set = set(node_id_map.keys()) - target_set assert len(root_id_set) == 1 for ri in root_id_set: node_id_map[ri]['@root'] = True return o
def add_or_replace_annotation( self, #pylint: disable=R0201 obj, annotation, agent, add_agent_only=False): '''Takes an `annotation` dictionary which is expected to have a string as the value of annotation['author']['name'] This function will remove all annotations from obj that: 1. have the same author/name, and 2. have no messages that are flagged as messages to be preserved (values for 'preserve' that evaluate to true) ''' nex = get_nexml_el(obj) nvers = detect_nexson_version(obj) _LOG.debug('detected version as ' + nvers) agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers) if not agents_obj: agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []}, nvers) agents_list = agents_obj.setdefault('agent', []) found_agent = False aid = agent['@id'] for a in agents_list: if a.get('@id') == aid: found_agent = True break if not found_agent: agents_list.append(agent) if add_agent_only: delete_same_agent_annotation(obj, annotation) else: replace_same_agent_annotation(obj, annotation)
def delete_annotation(obj, agent_id=None, annot_id=None, nexson_version=None): if nexson_version is None: nexson_version = detect_nexson_version(obj) nex_el = get_nexml_el(obj) annotation_list = get_annotation_list(nex_el, nexson_version) delete_annotation_from_annot_list(annotation_list, agent_id=agent_id, annot_id=annot_id)
def iter_otu(nexson, nexson_version=None): if nexson_version is None: nexson_version = detect_nexson_version(nexson) if not _is_by_id_hbf(nexson_version): raise NotImplementedError('iter_otu is only supported for nexson 1.2 at this point') nexml = get_nexml_el(nexson) for og in nexml.get('otusById', {}).values(): for otu_id, otu in og.get('otuById', {}).items(): yield otu_id, otu
def __init__(self, obj, logger): self._raw = obj self._nexml = None self._pyid_to_nexson_add = {} self._logger = logger self._repeated_id = False self._otuid2ottid_byogid = {} self._ottid2otuid_list_byogid = {} self._dupottid_by_ogid_tree_id = {} uk = None for k in obj.keys(): if k not in ['nexml', 'nex:nexml']: if uk is None: uk = [] uk.append(k) if uk: uk.sort() self._warn_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_UnrecognizedKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=uk) self._nexml = None try: self._nexml = get_nexml_el(obj) assert isinstance(self._nexml, dict) except: self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MissingMandatoryKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=[ 'nexml', ]) return ## EARLY EXIT!! self._nexson_id_to_obj = {} self._nexson_version = detect_nexson_version(obj) #attr used in validation only should be cleaned up # in the finally clause self._otu_group_by_id = {} self._otu_by_otug = {} try: # a little duck-punching vc = _ValidationContext(self, logger) add_schema_attributes(vc, self._nexson_version) assert self._nexson_version[:3] in ('0.0', '1.0', '1.2') self._validate_nexml_obj(self._nexml, vc, obj) finally: vc.adaptor = None # delete circular ref to help gc del vc del self._otu_group_by_id del self._otu_by_otug
def __init__(self, obj, logger): self._raw = obj self._nexml = None self._pyid_to_nexson_add = {} self._logger = logger self._repeated_id = False self._otuid2ottid_byogid = {} self._ottid2otuid_list_byogid = {} self._dupottid_by_ogid_tree_id = {} uk = None for k in obj.keys(): if k not in ['nexml', 'nex:nexml']: if uk is None: uk = [] uk.append(k) if uk: uk.sort() self._warn_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_UnrecognizedKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=uk) self._nexml = None try: self._nexml = get_nexml_el(obj) assert isinstance(self._nexml, dict) except: self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MissingMandatoryKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=['nexml',]) return ## EARLY EXIT!! self._nexson_id_to_obj = {} self._nexson_version = detect_nexson_version(obj) #attr used in validation only should be cleaned up # in the finally clause self._otu_group_by_id = {} self._otu_by_otug = {} try: # a little duck-punching vc = _ValidationContext(self, logger) add_schema_attributes(vc, self._nexson_version) assert self._nexson_version[:3] in ('0.0', '1.0', '1.2') self._validate_nexml_obj(self._nexml, vc, obj) finally: vc.adaptor = None # delete circular ref to help gc del vc del self._otu_group_by_id del self._otu_by_otug
def sort_arbitrarily_ordered_nexson(blob): '''Primarily used for testing (getting nice diffs). Calls sort_meta_elements and then sorts otu, node and edge list by id ''' # otu, node and edge elements have no necessary orger in v0.0 or v1.0 v = detect_nexson_version(blob) nex = get_nexml_el(blob) if _is_by_id_hbf(v): return blob sort_meta_elements(blob) for ob in _get_index_list_of_values(nex, 'otus'): _inplace_sort_by_id(ob.get('otu', [])) for tb in _get_index_list_of_values(nex, 'trees'): for tree in _get_index_list_of_values(tb, 'tree'): _inplace_sort_by_id(tree.get('node', [])) _inplace_sort_by_id(tree.get('edge', [])) return blob
def convert(self, obj): """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. """ if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex self._recursive_convert_dict(nex) nex['@nexml2json'] = str(BADGER_FISH_NEXSON_VERSION) self._single_el_list_to_dicts(nex, 'otus') self._single_el_list_to_dicts(nex, 'trees') # # otu and tree are always arrays in phylografter emulate_phylografter_pluralization = True if not emulate_phylografter_pluralization: self._single_el_list_to_dicts(nex, 'otus', 'otu') self._single_el_list_to_dicts(nex, 'trees', 'tree') self._single_el_list_to_dicts(nex, 'trees', 'tree', 'node') self._single_el_list_to_dicts(nex, 'trees', 'tree', 'edge') return obj
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex self._recursive_convert_dict(nex) nex['@nexml2json'] = str(BADGER_FISH_NEXSON_VERSION) self._single_el_list_to_dicts(nex, 'otus') self._single_el_list_to_dicts(nex, 'trees') # # otu and tree are always arrays in phylografter emulate_phylografter_pluralization = True if not emulate_phylografter_pluralization: self._single_el_list_to_dicts(nex, 'otus', 'otu') self._single_el_list_to_dicts(nex, 'trees', 'tree') self._single_el_list_to_dicts(nex, 'trees', 'tree', 'node') self._single_el_list_to_dicts(nex, 'trees', 'tree', 'edge') return obj
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError( 'pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otus = _index_list_of_values(nex, 'otus') o_t = self.convert_otus(otus) otusById, otusElementOrder = o_t trees = _get_index_list_of_values(nex, 'trees') treesById = dict((i['@id'], i) for i in trees) treesElementOrder = [i['@id'] for i in trees] if len(treesById) != len(treesElementOrder): trees_id_set = set() for tgid in treesElementOrder: if tgid in trees_id_set: raise NexsonError( 'Repeated trees element id "{}"'.format(tgid)) trees_id_set.add(tgid) tree_id_set = set() treeContainingObjByTreesId = {} for tree_group in trees: #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id'])) treeById = {} treeElementOrder = [] tree_array = _get_index_list_of_values(tree_group, 'tree') for tree in tree_array: #_LOG.debug('# pre-convert keys = {}'.format(tree.keys())) t_t = self.convert_tree(tree) tid, tree_alias = t_t if tid in tree_id_set: raise NexsonError( 'Repeated tree element id "{}"'.format(tid)) tree_id_set.add(tid) #_LOG.debug('converting tree {} to by_id'.format(tid)) #_LOG.debug('# post-convert keys = {}'.format(tree.keys())) assert tree_alias is tree treeById[tid] = tree treeElementOrder.append(tid) treeContainingObjByTreesId[tree_group['@id']] = treeById tree_group['^ot:treeElementOrder'] = treeElementOrder # If all that succeeds, add the new object to the dict, creating a fat structure nex['otusById'] = otusById nex['^ot:otusElementOrder'] = otusElementOrder nex['treesById'] = treesById nex['^ot:treesElementOrder'] = treesElementOrder for k, v in treeContainingObjByTreesId.items(): treesById[k]['treeById'] = v nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otus'] del nex['trees'] for k, v in treesById.items(): if 'tree' in v: del v['tree'] del v['@id'] return obj
def __init__(self, obj, logger, **kwargs): self._raw = obj self._nexml = None self._pyid_to_nexson_add = {} self._logger = logger self._repeated_id = False self._otuid2ottid_byogid = {} self._ottid2otuid_list_byogid = {} self._dupottid_by_ogid_tree_id = {} self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study') uk = None for k in obj.keys(): if k not in ['nexml', 'nex:nexml']: if uk is None: uk = [] uk.append(k) if uk: uk.sort() self._warn_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_UnrecognizedKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=uk) self._nexml = None try: self._nexml = get_nexml_el(obj) assert isinstance(self._nexml, dict) except: self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MissingMandatoryKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=['nexml', ]) return # EARLY EXIT!! self._nexson_id_to_obj = {} self._nexson_version = detect_nexson_version(obj) # attr used in validation only should be cleaned up # in the finally clause self._otu_group_by_id = {} self._otu_by_otug = {} try: # a little duck-punching vc = _ValidationContext(self, logger) try: add_schema_attributes(vc, self._nexson_version) assert self._nexson_version[:3] in ('0.0', '1.0', '1.2') self._validate_nexml_obj(self._nexml, vc, obj) if self._max_num_trees_per_study is not None: nt = count_num_trees(self._raw) if nt > self._max_num_trees_per_study: m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced' m = m.format(f=nt, m=self._max_num_trees_per_study) self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MaxSizeExceededWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, message=m) return # EARLY EXIT!! finally: vc.adaptor = None # delete circular ref to help gc del vc finally: del self._otu_group_by_id del self._otu_by_otug
def __init__(self, obj, logger, **kwargs): self._raw = obj self._nexml = None self._pyid_to_nexson_add = {} self._logger = logger self._repeated_id = False self._otuid2ottid_byogid = {} self._ottid2otuid_list_byogid = {} self._dupottid_by_ogid_tree_id = {} self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study') uk = None for k in obj.keys(): if k not in ['nexml', 'nex:nexml']: if uk is None: uk = [] uk.append(k) if uk: uk.sort() self._warn_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_UnrecognizedKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=uk) self._nexml = None try: self._nexml = get_nexml_el(obj) assert isinstance(self._nexml, dict) except: self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MissingMandatoryKeyWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, key_list=[ 'nexml', ]) return ## EARLY EXIT!! self._nexson_id_to_obj = {} self._nexson_version = detect_nexson_version(obj) #attr used in validation only should be cleaned up # in the finally clause self._otu_group_by_id = {} self._otu_by_otug = {} try: # a little duck-punching vc = _ValidationContext(self, logger) add_schema_attributes(vc, self._nexson_version) assert self._nexson_version[:3] in ('0.0', '1.0', '1.2') self._validate_nexml_obj(self._nexml, vc, obj) if self._max_num_trees_per_study is not None: nt = count_num_trees(self._raw) if nt > self._max_num_trees_per_study: m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced' m = m.format(f=nt, m=self._max_num_trees_per_study) self._error_event(_NEXEL.TOP_LEVEL, obj=obj, err_type=gen_MaxSizeExceededWarning, anc=_EMPTY_TUPLE, obj_nex_id=None, message=m) return ## EARLY EXIT!! finally: vc.adaptor = None # delete circular ref to help gc del vc del self._otu_group_by_id del self._otu_by_otug
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otus = _index_list_of_values(nex, 'otus') o_t = self.convert_otus(otus) otusById, otusElementOrder = o_t trees = _get_index_list_of_values(nex, 'trees') treesById = dict((i['@id'], i) for i in trees) treesElementOrder = [i['@id'] for i in trees] if len(treesById) != len(treesElementOrder): trees_id_set = set() for tgid in treesElementOrder: if tgid in trees_id_set: raise NexsonError('Repeated trees element id "{}"'.format(tgid)) trees_id_set.add(tgid) tree_id_set = set() treeContainingObjByTreesId = {} for tree_group in trees: #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id'])) treeById = {} treeElementOrder = [] tree_array = _get_index_list_of_values(tree_group, 'tree') for tree in tree_array: #_LOG.debug('# pre-convert keys = {}'.format(tree.keys())) t_t = self.convert_tree(tree) if t_t is None: continue tid, tree_alias = t_t if tid in tree_id_set: raise NexsonError('Repeated tree element id "{}"'.format(tid)) tree_id_set.add(tid) #_LOG.debug('converting tree {} to by_id'.format(tid)) #_LOG.debug('# post-convert keys = {}'.format(tree.keys())) assert tree_alias is tree treeById[tid] = tree treeElementOrder.append(tid) treeContainingObjByTreesId[tree_group['@id']] = treeById tree_group['^ot:treeElementOrder'] = treeElementOrder # If all that succeeds, add the new object to the dict, creating a fat structure nex['otusById'] = otusById nex['^ot:otusElementOrder'] = otusElementOrder nex['treesById'] = treesById nex['^ot:treesElementOrder'] = treesElementOrder for k, v in treeContainingObjByTreesId.items(): treesById[k]['treeById'] = v nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otus'] del nex['trees'] for k, v in treesById.items(): if 'tree' in v: del v['tree'] del v['@id'] return obj
def nexml_el_of_by_id(nexson, curr_version=None): if curr_version is None: curr_version = detect_nexson_version(nexson) if not _is_by_id_hbf(curr_version): nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH) return get_nexml_el(nexson)