def convert(self, doc_root): key, val = self._gen_hbf_el(doc_root) val['@nexml2json'] = self.output_format o = {key: val} try: n = get_nexml_el(o) assert n except: return o # ot: discard characters... if 'characters' in n: del n['characters'] # ot: expect root=true for exactly one node in a tree. for trees in _get_index_list_of_values(n, 'trees'): for tree in _get_index_list_of_values(trees, 'tree'): node_list = _get_index_list_of_values(tree, 'node') root_node_flagged = False for node in node_list: if node.get('@root') == True: root_node_flagged = True break if not root_node_flagged: node_id_map = dict( (node['@id'], node) for node in node_list) edge_list = _get_index_list_of_values(tree, 'edge') target_set = set([i['@target'] for i in edge_list]) root_id_set = set(node_id_map.keys()) - target_set assert len(root_id_set) == 1 for ri in root_id_set: node_id_map[ri]['@root'] = True return o
def convert(self, doc_root): key, val = self._gen_hbf_el(doc_root) val['@nexml2json'] = self.output_format o = {key: val} try: n = get_nexml_el(o) assert n except: return o # ot: discard characters... if 'characters' in n: del n['characters'] # ot: expect root=true for exactly one node in a tree. for trees in _get_index_list_of_values(n, 'trees'): for tree in _get_index_list_of_values(trees, 'tree'): node_list = _get_index_list_of_values(tree, 'node') root_node_flagged = False for node in node_list: if node.get('@root') == True: root_node_flagged = True break if not root_node_flagged: node_id_map = dict((node['@id'], node) for node in node_list) edge_list = _get_index_list_of_values(tree, 'edge') target_set = set([i['@target'] for i in edge_list]) root_id_set = set(node_id_map.keys()) - target_set assert len(root_id_set) == 1 for ri in root_id_set: node_id_map[ri]['@root'] = True return o
def convert_tree(self, tree): nodeById = {} root_node = None node_list = _index_list_of_values(tree, 'node') for node in node_list: nodeById[node['@id']] = node r = node.get('@root') #_LOG.debug(' node {} @root={}'.format(node['@id'], r)) if r in [True, 'true']: #@TEMP accepting true or "true" assert root_node is None root_node = node assert root_node is not None edgeBySourceId = {} edge_list = _get_index_list_of_values(tree, 'edge') for edge in edge_list: sourceId = edge['@source'] eid = edge['@id'] del edge['@id'] byso = edgeBySourceId.setdefault(sourceId, {}) byso[eid] = edge # If all that succeeds, add the new object to the dict, creating a fat structure tree['nodeById'] = nodeById tree['edgeBySourceId'] = edgeBySourceId tree['^ot:rootNodeId'] = root_node['@id'] # Make the struct leaner tid = tree['@id'] if self.remove_old_structs: del tree['@id'] del tree['node'] del tree['edge'] for node in node_list: if '^ot:isLeaf' in node: del node['^ot:isLeaf'] del node['@id'] return tid, tree
def sort_arbitrarily_ordered_nexson(blob): '''Primarily used for testing (getting nice diffs). Calls sort_meta_elements and then sorts otu, node and edge list by id ''' # otu, node and edge elements have no necessary orger in v0.0 or v1.0 v = detect_nexson_version(blob) nex = get_nexml_el(blob) if _is_by_id_hbf(v): return blob sort_meta_elements(blob) for ob in _get_index_list_of_values(nex, 'otus'): _inplace_sort_by_id(ob.get('otu', [])) for tb in _get_index_list_of_values(nex, 'trees'): for tree in _get_index_list_of_values(tb, 'tree'): _inplace_sort_by_id(tree.get('node', [])) _inplace_sort_by_id(tree.get('edge', [])) return blob
def convert_tree(self, tree): """Return (tree_id, tree) or None (if the tree has no edges). """ nodeById = {} root_node = None node_list = _index_list_of_values(tree, 'node') for node in node_list: nodeById[node['@id']] = node r = node.get('@root') # _LOG.debug(' node {} @root={}'.format(node['@id'], r)) if r in [True, 'true']: # @TEMP accepting true or "true" assert root_node is None root_node = node assert root_node is not None edgeBySourceId = {} edge_list = _get_index_list_of_values(tree, 'edge') for edge in edge_list: sourceId = edge['@source'] eid = edge['@id'] del edge['@id'] byso = edgeBySourceId.setdefault(sourceId, {}) byso[eid] = edge # If all that succeeds, add the new object to the dict, creating a fat structure tree['nodeById'] = nodeById tree['edgeBySourceId'] = edgeBySourceId tree['^ot:rootNodeId'] = root_node['@id'] # Make the struct leaner tid = tree['@id'] if self.remove_old_structs: del tree['@id'] del tree['node'] try: del tree['edge'] except: # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763 # is empty. in NeXML that shows up as a tree with a node but no edges. # See https://github.com/OpenTreeOfLife/opentree/issues/641 # TODO: returning None seems safest, but could cull trees with just metadata. # but creating a fake tree for metadata is ugly. So, I'm fine with not # supporting this. _LOG.warn( 'Tree with ID "{}" is being dropped because it has no edges' .format(tid)) assert not edge_list return None for node in node_list: if '^ot:isLeaf' in node: del node['^ot:isLeaf'] del node['@id'] return tid, tree
def convert_tree(self, tree): '''Return (tree_id, tree) or None (if the tree has no edges). ''' nodeById = {} root_node = None node_list = _index_list_of_values(tree, 'node') for node in node_list: nodeById[node['@id']] = node r = node.get('@root') #_LOG.debug(' node {} @root={}'.format(node['@id'], r)) if r in [True, 'true']: #@TEMP accepting true or "true" assert root_node is None root_node = node assert root_node is not None edgeBySourceId = {} edge_list = _get_index_list_of_values(tree, 'edge') for edge in edge_list: sourceId = edge['@source'] eid = edge['@id'] del edge['@id'] byso = edgeBySourceId.setdefault(sourceId, {}) byso[eid] = edge # If all that succeeds, add the new object to the dict, creating a fat structure tree['nodeById'] = nodeById tree['edgeBySourceId'] = edgeBySourceId tree['^ot:rootNodeId'] = root_node['@id'] # Make the struct leaner tid = tree['@id'] if self.remove_old_structs: del tree['@id'] del tree['node'] try: del tree['edge'] except: # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763 # is empty. in NeXML that shows up as a tree with a node but no edges. # See https://github.com/OpenTreeOfLife/opentree/issues/641 # TODO: returning None seems safest, but could cull trees with just metadata. # but creating a fake tree for metadata is ugly. So, I'm fine with not # supporting this. _LOG.warn('Tree with ID "{}" is being dropped because it has no edges'.format(tid)) assert not edge_list return None for node in node_list: if '^ot:isLeaf' in node: del node['^ot:isLeaf'] del node['@id'] return tid, tree
def _recursive_convert_dict(self, obj): _cull_redundant_about(obj) # rule 10... meta_list = _get_index_list_of_values(obj, 'meta') to_inject = {} for meta in meta_list: xt = meta['@xsi:type'] if _RESOURCE_META_PAT.match(xt): mk, mv = self._transform_resource_meta(meta) else: assert _LITERAL_META_PAT.match(xt) mk, mv = self._transform_literal_meta(meta) _add_value_to_dict_bf(to_inject, mk, mv) if ('meta' in obj) and self.remove_old_structs: del obj['meta'] for k, v in to_inject.items(): _add_value_to_dict_bf(obj, k, v) for k, v in obj.items(): if isinstance(v, dict): self._recursive_convert_dict(v) elif isinstance(v, list): self._recursive_convert_list(v)
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError( 'pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otus = _index_list_of_values(nex, 'otus') o_t = self.convert_otus(otus) otusById, otusElementOrder = o_t trees = _get_index_list_of_values(nex, 'trees') treesById = dict((i['@id'], i) for i in trees) treesElementOrder = [i['@id'] for i in trees] if len(treesById) != len(treesElementOrder): trees_id_set = set() for tgid in treesElementOrder: if tgid in trees_id_set: raise NexsonError( 'Repeated trees element id "{}"'.format(tgid)) trees_id_set.add(tgid) tree_id_set = set() treeContainingObjByTreesId = {} for tree_group in trees: #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id'])) treeById = {} treeElementOrder = [] tree_array = _get_index_list_of_values(tree_group, 'tree') for tree in tree_array: #_LOG.debug('# pre-convert keys = {}'.format(tree.keys())) t_t = self.convert_tree(tree) tid, tree_alias = t_t if tid in tree_id_set: raise NexsonError( 'Repeated tree element id "{}"'.format(tid)) tree_id_set.add(tid) #_LOG.debug('converting tree {} to by_id'.format(tid)) #_LOG.debug('# post-convert keys = {}'.format(tree.keys())) assert tree_alias is tree treeById[tid] = tree treeElementOrder.append(tid) treeContainingObjByTreesId[tree_group['@id']] = treeById tree_group['^ot:treeElementOrder'] = treeElementOrder # If all that succeeds, add the new object to the dict, creating a fat structure nex['otusById'] = otusById nex['^ot:otusElementOrder'] = otusElementOrder nex['treesById'] = treesById nex['^ot:treesElementOrder'] = treesElementOrder for k, v in treeContainingObjByTreesId.items(): treesById[k]['treeById'] = v nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otus'] del nex['trees'] for k, v in treesById.items(): if 'tree' in v: del v['tree'] del v['@id'] return obj
def convert(self, obj): '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place and returned. ''' if self.pristine_if_invalid: raise NotImplementedError('pristine_if_invalid option is not supported yet') nex = get_nexml_el(obj) assert nex # Create the new objects as locals. This section should not # mutate obj, so that if there is an exception the object # is unchanged on the error exit otus = _index_list_of_values(nex, 'otus') o_t = self.convert_otus(otus) otusById, otusElementOrder = o_t trees = _get_index_list_of_values(nex, 'trees') treesById = dict((i['@id'], i) for i in trees) treesElementOrder = [i['@id'] for i in trees] if len(treesById) != len(treesElementOrder): trees_id_set = set() for tgid in treesElementOrder: if tgid in trees_id_set: raise NexsonError('Repeated trees element id "{}"'.format(tgid)) trees_id_set.add(tgid) tree_id_set = set() treeContainingObjByTreesId = {} for tree_group in trees: #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id'])) treeById = {} treeElementOrder = [] tree_array = _get_index_list_of_values(tree_group, 'tree') for tree in tree_array: #_LOG.debug('# pre-convert keys = {}'.format(tree.keys())) t_t = self.convert_tree(tree) if t_t is None: continue tid, tree_alias = t_t if tid in tree_id_set: raise NexsonError('Repeated tree element id "{}"'.format(tid)) tree_id_set.add(tid) #_LOG.debug('converting tree {} to by_id'.format(tid)) #_LOG.debug('# post-convert keys = {}'.format(tree.keys())) assert tree_alias is tree treeById[tid] = tree treeElementOrder.append(tid) treeContainingObjByTreesId[tree_group['@id']] = treeById tree_group['^ot:treeElementOrder'] = treeElementOrder # If all that succeeds, add the new object to the dict, creating a fat structure nex['otusById'] = otusById nex['^ot:otusElementOrder'] = otusElementOrder nex['treesById'] = treesById nex['^ot:treesElementOrder'] = treesElementOrder for k, v in treeContainingObjByTreesId.items(): treesById[k]['treeById'] = v nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH) # Make the struct leaner if self.remove_old_structs: del nex['otus'] del nex['trees'] for k, v in treesById.items(): if 'tree' in v: del v['tree'] del v['@id'] return obj