コード例 #1
0
 def convert(self, doc_root):
     key, val = self._gen_hbf_el(doc_root)
     val['@nexml2json'] = self.output_format
     o = {key: val}
     try:
         n = get_nexml_el(o)
         assert n
     except:
         return o
     # ot: discard characters...
     if 'characters' in n:
         del n['characters']
     # ot: expect root=true for exactly one node in a tree.
     for trees in _get_index_list_of_values(n, 'trees'):
         for tree in _get_index_list_of_values(trees, 'tree'):
             node_list = _get_index_list_of_values(tree, 'node')
             root_node_flagged = False
             for node in node_list:
                 if node.get('@root') == True:
                     root_node_flagged = True
                     break
             if not root_node_flagged:
                 node_id_map = dict(
                     (node['@id'], node) for node in node_list)
                 edge_list = _get_index_list_of_values(tree, 'edge')
                 target_set = set([i['@target'] for i in edge_list])
                 root_id_set = set(node_id_map.keys()) - target_set
                 assert len(root_id_set) == 1
                 for ri in root_id_set:
                     node_id_map[ri]['@root'] = True
     return o
コード例 #2
0
ファイル: nexml2nexson.py プロジェクト: pombredanne/peyotl
 def convert(self, doc_root):
     key, val = self._gen_hbf_el(doc_root)
     val['@nexml2json'] = self.output_format
     o = {key: val}
     try:
         n = get_nexml_el(o)
         assert n
     except:
         return o
     # ot: discard characters...
     if 'characters' in n:
         del n['characters']
     # ot: expect root=true for exactly one node in a tree.
     for trees in _get_index_list_of_values(n, 'trees'):
         for tree in _get_index_list_of_values(trees, 'tree'):
             node_list = _get_index_list_of_values(tree, 'node')
             root_node_flagged = False
             for node in node_list:
                 if node.get('@root') == True:
                     root_node_flagged = True
                     break
             if not root_node_flagged:
                 node_id_map = dict((node['@id'], node) for node in node_list)
                 edge_list = _get_index_list_of_values(tree, 'edge')
                 target_set = set([i['@target'] for i in edge_list])
                 root_id_set = set(node_id_map.keys()) - target_set
                 assert len(root_id_set) == 1
                 for ri in root_id_set:
                     node_id_map[ri]['@root'] = True
     return o
コード例 #3
0
 def convert_tree(self, tree):
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']:  #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         del tree['edge']
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
コード例 #4
0
 def convert_tree(self, tree):
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']: #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         del tree['edge']
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
コード例 #5
0
ファイル: __init__.py プロジェクト: pombredanne/peyotl
def sort_arbitrarily_ordered_nexson(blob):
    '''Primarily used for testing (getting nice diffs). Calls
    sort_meta_elements and then sorts otu, node and edge list by id
    '''
    # otu, node and edge elements have no necessary orger in v0.0 or v1.0
    v = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(v):
        return blob
    sort_meta_elements(blob)
    for ob in _get_index_list_of_values(nex, 'otus'):
        _inplace_sort_by_id(ob.get('otu', []))
    for tb in _get_index_list_of_values(nex, 'trees'):
        for tree in _get_index_list_of_values(tb, 'tree'):
            _inplace_sort_by_id(tree.get('node', []))
            _inplace_sort_by_id(tree.get('edge', []))
    return blob
コード例 #6
0
def sort_arbitrarily_ordered_nexson(blob):
    '''Primarily used for testing (getting nice diffs). Calls
    sort_meta_elements and then sorts otu, node and edge list by id
    '''
    # otu, node and edge elements have no necessary orger in v0.0 or v1.0
    v = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(v):
        return blob
    sort_meta_elements(blob)
    for ob in _get_index_list_of_values(nex, 'otus'):
        _inplace_sort_by_id(ob.get('otu', []))
    for tb in _get_index_list_of_values(nex, 'trees'):
        for tree in _get_index_list_of_values(tb, 'tree'):
            _inplace_sort_by_id(tree.get('node', []))
            _inplace_sort_by_id(tree.get('edge', []))
    return blob
コード例 #7
0
 def convert_tree(self, tree):
     """Return (tree_id, tree) or None (if the tree has no edges).
     """
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         # _LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']:  # @TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         try:
             del tree['edge']
         except:
             # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763
             #   is empty. in NeXML that shows up as a tree with a node but no edges.
             #   See https://github.com/OpenTreeOfLife/opentree/issues/641
             # TODO: returning None seems safest, but could cull trees with just metadata.
             #       but creating a fake tree for metadata is ugly. So, I'm fine with not
             #       supporting this.
             _LOG.warn(
                 'Tree with ID "{}" is being dropped because it has no edges'
                 .format(tid))
             assert not edge_list
             return None
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
コード例 #8
0
ファイル: direct2optimal_nexson.py プロジェクト: rvosa/peyotl
 def convert_tree(self, tree):
     '''Return (tree_id, tree) or None (if the tree has no edges).
     '''
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']: #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         try:
             del tree['edge']
         except:
             # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763
             #   is empty. in NeXML that shows up as a tree with a node but no edges.
             #   See https://github.com/OpenTreeOfLife/opentree/issues/641
             # TODO: returning None seems safest, but could cull trees with just metadata.
             #       but creating a fake tree for metadata is ugly. So, I'm fine with not
             #       supporting this.
             _LOG.warn('Tree with ID "{}" is being dropped because it has no edges'.format(tid))
             assert not edge_list
             return None
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
コード例 #9
0
 def _recursive_convert_dict(self, obj):
     _cull_redundant_about(obj)  # rule 10...
     meta_list = _get_index_list_of_values(obj, 'meta')
     to_inject = {}
     for meta in meta_list:
         xt = meta['@xsi:type']
         if _RESOURCE_META_PAT.match(xt):
             mk, mv = self._transform_resource_meta(meta)
         else:
             assert _LITERAL_META_PAT.match(xt)
             mk, mv = self._transform_literal_meta(meta)
         _add_value_to_dict_bf(to_inject, mk, mv)
     if ('meta' in obj) and self.remove_old_structs:
         del obj['meta']
     for k, v in to_inject.items():
         _add_value_to_dict_bf(obj, k, v)
     for k, v in obj.items():
         if isinstance(v, dict):
             self._recursive_convert_dict(v)
         elif isinstance(v, list):
             self._recursive_convert_list(v)
コード例 #10
0
 def _recursive_convert_dict(self, obj):
     _cull_redundant_about(obj)  # rule 10...
     meta_list = _get_index_list_of_values(obj, 'meta')
     to_inject = {}
     for meta in meta_list:
         xt = meta['@xsi:type']
         if _RESOURCE_META_PAT.match(xt):
             mk, mv = self._transform_resource_meta(meta)
         else:
             assert _LITERAL_META_PAT.match(xt)
             mk, mv = self._transform_literal_meta(meta)
         _add_value_to_dict_bf(to_inject, mk, mv)
     if ('meta' in obj) and self.remove_old_structs:
         del obj['meta']
     for k, v in to_inject.items():
         _add_value_to_dict_bf(obj, k, v)
     for k, v in obj.items():
         if isinstance(v, dict):
             self._recursive_convert_dict(v)
         elif isinstance(v, list):
             self._recursive_convert_list(v)
コード例 #11
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError(
                'pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError(
                        'Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError(
                        'Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj
コード例 #12
0
ファイル: direct2optimal_nexson.py プロジェクト: rvosa/peyotl
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError('Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                if t_t is None:
                    continue
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError('Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj