Ejemplo n.º 1
0
 def convert_otus(self, otus_list):
     otusById = dict((i['@id'], i) for i in otus_list)
     otusElementOrder = [i['@id'] for i in otus_list]
     otusIdToOtuObj = {}
     for oid, otus_el in otusById.items():
         o_list = _index_list_of_values(otus_el, 'otu')
         otuById = dict((i['@id'], i) for i in o_list)
         otusIdToOtuObj[oid] = otuById
     # If all that succeeds, add the new object to the dict, creating a fat structure
     for k, v in otusIdToOtuObj.items():
         otusById[k]['otuById'] = v
     # Make the struct leaner
     if self.remove_old_structs:
         for v in otusById.values():
             del v['@id']
         for k, otu_obj in otusIdToOtuObj.items():
             o = otusById[k]
             del o['otu']
             for v in otu_obj.values():
                 del v['@id']
                 # move @label to ^ot:manualLabel if it is not ottTaxonName
                 # if self.suppress_label_if_ott_taxon:
                 #     if '@label' in v:
                 #         tax_name = v.get('^ot:ottTaxonName')
                 #         label = v.get('@label').strip()
                 #         if tax_name is None:
                 #             orig_name = v.get('^ot:originalLabel')
                 #             assert(orig_name is not None)
                 #             if label != orig_name:
                 #                 v['^ot:manualLabel'] = label
                 #         elif label != tax_name:
                 #             v['^ot:manualLabel'] = label
                 #         del v['@label']
     return otusById, otusElementOrder
Ejemplo n.º 2
0
 def convert_tree(self, tree):
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']:  #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         del tree['edge']
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
Ejemplo n.º 3
0
 def convert_tree(self, tree):
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']: #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         del tree['edge']
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
Ejemplo n.º 4
0
 def convert_otus(self, otus_list):
     otusById = dict((i['@id'], i) for i in otus_list)
     otusElementOrder = [i['@id'] for i in otus_list]
     otusIdToOtuObj = {}
     for oid, otus_el in otusById.items():
         o_list = _index_list_of_values(otus_el, 'otu')
         otuById = dict((i['@id'], i) for i in o_list)
         otusIdToOtuObj[oid] = otuById
     # If all that succeeds, add the new object to the dict, creating a fat structure
     for k, v in otusIdToOtuObj.items():
         otusById[k]['otuById'] = v
     # Make the struct leaner
     if self.remove_old_structs:
         for v in otusById.values():
             del v['@id']
         for k, otu_obj in otusIdToOtuObj.items():
             o = otusById[k]
             del o['otu']
             for v in otu_obj.values():
                 del v['@id']
                 # move @label to ^ot:manualLabel if it is not ottTaxonName
                 # if self.suppress_label_if_ott_taxon:
                 #     if '@label' in v:
                 #         tax_name = v.get('^ot:ottTaxonName')
                 #         label = v.get('@label').strip()
                 #         if tax_name is None:
                 #             orig_name = v.get('^ot:originalLabel')
                 #             assert(orig_name is not None)
                 #             if label != orig_name:
                 #                 v['^ot:manualLabel'] = label
                 #         elif label != tax_name:
                 #             v['^ot:manualLabel'] = label
                 #         del v['@label']
     return otusById, otusElementOrder
Ejemplo n.º 5
0
 def _add_dict_of_subtree_to_xml_doc(self,
                                     doc,
                                     parent,
                                     children_dict,
                                     key_order=None):
     written = set()
     if key_order:
         for t in key_order:
             k, nko = t
             assert nko is None or isinstance(nko, tuple)
             if k in children_dict:
                 chl = _index_list_of_values(children_dict, k)
                 written.add(k)
                 self._add_subtree_list_to_xml_doc(doc, parent, chl, k, nko)
     ksl = list(children_dict.keys())
     ksl.sort()
     for k in ksl:
         chl = _index_list_of_values(children_dict, k)
         if k not in written:
             self._add_subtree_list_to_xml_doc(doc, parent, chl, k, None)
Ejemplo n.º 6
0
 def _add_dict_of_subtree_to_xml_doc(self,
                                     doc,
                                     parent,
                                     children_dict,
                                     key_order=None):
     written = set()
     if key_order:
         for t in key_order:
             k, nko = t
             assert nko is None or isinstance(nko, tuple)
             if k in children_dict:
                 chl = _index_list_of_values(children_dict, k)
                 written.add(k)
                 self._add_subtree_list_to_xml_doc(doc, parent, chl, k, nko)
     ksl = list(children_dict.keys())
     ksl.sort()
     for k in ksl:
         chl = _index_list_of_values(children_dict, k)
         if k not in written:
             self._add_subtree_list_to_xml_doc(doc, parent, chl, k, None)
Ejemplo n.º 7
0
 def convert_tree(self, tree):
     """Return (tree_id, tree) or None (if the tree has no edges).
     """
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         # _LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']:  # @TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         try:
             del tree['edge']
         except:
             # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763
             #   is empty. in NeXML that shows up as a tree with a node but no edges.
             #   See https://github.com/OpenTreeOfLife/opentree/issues/641
             # TODO: returning None seems safest, but could cull trees with just metadata.
             #       but creating a fake tree for metadata is ugly. So, I'm fine with not
             #       supporting this.
             _LOG.warn(
                 'Tree with ID "{}" is being dropped because it has no edges'
                 .format(tid))
             assert not edge_list
             return None
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
Ejemplo n.º 8
0
 def _add_meta_dict_to_xml(self, doc, parent, meta_dict):
     '''
     Values in the meta element dict are converted to a BadgerFish-style
         encoding (see _convert_hbf_meta_val_for_xml), so regardless of input_format,
         we treat them as if they were BadgerFish.
     '''
     if not meta_dict:
         return
     key_list = list(meta_dict.keys())
     key_list.sort()
     for key in key_list:
         el_list = _index_list_of_values(meta_dict, key)
         for el in el_list:
             self._add_meta_value_to_xml_doc(doc, parent, el)
Ejemplo n.º 9
0
 def _add_meta_dict_to_xml(self, doc, parent, meta_dict):
     '''
     Values in the meta element dict are converted to a BadgerFish-style
         encoding (see _convert_hbf_meta_val_for_xml), so regardless of input_format,
         we treat them as if they were BadgerFish.
     '''
     if not meta_dict:
         return
     key_list = list(meta_dict.keys())
     key_list.sort()
     for key in key_list:
         el_list = _index_list_of_values(meta_dict, key)
         for el in el_list:
             self._add_meta_value_to_xml_doc(doc, parent, el)
Ejemplo n.º 10
0
 def convert_tree(self, tree):
     '''Return (tree_id, tree) or None (if the tree has no edges).
     '''
     nodeById = {}
     root_node = None
     node_list = _index_list_of_values(tree, 'node')
     for node in node_list:
         nodeById[node['@id']] = node
         r = node.get('@root')
         #_LOG.debug(' node {} @root={}'.format(node['@id'], r))
         if r in [True, 'true']: #@TEMP accepting true or "true"
             assert root_node is None
             root_node = node
     assert root_node is not None
     edgeBySourceId = {}
     edge_list = _get_index_list_of_values(tree, 'edge')
     for edge in edge_list:
         sourceId = edge['@source']
         eid = edge['@id']
         del edge['@id']
         byso = edgeBySourceId.setdefault(sourceId, {})
         byso[eid] = edge
     # If all that succeeds, add the new object to the dict, creating a fat structure
     tree['nodeById'] = nodeById
     tree['edgeBySourceId'] = edgeBySourceId
     tree['^ot:rootNodeId'] = root_node['@id']
     # Make the struct leaner
     tid = tree['@id']
     if self.remove_old_structs:
         del tree['@id']
         del tree['node']
         try:
             del tree['edge']
         except:
             # Tree Tr75035 in http://treebase.org/treebase-web/search/study/summary.html?id=14763
             #   is empty. in NeXML that shows up as a tree with a node but no edges.
             #   See https://github.com/OpenTreeOfLife/opentree/issues/641
             # TODO: returning None seems safest, but could cull trees with just metadata.
             #       but creating a fake tree for metadata is ugly. So, I'm fine with not
             #       supporting this.
             _LOG.warn('Tree with ID "{}" is being dropped because it has no edges'.format(tid))
             assert not edge_list
             return None
         for node in node_list:
             if '^ot:isLeaf' in node:
                 del node['^ot:isLeaf']
             del node['@id']
     return tid, tree
Ejemplo n.º 11
0
    def _hbf_handle_child_elements(self, obj, ntl):
        '''
        Indirect recursion through _gen_hbf_el
        '''
        # accumulate a list of the children names in ko, and
        #   the a dictionary of tag to xml elements.
        # repetition of a tag means that it will map to a list of
        #   xml elements
        cd = {}
        ko = []
        ks = set()
        for child in ntl:
            k = child.nodeName
            if k == 'meta' and (not self._badgerfish_style_conversion):
                matk, matv = self._transform_meta_key_value(child)
                if matk is not None:
                    _add_value_to_dict_bf(obj, matk, matv)
            else:
                if k not in ks:
                    ko.append(k)
                    ks.add(k)
                _add_value_to_dict_bf(cd, k, child)

        # Converts the child XML elements to dicts by recursion and
        #   adds these to the dict.
        for k in ko:
            v = _index_list_of_values(cd, k)
            dcl = []
            ct = None
            for xc in v:
                ct, dc = self._gen_hbf_el(xc)
                dcl.append(dc)
            # this assertion will trip is the hacky stripping of namespaces
            #   results in a name clash among the tags of the children
            assert ct not in obj
            obj[ct] = dcl

        # delete redundant about attributes that are used in XML, but not JSON (last rule of HoneyBadgerFish)
        _cull_redundant_about(obj)
        return obj
Ejemplo n.º 12
0
    def _hbf_handle_child_elements(self, obj, ntl):
        """
        Indirect recursion through _gen_hbf_el
        """
        # accumulate a list of the children names in ko, and
        #   the a dictionary of tag to xml elements.
        # repetition of a tag means that it will map to a list of
        #   xml elements
        cd = {}
        ko = []
        ks = set()
        for child in ntl:
            k = child.nodeName
            if k == 'meta' and (not self._badgerfish_style_conversion):
                matk, matv = self._transform_meta_key_value(child)
                if matk is not None:
                    _add_value_to_dict_bf(obj, matk, matv)
            else:
                if k not in ks:
                    ko.append(k)
                    ks.add(k)
                _add_value_to_dict_bf(cd, k, child)

        # Converts the child XML elements to dicts by recursion and
        #   adds these to the dict.
        for k in ko:
            v = _index_list_of_values(cd, k)
            dcl = []
            ct = None
            for xc in v:
                ct, dc = self._gen_hbf_el(xc)
                dcl.append(dc)
            # this assertion will trip is the hacky stripping of namespaces
            #   results in a name clash among the tags of the children
            assert ct not in obj
            obj[ct] = dcl

        # delete redundant about attributes that are used in XML, but not JSON (last rule of HoneyBadgerFish)
        _cull_redundant_about(obj)
        return obj
Ejemplo n.º 13
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError(
                'pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError(
                        'Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError(
                        'Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj
Ejemplo n.º 14
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError('Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                if t_t is None:
                    continue
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError('Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj