def add_or_replace_annotation(self,  # pylint: disable=R0201
                               obj,
                               annotation,
                               agent,
                               add_agent_only=False):
     """Takes an `annotation` dictionary which is
     expected to have a string as the value of annotation['author']['name']
     This function will remove all annotations from obj that:
         1. have the same author/name, and
         2. have no messages that are flagged as messages to be preserved (values for 'preserve'
             that evaluate to true)
     """
     nex = get_nexml_el(obj)
     nvers = detect_nexson_version(obj)
     _LOG.debug('detected version as ' + nvers)
     agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers)
     if not agents_obj:
         agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []}, nvers)
     agents_list = agents_obj.setdefault('agent', [])
     found_agent = False
     aid = agent['@id']
     for a in agents_list:
         if a.get('@id') == aid:
             found_agent = True
             break
     if not found_agent:
         agents_list.append(agent)
     if add_agent_only:
         delete_same_agent_annotation(obj, annotation)
     else:
         replace_same_agent_annotation(obj, annotation)
Example #2
0
    def convert(self, obj):
        """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        """
        if self.pristine_if_invalid:
            raise NotImplementedError(
                'pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        self._recursive_convert_dict(nex)
        # pluralization simplifications in hbf:
        # convert dicts to lists for the primary datastructures...
        self._dict_to_list_of_dicts(nex, 'otus')
        self._dict_to_list_of_dicts(nex, 'otus', 'otu')
        self._dict_to_list_of_dicts(nex, 'trees')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'node')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'edge')
        if self._add_tree_xsi_type:
            for tb in nex.get('trees', []):
                for t in tb.get('tree', []):
                    t.setdefault('@xsi:type', 'nex:FloatTree')
        nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH)
        return obj
Example #3
0
def extract_tree_nexson(nexson, tree_id, curr_version=None):
    '''Returns a list of (id, tree, otus_group) tuples for the
    specified tree_id (all trees if tree_id is None)
    '''
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)

    nexml_el = get_nexml_el(nexson)
    tree_groups = nexml_el['treesById']
    tree_obj_otus_group_list = []
    for tree_group in tree_groups.values():
        if tree_id:
            tree_list = [(tree_id, tree_group['treeById'].get(tree_id))]
        else:
            tree_list = tree_group['treeById'].items()
        for tid, tree in tree_list:
            if tree is not None:
                otu_groups = nexml_el['otusById']
                ogi = tree_group['@otus']
                otu_group = otu_groups[ogi]['otuById']
                tree_obj_otus_group_list.append((tid, tree, otu_group))
                if tree_id is not None:
                    return tree_obj_otus_group_list
    return tree_obj_otus_group_list
Example #4
0
def strip_to_meta_only(blob, nexson_version):
    if nexson_version is None:
        nexson_version = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(nexson_version):
        for otus_group in nex.get('otusById', {}).values():
            if 'otuById' in otus_group:
                del otus_group['otuById']
        for trees_group in nex.get('treesById', {}).values():
            tree_group = trees_group['treeById']
            key_list = tree_group.keys()
            for k in key_list:
                tree_group[k] = None
    else:
        otus = nex['otus']
        if not isinstance(otus, list):
            otus = [otus]
        for otus_group in otus:
            if 'otu' in otus_group:
                del otus_group['otu']
        trees = nex['trees']
        if not isinstance(trees, list):
            trees = [trees]
        for trees_group in trees:
            tree_list = trees_group.get('tree')
            if not isinstance(tree_list, list):
                tree_list = [tree_list]
            t = [{'id': i.get('@id')} for i in tree_list]
            trees_group['tree'] = t
Example #5
0
def extract_tree_nexson(nexson, tree_id, curr_version=None):
    '''Returns a list of (id, tree, otus_group) tuples for the
    specified tree_id (all trees if tree_id is None)
    '''
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)

    nexml_el = get_nexml_el(nexson)
    tree_groups = nexml_el['treesById']
    tree_obj_otus_group_list = []
    for tree_group in tree_groups.values():
        if tree_id:
            tree_list = [(tree_id, tree_group['treeById'].get(tree_id))]
        else:
            tree_list = tree_group['treeById'].items()
        for tid, tree in tree_list:
            if tree is not None:
                otu_groups = nexml_el['otusById']
                ogi = tree_group['@otus']
                otu_group = otu_groups[ogi]['otuById']
                tree_obj_otus_group_list.append((tid, tree, otu_group))
                if tree_id is not None:
                    return tree_obj_otus_group_list
    return tree_obj_otus_group_list
    def convert(self, obj):
        """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        """
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        self._recursive_convert_dict(nex)
        # pluralization simplifications in hbf:
        # convert dicts to lists for the primary datastructures...
        self._dict_to_list_of_dicts(nex, 'otus')
        self._dict_to_list_of_dicts(nex, 'otus', 'otu')
        self._dict_to_list_of_dicts(nex, 'trees')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'node')
        self._dict_to_list_of_dicts(nex, 'trees', 'tree', 'edge')
        if self._add_tree_xsi_type:
            for tb in nex.get('trees', []):
                for t in tb.get('tree', []):
                    t.setdefault('@xsi:type', 'nex:FloatTree')
        nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH)
        return obj
Example #7
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.2.* type and
        converts it to DIRECT_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError(
                'pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otusById = nex['otusById']
        otusElementOrder = nex['^ot:otusElementOrder']
        otus = self.convert_otus(otusById, otusElementOrder)
        nex['otus'] = otus
        treesById = nex['treesById']
        treesElementOrder = nex['^ot:treesElementOrder']
        trees = self.convert_trees(treesById, treesElementOrder)
        # add the locals to the object
        nex['trees'] = trees
        nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otusById']
            del nex['^ot:otusElementOrder']
            del nex['treesById']
            del nex['^ot:treesElementOrder']
        return obj
Example #8
0
def cull_nonmatching_trees(nexson, tree_id, curr_version=None):
    '''Modifies `nexson` and returns it in version 1.2.1
    with any tree that does not match the ID removed.

    Note that this does not search through the NexSON for
    every node, edge, tree that was deleted. So the resulting
    NexSON may have broken references !
    '''
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)

    nexml_el = get_nexml_el(nexson)
    tree_groups = nexml_el['treesById']
    tree_groups_to_del = []
    for tgi, tree_group in tree_groups.items():
        tbi = tree_group['treeById']
        if tree_id in tbi:
            trees_to_del = [i for i in tbi.keys() if i != tree_id]
            for tid in trees_to_del:
                tree_group['^ot:treeElementOrder'].remove(tid)
                del tbi[tid]
        else:
            tree_groups_to_del.append(tgi)
    for tgid in tree_groups_to_del:
        nexml_el['^ot:treesElementOrder'].remove(tgid)
        del tree_groups[tgid]
    return nexson
Example #9
0
def strip_to_meta_only(blob, nexson_version):
    if nexson_version is None:
        nexson_version = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(nexson_version):
        for otus_group in nex.get('otusById', {}).values():
            if 'otuById' in otus_group:
                del otus_group['otuById']
        for trees_group in nex.get('treesById', {}).values():
            tree_group = trees_group['treeById']
            key_list = tree_group.keys()
            for k in key_list:
                tree_group[k] = None
    else:
        otus = nex['otus']
        if not isinstance(otus, list):
            otus = [otus]
        for otus_group in otus:
            if 'otu' in otus_group:
                del otus_group['otu']
        trees = nex['trees']
        if not isinstance(trees, list):
            trees = [trees]
        for trees_group in trees:
            tree_list = trees_group.get('tree')
            if not isinstance(tree_list, list):
                tree_list = [tree_list]
            t = [{'id': i.get('@id')} for i in tree_list]
            trees_group['tree'] = t
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.2.* type and
        converts it to DIRECT_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otusById = nex['otusById']
        otusElementOrder = nex['^ot:otusElementOrder']
        otus = self.convert_otus(otusById, otusElementOrder)
        nex['otus'] = otus
        treesById = nex['treesById']
        treesElementOrder = nex['^ot:treesElementOrder']
        trees = self.convert_trees(treesById, treesElementOrder)
        # add the locals to the object
        nex['trees'] = trees
        nex['@nexml2json'] = str(DIRECT_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otusById']
            del nex['^ot:otusElementOrder']
            del nex['treesById']
            del nex['^ot:treesElementOrder']
        return obj
Example #11
0
def cull_nonmatching_trees(nexson, tree_id, curr_version=None):
    '''Modifies `nexson` and returns it in version 1.2.1 
    with any tree that does not match the ID removed.

    Note that this does not search through the NexSON for
    every node, edge, tree that was deleted. So the resulting
    NexSON may have broken references !
    '''
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)

    nexml_el = get_nexml_el(nexson)
    tree_groups = nexml_el['treesById']
    tree_groups_to_del = []
    for tgi, tree_group in tree_groups.items():
        tbi = tree_group['treeById']
        if tree_id in tbi:
            trees_to_del = [i for i in tbi.keys() if i != tree_id]
            for tid in trees_to_del:
                tree_group['^ot:treeElementOrder'].remove(tid)
                del tbi[tid]
        else:
            tree_groups_to_del.append(tgi)
    for tgid in tree_groups_to_del:
        nexml_el['^ot:treesElementOrder'].remove(tgid)
        del tree_groups[tgid]
    return nexson
Example #12
0
 def convert(self, doc_root):
     key, val = self._gen_hbf_el(doc_root)
     val['@nexml2json'] = self.output_format
     o = {key: val}
     try:
         n = get_nexml_el(o)
         assert n
     except:
         return o
     # ot: discard characters...
     if 'characters' in n:
         del n['characters']
     # ot: expect root=true for exactly one node in a tree.
     for trees in _get_index_list_of_values(n, 'trees'):
         for tree in _get_index_list_of_values(trees, 'tree'):
             node_list = _get_index_list_of_values(tree, 'node')
             root_node_flagged = False
             for node in node_list:
                 if node.get('@root') == True:
                     root_node_flagged = True
                     break
             if not root_node_flagged:
                 node_id_map = dict(
                     (node['@id'], node) for node in node_list)
                 edge_list = _get_index_list_of_values(tree, 'edge')
                 target_set = set([i['@target'] for i in edge_list])
                 root_id_set = set(node_id_map.keys()) - target_set
                 assert len(root_id_set) == 1
                 for ri in root_id_set:
                     node_id_map[ri]['@root'] = True
     return o
Example #13
0
 def convert(self, doc_root):
     key, val = self._gen_hbf_el(doc_root)
     val['@nexml2json'] = self.output_format
     o = {key: val}
     try:
         n = get_nexml_el(o)
         assert n
     except:
         return o
     # ot: discard characters...
     if 'characters' in n:
         del n['characters']
     # ot: expect root=true for exactly one node in a tree.
     for trees in _get_index_list_of_values(n, 'trees'):
         for tree in _get_index_list_of_values(trees, 'tree'):
             node_list = _get_index_list_of_values(tree, 'node')
             root_node_flagged = False
             for node in node_list:
                 if node.get('@root') == True:
                     root_node_flagged = True
                     break
             if not root_node_flagged:
                 node_id_map = dict((node['@id'], node) for node in node_list)
                 edge_list = _get_index_list_of_values(tree, 'edge')
                 target_set = set([i['@target'] for i in edge_list])
                 root_id_set = set(node_id_map.keys()) - target_set
                 assert len(root_id_set) == 1
                 for ri in root_id_set:
                     node_id_map[ri]['@root'] = True
     return o
Example #14
0
 def add_or_replace_annotation(
         self,  #pylint: disable=R0201
         obj,
         annotation,
         agent,
         add_agent_only=False):
     '''Takes an `annotation` dictionary which is
     expected to have a string as the value of annotation['author']['name']
     This function will remove all annotations from obj that:
         1. have the same author/name, and
         2. have no messages that are flagged as messages to be preserved (values for 'preserve'
             that evaluate to true)
     '''
     nex = get_nexml_el(obj)
     nvers = detect_nexson_version(obj)
     _LOG.debug('detected version as ' + nvers)
     agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers)
     if not agents_obj:
         agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []},
                                       nvers)
     agents_list = agents_obj.setdefault('agent', [])
     found_agent = False
     aid = agent['@id']
     for a in agents_list:
         if a.get('@id') == aid:
             found_agent = True
             break
     if not found_agent:
         agents_list.append(agent)
     if add_agent_only:
         delete_same_agent_annotation(obj, annotation)
     else:
         replace_same_agent_annotation(obj, annotation)
Example #15
0
def delete_annotation(obj, agent_id=None, annot_id=None, nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(obj)
    nex_el = get_nexml_el(obj)
    annotation_list = get_annotation_list(nex_el, nexson_version)
    delete_annotation_from_annot_list(annotation_list,
                                      agent_id=agent_id,
                                      annot_id=annot_id)
Example #16
0
def iter_otu(nexson, nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(nexson_version):
        raise NotImplementedError('iter_otu is only supported for nexson 1.2 at this point')
    nexml = get_nexml_el(nexson)
    for og in nexml.get('otusById', {}).values():
        for otu_id, otu in og.get('otuById', {}).items():
            yield otu_id, otu
Example #17
0
def iter_otu(nexson, nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(nexson_version):
        raise NotImplementedError('iter_otu is only supported for nexson 1.2 at this point')
    nexml = get_nexml_el(nexson)
    for og in nexml.get('otusById', {}).values():
        for otu_id, otu in og.get('otuById', {}).items():
            yield otu_id, otu
Example #18
0
def delete_annotation(obj,
                      agent_id=None,
                      annot_id=None,
                      nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(obj)
    nex_el = get_nexml_el(obj)
    annotation_list = get_annotation_list(nex_el, nexson_version)
    delete_annotation_from_annot_list(annotation_list, agent_id=agent_id, annot_id=annot_id)
Example #19
0
    def __init__(self, obj, logger):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=[
                                  'nexml',
                              ])
            return  ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
        finally:
            vc.adaptor = None  # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Example #20
0
    def __init__(self, obj, logger):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=['nexml',])
            return ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
        finally:
            vc.adaptor = None # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Example #21
0
def sort_arbitrarily_ordered_nexson(blob):
    '''Primarily used for testing (getting nice diffs). Calls
    sort_meta_elements and then sorts otu, node and edge list by id
    '''
    # otu, node and edge elements have no necessary orger in v0.0 or v1.0
    v = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(v):
        return blob
    sort_meta_elements(blob)
    for ob in _get_index_list_of_values(nex, 'otus'):
        _inplace_sort_by_id(ob.get('otu', []))
    for tb in _get_index_list_of_values(nex, 'trees'):
        for tree in _get_index_list_of_values(tb, 'tree'):
            _inplace_sort_by_id(tree.get('node', []))
            _inplace_sort_by_id(tree.get('edge', []))
    return blob
Example #22
0
def sort_arbitrarily_ordered_nexson(blob):
    '''Primarily used for testing (getting nice diffs). Calls
    sort_meta_elements and then sorts otu, node and edge list by id
    '''
    # otu, node and edge elements have no necessary orger in v0.0 or v1.0
    v = detect_nexson_version(blob)
    nex = get_nexml_el(blob)
    if _is_by_id_hbf(v):
        return blob
    sort_meta_elements(blob)
    for ob in _get_index_list_of_values(nex, 'otus'):
        _inplace_sort_by_id(ob.get('otu', []))
    for tb in _get_index_list_of_values(nex, 'trees'):
        for tree in _get_index_list_of_values(tb, 'tree'):
            _inplace_sort_by_id(tree.get('node', []))
            _inplace_sort_by_id(tree.get('edge', []))
    return blob
    def convert(self, obj):
        """Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        """
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        self._recursive_convert_dict(nex)
        nex['@nexml2json'] = str(BADGER_FISH_NEXSON_VERSION)
        self._single_el_list_to_dicts(nex, 'otus')
        self._single_el_list_to_dicts(nex, 'trees')
        #
        # otu and tree are always arrays in phylografter
        emulate_phylografter_pluralization = True
        if not emulate_phylografter_pluralization:
            self._single_el_list_to_dicts(nex, 'otus', 'otu')
            self._single_el_list_to_dicts(nex, 'trees', 'tree')
            self._single_el_list_to_dicts(nex, 'trees', 'tree', 'node')
            self._single_el_list_to_dicts(nex, 'trees', 'tree', 'edge')
        return obj
Example #24
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        self._recursive_convert_dict(nex)
        nex['@nexml2json'] = str(BADGER_FISH_NEXSON_VERSION)
        self._single_el_list_to_dicts(nex, 'otus')
        self._single_el_list_to_dicts(nex, 'trees')
        #
        # otu and tree are always arrays in phylografter
        emulate_phylografter_pluralization = True
        if not emulate_phylografter_pluralization:
            self._single_el_list_to_dicts(nex, 'otus', 'otu')
            self._single_el_list_to_dicts(nex, 'trees', 'tree')
            self._single_el_list_to_dicts(nex, 'trees', 'tree', 'node')
            self._single_el_list_to_dicts(nex, 'trees', 'tree', 'edge')
        return obj
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError(
                'pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError(
                        'Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError(
                        'Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj
Example #26
0
    def __init__(self, obj, logger, **kwargs):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study')
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=['nexml', ])
            return  # EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        # attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            try:
                add_schema_attributes(vc, self._nexson_version)
                assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
                self._validate_nexml_obj(self._nexml, vc, obj)
                if self._max_num_trees_per_study is not None:
                    nt = count_num_trees(self._raw)
                    if nt > self._max_num_trees_per_study:
                        m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced'
                        m = m.format(f=nt, m=self._max_num_trees_per_study)
                        self._error_event(_NEXEL.TOP_LEVEL,
                                          obj=obj,
                                          err_type=gen_MaxSizeExceededWarning,
                                          anc=_EMPTY_TUPLE,
                                          obj_nex_id=None,
                                          message=m)
                        return  # EARLY EXIT!!
            finally:
                vc.adaptor = None  # delete circular ref to help gc
                del vc
        finally:
            del self._otu_group_by_id
            del self._otu_by_otug
Example #27
0
    def __init__(self, obj, logger, **kwargs):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study')
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=[
                                  'nexml',
                              ])
            return  ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
            if self._max_num_trees_per_study is not None:
                nt = count_num_trees(self._raw)
                if nt > self._max_num_trees_per_study:
                    m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced'
                    m = m.format(f=nt, m=self._max_num_trees_per_study)
                    self._error_event(_NEXEL.TOP_LEVEL,
                                      obj=obj,
                                      err_type=gen_MaxSizeExceededWarning,
                                      anc=_EMPTY_TUPLE,
                                      obj_nex_id=None,
                                      message=m)
                    return  ## EARLY EXIT!!
        finally:
            vc.adaptor = None  # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Example #28
0
    def convert(self, obj):
        '''Takes a dict corresponding to the honeybadgerfish JSON blob of the 1.0.* type and
        converts it to BY_ID_HONEY_BADGERFISH version. The object is modified in place
        and returned.
        '''
        if self.pristine_if_invalid:
            raise NotImplementedError('pristine_if_invalid option is not supported yet')

        nex = get_nexml_el(obj)
        assert nex
        # Create the new objects as locals. This section should not
        #   mutate obj, so that if there is an exception the object
        #   is unchanged on the error exit
        otus = _index_list_of_values(nex, 'otus')
        o_t = self.convert_otus(otus)
        otusById, otusElementOrder = o_t
        trees = _get_index_list_of_values(nex, 'trees')
        treesById = dict((i['@id'], i) for i in trees)
        treesElementOrder = [i['@id'] for i in trees]
        if len(treesById) != len(treesElementOrder):
            trees_id_set = set()
            for tgid in treesElementOrder:
                if tgid in trees_id_set:
                    raise NexsonError('Repeated trees element id "{}"'.format(tgid))
                trees_id_set.add(tgid)
        tree_id_set = set()
        treeContainingObjByTreesId = {}
        for tree_group in trees:
            #_LOG.debug('converting tree group {} to by_id'.format(tree_group['@id']))
            treeById = {}
            treeElementOrder = []
            tree_array = _get_index_list_of_values(tree_group, 'tree')
            for tree in tree_array:
                #_LOG.debug('# pre-convert keys = {}'.format(tree.keys()))
                t_t = self.convert_tree(tree)
                if t_t is None:
                    continue
                tid, tree_alias = t_t
                if tid in tree_id_set:
                    raise NexsonError('Repeated tree element id "{}"'.format(tid))
                tree_id_set.add(tid)

                #_LOG.debug('converting tree {} to by_id'.format(tid))
                #_LOG.debug('# post-convert keys = {}'.format(tree.keys()))
                assert tree_alias is tree
                treeById[tid] = tree
                treeElementOrder.append(tid)
            treeContainingObjByTreesId[tree_group['@id']] = treeById
            tree_group['^ot:treeElementOrder'] = treeElementOrder

        # If all that succeeds, add the new object to the dict, creating a fat structure
        nex['otusById'] = otusById
        nex['^ot:otusElementOrder'] = otusElementOrder
        nex['treesById'] = treesById
        nex['^ot:treesElementOrder'] = treesElementOrder
        for k, v in treeContainingObjByTreesId.items():
            treesById[k]['treeById'] = v
        nex['@nexml2json'] = str(BY_ID_HONEY_BADGERFISH)
        # Make the struct leaner
        if self.remove_old_structs:
            del nex['otus']
            del nex['trees']
            for k, v in treesById.items():
                if 'tree' in v:
                    del v['tree']
                del v['@id']
        return obj
Example #29
0
def nexml_el_of_by_id(nexson, curr_version=None):
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)
    return get_nexml_el(nexson)
Example #30
0
def nexml_el_of_by_id(nexson, curr_version=None):
    if curr_version is None:
        curr_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(curr_version):
        nexson = convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH)
    return get_nexml_el(nexson)