Ejemplo n.º 1
0
 def diagnose_repo_nexml2json(self):
     with self._index_lock:
         fp = self.study_index.values()[0][2]
     _LOG.debug('diagnose_repo_nexml2json with fp={}'.format(fp))
     with codecs.open(fp, mode='r', encoding='utf-8') as fo:
         fj = json.load(fo)
         return detect_nexson_version(fj)
Ejemplo n.º 2
0
def iter_otus(nexson, nexson_version=None):
    """generator over all otus in all otus group elements.
    yields a tuple of 3 items:
        otus group ID,
        otu ID,
        the otu obj
    """
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    if not _is_by_id_hbf(nexson_version):
        convert_nexson_format(
            nexson, BY_ID_HONEY_BADGERFISH)  # TODO shouldn't modify...
    nex = get_nexml_el(nexson)
    otus_group_by_id = nex['otusById']
    group_order = nex.get('^ot:otusElementOrder', [])
    if len(group_order) < len(otus_group_by_id):
        group_order = list(otus_group_by_id.keys())
        group_order.sort()
    for otus_group_id in group_order:
        otus_group = otus_group_by_id[otus_group_id]
        otu_by_id = otus_group['otuById']
        ti_order = list(otu_by_id.keys())
        for otu_id in ti_order:
            otu = otu_by_id[otu_id]
            yield otus_group_id, otu_id, otu
Ejemplo n.º 3
0
 def diagnose_repo_nexml2json(self):
     with self._index_lock:
         fp = self.study_index.values()[0][2]
     _LOG.debug('diagnose_repo_nexml2json with fp={}'.format(fp))
     with codecs.open(fp, mode='r', encoding='utf-8') as fo:
         fj = json.load(fo)
         return detect_nexson_version(fj)
Ejemplo n.º 4
0
def iter_trees(nexson, nexson_version=None):
    '''generator over all trees in all trees elements.
    yields a tuple of 3 items:
        trees element ID,
        tree ID,
        the tree obj
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    nex = get_nexml_el(nexson)
    if _is_by_id_hbf(nexson_version):
        trees_group_by_id = nex['treesById']
        group_order = nex.get('^ot:treesElementOrder', [])
        if len(group_order) < len(trees_group_by_id):
            group_order = list(trees_group_by_id.keys())
            group_order.sort()
        for trees_group_id in group_order:
            trees_group = trees_group_by_id[trees_group_id]
            tree_by_id = trees_group['treeById']
            ti_order = trees_group.get('^ot:treeElementOrder', [])
            if len(ti_order) < len(tree_by_id):
                ti_order = list(tree_by_id.keys())
                ti_order.sort()
            for tree_id in ti_order:
                tree = tree_by_id[tree_id]
                yield trees_group_id, tree_id, tree
    else:
        for trees_group in nex.get('trees', []):
            trees_group_id = trees_group['@id']
            for tree in trees_group.get('tree', []):
                tree_id = tree['@id']
                yield trees_group_id, tree_id, tree
Ejemplo n.º 5
0
 def add_or_replace_annotation(
         self,  #pylint: disable=R0201
         obj,
         annotation,
         agent,
         add_agent_only=False):
     '''Takes an `annotation` dictionary which is
     expected to have a string as the value of annotation['author']['name']
     This function will remove all annotations from obj that:
         1. have the same author/name, and
         2. have no messages that are flagged as messages to be preserved (values for 'preserve'
             that evaluate to true)
     '''
     nex = get_nexml_el(obj)
     nvers = detect_nexson_version(obj)
     _LOG.debug('detected version as ' + nvers)
     agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers)
     if not agents_obj:
         agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []},
                                       nvers)
     agents_list = agents_obj.setdefault('agent', [])
     found_agent = False
     aid = agent['@id']
     for a in agents_list:
         if a.get('@id') == aid:
             found_agent = True
             break
     if not found_agent:
         agents_list.append(agent)
     if add_agent_only:
         delete_same_agent_annotation(obj, annotation)
     else:
         replace_same_agent_annotation(obj, annotation)
Ejemplo n.º 6
0
 def add_or_replace_annotation(self,  # pylint: disable=R0201
                               obj,
                               annotation,
                               agent,
                               add_agent_only=False):
     """Takes an `annotation` dictionary which is
     expected to have a string as the value of annotation['author']['name']
     This function will remove all annotations from obj that:
         1. have the same author/name, and
         2. have no messages that are flagged as messages to be preserved (values for 'preserve'
             that evaluate to true)
     """
     nex = get_nexml_el(obj)
     nvers = detect_nexson_version(obj)
     _LOG.debug('detected version as ' + nvers)
     agents_obj = find_val_literal_meta_first(nex, 'ot:agents', nvers)
     if not agents_obj:
         agents_obj = add_literal_meta(nex, 'ot:agents', {'agent': []}, nvers)
     agents_list = agents_obj.setdefault('agent', [])
     found_agent = False
     aid = agent['@id']
     for a in agents_list:
         if a.get('@id') == aid:
             found_agent = True
             break
     if not found_agent:
         agents_list.append(agent)
     if add_agent_only:
         delete_same_agent_annotation(obj, annotation)
     else:
         replace_same_agent_annotation(obj, annotation)
Ejemplo n.º 7
0
def iter_trees(nexson, nexson_version=None):
    '''generator over all trees in all trees elements.
    yields a tuple of 3 items:
        trees element ID,
        tree ID,
        the tree obj
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    nex = get_nexml_el(nexson)
    if _is_by_id_hbf(nexson_version):
        trees_group_by_id = nex['treesById']
        group_order = nex.get('^ot:treesElementOrder', [])
        if len(group_order) < len(trees_group_by_id):
            group_order = list(trees_group_by_id.keys())
            group_order.sort()
        for trees_group_id in group_order:
            trees_group = trees_group_by_id[trees_group_id]
            tree_by_id = trees_group['treeById']
            ti_order = trees_group.get('^ot:treeElementOrder', [])
            if len(ti_order) < len(tree_by_id):
                ti_order = list(tree_by_id.keys())
                ti_order.sort()
            for tree_id in ti_order:
                tree = tree_by_id[tree_id]
                yield trees_group_id, tree_id, tree
    else:
        for trees_group in nex.get('trees', []):
            trees_group_id = trees_group['@id']
            for tree in trees_group.get('tree', []):
                tree_id = tree['@id']
                yield trees_group_id, tree_id, tree
Ejemplo n.º 8
0
def diagnose_repo_nexml2json(shard):
    """Optimistic test for Nexson version in a shard (tests first study found)"""
    with shard._index_lock:
        fp = next(iter(shard.study_index.values()))[2]
    with codecs.open(fp, mode='r', encoding='utf-8') as fo:
        fj = json.load(fo)
        from peyotl.nexson_syntax import detect_nexson_version
        return detect_nexson_version(fj)
Ejemplo n.º 9
0
def diagnose_repo_nexml2json(shard):
    """Optimistic test for Nexson version in a shard (tests first study found)"""
    with shard._index_lock:
        fp = next(iter(shard.study_index.values()))[2]
    with codecs.open(fp, mode='r', encoding='utf-8') as fo:
        fj = json.load(fo)
        from peyotl.nexson_syntax import detect_nexson_version
        return detect_nexson_version(fj)
Ejemplo n.º 10
0
def delete_annotation(obj, agent_id=None, annot_id=None, nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(obj)
    nex_el = get_nexml_el(obj)
    annotation_list = get_annotation_list(nex_el, nexson_version)
    delete_annotation_from_annot_list(annotation_list,
                                      agent_id=agent_id,
                                      annot_id=annot_id)
Ejemplo n.º 11
0
def delete_annotation(obj,
                      agent_id=None,
                      annot_id=None,
                      nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(obj)
    nex_el = get_nexml_el(obj)
    annotation_list = get_annotation_list(nex_el, nexson_version)
    delete_annotation_from_annot_list(annotation_list, agent_id=agent_id, annot_id=annot_id)
Ejemplo n.º 12
0
    def __init__(self, obj, logger):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=[
                                  'nexml',
                              ])
            return  ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
        finally:
            vc.adaptor = None  # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Ejemplo n.º 13
0
    def __init__(self, obj, logger):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=['nexml',])
            return ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
        finally:
            vc.adaptor = None # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Ejemplo n.º 14
0
 def testTreesCulledNonmatcingConvViaPSV0(self):
     """Verify that the culling does not break the conversion to other forms of NexSON"""
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{'))  # pylint: disable=E1103
     rx = json.loads(x)
     self.assertEqual(detect_nexson_version(rx), '0.0.0')
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Ejemplo n.º 15
0
 def testTreesCulledNonmatcingConvViaPSV0(self):
     '''Verify that the culling does not break the conversion to other forms of NexSON'''
     o = pathmap.nexson_obj('9/v1.2.json')
     self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2)
     ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true')
     x = ps.serialize(o)
     etn = extract_tree_nexson(o, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
     self.assertTrue(x.startswith('{')) #pylint: disable=E1103
     rx = json.loads(x)
     self.assertEqual(detect_nexson_version(rx), '0.0.0')
     etn = extract_tree_nexson(rx, tree_id=None)
     self.assertEqual(len(etn), 1)
     self.assertEqual(etn[0][0], 'tree2')
Ejemplo n.º 16
0
def create_validation_adaptor(obj, logger, **kwargs):
    try:
        nexson_version = detect_nexson_version(obj)
    except:
        return BadgerFishValidationAdaptor(obj, logger, **kwargs)
    if _is_by_id_hbf(nexson_version):
        # _LOG.debug('validating as ById...')
        return ByIdHBFValidationAdaptor(obj, logger, **kwargs)
    elif _is_badgerfish_version(nexson_version):
        # _LOG.debug('validating as BadgerFish...')
        return BadgerFishValidationAdaptor(obj, logger, **kwargs)
    elif _is_direct_hbf(nexson_version):
        # _LOG.debug('validating as DirectHBF...')
        return DirectHBFValidationAdaptor(obj, logger, **kwargs)
    raise NotImplementedError('nexml2json version {v}'.format(v=nexson_version))
Ejemplo n.º 17
0
 def __init__(self, filepath='', nexson=None):
     self.filepath = filepath
     if nexson is None:
         if not filepath:
             raise ValueError('Either a filepath or nexson argument must be provided')
         self._nexson = read_as_json(self.filepath)
     else:
         self._nexson = nexson
     v = detect_nexson_version(self._nexson)
     if v != BY_ID_HONEY_BADGERFISH:
         _LOG.debug('NexsonProxy converting to hbf1.2')
         convert_nexson_format(self._nexson, BY_ID_HONEY_BADGERFISH)
     self._nexml_el = get_nexml_el(self._nexson)
     self._otu_cache = {}
     self._tree_cache = {}
     self._wr = None
Ejemplo n.º 18
0
def gen_otu_dict(nex_obj, nexson_version=None):
    '''Takes a NexSON object and returns a dict of
    otu_id -> otu_obj
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nex_obj)
    if _is_by_id_hbf(nexson_version):
        otus = nex_obj['nexml']['otusById']
        if len(otus) > 1:
            d = {}
            for v in otus.values():
                d.update(v['otuById'])
            return d
        else:
            return otus.value()[0]['otuById']
    o_dict = {}
    for ob in nex_obj.get('otus', []):
        for o in ob.get('otu', []):
            oid = o['@id']
            o_dict[oid] = o
    return o_dict
Ejemplo n.º 19
0
def gen_otu_dict(nex_obj, nexson_version=None):
    """Takes a NexSON object and returns a dict of
    otu_id -> otu_obj
    """
    if nexson_version is None:
        nexson_version = detect_nexson_version(nex_obj)
    if _is_by_id_hbf(nexson_version):
        otus = nex_obj["nexml"]["otusById"]
        if len(otus) > 1:
            d = {}
            for v in otus.values():
                d.update(v["otuById"])
            return d
        else:
            return otus.values()[0]["otuById"]
    o_dict = {}
    for ob in nex_obj.get("otus", []):
        for o in ob.get("otu", []):
            oid = o["@id"]
            o_dict[oid] = o
    return o_dict
Ejemplo n.º 20
0
def gen_otu_dict(nex_obj, nexson_version=None):
    '''Takes a NexSON object and returns a dict of
    otu_id -> otu_obj
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nex_obj)
    if _is_by_id_hbf(nexson_version):
        otus = nex_obj['nexml']['otusById']
        if len(otus) > 1:
            d = {}
            for v in otus.values():
                d.update(v['otuById'])
            return d
        else:
            return otus.value()[0]['otuById']
    o_dict = {}
    for ob in nex_obj.get('otus', []):
        for o in ob.get('otu', []):
            oid = o['@id']
            o_dict[oid] = o
    return o_dict
Ejemplo n.º 21
0
def count_num_trees(nexson, nexson_version=None):
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    nex = get_nexml_el(nexson)
    num_trees_by_group = []
    if _is_by_id_hbf(nexson_version):
        for tree_group in nex.get('treesById', {}).values():
            nt = len(tree_group.get('treeById', {}))
            num_trees_by_group.append(nt)
    else:
        trees_group = nex.get('trees', [])
        if isinstance(trees_group, dict):
            trees_group = [trees_group]
        for tree_group in trees_group:
            t = tree_group.get('tree')
            if isinstance(t, list):
                nt = len(t)
            else:
                nt = 1
            num_trees_by_group.append(nt)
    return sum(num_trees_by_group)
Ejemplo n.º 22
0
def count_num_trees(nexson, nexson_version=None):
    '''Returns the number of trees summed across all tree
    groups.
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    nex = get_nexml_el(nexson)
    num_trees_by_group = []
    if _is_by_id_hbf(nexson_version):
        for tree_group in nex.get('treesById', {}).values():
            nt = len(tree_group.get('treeById', {}))
            num_trees_by_group.append(nt)
    else:
        trees_group = nex.get('trees', [])
        if isinstance(trees_group, dict):
            trees_group = [trees_group]
        for tree_group in trees_group:
            t = tree_group.get('tree')
            if isinstance(t, list):
                nt = len(t)
            else:
                nt = 1
            num_trees_by_group.append(nt)
    return sum(num_trees_by_group)
Ejemplo n.º 23
0
def iter_otus(nexson, nexson_version=None):
    '''generator over all otus in all otus group elements.
    yields a tuple of 3 items:
        otus group ID,
        otu ID,
        the otu obj
    '''
    if nexson_version is None:
        nexson_version = detect_nexson_version(nexson)
    nex = get_nexml_el(nexson)
    if not _is_by_id_hbf(nexson_version):
        convert_nexson_format(nexson_blob, BY_ID_HONEY_BADGERFISH) #TODO shouldn't modify...
    otus_group_by_id = nex['otusById']
    group_order = nex.get('^ot:otusElementOrder', [])
    if len(group_order) < len(otus_group_by_id):
        group_order = list(otus_group_by_id.keys())
        group_order.sort()
    for otus_group_id in group_order:
        otus_group = otus_group_by_id[otus_group_id]
        otu_by_id = otus_group['otuById']
        ti_order = list(otu_by_id.keys())
        for otu_id in ti_order:
            otu = otu_by_id[otu_id]
            yield otus_group_id, otu_id, otu
Ejemplo n.º 24
0
    def __init__(self, obj, logger, **kwargs):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study')
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=['nexml', ])
            return  # EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        # attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            try:
                add_schema_attributes(vc, self._nexson_version)
                assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
                self._validate_nexml_obj(self._nexml, vc, obj)
                if self._max_num_trees_per_study is not None:
                    nt = count_num_trees(self._raw)
                    if nt > self._max_num_trees_per_study:
                        m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced'
                        m = m.format(f=nt, m=self._max_num_trees_per_study)
                        self._error_event(_NEXEL.TOP_LEVEL,
                                          obj=obj,
                                          err_type=gen_MaxSizeExceededWarning,
                                          anc=_EMPTY_TUPLE,
                                          obj_nex_id=None,
                                          message=m)
                        return  # EARLY EXIT!!
            finally:
                vc.adaptor = None  # delete circular ref to help gc
                del vc
        finally:
            del self._otu_group_by_id
            del self._otu_by_otug
Ejemplo n.º 25
0
    def __init__(self, obj, logger, **kwargs):
        self._raw = obj
        self._nexml = None
        self._pyid_to_nexson_add = {}
        self._logger = logger
        self._repeated_id = False
        self._otuid2ottid_byogid = {}
        self._ottid2otuid_list_byogid = {}
        self._dupottid_by_ogid_tree_id = {}
        self._max_num_trees_per_study = kwargs.get('max_num_trees_per_study')
        uk = None
        for k in obj.keys():
            if k not in ['nexml', 'nex:nexml']:
                if uk is None:
                    uk = []
                uk.append(k)
        if uk:
            uk.sort()
            self._warn_event(_NEXEL.TOP_LEVEL,
                             obj=obj,
                             err_type=gen_UnrecognizedKeyWarning,
                             anc=_EMPTY_TUPLE,
                             obj_nex_id=None,
                             key_list=uk)
        self._nexml = None
        try:
            self._nexml = get_nexml_el(obj)
            assert isinstance(self._nexml, dict)
        except:
            self._error_event(_NEXEL.TOP_LEVEL,
                              obj=obj,
                              err_type=gen_MissingMandatoryKeyWarning,
                              anc=_EMPTY_TUPLE,
                              obj_nex_id=None,
                              key_list=[
                                  'nexml',
                              ])
            return  ## EARLY EXIT!!
        self._nexson_id_to_obj = {}
        self._nexson_version = detect_nexson_version(obj)

        #attr used in validation only should be cleaned up
        # in the finally clause
        self._otu_group_by_id = {}
        self._otu_by_otug = {}

        try:
            # a little duck-punching
            vc = _ValidationContext(self, logger)
            add_schema_attributes(vc, self._nexson_version)
            assert self._nexson_version[:3] in ('0.0', '1.0', '1.2')
            self._validate_nexml_obj(self._nexml, vc, obj)
            if self._max_num_trees_per_study is not None:
                nt = count_num_trees(self._raw)
                if nt > self._max_num_trees_per_study:
                    m = '{f:d} trees found, but a limit of {m:d} trees per nexson is being enforced'
                    m = m.format(f=nt, m=self._max_num_trees_per_study)
                    self._error_event(_NEXEL.TOP_LEVEL,
                                      obj=obj,
                                      err_type=gen_MaxSizeExceededWarning,
                                      anc=_EMPTY_TUPLE,
                                      obj_nex_id=None,
                                      message=m)
                    return  ## EARLY EXIT!!
        finally:
            vc.adaptor = None  # delete circular ref to help gc
            del vc
            del self._otu_group_by_id
            del self._otu_by_otug
Ejemplo n.º 26
0
 def testDetectVersion(self):
     o = pathmap.nexson_obj('invalid/bad_version.json.input')
     v = detect_nexson_version(o)
     self.assertEqual(v, '1.3.1')
Ejemplo n.º 27
0
def merge_otus_and_trees(nexson_blob):
    '''Takes a nexson object:
        1. merges trees elements 2 - # trees into the first trees element.,
        2. merges otus elements 2 - # otus into the first otus element.
        3. if there is no ot:originalLabel field for any otu,
            it sets that field based on @label and deletes @label
        4. merges an otu elements using the rule:
              A. treat (ottId, originalLabel) as a key
              B. If otu objects in subsequent trees match originalLabel and
                have a matching or absent ot:ottId, then they are merged into
                the same OTUs (however see C)
              C. No two leaves of a tree may share an otu (though otu should
                be shared across different trees). It is important that
                each leaf node be mapped to a distinct OTU. Otherwise there
                will be no way of separating them during OTU mapping. we
                do this indirectly by assuring to no two otu objects in the
                same otus object get merged with each other (or to a common
                object)

        5. correct object references to deleted entities.

    This function is used to patch up NexSONs created by multiple imports, hence the
    substitution of '@label' for 'ot:originalLabel'. Ids are arbitrary for imports from
    non-nexml tools, so matching is done based on names. This should mimic the behavior
    of the analysis tools that produced the trees (for most/all such tools unique names
    constitute unique OTUs).
    '''
    id_to_replace_id = {}
    orig_version = detect_nexson_version(nexson_blob)
    convert_nexson_format(nexson_blob, BY_ID_HONEY_BADGERFISH)
    nexson = get_nexml_el(nexson_blob)
    otus_group_order = nexson.get('^ot:otusElementOrder', [])
    # (ott, orig) -> list of otu elements
    retained_mapped2otu = {}
    # orig -> list of otu elements
    retained_orig2otu = {}
    # For the first (entirely retained) group of otus:
    #   1. assure that originalLabel is filled in
    #   2. register the otu in retained_mapped2otu and retained_orig2otu
    # otu elements that have no label, originalLabel or ottId will not
    #   be registered, so they'll never be matched.
    if len(otus_group_order) > 0:
        otus_group_by_id = nexson['otusById']
        retained_ogi = otus_group_order[0]
        retained_og = otus_group_by_id[retained_ogi]
        retained_og_otu = retained_og.setdefault('otuById', {})
        label_to_original_label_otu_by_id(retained_og_otu)
        for oid, otu in retained_og_otu.items():
            ottid = otu.get('^ot:ottId')
            orig = otu.get('^ot:originalLabel')
            key = (ottid, orig)
            if key != (None, None):
                m = retained_mapped2otu.setdefault(key, [])
                t = (oid, otu)
                m.append(t)
                if orig is not None:
                    m = retained_orig2otu.setdefault(orig, [])
                    m.append(t)
        # For each of the other otus elements, we:
        #   1. assure that originalLabel is filled in
        #   2. decide (for each otu) whether it will
        #       be added to retained_og or merged with
        #       an otu already in retained_og. In the
        #       case of the latter, we add to the
        #       replaced_otu dict (old oid as key, new otu as value)
        for ogi in otus_group_order[1:]:
            #_LOG.debug('retained_mapped2otu = {r}'.format(r=retained_mapped2otu))
            og = otus_group_by_id[ogi]
            del otus_group_by_id[ogi]
            otu_by_id = og.get('otuById', {})
            label_to_original_label_otu_by_id(otu_by_id)
            used_matches = set()
            id_to_replace_id[ogi] = retained_ogi
            for oid, otu in otu_by_id.items():
                ottid = otu.get('^ot:ottId')
                orig = otu.get('^ot:originalLabel')
                key = (ottid, orig)
                if key == (None, None):
                    retained_og[oid] = otu
                else:
                    match_otu = None
                    mlist = retained_mapped2otu.get(key)
                    if mlist is not None:
                        for m in mlist:
                            if m[0] not in used_matches:
                                # _LOG.debug('Matching {k} to {m}'.format(k=repr(key), m=repr(m)))
                                match_otu = m
                                break
                            #else:
                            #    _LOG.debug('{k} already in {m}'.format(k=repr(m[0]), m=repr(used_matches)))
                    if match_otu is None:
                        #_LOG.debug('New el: {k} mlist = {m}'.format(k=repr(key), m=repr(mlist)))
                        mlist = retained_orig2otu.get(orig, [])
                        for m in mlist:
                            if m[0] not in used_matches:
                                match_otu = m
                                break
                    if match_otu is not None:
                        id_to_replace_id[oid] = match_otu[0]
                        used_matches.add(match_otu[0])
                        _merge_otu_do_not_fix_references(otu, match_otu[1])
                    else:
                        assert oid not in retained_og_otu
                        retained_og_otu[oid] = otu
                        m = retained_mapped2otu.setdefault(key, [])
                        t = (oid, otu)
                        m.append(t)
                        if orig is not None:
                            m = retained_orig2otu.setdefault(orig, [])
                            m.append(t)
        nexson['^ot:otusElementOrder'] = [retained_ogi]
    # Move all of the tree elements to the first trees group.
    trees_group_order = nexson.get('^ot:treesElementOrder', [])
    if len(trees_group_order) > 0:
        trees_group_by_id = nexson['treesById']
        retained_tgi = trees_group_order[0]
        retained_tg = trees_group_by_id[retained_tgi]
        retained_tg['@otus'] = retained_ogi
        retained_tg_tree_obj = retained_tg.get('treeById', {})
        for tgi in trees_group_order[1:]:
            tg = trees_group_by_id[tgi]
            del trees_group_by_id[tgi]
            id_to_replace_id[tgi] = retained_tgi
            retained_tg['^ot:treeElementOrder'].extend(tg['^ot:treeElementOrder'])
            for tid, tree_obj in tg.get('treeById', {}).items():
                retained_tg_tree_obj[tid] = tree_obj
        for tree_obj in retained_tg_tree_obj.values():
            for node in tree_obj.get('nodeById', {}).values():
                o = node.get('@otu')
                if o is not None:
                    r = id_to_replace_id.get(o)
                    if r is not None:
                        node['@otu'] = r
        nexson['^ot:treesElementOrder'] = [retained_tgi]

    replace_entity_references_in_meta_and_annotations(nexson, id_to_replace_id)
    convert_nexson_format(nexson_blob, orig_version)
    return nexson_blob
Ejemplo n.º 28
0
def merge_otus_and_trees(nexson_blob):
    '''Takes a nexson object:
        1. merges trees elements 2 - # trees into the first trees element.,
        2. merges otus elements 2 - # otus into the first otus element.
        3. if there is no ot:originalLabel field for any otu,
            it sets that field based on @label and deletes @label
        4. merges an otu elements using the rule:
              A. treat (ottId, originalLabel) as a key
              B. If otu objects in subsequent trees match originalLabel and
                have a matching or absent ot:ottId, then they are merged into
                the same OTUs (however see C)
              C. No two leaves of a tree may share an otu (though otu should
                be shared across different trees). It is important that
                each leaf node be mapped to a distinct OTU. Otherwise there
                will be no way of separating them during OTU mapping. we
                do this indirectly by assuring to no two otu objects in the
                same otus object get merged with each other (or to a common
                object)

        5. correct object references to deleted entities.

    This function is used to patch up NexSONs created by multiple imports, hence the
    substitution of '@label' for 'ot:originalLabel'. Ids are arbitrary for imports from
    non-nexml tools, so matching is done based on names. This should mimic the behavior
    of the analysis tools that produced the trees (for most/all such tools unique names
    constitute unique OTUs).
    '''
    id_to_replace_id = {}
    orig_version = detect_nexson_version(nexson_blob)
    convert_nexson_format(nexson_blob, BY_ID_HONEY_BADGERFISH)
    nexson = get_nexml_el(nexson_blob)
    otus_group_order = nexson.get('^ot:otusElementOrder', [])
    # (ott, orig) -> list of otu elements
    retained_mapped2otu = {}
    # orig -> list of otu elements
    retained_orig2otu = {}
    # For the first (entirely retained) group of otus:
    #   1. assure that originalLabel is filled in
    #   2. register the otu in retained_mapped2otu and retained_orig2otu
    # otu elements that have no label, originalLabel or ottId will not
    #   be registered, so they'll never be matched.
    if len(otus_group_order) > 0:
        otus_group_by_id = nexson['otusById']
        retained_ogi = otus_group_order[0]
        retained_og = otus_group_by_id[retained_ogi]
        retained_og_otu = retained_og.setdefault('otuById', {})
        label_to_original_label_otu_by_id(retained_og_otu)
        for oid, otu in retained_og_otu.items():
            ottid = otu.get('^ot:ottId')
            orig = otu.get('^ot:originalLabel')
            key = (ottid, orig)
            if key != (None, None):
                m = retained_mapped2otu.setdefault(key, [])
                t = (oid, otu)
                m.append(t)
                if orig is not None:
                    m = retained_orig2otu.setdefault(orig, [])
                    m.append(t)
        # For each of the other otus elements, we:
        #   1. assure that originalLabel is filled in
        #   2. decide (for each otu) whether it will
        #       be added to retained_og or merged with
        #       an otu already in retained_og. In the
        #       case of the latter, we add to the
        #       replaced_otu dict (old oid as key, new otu as value)
        for ogi in otus_group_order[1:]:
            #_LOG.debug('retained_mapped2otu = {r}'.format(r=retained_mapped2otu))
            og = otus_group_by_id[ogi]
            del otus_group_by_id[ogi]
            otu_by_id = og.get('otuById', {})
            label_to_original_label_otu_by_id(otu_by_id)
            used_matches = set()
            id_to_replace_id[ogi] = retained_ogi
            for oid, otu in otu_by_id.items():
                ottid = otu.get('^ot:ottId')
                orig = otu.get('^ot:originalLabel')
                key = (ottid, orig)
                if key == (None, None):
                    retained_og[oid] = otu
                else:
                    match_otu = None
                    mlist = retained_mapped2otu.get(key)
                    if mlist is not None:
                        for m in mlist:
                            if m[0] not in used_matches:
                                # _LOG.debug('Matching {k} to {m}'.format(k=repr(key), m=repr(m)))
                                match_otu = m
                                break
                            #else:
                            #    _LOG.debug('{k} already in {m}'.format(k=repr(m[0]), m=repr(used_matches)))
                    if match_otu is None:
                        #_LOG.debug('New el: {k} mlist = {m}'.format(k=repr(key), m=repr(mlist)))
                        mlist = retained_orig2otu.get(orig, [])
                        for m in mlist:
                            if m[0] not in used_matches:
                                match_otu = m
                                break
                    if match_otu is not None:
                        id_to_replace_id[oid] = match_otu[0]
                        used_matches.add(match_otu[0])
                        _merge_otu_do_not_fix_references(otu, match_otu[1])
                    else:
                        assert oid not in retained_og_otu
                        retained_og_otu[oid] = otu
                        m = retained_mapped2otu.setdefault(key, [])
                        t = (oid, otu)
                        m.append(t)
                        if orig is not None:
                            m = retained_orig2otu.setdefault(orig, [])
                            m.append(t)
        nexson['^ot:otusElementOrder'] = [retained_ogi]
    # Move all of the tree elements to the first trees group.
    trees_group_order = nexson.get('^ot:treesElementOrder', [])
    if len(trees_group_order) > 0:
        trees_group_by_id = nexson['treesById']
        retained_tgi = trees_group_order[0]
        retained_tg = trees_group_by_id[retained_tgi]
        retained_tg['@otus'] = retained_ogi
        retained_tg_tree_obj = retained_tg.get('treeById', {})
        for tgi in trees_group_order[1:]:
            tg = trees_group_by_id[tgi]
            del trees_group_by_id[tgi]
            id_to_replace_id[tgi] = retained_tgi
            retained_tg['^ot:treeElementOrder'].extend(
                tg['^ot:treeElementOrder'])
            for tid, tree_obj in tg.get('treeById', {}).items():
                retained_tg_tree_obj[tid] = tree_obj
        for tree_obj in retained_tg_tree_obj.values():
            for node in tree_obj.get('nodeById', {}).values():
                o = node.get('@otu')
                if o is not None:
                    r = id_to_replace_id.get(o)
                    if r is not None:
                        node['@otu'] = r
        nexson['^ot:treesElementOrder'] = [retained_tgi]

    replace_entity_references_in_meta_and_annotations(nexson, id_to_replace_id)
    convert_nexson_format(nexson_blob, orig_version)
    return nexson_blob
Ejemplo n.º 29
0
 def testDetectVersion(self):
     o = pathmap.nexson_obj('invalid/bad_version.json.input')
     v = detect_nexson_version(o)
     self.assertEqual(v, '1.3.1')