def testCanConvert(self):
     inp = pathmap.nexson_obj('merge/merge-input.v1.2.json')
     expected = pathmap.nexson_obj('merge/merge-expected.v1.2.json')
     expected = sort_arbitrarily_ordered_nexson(expected)
     inp = sort_arbitrarily_ordered_nexson(inp)
     self.assertNotEqual(inp, expected)
     merge_otus_and_trees(inp)
     equal_blob_check(self, '', inp, expected)
 def testCanConvert(self):
     inp = pathmap.nexson_obj('merge/merge-input.v1.2.json')
     expected = pathmap.nexson_obj('merge/merge-expected.v1.2.json')
     expected = sort_arbitrarily_ordered_nexson(expected)
     inp = sort_arbitrarily_ordered_nexson(inp)
     self.assertNotEqual(inp, expected)
     merge_otus_and_trees(inp)
     equal_blob_check(self, '', inp, expected)
def merge_otus():
    '''Takes a "nexson" arg that should be a NexSON blob.
    Returns an object with a "data" property that will be the NexSON
    with otus merged into the first otu group.

        1. merges trees elements 2 - # trees into the first trees element.,
        2. merges otus elements 2 - # otus into the first otus element.
        3. if there is no ot:originalLabel field for any otu,
            it sets that field based on @label and deletes @label
        4. merges an otu elements using the rule:
              A. treat (ottId, originalLabel) as a key
              B. If otu objects in subsequent trees match originalLabel and
                have a matching or absent ot:ottId, then they are merged into
                the same OTUs (however see C)
              C. No two leaves of a tree may share an otu (though otu should
                be shared across different trees). It is important that 
                each leaf node be mapped to a distinct OTU. Otherwise there
                will be no way of separating them during OTU mapping. we
                do this indirectly by assuring to no two otu objects in the
                same otus object get merged with each other (or to a common
                object)

        5. correct object references to deleted entities.

    This function is used to patch up NexSONs created by multiple imports, hence the 
    substitution of '@label' for 'ot:originalLabel'. Ids are arbitrary for imports from
    non-nexml tools, so matching is done based on names. This should mimic the behavior
    of the analysis tools that produced the trees (for most/all such tools unique names
    constitute unique OTUs).

    '''
    response.view = 'generic.json'
    # read NexSON from 'nexson' arg or (more likely) the request body
    nexson = extract_nexson_from_http_call(request, **request.vars)  # web2py equivalent to **kwargs
    
    try:
        o = merge_otus_and_trees(nexson)
        return {'data': o,
                'error': 0}
    except Exception, x:
        s = str(x)
        return {'error': 1,
                'description': s}
Example #4
0
def merge_otus():
    '''Takes a "nexson" arg that should be a NexSON blob.
    Returns an object with a "data" property that will be the NexSON
    with otus merged into the first otu group.

        1. merges trees elements 2 - # trees into the first trees element.,
        2. merges otus elements 2 - # otus into the first otus element.
        3. if there is no ot:originalLabel field for any otu,
            it sets that field based on @label and deletes @label
        4. merges an otu elements using the rule:
              A. treat (ottId, originalLabel) as a key
              B. If otu objects in subsequent trees match originalLabel and
                have a matching or absent ot:ottId, then they are merged into
                the same OTUs (however see C)
              C. No two leaves of a tree may share an otu (though otu should
                be shared across different trees). It is important that 
                each leaf node be mapped to a distinct OTU. Otherwise there
                will be no way of separating them during OTU mapping. we
                do this indirectly by assuring to no two otu objects in the
                same otus object get merged with each other (or to a common
                object)

        5. correct object references to deleted entities.

    This function is used to patch up NexSONs created by multiple imports, hence the 
    substitution of '@label' for 'ot:originalLabel'. Ids are arbitrary for imports from
    non-nexml tools, so matching is done based on names. This should mimic the behavior
    of the analysis tools that produced the trees (for most/all such tools unique names
    constitute unique OTUs).

    '''
    response.view = 'generic.json'
    # read NexSON from 'nexson' arg or (more likely) the request body
    nexson = extract_nexson_from_http_call(request, **request.vars)  # web2py equivalent to **kwargs
    
    try:
        o = merge_otus_and_trees(nexson)
        return {'data': o,
                'error': 0}
    except Exception, x:
        s = str(x)
        return {'error': 1,
                'description': s}
Example #5
0
def get_ot_study_info_from_treebase_nexml(src=None,
                                          nexml_content=None,
                                          encoding=u'utf8',
                                          nexson_syntax_version=DEFAULT_NEXSON_VERSION,
                                          merge_blocks=True,
                                          sort_arbitrary=False):
    '''Normalize treebase-specific metadata into the locations where
    open tree of life software that expects it.

    See get_ot_study_info_from_nexml for the explanation of the src,
    nexml_content, encoding, and nexson_syntax_version arguments
    If merge_blocks is True then peyotl.manip.merge_otus_and_trees

    Actions to "normalize" TreeBase objects to ot Nexson
        1. the meta id for any meta item that has only a value and an id
        2. throw away rdfs:isDefinedBy
        3. otu @label -> otu ^ot:originalLabel
        4. ^tb:indentifier.taxon, ^tb:indentifier.taxonVariant and some skos:closeMatch
            fields to ^ot:taxonLink
        5. remove "@xml:base"
        6. coerce edge lengths to native types
    '''
    #pylint: disable=R0915
    raw = get_ot_study_info_from_nexml(src=src,
                                       nexml_content=nexml_content,
                                       encoding=encoding,
                                       nexson_syntax_version=BY_ID_HONEY_BADGERFISH)
    nexml = raw['nexml']
    SKOS_ALT_LABEL = '^skos:altLabel'
    SKOS_CLOSE_MATCH = '^skos:closeMatch'
    strippable_pre = {
        'http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:': '@ubio',
        'http://purl.uniprot.org/taxonomy/': '@uniprot',
    }
    moveable2taxon_link = {"^tb:identifier.taxon": '@tb:identifier.taxon',
                           "^tb:identifier.taxonVariant": '@tb:identifier.taxonVariant', }
    to_del = ['^rdfs:isDefinedBy', '@xml:base']
    for tag in to_del:
        if tag in nexml:
            del nexml[tag]
    _simplify_all_meta_by_id_del(nexml)
    _otu2label = {}
    prefix_map = {}
    # compose dataDeposit
    nexid = nexml['@id']
    tb_url = 'http://purl.org/phylo/treebase/phylows/study/TB2:' + nexid
    nexml['^ot:dataDeposit'] = {'@href': tb_url}
    # compose dataDeposit
    bd = nexml.get("^dcterms:bibliographicCitation")
    if bd:
        nexml['^ot:studyPublicationReference'] = bd
    doi = nexml.get('^prism:doi')
    if doi:
        nexml['^ot:studyPublication'] = {'@href': doi}
    year = nexml.get('^prism:publicationDate')
    if year:
        try:
            nexml['^ot:studyYear'] = int(year)
        except:
            pass
    #
    for otus in nexml['otusById'].values():
        for tag in to_del:
            if tag in otus:
                del otus[tag]
        _simplify_all_meta_by_id_del(otus)
        for oid, otu in otus['otuById'].items():
            for tag in to_del:
                if tag in otu:
                    del otu[tag]
            _simplify_all_meta_by_id_del(otu)
            label = otu['@label']
            _otu2label[oid] = label
            otu['^ot:originalLabel'] = label
            del otu['@label']
            al = otu.get(SKOS_ALT_LABEL)
            if al is not None:
                if otu.get('^ot:altLabel') is None:
                    otu['^ot:altLabel'] = al
                del otu[SKOS_ALT_LABEL]
            tl = {}
            scm = otu.get(SKOS_CLOSE_MATCH)
            #_LOG.debug('scm = ' + str(scm))
            if scm:
                if isinstance(scm, dict):
                    h = scm.get('@href')
                    if h:
                        try:
                            for p, t in strippable_pre.items():
                                if h.startswith(p):
                                    ident = h[len(p):]
                                    tl[t] = ident
                                    del otu[SKOS_CLOSE_MATCH]
                                    prefix_map[t] = p
                        except:
                            pass
                else:
                    nm = []
                    try:
                        for el in scm:
                            h = el.get('@href')
                            if h:
                                found = False
                                for p, t in strippable_pre.items():
                                    if h.startswith(p):
                                        ident = h[len(p):]
                                        tl[t] = ident
                                        found = True
                                        prefix_map[t] = p
                                        break
                                if not found:
                                    nm.append(el)
                    except:
                        pass
                    if len(nm) < len(scm):
                        if len(nm) > 1:
                            otu[SKOS_CLOSE_MATCH] = nm
                        elif len(nm) == 1:
                            otu[SKOS_CLOSE_MATCH] = nm[0]
                        else:
                            del otu[SKOS_CLOSE_MATCH]
            #_LOG.debug('tl =' + str(tl))
            for k, t in moveable2taxon_link.items():
                al = otu.get(k)
                if al:
                    tl[t] = al
                    del otu[k]
            if tl:
                otu['^ot:taxonLink'] = tl
    for trees in nexml['treesById'].values():
        for tag in to_del:
            if tag in trees:
                del trees[tag]
        _simplify_all_meta_by_id_del(trees)
        for tree in trees['treeById'].values():
            for tag in to_del:
                if tag in tree:
                    del tree[tag]
            _simplify_all_meta_by_id_del(tree)
            tt = tree.get('@xsi:type', 'nex:FloatTree')
            if tt.lower() == 'nex:inttree':
                e_len_coerce = int
            else:
                e_len_coerce = float
            for edge_d in tree['edgeBySourceId'].values():
                for edge in edge_d.values():
                    try:
                        x = e_len_coerce(edge['@length'])
                        edge['@length'] = x
                    except:
                        pass
            for node in tree['nodeById'].values():
                nl = node.get('@label')
                if nl:
                    no = node.get('@otu')
                    if no and _otu2label[no] == nl:
                        del node['@label']

    if prefix_map:
        nexml['^ot:taxonLinkPrefixes'] = prefix_map
    if merge_blocks:
        from peyotl.manip import merge_otus_and_trees
        merge_otus_and_trees(raw)
    if nexson_syntax_version != BY_ID_HONEY_BADGERFISH:
        convert_nexson_format(raw,
                              nexson_syntax_version,
                              current_format=BY_ID_HONEY_BADGERFISH,
                              sort_arbitrary=sort_arbitrary)
    elif sort_arbitrary:
        sort_arbitrarily_ordered_nexson(raw)
    return raw