def testCanConvert(self): inp = pathmap.nexson_obj('merge/merge-input.v1.2.json') expected = pathmap.nexson_obj('merge/merge-expected.v1.2.json') expected = sort_arbitrarily_ordered_nexson(expected) inp = sort_arbitrarily_ordered_nexson(inp) self.assertNotEqual(inp, expected) merge_otus_and_trees(inp) equal_blob_check(self, '', inp, expected)
def merge_otus(): '''Takes a "nexson" arg that should be a NexSON blob. Returns an object with a "data" property that will be the NexSON with otus merged into the first otu group. 1. merges trees elements 2 - # trees into the first trees element., 2. merges otus elements 2 - # otus into the first otus element. 3. if there is no ot:originalLabel field for any otu, it sets that field based on @label and deletes @label 4. merges an otu elements using the rule: A. treat (ottId, originalLabel) as a key B. If otu objects in subsequent trees match originalLabel and have a matching or absent ot:ottId, then they are merged into the same OTUs (however see C) C. No two leaves of a tree may share an otu (though otu should be shared across different trees). It is important that each leaf node be mapped to a distinct OTU. Otherwise there will be no way of separating them during OTU mapping. we do this indirectly by assuring to no two otu objects in the same otus object get merged with each other (or to a common object) 5. correct object references to deleted entities. This function is used to patch up NexSONs created by multiple imports, hence the substitution of '@label' for 'ot:originalLabel'. Ids are arbitrary for imports from non-nexml tools, so matching is done based on names. This should mimic the behavior of the analysis tools that produced the trees (for most/all such tools unique names constitute unique OTUs). ''' response.view = 'generic.json' # read NexSON from 'nexson' arg or (more likely) the request body nexson = extract_nexson_from_http_call(request, **request.vars) # web2py equivalent to **kwargs try: o = merge_otus_and_trees(nexson) return {'data': o, 'error': 0} except Exception, x: s = str(x) return {'error': 1, 'description': s}
def get_ot_study_info_from_treebase_nexml(src=None, nexml_content=None, encoding=u'utf8', nexson_syntax_version=DEFAULT_NEXSON_VERSION, merge_blocks=True, sort_arbitrary=False): '''Normalize treebase-specific metadata into the locations where open tree of life software that expects it. See get_ot_study_info_from_nexml for the explanation of the src, nexml_content, encoding, and nexson_syntax_version arguments If merge_blocks is True then peyotl.manip.merge_otus_and_trees Actions to "normalize" TreeBase objects to ot Nexson 1. the meta id for any meta item that has only a value and an id 2. throw away rdfs:isDefinedBy 3. otu @label -> otu ^ot:originalLabel 4. ^tb:indentifier.taxon, ^tb:indentifier.taxonVariant and some skos:closeMatch fields to ^ot:taxonLink 5. remove "@xml:base" 6. coerce edge lengths to native types ''' #pylint: disable=R0915 raw = get_ot_study_info_from_nexml(src=src, nexml_content=nexml_content, encoding=encoding, nexson_syntax_version=BY_ID_HONEY_BADGERFISH) nexml = raw['nexml'] SKOS_ALT_LABEL = '^skos:altLabel' SKOS_CLOSE_MATCH = '^skos:closeMatch' strippable_pre = { 'http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:': '@ubio', 'http://purl.uniprot.org/taxonomy/': '@uniprot', } moveable2taxon_link = {"^tb:identifier.taxon": '@tb:identifier.taxon', "^tb:identifier.taxonVariant": '@tb:identifier.taxonVariant', } to_del = ['^rdfs:isDefinedBy', '@xml:base'] for tag in to_del: if tag in nexml: del nexml[tag] _simplify_all_meta_by_id_del(nexml) _otu2label = {} prefix_map = {} # compose dataDeposit nexid = nexml['@id'] tb_url = 'http://purl.org/phylo/treebase/phylows/study/TB2:' + nexid nexml['^ot:dataDeposit'] = {'@href': tb_url} # compose dataDeposit bd = nexml.get("^dcterms:bibliographicCitation") if bd: nexml['^ot:studyPublicationReference'] = bd doi = nexml.get('^prism:doi') if doi: nexml['^ot:studyPublication'] = {'@href': doi} year = nexml.get('^prism:publicationDate') if year: try: nexml['^ot:studyYear'] = int(year) except: pass # for otus in nexml['otusById'].values(): for tag in to_del: if tag in otus: del otus[tag] _simplify_all_meta_by_id_del(otus) for oid, otu in otus['otuById'].items(): for tag in to_del: if tag in otu: del otu[tag] _simplify_all_meta_by_id_del(otu) label = otu['@label'] _otu2label[oid] = label otu['^ot:originalLabel'] = label del otu['@label'] al = otu.get(SKOS_ALT_LABEL) if al is not None: if otu.get('^ot:altLabel') is None: otu['^ot:altLabel'] = al del otu[SKOS_ALT_LABEL] tl = {} scm = otu.get(SKOS_CLOSE_MATCH) #_LOG.debug('scm = ' + str(scm)) if scm: if isinstance(scm, dict): h = scm.get('@href') if h: try: for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident del otu[SKOS_CLOSE_MATCH] prefix_map[t] = p except: pass else: nm = [] try: for el in scm: h = el.get('@href') if h: found = False for p, t in strippable_pre.items(): if h.startswith(p): ident = h[len(p):] tl[t] = ident found = True prefix_map[t] = p break if not found: nm.append(el) except: pass if len(nm) < len(scm): if len(nm) > 1: otu[SKOS_CLOSE_MATCH] = nm elif len(nm) == 1: otu[SKOS_CLOSE_MATCH] = nm[0] else: del otu[SKOS_CLOSE_MATCH] #_LOG.debug('tl =' + str(tl)) for k, t in moveable2taxon_link.items(): al = otu.get(k) if al: tl[t] = al del otu[k] if tl: otu['^ot:taxonLink'] = tl for trees in nexml['treesById'].values(): for tag in to_del: if tag in trees: del trees[tag] _simplify_all_meta_by_id_del(trees) for tree in trees['treeById'].values(): for tag in to_del: if tag in tree: del tree[tag] _simplify_all_meta_by_id_del(tree) tt = tree.get('@xsi:type', 'nex:FloatTree') if tt.lower() == 'nex:inttree': e_len_coerce = int else: e_len_coerce = float for edge_d in tree['edgeBySourceId'].values(): for edge in edge_d.values(): try: x = e_len_coerce(edge['@length']) edge['@length'] = x except: pass for node in tree['nodeById'].values(): nl = node.get('@label') if nl: no = node.get('@otu') if no and _otu2label[no] == nl: del node['@label'] if prefix_map: nexml['^ot:taxonLinkPrefixes'] = prefix_map if merge_blocks: from peyotl.manip import merge_otus_and_trees merge_otus_and_trees(raw) if nexson_syntax_version != BY_ID_HONEY_BADGERFISH: convert_nexson_format(raw, nexson_syntax_version, current_format=BY_ID_HONEY_BADGERFISH, sort_arbitrary=sort_arbitrary) elif sort_arbitrary: sort_arbitrarily_ordered_nexson(raw) return raw