예제 #1
0
파일: default.py 프로젝트: thyzzs/opentree
     response.headers['Content-Type'] = 'text/plain'
     raise HTTP(501, T("Conversion to NeXML failed.\n" + err_content))
 if output == 'nexml':
     response.headers['Content-Type'] = 'text/xml'
     return open(NEXML_FILEPATH, 'rU').read()
 NEXSON_FILENAME = 'nexson' + NEXSON_VERSION + '.json'
 NEXSON_FILEPATH = os.path.join(working_dir, NEXSON_FILENAME)
 NEXSON_DONE_FILEPATH = NEXSON_FILEPATH + '.written'
 NEXSON_LOCKFILEPATH = NEXSON_FILEPATH + '.lock'
 if not os.path.exists(NEXSON_DONE_FILEPATH):
     try:
         with locket.lock_file(NEXSON_LOCKFILEPATH, timeout=0):
             if not os.path.exists(NEXSON_DONE_FILEPATH):
                 try:
                     dfj = get_ot_study_info_from_nexml(
                         NEXML_FILEPATH,
                         nexson_syntax_version=NEXSON_VERSION)
                 except:
                     raise HTTP(
                         400,
                         T("Submitted data is not a valid NeXML file, or cannot be converted."
                           ))
                 out = codecs.open(NEXSON_FILEPATH, 'w', encoding='utf-8')
                 json.dump(dfj, out, indent=0, sort_keys=True)
                 out.write('\n')
                 out.close()
                 out = open(NEXSON_DONE_FILEPATH, 'w')
                 out.write('0\n')
                 out.close()
     except locket.LockError:
         return HTTP(102, "Conversion to NexSON still running")
예제 #2
0
def get_ot_study_info_from_treebase_nexml(src=None,
                                          nexml_content=None,
                                          encoding=u'utf8',
                                          nexson_syntax_version=DEFAULT_NEXSON_VERSION,
                                          merge_blocks=True,
                                          sort_arbitrary=False):
    '''Normalize treebase-specific metadata into the locations where
    open tree of life software that expects it.

    See get_ot_study_info_from_nexml for the explanation of the src,
    nexml_content, encoding, and nexson_syntax_version arguments
    If merge_blocks is True then peyotl.manip.merge_otus_and_trees

    Actions to "normalize" TreeBase objects to ot Nexson
        1. the meta id for any meta item that has only a value and an id
        2. throw away rdfs:isDefinedBy
        3. otu @label -> otu ^ot:originalLabel
        4. ^tb:indentifier.taxon, ^tb:indentifier.taxonVariant and some skos:closeMatch
            fields to ^ot:taxonLink
        5. remove "@xml:base"
        6. coerce edge lengths to native types
    '''
    #pylint: disable=R0915
    raw = get_ot_study_info_from_nexml(src=src,
                                       nexml_content=nexml_content,
                                       encoding=encoding,
                                       nexson_syntax_version=BY_ID_HONEY_BADGERFISH)
    nexml = raw['nexml']
    SKOS_ALT_LABEL = '^skos:altLabel'
    SKOS_CLOSE_MATCH = '^skos:closeMatch'
    strippable_pre = {
        'http://www.ubio.org/authority/metadata.php?lsid=urn:lsid:ubio.org:namebank:': '@ubio',
        'http://purl.uniprot.org/taxonomy/': '@uniprot',
    }
    moveable2taxon_link = {"^tb:identifier.taxon": '@tb:identifier.taxon',
                           "^tb:identifier.taxonVariant": '@tb:identifier.taxonVariant', }
    to_del = ['^rdfs:isDefinedBy', '@xml:base']
    for tag in to_del:
        if tag in nexml:
            del nexml[tag]
    _simplify_all_meta_by_id_del(nexml)
    _otu2label = {}
    prefix_map = {}
    # compose dataDeposit
    nexid = nexml['@id']
    tb_url = 'http://purl.org/phylo/treebase/phylows/study/TB2:' + nexid
    nexml['^ot:dataDeposit'] = {'@href': tb_url}
    # compose dataDeposit
    bd = nexml.get("^dcterms:bibliographicCitation")
    if bd:
        nexml['^ot:studyPublicationReference'] = bd
    doi = nexml.get('^prism:doi')
    if doi:
        nexml['^ot:studyPublication'] = {'@href': doi}
    year = nexml.get('^prism:publicationDate')
    if year:
        try:
            nexml['^ot:studyYear'] = int(year)
        except:
            pass
    #
    for otus in nexml['otusById'].values():
        for tag in to_del:
            if tag in otus:
                del otus[tag]
        _simplify_all_meta_by_id_del(otus)
        for oid, otu in otus['otuById'].items():
            for tag in to_del:
                if tag in otu:
                    del otu[tag]
            _simplify_all_meta_by_id_del(otu)
            label = otu['@label']
            _otu2label[oid] = label
            otu['^ot:originalLabel'] = label
            del otu['@label']
            al = otu.get(SKOS_ALT_LABEL)
            if al is not None:
                if otu.get('^ot:altLabel') is None:
                    otu['^ot:altLabel'] = al
                del otu[SKOS_ALT_LABEL]
            tl = {}
            scm = otu.get(SKOS_CLOSE_MATCH)
            #_LOG.debug('scm = ' + str(scm))
            if scm:
                if isinstance(scm, dict):
                    h = scm.get('@href')
                    if h:
                        try:
                            for p, t in strippable_pre.items():
                                if h.startswith(p):
                                    ident = h[len(p):]
                                    tl[t] = ident
                                    del otu[SKOS_CLOSE_MATCH]
                                    prefix_map[t] = p
                        except:
                            pass
                else:
                    nm = []
                    try:
                        for el in scm:
                            h = el.get('@href')
                            if h:
                                found = False
                                for p, t in strippable_pre.items():
                                    if h.startswith(p):
                                        ident = h[len(p):]
                                        tl[t] = ident
                                        found = True
                                        prefix_map[t] = p
                                        break
                                if not found:
                                    nm.append(el)
                    except:
                        pass
                    if len(nm) < len(scm):
                        if len(nm) > 1:
                            otu[SKOS_CLOSE_MATCH] = nm
                        elif len(nm) == 1:
                            otu[SKOS_CLOSE_MATCH] = nm[0]
                        else:
                            del otu[SKOS_CLOSE_MATCH]
            #_LOG.debug('tl =' + str(tl))
            for k, t in moveable2taxon_link.items():
                al = otu.get(k)
                if al:
                    tl[t] = al
                    del otu[k]
            if tl:
                otu['^ot:taxonLink'] = tl
    for trees in nexml['treesById'].values():
        for tag in to_del:
            if tag in trees:
                del trees[tag]
        _simplify_all_meta_by_id_del(trees)
        for tree in trees['treeById'].values():
            for tag in to_del:
                if tag in tree:
                    del tree[tag]
            _simplify_all_meta_by_id_del(tree)
            tt = tree.get('@xsi:type', 'nex:FloatTree')
            if tt.lower() == 'nex:inttree':
                e_len_coerce = int
            else:
                e_len_coerce = float
            for edge_d in tree['edgeBySourceId'].values():
                for edge in edge_d.values():
                    try:
                        x = e_len_coerce(edge['@length'])
                        edge['@length'] = x
                    except:
                        pass
            for node in tree['nodeById'].values():
                nl = node.get('@label')
                if nl:
                    no = node.get('@otu')
                    if no and _otu2label[no] == nl:
                        del node['@label']

    if prefix_map:
        nexml['^ot:taxonLinkPrefixes'] = prefix_map
    if merge_blocks:
        from peyotl.manip import merge_otus_and_trees
        merge_otus_and_trees(raw)
    if nexson_syntax_version != BY_ID_HONEY_BADGERFISH:
        convert_nexson_format(raw,
                              nexson_syntax_version,
                              current_format=BY_ID_HONEY_BADGERFISH,
                              sort_arbitrary=sort_arbitrary)
    elif sort_arbitrary:
        sort_arbitrarily_ordered_nexson(raw)
    return raw
예제 #3
0
def _main():
    import sys, codecs, json, os
    import argparse
    _HELP_MESSAGE = '''NeXML/NexSON converter'''
    _EPILOG = '''UTF-8 encoding is used (for input and output).

Environmental variables used:
    NEXSON_INDENTATION_SETTING indentation in NexSON (default 0)
    NEXML_INDENTATION_SETTING indentation in NeXML (default is 0).
    NEXSON_LOGGING_LEVEL logging setting: NotSet, Debug, Warn, Info, Error
    NEXSON_LOGGING_FORMAT format string for logging messages.
'''
    parser = argparse.ArgumentParser(description=_HELP_MESSAGE,
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     epilog=_EPILOG)
    parser.add_argument("input", help="filepath to input")
    parser.add_argument("-o", "--output", 
                        metavar="FILE",
                        required=False,
                        help="output filepath. Standard output is used if omitted.")
    parser.add_argument("-s", "--sort", 
                        action="store_true",
                        default=False,
                        help="If specified, the arbitrarily ordered items will be sorted.")
    e_choices = ["nexml",
                 str(BADGER_FISH_NEXSON_VERSION),
                 str(DIRECT_HONEY_BADGERFISH),
                 str(BY_ID_HONEY_BADGERFISH),
                 "0.0",
                 "1.0",
                 "1.2",
                 "badgerfish"]
    e_choices.sort()
    e_help = 'output format. Valid choices are: "{c}". \
With "0.0" and "badgerfish" as aliases for "0.0.0", and \
"1.2" being an alias for the most recent version of honeybadgerfish \
(1.2.0). The verions "1.0.0" and its alias "1.0" refer to a \
version that uses the honeybadgefish syntax for meta elements, \
but maintained the direct object-mapping from NeXML of the \
badgerfish form of NexSON'.format(c='", "'.join(e_choices))
    parser.add_argument("-e", "--export", 
                        metavar="FMT",
                        required=False,
                        choices=e_choices,
                        help=e_help)
    codes = 'xjb'
    parser.add_argument("-m", "--mode", 
                        metavar="MODE",
                        required=False,
                        choices=[i + j for i in codes for j in codes],
                        help="A less precise way to specify a mapping. The \
                               m option is a two-letter code for {input}{output} \
                               The letters are x for NeXML, j for NexSON, \
                               and b for BadgerFish JSON version of NexML. \
                               The default behavior is to autodetect the format \
                               and convert JSON to NeXML or NeXML to NexSON.")
    args = parser.parse_args()
    inpfn = args.input
    outfn = args.output
    mode = args.mode
    export_format = args.export
    if export_format:
        if export_format.lower() in ["badgerfish", "0.0"]:
            export_format = str(BADGER_FISH_NEXSON_VERSION)
        elif export_format.lower() ==  "1.0":
            export_format = str(DIRECT_HONEY_BADGERFISH)
        elif export_format.lower() ==  "1.2":
            export_format = str(BY_ID_HONEY_BADGERFISH)
    if export_format is not None and mode is not None:
        if (mode.endswith('b') and (export_format != str(BADGER_FISH_NEXSON_VERSION))) \
           or (mode.endswith('x') and (export_format.lower() != "nexml")) \
           or (mode.endswith('x') and (export_format.lower() not in [str(DIRECT_HONEY_BADGERFISH)])):
            sys.exit('export format {e} clashes with mode {m}. The mode option is not neeeded if the export option is used.'.format(e=export_format, m=mode))
    try:
        inp = codecs.open(inpfn, mode='rU', encoding='utf-8')
    except:
        sys.exit('nexson_nexml: Could not open file "{fn}"\n'.format(fn=inpfn))
    if mode is None:
        try:
            while True:
                first_graph_char = inp.read(1).strip()
                if first_graph_char == '<':
                    mode = 'x*'
                    break
                elif first_graph_char in '{[':
                    mode = '*x'
                    break
                elif first_graph_char:
                    raise ValueError('Expecting input to start with <, {, or [')
        except:
            sys.exit('nexson_nexml: First character of "{fn}" was not <, {, or [\nInput does not appear to be NeXML or NexSON\n'.format(fn=inpfn))
        if export_format is None:
            if mode.endswith('*'):
                export_format = str(DIRECT_HONEY_BADGERFISH)
            else:
                export_format = "nexml"
        inp.seek(0)
    elif export_format is None:
        if mode.endswith('j'):
            export_format = str(DIRECT_HONEY_BADGERFISH)
        elif mode.endswith('b'):
            export_format = str(BADGER_FISH_NEXSON_VERSION)
        else:
            assert mode.endswith('x')
            export_format = "nexml"

    if export_format == "nexml":
        indentation = int(os.environ.get('NEXML_INDENTATION_SETTING', 0))
    else:
        indentation = int(os.environ.get('NEXSON_INDENTATION_SETTING', 0))
    
    if outfn is not None:
        try:
            out = codecs.open(outfn, mode='w', encoding='utf-8')
        except:
            sys.exit('nexson_nexml: Could not open output filepath "{fn}"\n'.format(fn=outfn))
    else:
        out = codecs.getwriter('utf-8')(sys.stdout)

    if mode.startswith('x'):
        blob = get_ot_study_info_from_nexml(inp,
                                            nexson_syntax_version=export_format)
    else:
        blob = json.load(inp)
        if mode.startswith('*'):
            try:
                n = get_nexml_el(blob)
            except:
                n = None
            if not n or (not isinstance(n, dict)):
                sys.exit('No top level "nex:nexml" element found. Document does not appear to be a JSON version of NeXML\n')
            if n:
                mode = 'j' + mode[1]
    if args.sort:
        sort_arbitrarily_ordered_nexson(blob)
    if export_format == "nexml":
        if indentation > 0:
            indent = ' '*indentation
        else:
            indent = ''
        newline = '\n'
        write_obj_as_nexml(blob,
                           out,
                           addindent=indent,
                           newl=newline)
    else:
        if not mode.startswith('x'):
            blob = convert_nexson_format(blob, export_format, sort_arbitrary=True)
        write_as_json(blob, out, indent=indentation)
예제 #4
0
     except:
         err_content = ''
     response.headers['Content-Type'] = 'text/plain'
     raise HTTP(501, T("Conversion to NeXML failed.\n" + err_content))
 if output == 'nexml':
     response.headers['Content-Type'] = 'text/xml'
     return open(NEXML_FILEPATH, 'rU').read()
 NEXSON_FILENAME = 'nexson' + NEXSON_VERSION + '.json'
 NEXSON_FILEPATH = os.path.join(working_dir, NEXSON_FILENAME)
 NEXSON_DONE_FILEPATH = NEXSON_FILEPATH + '.written'
 NEXSON_LOCKFILEPATH = NEXSON_FILEPATH+ '.lock'
 if not os.path.exists(NEXSON_DONE_FILEPATH):
     try:
         with locket.lock_file(NEXSON_LOCKFILEPATH, timeout=0):
             if not os.path.exists(NEXSON_DONE_FILEPATH):
                 dfj = get_ot_study_info_from_nexml(NEXML_FILEPATH,
                                                    nexson_syntax_version=NEXSON_VERSION)
                 out = codecs.open(NEXSON_FILEPATH, 'w', encoding='utf-8')
                 json.dump(dfj, out, indent=0, sort_keys=True)
                 out.write('\n')
                 out.close()
                 out = open(NEXSON_DONE_FILEPATH, 'w')
                 out.write('0\n')
                 out.close()
     except locket.LockError:
         return HTTP(102, "Conversion to NexSON still running")
 if output in ['nexson', 'ot:nexson']:
     response.view = 'generic.json'
     nex = json.load(codecs.open(NEXSON_FILEPATH, 'rU', encoding='utf-8'))
     num_trees = count_num_trees(nex, NEXSON_VERSION)
     r = {'data': nex}
     bundle_properties = json.load(codecs.open(RETURN_ATT_FILEPATH, 'rU', encoding='utf-8'))
예제 #5
0
def _main():
    import sys, codecs, json
    import argparse
    _HELP_MESSAGE = '''NexSON (or NeXML) to newick converter'''
    _EPILOG = '''UTF-8 encoding is used (for input and output).

Environmental variables used:
    NEXSON_LOGGING_LEVEL logging setting: NotSet, Debug, Warn, Info, Error
    NEXSON_LOGGING_FORMAT format string for logging messages.
'''
    tip_label_list = PhyloSchema._otu_label_list
    for tl in tip_label_list:
        assert(tl.startswith('ot:'))
    tip_labels_choices = [i[3:] for i in tip_label_list]
    parser = argparse.ArgumentParser(description=_HELP_MESSAGE,
                                     formatter_class=argparse.RawDescriptionHelpFormatter,
                                     epilog=_EPILOG)
    parser.add_argument("input", help="filepath to input")
    parser.add_argument("-i", "--id",
                        metavar="TREE-ID",
                        required=False,
                        help="The ID tree to emit")
    parser.add_argument("-o", "--output",
                        metavar="FILE",
                        required=False,
                        help="output filepath. Standard output is used if omitted.")
    parser.add_argument("-l", "--list",
                        action="store_true",
                        default=False,
                        help="Just list the tree IDs in the nexSON.")
    parser.add_argument("-x", "--xml",
                        action="store_true",
                        default=False,
                        help="Parse input as NeXML rather than NexSON.")
    tl_help = 'The field to use to label tips. Should be one of: "{}"'
    tl_help = tl_help.format('", "'.join(tip_labels_choices))
    parser.add_argument("-t", "--tip-label",
                        metavar="STRING",
                        required=False,
                        default='originallabel',
                        help=tl_help)
    args = parser.parse_args()
    otu_label = args.tip_label.lower()
    if not otu_label.startswith('ot:'):
        otu_label = 'ot:' + otu_label
    if otu_label not in tip_label_list:
        sys.exit('Illegal tip label choice "{}"\n'.format(args.tip_label))

    inpfn = args.input
    outfn = args.output
    try:
        inp = codecs.open(inpfn, mode='rU', encoding='utf-8')
    except:
        sys.exit('nexson_newick: Could not open file "{fn}"\n'.format(fn=inpfn))

    if outfn is not None:
        try:
            out = codecs.open(outfn, mode='w', encoding='utf-8')
        except:
            sys.exit('nexson_newick: Could not open output filepath "{fn}"\n'.format(fn=outfn))
    else:
        out = codecs.getwriter('utf-8')(sys.stdout)
    
    if args.xml:
        src_schema = PhyloSchema('nexml')
        blob = get_ot_study_info_from_nexml(inp)
    else:
        src_schema = None
        blob = json.load(inp)
    if args.list:
        schema = PhyloSchema(content='treelist', output_nexml2json='1.2.1')
        tl = schema.convert(src=blob, src_schema=src_schema)
        out.write('{t}\n'.format(t='\n'.join(tl)))
    else:
        schema = create_content_spec(content='tree', content_id=args.id, format='newick', otu_label=otu_label)
        try:
            schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
        except KeyError:
            if 'nexml' not in blob and 'nex:nexml' not in blob:
                blob = blob['data']
                schema.convert(src=blob, serialize=True, output_dest=out, src_schema=src_schema)
            else:
                raise