Beispiel #1
0
def _qrun(queries, mmjar):
    out = {}

    # run one query at a time for more efficient caching
    for k, v in queries.iteritems():
        q = {k: v}
        tmp = tempfile.NamedTemporaryFile()
        qfn = tmp.name

        f = codecs.open(qfn, "w", encoding="utf-8")
        for qid, qtxt in q.iteritems():
            print >> f, "%s:%s" % (qid, qtxt)
        f.close()

        d = _run(qfn, mmjar, cache_file=cache_file(CACHEDIR,
                                                   [hash_file(qfn),
                                                    hash_file(mmjar)],
                                                   "metamap"))
        d = longest_concepts(d)

        for kk, vv in d.iteritems():
            if kk in out:
                print >> sys.stderr, "error: duplicate key:", kk
                print >> sys.stderr, out
                sys.exit(1)
            else:
                out[kk] = vv

        tmp.close()

    return out
Beispiel #2
0
def _qrun(queries, mmjar):
    out = {}

    # run one query at a time for more efficient caching
    for k, v in queries.iteritems():
        q = {k: v}
        tmp = tempfile.NamedTemporaryFile()
        qfn = tmp.name

        f = codecs.open(qfn, "w", encoding="utf-8")
        for qid, qtxt in q.iteritems():
            print >> f, "%s:%s" % (qid, qtxt)
        f.close()

        d = _run(qfn,
                 mmjar,
                 cache_file=cache_file(
                     CACHEDIR,
                     [hash_file(qfn), hash_file(mmjar)], "metamap"))
        d = longest_concepts(d)

        for kk, vv in d.iteritems():
            if kk in out:
                print >> sys.stderr, "error: duplicate key:", kk
                print >> sys.stderr, out
                sys.exit(1)
            else:
                out[kk] = vv

        tmp.close()

    return out
Beispiel #3
0
def _txtrun(text, mmjar, no_cache, long_concepts):

    if not text:
        print '[metamap info] empty query'
        return {'txt': {'concepts': []}}

#     tmp = tempfile.NamedTemporaryFile()
#     tfn = tmp.name
# 
#     with codecs.open(tfn, 'wb', encoding='utf-8') as f:
#         print >> f, u"txt:{0}".format(text)
    cf = cache_file(CACHEDIR, [hash_obj(text), hash_file(mmjar)], "metamap")

    # for some inesplicable reason, the Java MetaMap API client
    # throws a java.lang.StringIndexOutOfBoundsException exception
    # instead of gracefully returning nothing when a text has no concepts.
    try:
        d = _run(text, mmjar,  no_cache=no_cache, cache_file=cf)
    except ValueError:
        print '[metamap info] no concepts found'
        d = {'txt': {'concepts': []}}

    return d
Beispiel #4
0
def _txtrun(text, mmjar, no_cache, long_concepts):

    if not text:
        print '[metamap info] empty query'
        return {'txt': {'concepts': []}}

#     tmp = tempfile.NamedTemporaryFile()
#     tfn = tmp.name
#
#     with codecs.open(tfn, 'wb', encoding='utf-8') as f:
#         print >> f, u"txt:{0}".format(text)
    cf = cache_file(CACHEDIR, [hash_obj(text), hash_file(mmjar)], "metamap")

    # for some inesplicable reason, the Java MetaMap API client
    # throws a java.lang.StringIndexOutOfBoundsException exception
    # instead of gracefully returning nothing when a text has no concepts.
    try:
        d = _run(text, mmjar, no_cache=no_cache, cache_file=cf)
    except ValueError:
        print '[metamap info] no concepts found'
        d = {'txt': {'concepts': []}}

    return d