Exemple #1
0
def main(mode, infile, outfile, format):
    """
    Conversion utility for NIF files.

    This converts the identifiers used to annotate mentions in documents
    across knowledge bases. For instance, the following will convert
    a NIF file with DBpedia identifiers to a NIF file with Wikidata identifiers:

       nifconverter --mode dbr:wd -i dbpedia_nif.ttl -o wikidata_nif.ttl

    """

    converter = registered_converters.get(mode)
    if converter is None:
        raise click.BadParameter('Invalid mode. Supported modes are: ' +
                                 get_allowed_modes())

    translator = NIFTranslator(converter)

    with click.open_file(infile) as f:
        nif = NIFCollection.loads(f.read())

    translator.translate_collection(nif)

    with click.open_file(outfile, 'w') as out:
        out.write(nif.dumps())
Exemple #2
0
def main(converter, target, infile, outfile, format):
    """
    Conversion utility for NIF files.

    This converts the identifiers used to annotate mentions in documents
    across knowledge bases. For instance, the following will convert
    a NIF file with DBpedia identifiers to a NIF file with Wikidata identifiers,
    using the default converter (which uses the DBpedia SameThing service):

       nifconverter -i dbpedia_nif.ttl -o wikidata_nif.ttl

    """

    converter_impl = registered_converters.get(converter)
    if converter_impl is None:
        raise click.BadParameter(
            'Invalid converter "{}". Supported converters are: {}'.format(
                converter, get_available_converters()))

    translator = NIFTranslator(converter_impl(target_prefix=target))

    with click.open_file(infile) as f:
        nif = NIFCollection.loads(f.read())

    translator.translate_collection(nif)

    with click.open_file(outfile, 'w') as out:
        out.write(nif.dumps())
Exemple #3
0
def gerbil_handler():
    def extract_string() -> Tuple[str, str]:
        for triple in nif.triples():
            if 'isString' in triple[1]:
                return str(triple[0]), str(triple[2])

    nif = NIFCollection.loads(request.data.decode('utf-8'))
    hid = request.args['handler_id']

    if 'annotator' not in request.args:
        with lck:
            queries[hid]['test']['context'], queries[hid]['test'][
                'query'] = extract_string()

        a = _wait(lambda: queries[hid]['test']['answer'])

        with lck:
            queries[hid]['test']['answer'] = None

        return a

    else:
        with lck:
            an = queries[hid]['experiment']['annotators'][
                request.args['annotator']]
            an['context'], an['query'] = extract_string()

        a = _wait(lambda: an['answer'])

        with lck:
            an['answer'] = None

        return a
Exemple #4
0
def processQueryNif():
    print("inside")
    content_format = request.headers.get('Content') or 'application/x-turtle'
    nif_body = request.data.decode("utf-8")
    print(nif_body)
    try:
        nif_doc = NIFCollection.loads(nif_body, format='turtle')
        #print(nif_doc)
        for context in nif_doc.contexts:
            vectors = v.vectorise(context.mention)
            entities = p.link(vectors)
            s = set()
            for idx, entityarr in entities.items():
                for ent in entityarr:
                    s.add(ent[0])
            for entity in s:
                context.add_phrase(
                    beginIndex=0,
                    endIndex=1,
                    taIdentRef='http://www.wikidata.org/entity/' + entity)
        resp = Response(nif_doc.dumps())
        print(nif_doc.dumps())
        resp.headers['content-type'] = content_format
        return resp
    except Exception as e:
        print(e)
        return ''
    return ''
Exemple #5
0
def nif2json(lang="en"):
    paths = ["./VoxEL/rVoxEL-{}.ttl", "./VoxEL/sVoxEL-{}.ttl"]
    prefix = ["r", "s"]

    for path, p in zip(paths, prefix):
        with open(path.format(lang)) as f:
            data = NIFCollection.loads(f.read(), format='turtle')
        out = nif2dict(data)
        with open("./{}_{}.json".format(p, lang), "w") as f:
            json.dump(out, f, indent=4)
Exemple #6
0
def nif_2_annotations(nif_collection):
    annotations = defaultdict(list)
    temp_annotations = defaultdict(list)
    keys = []

    parsed_collection = NIFCollection.loads(nif_collection, format='turtle')
    for context in parsed_collection.contexts:
        for phrase in context.phrases:
            id_annotation = phrase.context.rsplit('/', 1)[-1]
            entity = phrase.taIdentRef
            keys.append(int(id_annotation))
            temp_annotations[int(id_annotation)].append(entity)
    keys.sort()
    for key in keys:
        annotations[key] = temp_annotations[key]
    return annotations
Exemple #7
0
def nif_api(*args, **kwargs):
    content_format = request.headers.get('Content') or 'application/x-turtle'
    content_type_to_format = {
        'application/x-turtle': 'turtle',
        'text/turtle': 'turtle',
    }
    nif_body = request.body.read()
    nif_doc = NIFCollection.loads(nif_body)
    for context in nif_doc.contexts:
        logger.debug(context.mention)
        mentions = classifier.create_mentions(context.mention)
        classifier.classify_mentions(mentions)
        for mention in mentions:
            mention.add_phrase_to_nif_context(context)

    response.set_header('content-type', content_format)
    return nif_doc.dumps()
Exemple #8
0
def d2kb():
    data = request.data
    data = data.rstrip()
    data = data.lstrip()
    nif_post = NIFCollection.loads(data.decode('utf-8'), format='turtle')
    mentions = []
    for context in nif_post.contexts:
        tweet = Tweet()
        tweet.mentions = []
        tweet.idTweet = context.uri
        tweet.text = context.mention
        try:
            for phrase in context.phrases:
                single_mention = (phrase.mention, phrase.beginIndex,
                                  phrase.endIndex)
                mentions.append(single_mention)
        except:
            print('no mentions')
        if len(mentions) > 0:
            if VERBOSE == 'yes':
                print('\n\n:::: PREPROCESSING ::::\n\n')
            start = time.time()
            tweet = preprocessing_d2kb(tweet, mentions, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
            if VERBOSE == 'yes':
                print('\n\n:::: ENTITY SELECTION ::::\n\n')
            start = time.time()
            tweet.candidates = select_candidates(tweet, vocab2idx, TYPE, MAX,
                                                 BOOST, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
            if VERBOSE == 'yes':
                print('\n\n:::: DISAMBIGUATION ::::\n\n')
            start = time.time()
            tweet.entities = disambiguate_mentions(tweet, THRESHOLD, model,
                                                   device, vocab2idx, WS,
                                                   EXTRA, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
        collection_name = "http://optic.ufsc.br/"
        nif = annotation2nif(collection_name, tweet)
    return nif
Exemple #9
0
    # ******************************** #
    # Start OPTIC
    count = 0

    # Read directory with tweets to be annotated
    inputs = set()
    for nif_temp in os.listdir(INPUT_PATH):
        # Initially, we works only with RDF turtle standard
        if (fnmatch.fnmatch(nif_temp, '*.ttl')):
            inputs.add(nif_temp)

    for nif_input in inputs:
        nif_file = ''
        with open(INPUT_PATH + nif_input, 'r') as f:
            nif_file = f.read()
        nif_post = NIFCollection.loads(nif_file, format='turtle')
        for context in nif_post.contexts:
            tweet = Tweet()
            tweet.idTweet = context.uri
            tweet.text = context.mention
            tweet.mentions = []

            # A2KB Mode
            # TODO
            if MODE == 'a2kb':
                continue

            # D2KB Mode
            else:
                mentions = []
                try: