def main(mode, infile, outfile, format): """ Conversion utility for NIF files. This converts the identifiers used to annotate mentions in documents across knowledge bases. For instance, the following will convert a NIF file with DBpedia identifiers to a NIF file with Wikidata identifiers: nifconverter --mode dbr:wd -i dbpedia_nif.ttl -o wikidata_nif.ttl """ converter = registered_converters.get(mode) if converter is None: raise click.BadParameter('Invalid mode. Supported modes are: ' + get_allowed_modes()) translator = NIFTranslator(converter) with click.open_file(infile) as f: nif = NIFCollection.loads(f.read()) translator.translate_collection(nif) with click.open_file(outfile, 'w') as out: out.write(nif.dumps())
def main(converter, target, infile, outfile, format): """ Conversion utility for NIF files. This converts the identifiers used to annotate mentions in documents across knowledge bases. For instance, the following will convert a NIF file with DBpedia identifiers to a NIF file with Wikidata identifiers, using the default converter (which uses the DBpedia SameThing service): nifconverter -i dbpedia_nif.ttl -o wikidata_nif.ttl """ converter_impl = registered_converters.get(converter) if converter_impl is None: raise click.BadParameter( 'Invalid converter "{}". Supported converters are: {}'.format( converter, get_available_converters())) translator = NIFTranslator(converter_impl(target_prefix=target)) with click.open_file(infile) as f: nif = NIFCollection.loads(f.read()) translator.translate_collection(nif) with click.open_file(outfile, 'w') as out: out.write(nif.dumps())
def gerbil_handler(): def extract_string() -> Tuple[str, str]: for triple in nif.triples(): if 'isString' in triple[1]: return str(triple[0]), str(triple[2]) nif = NIFCollection.loads(request.data.decode('utf-8')) hid = request.args['handler_id'] if 'annotator' not in request.args: with lck: queries[hid]['test']['context'], queries[hid]['test'][ 'query'] = extract_string() a = _wait(lambda: queries[hid]['test']['answer']) with lck: queries[hid]['test']['answer'] = None return a else: with lck: an = queries[hid]['experiment']['annotators'][ request.args['annotator']] an['context'], an['query'] = extract_string() a = _wait(lambda: an['answer']) with lck: an['answer'] = None return a
def processQueryNif(): print("inside") content_format = request.headers.get('Content') or 'application/x-turtle' nif_body = request.data.decode("utf-8") print(nif_body) try: nif_doc = NIFCollection.loads(nif_body, format='turtle') #print(nif_doc) for context in nif_doc.contexts: vectors = v.vectorise(context.mention) entities = p.link(vectors) s = set() for idx, entityarr in entities.items(): for ent in entityarr: s.add(ent[0]) for entity in s: context.add_phrase( beginIndex=0, endIndex=1, taIdentRef='http://www.wikidata.org/entity/' + entity) resp = Response(nif_doc.dumps()) print(nif_doc.dumps()) resp.headers['content-type'] = content_format return resp except Exception as e: print(e) return '' return ''
def nif2json(lang="en"): paths = ["./VoxEL/rVoxEL-{}.ttl", "./VoxEL/sVoxEL-{}.ttl"] prefix = ["r", "s"] for path, p in zip(paths, prefix): with open(path.format(lang)) as f: data = NIFCollection.loads(f.read(), format='turtle') out = nif2dict(data) with open("./{}_{}.json".format(p, lang), "w") as f: json.dump(out, f, indent=4)
def nif_2_annotations(nif_collection): annotations = defaultdict(list) temp_annotations = defaultdict(list) keys = [] parsed_collection = NIFCollection.loads(nif_collection, format='turtle') for context in parsed_collection.contexts: for phrase in context.phrases: id_annotation = phrase.context.rsplit('/', 1)[-1] entity = phrase.taIdentRef keys.append(int(id_annotation)) temp_annotations[int(id_annotation)].append(entity) keys.sort() for key in keys: annotations[key] = temp_annotations[key] return annotations
def nif_api(*args, **kwargs): content_format = request.headers.get('Content') or 'application/x-turtle' content_type_to_format = { 'application/x-turtle': 'turtle', 'text/turtle': 'turtle', } nif_body = request.body.read() nif_doc = NIFCollection.loads(nif_body) for context in nif_doc.contexts: logger.debug(context.mention) mentions = classifier.create_mentions(context.mention) classifier.classify_mentions(mentions) for mention in mentions: mention.add_phrase_to_nif_context(context) response.set_header('content-type', content_format) return nif_doc.dumps()
def d2kb(): data = request.data data = data.rstrip() data = data.lstrip() nif_post = NIFCollection.loads(data.decode('utf-8'), format='turtle') mentions = [] for context in nif_post.contexts: tweet = Tweet() tweet.mentions = [] tweet.idTweet = context.uri tweet.text = context.mention try: for phrase in context.phrases: single_mention = (phrase.mention, phrase.beginIndex, phrase.endIndex) mentions.append(single_mention) except: print('no mentions') if len(mentions) > 0: if VERBOSE == 'yes': print('\n\n:::: PREPROCESSING ::::\n\n') start = time.time() tweet = preprocessing_d2kb(tweet, mentions, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) if VERBOSE == 'yes': print('\n\n:::: ENTITY SELECTION ::::\n\n') start = time.time() tweet.candidates = select_candidates(tweet, vocab2idx, TYPE, MAX, BOOST, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) if VERBOSE == 'yes': print('\n\n:::: DISAMBIGUATION ::::\n\n') start = time.time() tweet.entities = disambiguate_mentions(tweet, THRESHOLD, model, device, vocab2idx, WS, EXTRA, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) collection_name = "http://optic.ufsc.br/" nif = annotation2nif(collection_name, tweet) return nif
# ******************************** # # Start OPTIC count = 0 # Read directory with tweets to be annotated inputs = set() for nif_temp in os.listdir(INPUT_PATH): # Initially, we works only with RDF turtle standard if (fnmatch.fnmatch(nif_temp, '*.ttl')): inputs.add(nif_temp) for nif_input in inputs: nif_file = '' with open(INPUT_PATH + nif_input, 'r') as f: nif_file = f.read() nif_post = NIFCollection.loads(nif_file, format='turtle') for context in nif_post.contexts: tweet = Tweet() tweet.idTweet = context.uri tweet.text = context.mention tweet.mentions = [] # A2KB Mode # TODO if MODE == 'a2kb': continue # D2KB Mode else: mentions = [] try: