def d2kb(): data = request.data data = data.rstrip() data = data.lstrip() nif_post = NIFCollection.loads(data.decode('utf-8'), format='turtle') mentions = [] for context in nif_post.contexts: tweet = Tweet() tweet.mentions = [] tweet.idTweet = context.uri tweet.text = context.mention try: for phrase in context.phrases: single_mention = (phrase.mention, phrase.beginIndex, phrase.endIndex) mentions.append(single_mention) except: print('no mentions') if len(mentions) > 0: if VERBOSE == 'yes': print('\n\n:::: PREPROCESSING ::::\n\n') start = time.time() tweet = preprocessing_d2kb(tweet, mentions, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) if VERBOSE == 'yes': print('\n\n:::: ENTITY SELECTION ::::\n\n') start = time.time() tweet.candidates = select_candidates(tweet, vocab2idx, TYPE, MAX, BOOST, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) if VERBOSE == 'yes': print('\n\n:::: DISAMBIGUATION ::::\n\n') start = time.time() tweet.entities = disambiguate_mentions(tweet, THRESHOLD, model, device, vocab2idx, WS, EXTRA, VERBOSE) end = time.time() if VERBOSE == 'yes': print('Running time: {}'.format(end - start)) collection_name = "http://optic.ufsc.br/" nif = annotation2nif(collection_name, tweet) return nif
if MODE == 'a2kb': continue # D2KB Mode else: mentions = [] try: # Get all the mentions present in the tweet for phrase in context.phrases: single_mention = (phrase.mention, phrase.beginIndex, phrase.endIndex) mentions.append(single_mention) # If there is no mention in tweet, return the original tweet except: continue if len(mentions) > 0: tweet = preprocessing_d2kb(tweet, mentions, VERBOSE) tweet.candidates = select_candidates( tweet, vocab2idx, TYPE, MAX, BOOST, VERBOSE) tweet.entities = disambiguate_mentions( tweet, THRESHOLD, model, device, vocab2idx, WS, EXTRA, VERBOSE) # Create tweet semantically annotated, as nif, when there are mentions # If not, just return the tweet as nif collection_name = "http://optic.ufsc.br/" nif = annotation2nif(collection_name, tweet) with open('{}output/{}.ttl'.format(DATA_PATH, count), 'w') as output_file: output_file.write(nif) count += 1