コード例 #1
0
def d2kb():
    data = request.data
    data = data.rstrip()
    data = data.lstrip()
    nif_post = NIFCollection.loads(data.decode('utf-8'), format='turtle')
    mentions = []
    for context in nif_post.contexts:
        tweet = Tweet()
        tweet.mentions = []
        tweet.idTweet = context.uri
        tweet.text = context.mention
        try:
            for phrase in context.phrases:
                single_mention = (phrase.mention, phrase.beginIndex,
                                  phrase.endIndex)
                mentions.append(single_mention)
        except:
            print('no mentions')
        if len(mentions) > 0:
            if VERBOSE == 'yes':
                print('\n\n:::: PREPROCESSING ::::\n\n')
            start = time.time()
            tweet = preprocessing_d2kb(tweet, mentions, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
            if VERBOSE == 'yes':
                print('\n\n:::: ENTITY SELECTION ::::\n\n')
            start = time.time()
            tweet.candidates = select_candidates(tweet, vocab2idx, TYPE, MAX,
                                                 BOOST, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
            if VERBOSE == 'yes':
                print('\n\n:::: DISAMBIGUATION ::::\n\n')
            start = time.time()
            tweet.entities = disambiguate_mentions(tweet, THRESHOLD, model,
                                                   device, vocab2idx, WS,
                                                   EXTRA, VERBOSE)
            end = time.time()
            if VERBOSE == 'yes':
                print('Running time: {}'.format(end - start))
        collection_name = "http://optic.ufsc.br/"
        nif = annotation2nif(collection_name, tweet)
    return nif
コード例 #2
0
            if MODE == 'a2kb':
                continue

            # D2KB Mode
            else:
                mentions = []
                try:
                    # Get all the mentions present in the tweet
                    for phrase in context.phrases:
                        single_mention = (phrase.mention, phrase.beginIndex,
                                          phrase.endIndex)
                        mentions.append(single_mention)
                # If there is no mention in tweet, return the original tweet
                except:
                    continue
                if len(mentions) > 0:
                    tweet = preprocessing_d2kb(tweet, mentions, VERBOSE)
                    tweet.candidates = select_candidates(
                        tweet, vocab2idx, TYPE, MAX, BOOST, VERBOSE)
                    tweet.entities = disambiguate_mentions(
                        tweet, THRESHOLD, model, device, vocab2idx, WS, EXTRA,
                        VERBOSE)
            # Create tweet semantically annotated, as nif, when there are mentions
            # If not, just return the tweet as nif
            collection_name = "http://optic.ufsc.br/"
            nif = annotation2nif(collection_name, tweet)
            with open('{}output/{}.ttl'.format(DATA_PATH, count),
                      'w') as output_file:
                output_file.write(nif)
                count += 1