Exemple #1
0
def predict(sentence, model):
    sen_list = [[[i, 'O\n'] for i in sentence.split()]]
    #sen_list = [[['SOCCER', 'O\n'], ['-', 'O\n'], ['JAPAN', 'O\n'], ['GET', 'O\n'], ['LUCKY', 'O\n'], ['WIN', 'O\n'], [',', 'O\n'], ['CHINA', 'O\n'], ['IN', 'O\n'], ['SURPRISE', 'O\n'], ['DEFEAT', 'O\n'], ['.', 'O\n']]]
    test = addCharInformatioin(sen_list)

    predLabels = []

    test_set = padding(
        createMatrices(test, word2Idx, label2Idx, case2Idx, char2Idx))

    test_batch, test_batch_len = createBatches(test_set)

    for i, data in enumerate(test_batch):
        tokens, casing, char, labels = data

        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        predLabels.append(pred)
    entity_labels = []
    j = 0
    words_list = sentence.split()
    for i in predLabels[-1]:
        entity_labels.append((words_list[j], idx2Label[int(i)]))
        j += 1
    print("predLabels", entity_labels)

    return entity_labels
Exemple #2
0
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    b.update(i + 1)
    return predLabels, correctLabels


trainSentences = readfile("data/train.txt")
devSentences = readfile("data/valid.txt")
testSentences = readfile("data/test.txt")

trainSentences = addCharInformatioin(trainSentences)
devSentences = addCharInformatioin(devSentences)
testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for token, char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True

# :: Create a mapping for the labels ::
label2Idx = {}
for label in labelSet:
Exemple #3
0
from extract_all_words import extract_words
from candidate_retriever import generate_training_data

epochs = 100
training_data_path = "../data/ner_training_data.txt"
all_words_path = "../data/words.txt"
word_embedding_path = "../data/glove.6B.100d.txt"

if not os.path.isfile(all_words_path):
    extract_words()

if not os.path.isfile(training_data_path):
    generate_training_data()

trainSentences = readfile(training_data_path)
trainSentences = addCharInformatioin(trainSentences)

##LOAD all words from train, test and dev
words = {}
with open(all_words_path, encoding="utf-8") as f:
    content = f.readlines()
    for w in enumerate(content):
        words[w] = True

# :: Create a mapping for the labels ::
label2Idx = {}
label2Idx["I"] = 1
label2Idx["O"] = 0

# :: Read in word embeddings ::
word2Idx = {}
Exemple #4
0
Fichier : nn.py Projet : avinik/Al
    learnSentences = trainSentences[int(len(trainSentences)/10):]
    trainSentences = trainSentences[:int(len(trainSentences)/10)]
    testSentences = readfile("twitter/TwitterTestBIO.tsv")

elif datasetName == "Medline":
    trainSentences = readfileTwitter("twitter/MedlineBIO.tsv")
    learnSentences = []
    testSentences = []

elif datasetName == "Cadec":
    trainSentences = readfileTwitter("twitter/CadecBIO.tsv")
    learnSentences = []
    testSentences = []


trainSentences = addCharInformatioin(trainSentences)
learnSentences = addCharInformatioin(learnSentences)
testSentences = addCharInformatioin(testSentences)


labelSet = set()
words = {}

for dataset in [trainSentences, learnSentences, testSentences]:
    for sentence in dataset:
        for token,char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True

# :: Create a mapping for the labels ::
label2Idx = {}
Exemple #5
0
def make_dataset(file_name):
    Senetnecs = readfile(file_name)
    Senetnecs = addCharInformatioin(Senetnecs)
    return Senetnecs