Python addCharInformatioin Examples

Programming Language: Python

Namespace/Package Name: prepro

Method/Function: addCharInformatioin

Examples at hotexamples.com: 5

Python addCharInformatioin - 5 examples found. These are the top rated real world Python examples of prepro.addCharInformatioin extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: nn.py Project: joydeb28/NLP

def predict(sentence, model):
    sen_list = [[[i, 'O\n'] for i in sentence.split()]]
    #sen_list = [[['SOCCER', 'O\n'], ['-', 'O\n'], ['JAPAN', 'O\n'], ['GET', 'O\n'], ['LUCKY', 'O\n'], ['WIN', 'O\n'], [',', 'O\n'], ['CHINA', 'O\n'], ['IN', 'O\n'], ['SURPRISE', 'O\n'], ['DEFEAT', 'O\n'], ['.', 'O\n']]]
    test = addCharInformatioin(sen_list)

    predLabels = []

    test_set = padding(
        createMatrices(test, word2Idx, label2Idx, case2Idx, char2Idx))

    test_batch, test_batch_len = createBatches(test_set)

    for i, data in enumerate(test_batch):
        tokens, casing, char, labels = data

        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        predLabels.append(pred)
    entity_labels = []
    j = 0
    words_list = sentence.split()
    for i in predLabels[-1]:
        entity_labels.append((words_list[j], idx2Label[int(i)]))
        j += 1
    print("predLabels", entity_labels)

    return entity_labels

Example #2

Show file

File: nn.py Project: sweetpand/NLP-Best-Practices

        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    b.update(i + 1)
    return predLabels, correctLabels


trainSentences = readfile("data/train.txt")
devSentences = readfile("data/valid.txt")
testSentences = readfile("data/test.txt")

trainSentences = addCharInformatioin(trainSentences)
devSentences = addCharInformatioin(devSentences)
testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for token, char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True

# :: Create a mapping for the labels ::
label2Idx = {}
for label in labelSet:

Example #3

Show file

from extract_all_words import extract_words
from candidate_retriever import generate_training_data

epochs = 100
training_data_path = "../data/ner_training_data.txt"
all_words_path = "../data/words.txt"
word_embedding_path = "../data/glove.6B.100d.txt"

if not os.path.isfile(all_words_path):
    extract_words()

if not os.path.isfile(training_data_path):
    generate_training_data()

trainSentences = readfile(training_data_path)
trainSentences = addCharInformatioin(trainSentences)

##LOAD all words from train, test and dev
words = {}
with open(all_words_path, encoding="utf-8") as f:
    content = f.readlines()
    for w in enumerate(content):
        words[w] = True

# :: Create a mapping for the labels ::
label2Idx = {}
label2Idx["I"] = 1
label2Idx["O"] = 0

# :: Read in word embeddings ::
word2Idx = {}

Example #4

Show file

File: nn.py Project: avinik/Al

    learnSentences = trainSentences[int(len(trainSentences)/10):]
    trainSentences = trainSentences[:int(len(trainSentences)/10)]
    testSentences = readfile("twitter/TwitterTestBIO.tsv")

elif datasetName == "Medline":
    trainSentences = readfileTwitter("twitter/MedlineBIO.tsv")
    learnSentences = []
    testSentences = []

elif datasetName == "Cadec":
    trainSentences = readfileTwitter("twitter/CadecBIO.tsv")
    learnSentences = []
    testSentences = []


trainSentences = addCharInformatioin(trainSentences)
learnSentences = addCharInformatioin(learnSentences)
testSentences = addCharInformatioin(testSentences)


labelSet = set()
words = {}

for dataset in [trainSentences, learnSentences, testSentences]:
    for sentence in dataset:
        for token,char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True

# :: Create a mapping for the labels ::
label2Idx = {}

Example #5

Show file

File: nn.py Project: joydeb28/NLP

def make_dataset(file_name):
    Senetnecs = readfile(file_name)
    Senetnecs = addCharInformatioin(Senetnecs)
    return Senetnecs