コード例 #1
0
ファイル: nn.py プロジェクト: sweetpand/NLP-Best-Practices
    b = Progbar(len(dataset))
    for i, data in enumerate(dataset):
        tokens, casing, char, labels = data
        tokens = np.asarray([tokens])
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = model.predict([tokens, casing, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    b.update(i + 1)
    return predLabels, correctLabels


trainSentences = readfile("data/train.txt")
devSentences = readfile("data/valid.txt")
testSentences = readfile("data/test.txt")

trainSentences = addCharInformatioin(trainSentences)
devSentences = addCharInformatioin(devSentences)
testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for token, char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True
コード例 #2
0
import os.path
from extract_all_words import extract_words
from candidate_retriever import generate_training_data

epochs = 100
training_data_path = "../data/ner_training_data.txt"
all_words_path = "../data/words.txt"
word_embedding_path = "../data/glove.6B.100d.txt"

if not os.path.isfile(all_words_path):
    extract_words()

if not os.path.isfile(training_data_path):
    generate_training_data()

trainSentences = readfile(training_data_path)
trainSentences = addCharInformatioin(trainSentences)

##LOAD all words from train, test and dev
words = {}
with open(all_words_path, encoding="utf-8") as f:
    content = f.readlines()
    for w in enumerate(content):
        words[w] = True

# :: Create a mapping for the labels ::
label2Idx = {}
label2Idx["I"] = 1
label2Idx["O"] = 0

# :: Read in word embeddings ::
コード例 #3
0
        b.update(i)
    b.update(i + 1)
    return predLabels, correctLabels


# trainSentences = readfile("data/train.txt")
# devSentences = readfile("data/valid.txt")
# testSentences = readfile("data/test.txt")
# trainSentences = readfile("data/ref_train.txt")
# devSentences = readfile("data/ref_dev.txt")
# testSentences = readfile("data/ref_test.txt")
# trainSentences = readfile("data/WkAnno_output_train_data.txt")
# devSentences = readfile("data/WkAnno_output_dev_data.txt")
# testSentences = readfile("data/WkAnno_output_test_data.txt")

trainSentences = readfile("training_data/train.txt")
devSentences = readfile("training_data/dev.txt")
testSentences = readfile("training_data/test.txt")

trainSentences = addCharInformatioin(trainSentences)
devSentences = addCharInformatioin(devSentences)
testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}

for dataset in [trainSentences, devSentences, testSentences]:
    for sentence in dataset:
        for token, char, label in sentence:
            labelSet.add(label)
            words[token.lower()] = True
コード例 #4
0
 def loadData(self):
     """Load data and add character information"""
     self.trainSentences = readfile("data/train.txt")
     self.devSentences = readfile("data/dev.txt")
     self.testSentences = readfile("data/test.txt")
コード例 #5
0
ファイル: nn.py プロジェクト: avinik/Al
samplingMethod = sys.argv[2] #"entropySampling"]

#Name of the model... See models.py for details
modelName = "LSTM_word_char"
datasetName = sys.argv[1] #"Cadec"

print(datasetName + " " +  samplingMethod)


#Loading The dataset

if datasetName == "Twitter":
    trainSentences = readfileTwitter("twitter/TwitterTrainBIO.tsv")
    learnSentences = trainSentences[int(len(trainSentences)/10):]
    trainSentences = trainSentences[:int(len(trainSentences)/10)]
    testSentences = readfile("twitter/TwitterTestBIO.tsv")

elif datasetName == "Medline":
    trainSentences = readfileTwitter("twitter/MedlineBIO.tsv")
    learnSentences = []
    testSentences = []

elif datasetName == "Cadec":
    trainSentences = readfileTwitter("twitter/CadecBIO.tsv")
    learnSentences = []
    testSentences = []


trainSentences = addCharInformatioin(trainSentences)
learnSentences = addCharInformatioin(learnSentences)
testSentences = addCharInformatioin(testSentences)
コード例 #6
0
ファイル: nn.py プロジェクト: joydeb28/NLP
def make_dataset(file_name):
    Senetnecs = readfile(file_name)
    Senetnecs = addCharInformatioin(Senetnecs)
    return Senetnecs
コード例 #7
0
    predLabels = []
    b = Progbar(len(dataset))
    for i, data in enumerate(dataset):
        tokens, char, labels = data
        tokens = np.asarray([tokens])

        char = np.asarray([char])
        pred = model.predict([tokens, char], verbose=False)[0]
        pred = pred.argmax(axis=-1)  #Predict the classes
        correctLabels.append(labels)
        predLabels.append(pred)
        b.update(i)
    return predLabels, correctLabels


trainSentences = readfile("train_data.txt")
testSentences = readfile("test_data.txt")

#testSentences.pop(0)
#trainSentences.pop(0)
#trainSentences[0].pop(0)
#testSentences[0].pop(0)
trainSentences = addCharInformatioin(trainSentences)

testSentences = addCharInformatioin(testSentences)

labelSet = set()
words = {}
"""for sentence in trainSentences:
    for token,char,label in sentence:
        labelSet.add(label)
コード例 #8
0
ファイル: main.py プロジェクト: CarloSegat/finalYearProject
 def loadData(self):
     """Load data and add character information"""
     self.trainSentences = readfile(
         "data/NER-ABSA-16_Restaurants_Train.txt")
     #self.devSentences = readfile("data/dev.txt")
     self.testSentences = readfile("data/NER-ABSA-16_Restaurants_Test.txt")