def test_CRF_with_Word_Segmentation(self): starttime = datetime.datetime.now() # template of feature functions template_builder = TemplateBuilder("feature.template") # input dataset and tags msr_corpus = Corpus('MSR', max_length=15, min_length=5, max_samples=10000) dataset, tags = msr_corpus.dataset, msr_corpus.tags print("dataset nums: " + str(len(dataset))) # split train and test dataset X_train, X_test, Y_train, Y_test = train_test_split(dataset, tags, test_size=0.3, random_state=0) # build feature functions according to input dataset and feature templates feature_builder = FeatureBuilder(X_train, Y_train, template_builder) # CRF model crf = LinearChainCRF(feature_builder, epochs=30, learning_rate=1e-2) crf.fit(X_train, Y_train) endtime = datetime.datetime.now() print("training time: " + str((endtime - starttime).seconds) + " seconds") # predict predicts = crf.predict(X_test) # evaluation total, correct = 0, 0 for pred, tag in zip(predicts, Y_test): for i in range(len(pred)): if pred[i] == tag[i]: correct += 1 total += 1 accuracy = float(correct) / total print("accuracy: ", accuracy)
#!/usr/bin/env python import argparse from crf import LinearChainCRF if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("datafile", help="data file for training input") parser.add_argument("modelfile", help="the model file name. (output)") args = parser.parse_args() crf = LinearChainCRF() crf.train(args.datafile, args.modelfile)
#!/usr/bin/env python import argparse from crf import LinearChainCRF if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("datafile", help="data file for testing input") parser.add_argument("modelfile", help="the model file name.") args = parser.parse_args() crf = LinearChainCRF() crf.load(args.modelfile) crf.test(args.datafile)
from crf import LinearChainCRF print "Verifying gradients without regularization" m = LinearChainCRF() m.L2 = 0 m.L1 = 0 m.verify_gradients() print "" print "Verifying gradients with L2 regularization" m.L2 = 0.001 m.verify_gradients() print "" print "Verifying gradients with L1 regularization" m.L2 = 0 m.L1 = 0.001 m.verify_gradients()
fString = "" for x in intArray: fString += xxx[x] return fString def test(myObj, testArray): myObj.use(testArray) print(myObj.useOutput) print(myObj.useConfidence) #test(myObj,[SamX.train_data[1][0]]) SamX = SampleData() myObj = LinearChainCRF(n_epochs=100) myObj.initialize(SamX.input_size, SamX.n_class) myObj.train(SamX.train_data[1:2]) ''' inputX=[ [1,2,3], [2,3,4], [3,4,5], ] inputY=[ [3,4,15], [13,20,32], [12,23,4], ] target=[0,1,0]#not 0 indexed