def test_singleClassifier(): trainCorpus, devCorpus = generateTestData(positiveCount=100, negativeCount=100, relTypes=2) predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score, 3) == 0.519
def test_simpleRelationClassifier_binary(): trainCorpus, testCorpusGold = generateTestData(positiveCount=100, negativeCount=100) predictionCorpus = testCorpusGold.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(testCorpusGold, predictionCorpus, metric='f1score') assert f1score == 1.0
def test_logisticregression_threshold(): trainCorpus, devCorpus = generateTestData(positiveCount=100, negativeCount=100, relTypes=1) predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier( classifierType='LogisticRegression', threshold=0.3) classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score, 3) == 1.0
def test_filterByEntityTypes_invalidTypes(): trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1) predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() for doc in devCorpus.documents: for e in doc.entities: e.entityType = 'a new type' classifier = kindred.RelationClassifier(features=["unigramsBetweenEntities","bigrams","dependencyPathEdges","dependencyPathEdgesNearEntities"]) classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score,3) == 0.0
def _SeeDevmini(): trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train') devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev') trainCorpus.documents = trainCorpus.documents[1:2] devCorpus.documents = devCorpus.documents[:1] predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score, 3) == 0.235
def _bionlpst_bb3(swap): trainCorpus = kindred.bionlpst.load('2016-BB3-event-train') devCorpus = kindred.bionlpst.load('2016-BB3-event-dev') if swap: trainCorpus,devCorpus = devCorpus,trainCorpus predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier(useBuilder=True) classifier.train(trainCorpus) classifier.predict(predictionCorpus) scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all') print("bb3 scores:",scores,swap)
def _bionlpst_seedev(swap): trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train') devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev') if swap: trainCorpus,devCorpus = devCorpus,trainCorpus predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all') print("seedev scores:",scores,swap)
def test_doublelabels(): trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1) for doc in trainCorpus.documents: newRelations = [ kindred.Relation("anotherLabel",r.entities,r.argNames) for r in doc.relations ] doc.relations += newRelations for doc in devCorpus.documents: newRelations = [ kindred.Relation("anotherLabel",r.entities,r.argNames) for r in doc.relations ] doc.relations += newRelations predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score,3) == 1.0
def test_pickle(): trainCorpus, testCorpusGold = generateTestData(positiveCount=100, negativeCount=100) predictionCorpus = testCorpusGold.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) with tempfile.NamedTemporaryFile() as tempF: with open(tempF.name, 'wb') as f: pickle.dump(classifier, f) with open(tempF.name, 'rb') as f: classifier = pickle.load(f) classifier.predict(predictionCorpus) f1score = kindred.evaluate(testCorpusGold, predictionCorpus, metric='f1score') assert f1score == 1.0
def test_singleClassifier_triple(): trainCorpus, devCorpus = generateTestData(entityCount=3,positiveCount=100,negativeCount=100,relTypes=1) trainRelations = trainCorpus.getRelations() assert len(trainRelations) == 50 for r in trainRelations: assert len(r.entities) == 3 predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier(entityCount=3) classifier.train(trainCorpus) classifier.predict(predictionCorpus) predictedRelations = predictionCorpus.getRelations() assert len(predictedRelations) == 50 for r in predictedRelations: assert len(r.entities) == 3 f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') assert round(f1score,3) == 1.0
def test_simpleRelationClassifier_triple(): trainCorpus, testCorpusGold = generateTestData(entityCount=3,positiveCount=100,negativeCount=100) trainRelations = trainCorpus.getRelations() assert len(trainRelations) == 50 for r in trainRelations: assert len(r.entities) == 3 predictionCorpus = testCorpusGold.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier(entityCount=3) classifier.train(trainCorpus) classifier.predict(predictionCorpus) predictedRelations = predictionCorpus.getRelations() assert len(predictedRelations) == 50 for r in predictedRelations: assert len(r.entities) == 3 f1score = kindred.evaluate(testCorpusGold, predictionCorpus, metric='f1score') assert f1score == 1.0
def test_evaluate(capfd): goldText = 'The <disease id="T1">colorectal cancer</disease> was caused by mutations in <gene id="T2">APC</gene>. We also studied <disease id="T3">glioblastoma</disease>.' goldText += '<relation type="typeA" subj="T2" obj="T1" />' goldText += '<relation type="typeB" subj="T2" obj="T3" />' goldText += '<relation type="typeA" subj="T3" obj="T1" />' goldText += '<relation type="typeB" subj="T2" obj="T1" />' goldText += '<relation type="typeC" subj="T2" obj="T1" />' goldCorpus = kindred.Corpus(goldText) testCorpus = goldCorpus.clone() testDoc = testCorpus.documents[0] mapping = testDoc.getSourceEntityIDsToEntityIDs() # Remove a relation and add two different ones testDoc.relations = testDoc.relations[:4] testDoc.addRelation( kindred.Relation("typeX", entityIDs=[mapping["T1"], mapping["T2"]])) testDoc.addRelation( kindred.Relation("typeX", entityIDs=[mapping["T1"], mapping["T3"]])) precision = kindred.evaluate(goldCorpus, testCorpus, metric='precision') assert precision == 4.0 / 6.0 recall = kindred.evaluate(goldCorpus, testCorpus, metric='recall') assert recall == 4.0 / 5.0 f1score = kindred.evaluate(goldCorpus, testCorpus, metric='f1score') assert round(f1score, 10) == round(72.0 / 99.0, 10) precision2, recall2, f1score2 = kindred.evaluate(goldCorpus, testCorpus, metric='all') assert precision2 == 4.0 / 6.0 assert recall2 == 4.0 / 5.0 assert round(f1score2, 10) == round(72.0 / 99.0, 10) with pytest.raises(RuntimeError) as excinfo: kindred.evaluate(goldCorpus, testCorpus, metric='nonsense') assert excinfo.value.args == ('Unknown metric: nonsense', ) # Make sure nothing has been displayed out, err = capfd.readouterr() assert out == "" assert err == ""
# Kindred # 5 classes trainCorpus = kindred.load(dataFormat='json', path='relation/db/1') devCorpus = kindred.load(dataFormat='json', path='ner-dump') predictionCorpus = devCorpus.clone() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score') print("5 CLASSES ---------------------") for i in predictionCorpus.documents: for j in (i.relations): rel = Relationships(j.entities[0].text, j.entities[1].text, j.relationType, i.text) insert_rels(rel) result = get_rels() for i in result: print(i) result = get_rels_clean() for i in result: print(i)
help='Directory containing stand-off testing test') args = parser.parse_args() print("threshold\tprecision\trecall") for threshold in np.arange(0, 1.01, 0.01): trainCorpus = kindred.load('standoff', args.train) testCorpus = kindred.load('standoff', args.test) predCorpus = testCorpus.clone() predCorpus.removeRelations() parser = kindred.Parser(model='en_core_sci_sm') parser.parse(trainCorpus) parser.parse(testCorpus) parser.parse(predCorpus) classifier = kindred.RelationClassifier( classifierType='LogisticRegression', threshold=threshold, acceptedEntityTypes=[('Chemical', 'Mutation')]) classifier.train(trainCorpus) classifier.predict(predCorpus) precision, recall, f1score = kindred.evaluate(testCorpus, predCorpus, metric='all', display=False) print("%f\t%f\t%f" % (threshold, precision, recall))
print("-------------5 CLASSES------------") while count < iter_num: print("------", count, "------") trainCorpus, devCorpus = Corpus.split(trainFraction=0.9) predictionCorpus = devCorpus.clone() predictionCorpus.removeRelations() classifier = kindred.RelationClassifier() classifier.train(trainCorpus) classifier.predict(predictionCorpus) svmf1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score', display=False) print("svm:\t", svmf1score) # avg_svm = avg_svm+svmf1score classifier = kindred.RelationClassifier(classifierType='DCT') classifier.train(trainCorpus) classifier.predict(predictionCorpus) dctf1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score', display=False) print("dct:\t", dctf1score) # avg_dct = avg_dct+dctf1score classifier = kindred.RelationClassifier(classifierType='NN')