コード例 #1
0
def test_singleClassifier():
    trainCorpus, devCorpus = generateTestData(positiveCount=100,
                                              negativeCount=100,
                                              relTypes=2)

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 0.519
コード例 #2
0
def test_simpleRelationClassifier_binary():
    trainCorpus, testCorpusGold = generateTestData(positiveCount=100,
                                                   negativeCount=100)

    predictionCorpus = testCorpusGold.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(testCorpusGold,
                               predictionCorpus,
                               metric='f1score')
    assert f1score == 1.0
コード例 #3
0
def test_logisticregression_threshold():
    trainCorpus, devCorpus = generateTestData(positiveCount=100,
                                              negativeCount=100,
                                              relTypes=1)

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier(
        classifierType='LogisticRegression', threshold=0.3)
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 1.0
コード例 #4
0
def test_filterByEntityTypes_invalidTypes():
	trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	for doc in devCorpus.documents:
		for e in doc.entities:
			e.entityType = 'a new type'

	classifier = kindred.RelationClassifier(features=["unigramsBetweenEntities","bigrams","dependencyPathEdges","dependencyPathEdgesNearEntities"])
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 0.0
コード例 #5
0
def _SeeDevmini():
    trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
    devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')

    trainCorpus.documents = trainCorpus.documents[1:2]
    devCorpus.documents = devCorpus.documents[:1]

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 0.235
コード例 #6
0
def _bionlpst_bb3(swap):
	trainCorpus = kindred.bionlpst.load('2016-BB3-event-train')
	devCorpus = kindred.bionlpst.load('2016-BB3-event-dev')

	if swap:
		trainCorpus,devCorpus = devCorpus,trainCorpus

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(useBuilder=True)
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all')
	print("bb3 scores:",scores,swap)
コード例 #7
0
def _bionlpst_seedev(swap):
	trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
	devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')
	
	if swap:
		trainCorpus,devCorpus = devCorpus,trainCorpus

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all')
	print("seedev scores:",scores,swap)
コード例 #8
0
def test_doublelabels():
	trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	for doc in trainCorpus.documents:
		newRelations = [ kindred.Relation("anotherLabel",r.entities,r.argNames) for r in doc.relations ]
		doc.relations += newRelations
	for doc in devCorpus.documents:
		newRelations = [ kindred.Relation("anotherLabel",r.entities,r.argNames) for r in doc.relations ]
		doc.relations += newRelations

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 1.0
コード例 #9
0
ファイル: test_pickle.py プロジェクト: jopilipina/thesis
def test_pickle():
    trainCorpus, testCorpusGold = generateTestData(positiveCount=100,
                                                   negativeCount=100)

    predictionCorpus = testCorpusGold.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    with tempfile.NamedTemporaryFile() as tempF:
        with open(tempF.name, 'wb') as f:
            pickle.dump(classifier, f)
        with open(tempF.name, 'rb') as f:
            classifier = pickle.load(f)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(testCorpusGold,
                               predictionCorpus,
                               metric='f1score')
    assert f1score == 1.0
コード例 #10
0
def test_singleClassifier_triple():
	trainCorpus, devCorpus = generateTestData(entityCount=3,positiveCount=100,negativeCount=100,relTypes=1)

	trainRelations = trainCorpus.getRelations()
	assert len(trainRelations) == 50
	for r in trainRelations:
		assert len(r.entities) == 3

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(entityCount=3)
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	predictedRelations = predictionCorpus.getRelations()
	assert len(predictedRelations) == 50
	for r in predictedRelations:
		assert len(r.entities) == 3
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 1.0
コード例 #11
0
def test_simpleRelationClassifier_triple():
	trainCorpus, testCorpusGold = generateTestData(entityCount=3,positiveCount=100,negativeCount=100)

	trainRelations = trainCorpus.getRelations()
	assert len(trainRelations) == 50
	for r in trainRelations:
		assert len(r.entities) == 3

	predictionCorpus = testCorpusGold.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(entityCount=3)
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)

	predictedRelations = predictionCorpus.getRelations()
	assert len(predictedRelations) == 50
	for r in predictedRelations:
		assert len(r.entities) == 3
	
	f1score = kindred.evaluate(testCorpusGold, predictionCorpus, metric='f1score')
	assert f1score == 1.0
コード例 #12
0
ファイル: test_evaluate.py プロジェクト: QAandBioNLP/kindred
def test_evaluate(capfd):
    goldText = 'The <disease id="T1">colorectal cancer</disease> was caused by mutations in <gene id="T2">APC</gene>. We also studied <disease id="T3">glioblastoma</disease>.'
    goldText += '<relation type="typeA" subj="T2" obj="T1" />'
    goldText += '<relation type="typeB" subj="T2" obj="T3" />'
    goldText += '<relation type="typeA" subj="T3" obj="T1" />'
    goldText += '<relation type="typeB" subj="T2" obj="T1" />'
    goldText += '<relation type="typeC" subj="T2" obj="T1" />'

    goldCorpus = kindred.Corpus(goldText)

    testCorpus = goldCorpus.clone()
    testDoc = testCorpus.documents[0]
    mapping = testDoc.getSourceEntityIDsToEntityIDs()

    # Remove a relation and add two different ones
    testDoc.relations = testDoc.relations[:4]
    testDoc.addRelation(
        kindred.Relation("typeX", entityIDs=[mapping["T1"], mapping["T2"]]))
    testDoc.addRelation(
        kindred.Relation("typeX", entityIDs=[mapping["T1"], mapping["T3"]]))

    precision = kindred.evaluate(goldCorpus, testCorpus, metric='precision')
    assert precision == 4.0 / 6.0
    recall = kindred.evaluate(goldCorpus, testCorpus, metric='recall')
    assert recall == 4.0 / 5.0
    f1score = kindred.evaluate(goldCorpus, testCorpus, metric='f1score')
    assert round(f1score, 10) == round(72.0 / 99.0, 10)

    precision2, recall2, f1score2 = kindred.evaluate(goldCorpus,
                                                     testCorpus,
                                                     metric='all')
    assert precision2 == 4.0 / 6.0
    assert recall2 == 4.0 / 5.0
    assert round(f1score2, 10) == round(72.0 / 99.0, 10)

    with pytest.raises(RuntimeError) as excinfo:
        kindred.evaluate(goldCorpus, testCorpus, metric='nonsense')
    assert excinfo.value.args == ('Unknown metric: nonsense', )

    # Make sure nothing has been displayed
    out, err = capfd.readouterr()
    assert out == ""
    assert err == ""
コード例 #13
0

# Kindred

# 5 classes

trainCorpus = kindred.load(dataFormat='json', path='relation/db/1')
devCorpus = kindred.load(dataFormat='json', path='ner-dump')

predictionCorpus = devCorpus.clone()

classifier = kindred.RelationClassifier()
classifier.train(trainCorpus)
classifier.predict(predictionCorpus)

f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')

print("5 CLASSES ---------------------")
for i in predictionCorpus.documents:
    for j in (i.relations):
        rel = Relationships(j.entities[0].text, j.entities[1].text,
                            j.relationType, i.text)
        insert_rels(rel)

result = get_rels()
for i in result:
    print(i)

result = get_rels_clean()
for i in result:
    print(i)
コード例 #14
0
ファイル: prCurve.py プロジェクト: jakelever/pgxmine
                        help='Directory containing stand-off testing test')
    args = parser.parse_args()

    print("threshold\tprecision\trecall")
    for threshold in np.arange(0, 1.01, 0.01):

        trainCorpus = kindred.load('standoff', args.train)
        testCorpus = kindred.load('standoff', args.test)

        predCorpus = testCorpus.clone()
        predCorpus.removeRelations()

        parser = kindred.Parser(model='en_core_sci_sm')
        parser.parse(trainCorpus)
        parser.parse(testCorpus)
        parser.parse(predCorpus)

        classifier = kindred.RelationClassifier(
            classifierType='LogisticRegression',
            threshold=threshold,
            acceptedEntityTypes=[('Chemical', 'Mutation')])
        classifier.train(trainCorpus)
        classifier.predict(predCorpus)

        precision, recall, f1score = kindred.evaluate(testCorpus,
                                                      predCorpus,
                                                      metric='all',
                                                      display=False)

        print("%f\t%f\t%f" % (threshold, precision, recall))
コード例 #15
0
    print("-------------5 CLASSES------------")

    while count < iter_num:
        print("------", count, "------")

        trainCorpus, devCorpus = Corpus.split(trainFraction=0.9)

        predictionCorpus = devCorpus.clone()
        predictionCorpus.removeRelations()

        classifier = kindred.RelationClassifier()
        classifier.train(trainCorpus)
        classifier.predict(predictionCorpus)
        svmf1score = kindred.evaluate(devCorpus,
                                      predictionCorpus,
                                      metric='f1score',
                                      display=False)
        print("svm:\t", svmf1score)
        # avg_svm = avg_svm+svmf1score

        classifier = kindred.RelationClassifier(classifierType='DCT')
        classifier.train(trainCorpus)
        classifier.predict(predictionCorpus)
        dctf1score = kindred.evaluate(devCorpus,
                                      predictionCorpus,
                                      metric='f1score',
                                      display=False)
        print("dct:\t", dctf1score)
        # avg_dct = avg_dct+dctf1score

        classifier = kindred.RelationClassifier(classifierType='NN')