Esempio n. 1
0
def test_predicting_thrice():
	trainCorpus1, trainCorpus2 = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)
	trainCorpus3, testCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	for doc in trainCorpus1.documents:
		for r in doc.relations:
			r.relationType = 'type1'
	for doc in trainCorpus2.documents:
		for r in doc.relations:
			r.relationType = 'type2'
	for doc in trainCorpus3.documents:
		for r in doc.relations:
			r.relationType = 'type3'

	testCorpus.removeRelations()

	classifier1 = kindred.RelationClassifier()
	classifier1.train(trainCorpus1)
	classifier2 = kindred.RelationClassifier()
	classifier2.train(trainCorpus2)
	classifier3 = kindred.RelationClassifier()
	classifier3.train(trainCorpus3)

	classifier1.predict(testCorpus)
	classifier2.predict(testCorpus)
	classifier3.predict(testCorpus)

	relations = [ r for doc in testCorpus.documents for r in doc.relations ]
	assert len(relations) == len(set(relations)), "Duplicate relations found in predictions"
Esempio n. 2
0
def test_simpleRelationClassifier_triple():
    trainCorpus, testCorpusGold = generateTestData(entityCount=3,
                                                   positiveCount=100,
                                                   negativeCount=100)

    trainRelations = trainCorpus.getRelations()
    assert len(trainRelations) == 50
    for r in trainRelations:
        assert len(r.entities) == 3

    predictionCorpus = testCorpusGold.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier(entityCount=3)
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    predictedRelations = predictionCorpus.getRelations()
    assert len(predictedRelations) == 50
    for r in predictedRelations:
        assert len(r.entities) == 3

    f1score = kindred.evaluate(testCorpusGold,
                               predictionCorpus,
                               metric='f1score')
    assert f1score == 1.0
Esempio n. 3
0
def test_singleClassifier_triple():
    trainCorpus, devCorpus = generateTestData(entityCount=3,
                                              positiveCount=100,
                                              negativeCount=100,
                                              relTypes=1)

    trainRelations = trainCorpus.getRelations()
    assert len(trainRelations) == 50
    for r in trainRelations:
        assert len(r.entities) == 3

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier(entityCount=3)
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    predictedRelations = predictionCorpus.getRelations()
    assert len(predictedRelations) == 50
    for r in predictedRelations:
        assert len(r.entities) == 3

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 1.0
Esempio n. 4
0
def _featureBuilding(useBB3):
    #trainData, testData = generateTestData(positiveCount=100,negativeCount=100)

    if useBB3:
        trainData = kindred.bionlpst.load('2016-BB3-event-train')
        testData = kindred.bionlpst.load('2016-BB3-event-dev')
        chosenFeatures = ["entityTypes", "unigramsBetweenEntities"]
    else:
        trainData = kindred.bionlpst.load('2016-SeeDev-binary-train')
        testData = kindred.bionlpst.load('2016-SeeDev-binary-dev')
        chosenFeatures = [
            "entityTypes", "unigramsBetweenEntities", "bigrams",
            "dependencyPathEdges", "dependencyPathEdgesNearEntities"
        ]

    testData_TextAndEntities = [d.getTextAndEntities() for d in testData]
    testData_Relations = [d.getRelations() for d in testData]

    finalChosenFeatures = []

    for threshold in range(1, 100, 2):
        fthreshold = threshold / float(100)
        classifier = kindred.RelationClassifier(useBuilder=False,
                                                features=chosenFeatures,
                                                threshold=fthreshold)
        classifier.train(trainData)

        predictedRelations = classifier.predict(testData_TextAndEntities)

        evaluator = Evaluator()
        precision, recall, f1score = evaluator.evaluate(testData_Relations,
                                                        predictedRelations,
                                                        metric='all',
                                                        display=False)
        print(fthreshold, precision, recall, f1score)
Esempio n. 5
0
def _nary():
    trainData = kindred.BioNLPSTData(
        '2016-SeeDev-binary-training_and_development')
    testData = kindred.BioNLPSTData('2016-SeeDev-binary-test')

    classifier_merge = kindred.RelationClassifier(naryMethod='merge')
    classifier_merge.train(trainData)
    predictedRelations_merge = classifier_merge.predict(testData)
    kindred.saveST('BB3-predictions-merge/', testData,
                   predictedRelations_merge)

    classifier_combined = kindred.RelationClassifier(naryMethod='combine')
    classifier_combined.train(trainData)
    predictedRelations_combined = classifier_combined.predict(testData)
    kindred.saveST('BB3-predictions-combined/', testData,
                   predictedRelations_combined)
Esempio n. 6
0
def test_doublelabels():
    trainCorpus, devCorpus = generateTestData(positiveCount=100,
                                              negativeCount=100,
                                              relTypes=1)

    for doc in trainCorpus.documents:
        newRelations = [
            kindred.Relation("anotherLabel", r.entities, r.argNames)
            for r in doc.relations
        ]
        doc.relations += newRelations
    for doc in devCorpus.documents:
        newRelations = [
            kindred.Relation("anotherLabel", r.entities, r.argNames)
            for r in doc.relations
        ]
        doc.relations += newRelations

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 1.0
Esempio n. 7
0
def test_simpleRelationClassifier_emptyTrainCorpus():
	trainCorpus, testCorpus = generateTestData(positiveCount=100,negativeCount=100)

	for doc in trainCorpus.documents:
		doc.entities = []
		doc.relations = []

	classifier = kindred.RelationClassifier()

	with pytest.raises(RuntimeError) as excinfo:
		classifier.train(trainCorpus)
	assert excinfo.value.args == ('No candidate relations found in corpus for training. Does the corpus contain text and entity annotations with at least one sentence containing 2 entities.',)
Esempio n. 8
0
def test_logisticregression_threshold():
	trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(classifierType='LogisticRegression',threshold=0.3)
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 1.0
Esempio n. 9
0
def test_filterByEntityTypes_validTypes():
	trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(features=["unigramsBetweenEntities","bigrams","dependencyPathEdges","dependencyPathEdgesNearEntities"])
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 1.0
Esempio n. 10
0
def test_simpleRelationClassifier_emptyTestCorpus():
	trainCorpus, testCorpus = generateTestData(positiveCount=100,negativeCount=100)

	for doc in testCorpus.documents:
		doc.entities = []
		doc.relations = []

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(testCorpus)

	assert len(testCorpus.getRelations()) == 0
Esempio n. 11
0
def test_simpleRelationClassifier_binary():
	trainCorpus, testCorpusGold = generateTestData(positiveCount=100,negativeCount=100)

	predictionCorpus = testCorpusGold.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(testCorpusGold, predictionCorpus, metric='f1score')
	assert f1score == 1.0
Esempio n. 12
0
def test_singleClassifier():
	trainCorpus, devCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert round(f1score,3) == 1.0
Esempio n. 13
0
def test_bionlp_BB3_classifier():
	trainCorpus = kindred.bionlpst.load('2016-BB3-event-train')
	devCorpus = kindred.bionlpst.load('2016-BB3-event-dev')

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
	assert f1score > 0.4
Esempio n. 14
0
def _bionlpst_seedev():
    trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
    devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    print("f1score:", f1score)
Esempio n. 15
0
def test_bionlp_SeeDev_classifier():
    trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
    devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 0.349
Esempio n. 16
0
def test_predicting_duplicates():
	trainCorpus, testCorpus = generateTestData(positiveCount=100,negativeCount=100,relTypes=1)

	testCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)

	classifier.predict(testCorpus)
	classifier.predict(testCorpus)
	classifier.predict(testCorpus)

	relations = [ r for doc in testCorpus.documents for r in doc.relations ]
	assert len(relations) == len(set(relations)), "Duplicate relations found in predictions"
Esempio n. 17
0
def _bionlpst_seedev(swap):
	trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
	devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')
	
	if swap:
		trainCorpus,devCorpus = devCorpus,trainCorpus

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier()
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all')
	print("seedev scores:",scores,swap)
Esempio n. 18
0
def _SeeDevmini():
    trainCorpus = kindred.bionlpst.load('2016-SeeDev-binary-train')
    devCorpus = kindred.bionlpst.load('2016-SeeDev-binary-dev')

    trainCorpus.documents = trainCorpus.documents[1:2]
    devCorpus.documents = devCorpus.documents[:1]

    predictionCorpus = devCorpus.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')
    assert round(f1score, 3) == 0.235
Esempio n. 19
0
def _bionlpst_bb3(swap):
	trainCorpus = kindred.bionlpst.load('2016-BB3-event-train')
	devCorpus = kindred.bionlpst.load('2016-BB3-event-dev')

	if swap:
		trainCorpus,devCorpus = devCorpus,trainCorpus

	predictionCorpus = devCorpus.clone()
	predictionCorpus.removeRelations()

	classifier = kindred.RelationClassifier(useBuilder=True)
	classifier.train(trainCorpus)
	
	classifier.predict(predictionCorpus)
	
	scores = kindred.evaluate(devCorpus, predictionCorpus, metric='all')
	print("bb3 scores:",scores,swap)
Esempio n. 20
0
def _featureBuilding(useBB3):
    #trainData, testData = generateTestData(positiveCount=100,negativeCount=100)

    if useBB3:
        trainData = kindred.bionlpst.load('2016-BB3-event-train')
        testData = kindred.bionlpst.load('2016-BB3-event-dev')
        useBuilder = True
    else:
        trainData = kindred.bionlpst.load('2016-SeeDev-binary-train')
        testData = kindred.bionlpst.load('2016-SeeDev-binary-dev')
        useBuilder = False

    testData_TextAndEntities = [d.getTextAndEntities() for d in testData]
    testData_Relations = [d.getRelations() for d in testData]

    featureChoice = [
        "entityTypes", "unigramsBetweenEntities", "bigrams",
        "dependencyPathEdges", "dependencyPathEdgesNearEntities"
    ]

    finalChosenFeatures = []

    for stage in range(3):
        bestF1, bestFeature = -1, -1
        for feature in featureChoice:
            chosenFeatures = finalChosenFeatures + [feature]
            classifier = kindred.RelationClassifier(useBuilder=useBuilder,
                                                    features=chosenFeatures)
            classifier.train(trainData)

            predictedRelations = classifier.predict(testData_TextAndEntities)

            evaluator = Evaluator()
            f1score = evaluator.evaluate(testData_Relations,
                                         predictedRelations,
                                         metric='f1score',
                                         display=False)
            print(stage, feature, f1score)

            if f1score > bestF1:
                bestF1 = f1score
                bestFeature = feature

        featureChoice.remove(bestFeature)
        finalChosenFeatures.append(bestFeature)
Esempio n. 21
0
def _bionlpst_seedev_testSet():
    trainData = kindred.bionlpst.load('2016-SeeDev-binary-train')
    devData = kindred.bionlpst.load('2016-SeeDev-binary-dev')
    testData = kindred.bionlpst.load('2016-SeeDev-binary-test')

    trainAndDevData = trainData + devData

    print("Starting training...")
    classifier = kindred.RelationClassifier()
    classifier.train(trainAndDevData)

    print("Predicting training...")
    predictedRelations = classifier.predict(
        testData)  #devData_TextAndEntities)

    print("Saving...")
    outDir = 'out.SeeDev'
    kindred.save(testData,
                 'standoff',
                 outDir,
                 predictedRelations=predictedRelations)
Esempio n. 22
0
def test_pickle():
    trainCorpus, testCorpusGold = generateTestData(positiveCount=100,
                                                   negativeCount=100)

    predictionCorpus = testCorpusGold.clone()
    predictionCorpus.removeRelations()

    classifier = kindred.RelationClassifier()
    classifier.train(trainCorpus)

    with tempfile.NamedTemporaryFile() as tempF:
        with open(tempF.name, 'wb') as f:
            pickle.dump(classifier, f)
        with open(tempF.name, 'rb') as f:
            classifier = pickle.load(f)

    classifier.predict(predictionCorpus)

    f1score = kindred.evaluate(testCorpusGold,
                               predictionCorpus,
                               metric='f1score')
    assert f1score == 1.0
Esempio n. 23
0
def _bionlpst_bb3_testSet():
    trainData = kindred.bionlpst.load('2016-BB3-event-train')
    devData = kindred.bionlpst.load('2016-BB3-event-dev')
    testData = kindred.bionlpst.load('2016-BB3-event-test')

    trainAndDevData = trainData + devData

    print("Starting training...")
    classifier = kindred.RelationClassifier(useBuilder=True)
    #classifier = RelationClassifier(useBuilder=False)
    classifier.train(trainAndDevData)

    print("Predicting training...")
    predictedRelations = classifier.predict(
        testData)  #devData_TextAndEntities)

    print("Saving...")
    outDir = 'out.BB3'
    kindred.save(testData,
                 'standoff',
                 outDir,
                 predictedRelations=predictedRelations)
Esempio n. 24
0
        ['Driver', 'Oncogene', 'Tumor_Suppressor'], [
            args.outModel_Driver, args.outModel_Oncogene,
            args.outModel_TumorSuppressor
        ]):
        print("Building %s model" % relationType)
        print("  Loading training")
        goldDir = 'gold'
        trainCorpus = kindred.load('standoff', args.inTrain)

        for doc in trainCorpus.documents:
            doc.relations = [
                r for r in doc.relations if r.relationType == relationType
            ]

        print("  Doing training")
        features = "entityTypes,unigramsBetweenEntities,bigrams,dependencyPathEdges,dependencyPathEdgesNearEntities".split(
            ',')
        threshold = thresholds[relationType]
        classifier = kindred.RelationClassifier(
            classifierType='LogisticRegression',
            threshold=threshold,
            features=features,
            acceptedEntityTypes=[('cancer', 'gene')])
        classifier.train(trainCorpus)

        print("  Saving classifer")
        with open(outModel, 'wb') as f:
            pickle.dump(classifier, f)

        print("  Output done!")
Esempio n. 25
0
    # avg_nn = 0

    count = 0
    iter_num = 200

    print("-------------5 CLASSES------------")

    while count < iter_num:
        print("------", count, "------")

        trainCorpus, devCorpus = Corpus.split(trainFraction=0.9)

        predictionCorpus = devCorpus.clone()
        predictionCorpus.removeRelations()

        classifier = kindred.RelationClassifier()
        classifier.train(trainCorpus)
        classifier.predict(predictionCorpus)
        svmf1score = kindred.evaluate(devCorpus,
                                      predictionCorpus,
                                      metric='f1score',
                                      display=False)
        print("svm:\t", svmf1score)
        # avg_svm = avg_svm+svmf1score

        classifier = kindred.RelationClassifier(classifierType='DCT')
        classifier.train(trainCorpus)
        classifier.predict(predictionCorpus)
        dctf1score = kindred.evaluate(devCorpus,
                                      predictionCorpus,
                                      metric='f1score',
Esempio n. 26
0
            # Trim back to relation type of choice
            for doc in train.documents:
                doc.relations = [
                    r for r in doc.relations if r.relationType == args.reltype
                ]
            for doc in gold.documents:
                doc.relations = [
                    r for r in doc.relations if r.relationType == args.reltype
                ]

            entityType = entityTypes[args.reltype]
            entityCount = len(entityType)
            classifier = kindred.RelationClassifier(
                classifierType='LogisticRegression',
                threshold=threshold,
                entityCount=entityCount,
                acceptedEntityTypes=[entityType])

            classifier.train(train)

            predictions = gold.clone()
            predictions.removeRelations()

            classifier.predict(predictions)

            TP, FN, FP = 0, 0, 0
            for goldDoc, testDoc in zip(gold.documents, predictions.documents):
                goldTuples = [(r.relationType, tuple(r.entityIDs))
                              for r in goldDoc.relations]
                testTuples = [(r.relationType, tuple(r.entityIDs))
Esempio n. 27
0
                        help='Directory containing stand-off testing test')
    args = parser.parse_args()

    print("threshold\tprecision\trecall")
    for threshold in np.arange(0, 1.01, 0.01):

        trainCorpus = kindred.load('standoff', args.train)
        testCorpus = kindred.load('standoff', args.test)

        predCorpus = testCorpus.clone()
        predCorpus.removeRelations()

        parser = kindred.Parser(model='en_core_sci_sm')
        parser.parse(trainCorpus)
        parser.parse(testCorpus)
        parser.parse(predCorpus)

        classifier = kindred.RelationClassifier(
            classifierType='LogisticRegression',
            threshold=threshold,
            acceptedEntityTypes=[('Chemical', 'Mutation')])
        classifier.train(trainCorpus)
        classifier.predict(predCorpus)

        precision, recall, f1score = kindred.evaluate(testCorpus,
                                                      predCorpus,
                                                      metric='all',
                                                      display=False)

        print("%f\t%f\t%f" % (threshold, precision, recall))
Esempio n. 28
0
def get_rels_clean():
    c.execute("SELECT gene, disease, relation FROM relationships")
    return c.fetchall()


# Kindred

# 5 classes

trainCorpus = kindred.load(dataFormat='json', path='relation/db/1')
devCorpus = kindred.load(dataFormat='json', path='ner-dump')

predictionCorpus = devCorpus.clone()

classifier = kindred.RelationClassifier()
classifier.train(trainCorpus)
classifier.predict(predictionCorpus)

f1score = kindred.evaluate(devCorpus, predictionCorpus, metric='f1score')

print("5 CLASSES ---------------------")
for i in predictionCorpus.documents:
    for j in (i.relations):
        rel = Relationships(j.entities[0].text, j.entities[1].text,
                            j.relationType, i.text)
        insert_rels(rel)

result = get_rels()
for i in result:
    print(i)
Esempio n. 29
0
    for relationType, threshold, entityTypes in relationInfo:
        print("Building %s model" % relationType)
        print("  Loading training")
        trainCorpus = kindred.load('standoff', args.inTrain)

        for doc in trainCorpus.documents:
            doc.relations = [
                r for r in doc.relations if r.relationType == relationType
            ]
            doc.relations = [
                r for r in doc.relations if len(r.entities) == len(entityTypes)
            ]

        print("  Doing training")
        threshold = 0.5
        classifier = kindred.RelationClassifier(
            classifierType='LogisticRegression',
            threshold=threshold,
            entityCount=len(entityTypes),
            acceptedEntityTypes=[entityTypes],
            model='en_core_sci_sm')
        classifier.train(trainCorpus)

        print("  Saving classifer")
        outModel = os.path.join(args.outDir, "%s.model" % relationType)
        with open(outModel, 'wb') as f:
            pickle.dump(classifier, f)

        print("  Output done!")