Esempio n. 1
0
def test_classifier(dataset, classifier):

    print("-----TESTING CLASSIFIER-----")

    if isinstance(classifier, keras.engine.training.Model):

        x_test, y_test = dl.prepare_data_for_RNN(dataset)

        print("-----TEST SET SIZE: " + str(x_test["sentence1"].shape) +
              "-----")
        scores = classifier.evaluate(x_test, y_test)
        print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1] * 100))

    elif isinstance(classifier, keras.models.Sequential):

        x_test, y_test = dl.prepare_data_for_NN(dataset)

        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")
        scores = classifier.evaluate(x_test, y_test)
        print("\n%s: %.2f%%" % (classifier.metrics_names[1], scores[1] * 100))

    elif isinstance(classifier,
                    sklearn.ensemble.forest.RandomForestClassifier):

        x_test, y_test = dl.prepare_data_for_RF(dataset)
        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")

    else:
        x_test, y_test = dl.prepare_data_for_ZeroR(dataset)
        print("-----TEST SET SIZE: " + str(len(x_test)) + "-----")

    prediction = classifier.predict(x_test)

    numberOfClasses = y_test.shape[1]

    position = np.argmax(prediction, axis=-1)
    y_pred = np.identity(numberOfClasses)[position]

    target_names = ['nonrelated', 'related']
    print(classification_report(y_test, y_pred, target_names=target_names))

    y_test = [np.where(r == 1)[0][0] for r in y_test]
    y_pred = [np.where(r == 1)[0][0] for r in y_pred]

    y_true = pd.Series(y_test)
    y_pred = pd.Series(y_pred)

    print(
        pd.crosstab(y_true,
                    y_pred,
                    rownames=['True'],
                    colnames=['Predicted'],
                    margins=True))
Esempio n. 2
0
def train_RF_classifier(dataset):

    print("-----TRAIN CLASSIFIER-----")

    x_train, y_train = dl.prepare_data_for_RF(dataset)

    estimators = 200

    randomForest = RandomForestClassifier(n_estimators=estimators)

    randomForest.fit(x_train, y_train)

    print("-----TRAINING COMPLETE-----")
    return randomForest