Esempio n. 1
0
def predict_svm():
    global config
    load_config('svm')

    print('loading data...')
    df = pd.read_csv(Config.getPath('data') + '/' + TRAINING_DATA)

    df2 = df.sample(50000)

    X = df2['question_text']
    Y = df2['target']

    Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=42)
    # Two classes - Fake=0, Reliable=1
    config.set('num_target_tokens', 2)

    classifier = ClassifierFactory.getSVM()

    print('training size: ', len(Xtrain))
    print('testing size: ', len(Xtest))

    print('start fitting ...')

    classifier.fit(Xtrain, Ytrain, Xtest, Ytest)

    df = pd.read_csv(Config.getPath('data') + '/' + TESTING_DATA)

    df = df.sample(100000)
    X = df['question_text']
    Y = df['target']

    pred = classifier.predict(X)

    score = metrics.accuracy_score(Y, pred)
    f1score = metrics.f1_score(Y, pred)
    print("accuracy:   %0.3f" % score)
    print("f1 score:   %0.3f" % f1score)

    cm = metrics.confusion_matrix(Ytest, pred, labels=[0, 1])
    plot_confusion_matrix(cm, classes=[0, 1])
Esempio n. 2
0
def main_svm():
    load_config()
    c = ClassifierFactory.getSVM()
    train_vanilla(c)