Ejemplo n.º 1
0
    print("Running test for model: {}".format(args.m))

    # Our input $x$
    TEXT = torchtext.data.Field()

    # Our labels $y$
    LABEL = torchtext.data.Field(sequential=False, unk_token=None)

    # Generate train/test splits from the SST dataset, filter out neutral examples
    train, val, test = torchtext.datasets.SST.splits(
        TEXT, LABEL,
        filter_pred=lambda ex: ex.label != 'neutral')

    TEXT.build_vocab(train)
    LABEL.build_vocab(train)

    train_iter, val_iter, test_iter = torchtext.data.BucketIterator.splits((train, val, test), batch_size=10, device=-1, repeat = False)

    if args.m == "NaiveBayes":
        alpha = 1
        model = NaiveBayes(alpha, TEXT, LABEL)
        model.train(train_iter, val_iter)
        # Evaluate on training set
        train_acc = model.evaluate(train_iter)
        print('final train_acc (should be very high): ', train_acc)
        # Evaluate on testing set
        # test_code_NB(model, test_iter)
        test_acc = model.evaluate(test_iter)
        print('final test_acc: ', test_acc)
        df['text'], df['is_spam'], test_size=0.2, random_state=191)

    print('Data set:')
    print('{} total'.format(df.shape[0]))
    for t, t_name in zip(targets, target_names):
        print('{} {}'.format(len(df[df['is_spam'] == t]), t_name))

    print('\nTraining set:')
    print('{} total'.format(len(X_train)))
    for t, t_name in zip(targets, target_names):
        print('{} {}'.format(sum([y == t for y in y_train]), t_name))

    print('\nTest set:')
    print('{} total'.format(len(X_test)))
    for t, t_name in zip(targets, target_names):
        print('{} {}'.format(sum([y == t for y in y_test]), t_name))
    print('')

    # Build Classifier
    gvoc_model = NaiveBayes('General Vocabulary', X_train,
                            y_train, targets, target_names)
    gvoc_model.train()

    gvoc_model.evaluate(X_test, y_test, show_top_features=10)

    rvoc_model = NaiveBayes('Reduced Vocabulary', X_train, y_train, targets,
                            target_names, max_features=200)
    rvoc_model.train()

    rvoc_model.evaluate(X_test, y_test, show_top_features=10)
Ejemplo n.º 3
0
def evaluate_naivebayes():
    nb = NaiveBayes(sys.argv[1], evaluate = True)
    out = nb.evaluate(sys.argv[2])
    process(out, 'Naive Bayes')