Exemple #1
0
def get_data():
    train_data = get_dataset('url-versions-2015-06-14-clean-train.csv')
    X, y = split_data(train_data)
    X = p.pipeline.fit_transform(X)
    
    train_data1 = train_data[:1489]
    train_data2 = train_data[1489:]
    X1 = X[:1489]
    X2 = X[1489:]
    
    
    test_data = get_dataset('url-versions-2015-06-14-clean-test.csv')
    X_test, y_test = split_data(test_data)
    X_test = p.pipeline.transform(X_test)
    
    # return train/ validation/ test
    return (train_data1, X1, train_data2, X2, test_data, X_test)
import os

from model.utils import get_dataset, split_data
from model.cross_validation import ClaimKFold

if __name__ == '__main__':
    train_data = get_dataset('url-versions-2015-06-14-clean-train.csv')
    X, y = split_data(train_data)

    ckf = ClaimKFold(X)

    fold = 1
    for train_index, test_index in ckf:
        Z_test = X.iloc[test_index, :].copy()
        Z_test['articleHeadlineStance'] = y.iloc[test_index]
        Z_test.to_csv(os.path.join('..', 'data', 'emergent',
                                   'url-versions-2015-06-14-clean-test-fold-{0:d}.csv'.format(fold)))

        Z_train = X.iloc[train_index, :].copy()
        Z_train['articleHeadlineStance'] = y.iloc[train_index]
        Z_train.to_csv(os.path.join('..', 'data', 'emergent',
                                    'url-versions-2015-06-14-clean-train-fold-{0:d}.csv'.format(fold)))

        fold += 1

Exemple #3
0
import sys
import os

sys.path.append(os.path.join('..', 'src'))

from model.utils import get_dataset, split_data, run_test
from model.baseline.baseline_predictors import ProbabilityPredictor, ChancePredictor, \
    MajorityPredictor, WordOverlapBaselinePredictor


if __name__ == '__main__':
    train_data = get_dataset('url-versions-2015-06-14-clean-train.csv')
    X, y = split_data(train_data)
    test_data = get_dataset('url-versions-2015-06-14-clean-test.csv')

    print('\n>> Chance predictor <<\n')
    print(run_test(X, y, test_data, ChancePredictor()))

    print('\n>> Majority predictor <<\n')
    print(run_test(X, y, test_data, MajorityPredictor()))

    print('\n>> Probability predictor <<\n')
    print(run_test(X, y, test_data, ProbabilityPredictor()))

    print('\n>> Word overlap predictor <<\n')
    print(run_test(X, y, test_data, WordOverlapBaselinePredictor()))
Exemple #4
0
def get_snopes():
    test_data = get_dataset("my_claims_csv_cleaned.csv")
    X_test, y_test = split_data(test_data)
    X_test = p.pipeline.transform(X_test)
    
    return(test_data, X_test)