Beispiel #1
0
from data_manager import DataManager  # such as DataManager

from prepro import Preprocessor
input_dir = "../sample_data"
output_dir = "../resuts"

basename = 'credit'
D = DataManager(basename, input_dir)  # Load data
print("*** Original data ***")
print D

Prepro = Preprocessor()

# Preprocess on the data and load it back into D
D.data['X_train'] = Prepro.fit_transform(D.data['X_train'], D.data['Y_train'])
D.data['X_valid'] = Prepro.transform(D.data['X_valid'])
D.data['X_test'] = Prepro.transform(D.data['X_test'])

# Here show something that proves that the preprocessing worked fine
print("*** Transformed data ***")
print D

# Preprocessing gives you opportunities of visualization:
# Scatter-plots of the 2 first principal components
# Scatter plots of pairs of features that are most relevant
import matplotlib.pyplot as plt
X = D.data['X_train']
Y = D.data['Y_train']
plt.scatter(X[:, 0], X[:, 1], c=Y)
plt.xlabel('PC1')
plt.ylabel('PC2')
Beispiel #2
0
    return parser.parse_args()


if __name__ == '__main__':
    # Get arguments
    print('Getting arguments...')
    args = get_args()

    # make a dataset
    print('Importing dataset...')
    data = SentimentDataset(data=args.test_path)

    # preprocess and save word encodings

    preprocessor = Preprocessor(max_vocab=args.max_vocab)
    preprocessor.load()
    data = preprocessor.transform(dataset=data)

    # validation split
    test_ds, _ = data.to_dataset()

    # to dataLoaders
    test_set = DataLoader(test_ds, batch_size=16, shuffle=False)

    # load saved model
    print('Loading trained model...')
    model = torch.load(args.model_path)
    model.eval()

    test(test_set, model, val=False)