def fit(model_fn): data, labels = sk_utils.prepare_data() # train model = stack_model_gen() t1 = time.time() model.fit(None, labels) t2 = time.time() print('\nClassifier stack training time : %0.3f seconds.' % (t2 - t1)) _, test_labels = sk_utils.prepare_data(mode='test') print('Begin testing stacked classifier ') t1 = time.time() preds = model.predict(pred_directory='test/*/', X_indices=None) t2 = time.time() print('Classifier stack finished predicting in %0.3f seconds.' % (t2 - t1)) print('\nEvaluationg stack model') sk_utils.evaluate(test_labels, preds) print() joblib.dump(model, '%s.pkl' % (model_fn))
def calculate_score(model_dir='lstm/', base_dir='test/', dataset='full'): basepath = base_dir + model_dir path = basepath + "*.npy" data, labels, texts, word_index = prepare_data(MAX_NB_WORDS, MAX_SEQUENCE_LENGTH, mode='test', dataset=dataset) files = glob.glob(path) model_predictions = np.load(files[0]) print('Loaded predictions. Shape = ', model_predictions.shape) model_predictions = model_predictions.mean(axis=0) preds = np.argmax(model_predictions, axis=1) evaluate(labels, preds)
def fit_voting_classifier(dataset='full'): np.random.seed(1000) # print('Loading data') data, labels = prepare_data(mode='test', dataset=dataset) if dataset == 'full': pred_dir = 'test/*/' elif dataset == 'obama': pred_dir = 'obama/*/' else: pred_dir = 'romney/*/' preds = model.predict_proba_dir(pred_dir) evaluate(labels, np.argmax(preds, axis=1))
def write_predictions(model_fn='stack_model/stack-model', dataset='full'): basepath = model_fn path = basepath + ".pkl" data, labels = sk_utils.prepare_data(mode='test', dataset=dataset) model = joblib.load(path) # type: StackedGeneralizer if dataset == 'full': pred_dir = 'test/*/' elif dataset == 'obama': pred_dir = 'obama/*/' else: pred_dir = 'romney/*/' preds_proba = model.predict_proba(pred_dir) preds = np.argmax(preds_proba, axis=1) sk_utils.evaluate(labels, preds) np.save("stack_model/stack_predictions-%s.npy" % (dataset), preds_proba)