def __test(self, reviews, labels, test_on=DEFAULT_LABEL): X_training_counts = self.count_vect.transform(reviews) X_training_tfidf = self.tfidf_transformer.transform(X_training_counts) predicted = self.regs[test_on].predict(X_training_tfidf) return rmslog_error(predicted, labels), rmslog_error(np.zeros(len(predicted)), labels)
y_train, batch_size=batch_size, nb_epoch=15, validation_split=0.1, show_accuracy=True) # Load test material print "LOADING TEST DATA" reviews_test, _, funny_votes_test, _, _ = BaseBowRegressor.get_reviews_data( PARTITIONS_TESTING) reviews_tokens_test = [ language.tokenize_document(txt) for txt in enumerate(reviews_test[:NUM_ELEMENTS_TEST]) ] X_test = tokens_to_word_vectors(reviews_tokens_test, model) X_test = np.array(X_test) y_test = np.array(funny_votes_test[:NUM_ELEMENTS_TEST]).astype('float32') print "Padding test sequences" X_test = pad_sequence_word_vectors(X_test, maxlen=maxlen) print('X_test shape:', X_test.shape) score = model.evaluate(X_test, y_test, batch_size=batch_size) print('Test score:', score) predicted = model.predict(X_test, batch_size=batch_size) print mean_absolute_error(predicted, y_test) print mean_absolute_error(np.zeros(len(predicted)), y_test) print "RMSLOG error test: " + str(rmslog_error(predicted, y_test)) print "RMSLOG on zeros: " + str(rmslog_error(np.zeros(len(predicted)), y_test)) model.save_weights("GRU_128_DROPOUT_0.1_RELU_50_epochs_moredata")
X_test[i - N, :] = np.zeros(M) else: X_test[i - N, :] = model['REVIEW_' + str(i)] ## SGD REGRESSOR # sgd = SGDRegressor(loss="huber", alpha=0.001, penalty="l1", n_iter=20).fit(X, y) # predicted = sgd.predict(X_test) # print mean_absolute_error(predicted, y_test) # print rmslog_error(predicted, y_test) # print rmslog_error(np.zeros(len(predicted)), y_test) ## SUPPORT VECTOR REGRESSOR # svm = SVR(kernel='rbf').fit(X, y) # print "Trained!" # predicted = svm.predict(X_test) # print mean_absolute_error(predicted, y_test) # print rmslog_error(predicted, y_test) # print rmslog_error(np.zeros(len(predicted)), y_test) ## GRADIENT BOOSTING REGRESSOR est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0, loss='ls').fit(X, y) print "Trained!" predicted = est.predict(X_test) print mean_absolute_error(predicted, y_test) print mean_absolute_error(np.zeros(len(predicted)), y_test) print rmslog_error(predicted, y_test) print rmslog_error(np.zeros(len(predicted)), y_test)
model.add(Dense(128, 1)) model.add(Activation('relu')) # try using different optimizers and different optimizer configs rms = RMSprop() model.compile(loss=rmslog_loss, optimizer=rms) print("Train...") model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_split=0.1, show_accuracy=True) # Load test material print "LOADING TEST DATA" reviews_test, _, funny_votes_test, _, _ = BaseBowRegressor.get_reviews_data(PARTITIONS_TESTING) reviews_tokens_test = [language.tokenize_document(txt) for txt in enumerate(reviews_test[:NUM_ELEMENTS_TEST])] X_test = tokens_to_word_vectors(reviews_tokens_test, model) X_test = np.array(X_test) y_test = np.array(funny_votes_test[:NUM_ELEMENTS_TEST]).astype('float32') print "Padding test sequences" X_test = pad_sequence_word_vectors(X_test, maxlen=maxlen) print('X_test shape:', X_test.shape) score = model.evaluate(X_test, y_test, batch_size=batch_size) print('Test score:', score) predicted = model.predict(X_test, batch_size=batch_size) print mean_absolute_error(predicted, y_test) print mean_absolute_error(np.zeros(len(predicted)), y_test) print "RMSLOG error test: " + str(rmslog_error(predicted, y_test)) print "RMSLOG on zeros: " + str(rmslog_error(np.zeros(len(predicted)), y_test)) model.save_weights("GRU_128_DROPOUT_0.1_RELU_50_epochs_moredata")
y_test = funny_votes[N:N+N_test] for i in range(N, N + N_test): if 'REVIEW_' + str(i) not in model: print str(i) + "not in model?" X_test[i-N,:] = np.zeros(M) else: X_test[i-N,:] = model['REVIEW_' + str(i)] ## SGD REGRESSOR # sgd = SGDRegressor(loss="huber", alpha=0.001, penalty="l1", n_iter=20).fit(X, y) # predicted = sgd.predict(X_test) # print mean_absolute_error(predicted, y_test) # print rmslog_error(predicted, y_test) # print rmslog_error(np.zeros(len(predicted)), y_test) ## SUPPORT VECTOR REGRESSOR # svm = SVR(kernel='rbf').fit(X, y) # print "Trained!" # predicted = svm.predict(X_test) # print mean_absolute_error(predicted, y_test) # print rmslog_error(predicted, y_test) # print rmslog_error(np.zeros(len(predicted)), y_test) ## GRADIENT BOOSTING REGRESSOR est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0, loss='ls').fit(X, y) print "Trained!" predicted = est.predict(X_test) print mean_absolute_error(predicted, y_test) print mean_absolute_error(np.zeros(len(predicted)), y_test) print rmslog_error(predicted, y_test) print rmslog_error(np.zeros(len(predicted)), y_test)