コード例 #1
0
    def __test(self, reviews, labels, test_on=DEFAULT_LABEL):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.regs[test_on].predict(X_training_tfidf)

        return rmslog_error(predicted, labels), rmslog_error(np.zeros(len(predicted)), labels)
コード例 #2
0
ファイル: regression.py プロジェクト: trb116/pythonanalyzer
    def __test(self, reviews, labels, test_on=DEFAULT_LABEL):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.regs[test_on].predict(X_training_tfidf)

        return rmslog_error(predicted,
                            labels), rmslog_error(np.zeros(len(predicted)),
                                                  labels)
コード例 #3
0
ファイル: lstm.py プロジェクト: trb116/pythonanalyzer
          y_train,
          batch_size=batch_size,
          nb_epoch=15,
          validation_split=0.1,
          show_accuracy=True)

# Load test material
print "LOADING TEST DATA"
reviews_test, _, funny_votes_test, _, _ = BaseBowRegressor.get_reviews_data(
    PARTITIONS_TESTING)
reviews_tokens_test = [
    language.tokenize_document(txt)
    for txt in enumerate(reviews_test[:NUM_ELEMENTS_TEST])
]
X_test = tokens_to_word_vectors(reviews_tokens_test, model)
X_test = np.array(X_test)
y_test = np.array(funny_votes_test[:NUM_ELEMENTS_TEST]).astype('float32')
print "Padding test sequences"
X_test = pad_sequence_word_vectors(X_test, maxlen=maxlen)
print('X_test shape:', X_test.shape)

score = model.evaluate(X_test, y_test, batch_size=batch_size)
print('Test score:', score)

predicted = model.predict(X_test, batch_size=batch_size)
print mean_absolute_error(predicted, y_test)
print mean_absolute_error(np.zeros(len(predicted)), y_test)
print "RMSLOG error test: " + str(rmslog_error(predicted, y_test))
print "RMSLOG on zeros: " + str(rmslog_error(np.zeros(len(predicted)), y_test))

model.save_weights("GRU_128_DROPOUT_0.1_RELU_50_epochs_moredata")
コード例 #4
0
        X_test[i - N, :] = np.zeros(M)
    else:
        X_test[i - N, :] = model['REVIEW_' + str(i)]

## SGD REGRESSOR
# sgd = SGDRegressor(loss="huber", alpha=0.001, penalty="l1", n_iter=20).fit(X, y)
# predicted = sgd.predict(X_test)
# print mean_absolute_error(predicted, y_test)
# print rmslog_error(predicted, y_test)
# print rmslog_error(np.zeros(len(predicted)), y_test)

## SUPPORT VECTOR REGRESSOR
# svm = SVR(kernel='rbf').fit(X, y)
# print "Trained!"
# predicted = svm.predict(X_test)
# print mean_absolute_error(predicted, y_test)
# print rmslog_error(predicted, y_test)
# print rmslog_error(np.zeros(len(predicted)), y_test)

## GRADIENT BOOSTING REGRESSOR
est = GradientBoostingRegressor(n_estimators=100,
                                learning_rate=0.1,
                                max_depth=3,
                                random_state=0,
                                loss='ls').fit(X, y)
print "Trained!"
predicted = est.predict(X_test)
print mean_absolute_error(predicted, y_test)
print mean_absolute_error(np.zeros(len(predicted)), y_test)
print rmslog_error(predicted, y_test)
print rmslog_error(np.zeros(len(predicted)), y_test)
コード例 #5
0
ファイル: lstm.py プロジェクト: SemanticPrincess/deep-nlp
model.add(Dense(128, 1))
model.add(Activation('relu'))

# try using different optimizers and different optimizer configs
rms = RMSprop()
model.compile(loss=rmslog_loss, optimizer=rms)

print("Train...")
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_split=0.1, show_accuracy=True)

# Load test material
print "LOADING TEST DATA"
reviews_test, _, funny_votes_test, _, _ = BaseBowRegressor.get_reviews_data(PARTITIONS_TESTING)
reviews_tokens_test = [language.tokenize_document(txt) for txt in enumerate(reviews_test[:NUM_ELEMENTS_TEST])]
X_test = tokens_to_word_vectors(reviews_tokens_test, model)
X_test = np.array(X_test)
y_test = np.array(funny_votes_test[:NUM_ELEMENTS_TEST]).astype('float32')
print "Padding test sequences"
X_test = pad_sequence_word_vectors(X_test, maxlen=maxlen)
print('X_test shape:', X_test.shape)

score = model.evaluate(X_test, y_test, batch_size=batch_size)
print('Test score:', score)

predicted = model.predict(X_test, batch_size=batch_size)
print mean_absolute_error(predicted, y_test)
print mean_absolute_error(np.zeros(len(predicted)), y_test)
print "RMSLOG error test: " + str(rmslog_error(predicted, y_test))
print "RMSLOG on zeros: " + str(rmslog_error(np.zeros(len(predicted)), y_test))

model.save_weights("GRU_128_DROPOUT_0.1_RELU_50_epochs_moredata")
コード例 #6
0
y_test = funny_votes[N:N+N_test]
for i in range(N, N + N_test):
    if 'REVIEW_' + str(i) not in model:
        print str(i) + "not in model?"
        X_test[i-N,:] = np.zeros(M)
    else:
        X_test[i-N,:] = model['REVIEW_' + str(i)]

## SGD REGRESSOR
# sgd = SGDRegressor(loss="huber", alpha=0.001, penalty="l1", n_iter=20).fit(X, y)
# predicted = sgd.predict(X_test)
# print mean_absolute_error(predicted, y_test)
# print rmslog_error(predicted, y_test)
# print rmslog_error(np.zeros(len(predicted)), y_test)

## SUPPORT VECTOR REGRESSOR
# svm = SVR(kernel='rbf').fit(X, y)
# print "Trained!"
# predicted = svm.predict(X_test)
# print mean_absolute_error(predicted, y_test)
# print rmslog_error(predicted, y_test)
# print rmslog_error(np.zeros(len(predicted)), y_test)

## GRADIENT BOOSTING REGRESSOR
est = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=0, loss='ls').fit(X, y)
print "Trained!"
predicted = est.predict(X_test)
print mean_absolute_error(predicted, y_test)
print mean_absolute_error(np.zeros(len(predicted)), y_test)
print rmslog_error(predicted, y_test)
print rmslog_error(np.zeros(len(predicted)), y_test)