Ejemplo n.º 1
0
    # and learns the vocabulary; second, it transforms our training data
    # into feature vectors. The input to fit_transform should be a list of
    # strings.
    train_data_features = vectorizer.fit_transform(clean_train_LAPD)
    test_data_features = vectorizer.transform(clean_test_LAPD)

    # Numpy arrays are easy to work with, so convert the result to an array
    np.asarray(train_data_features)
    np.asarray(test_data_features)

    ###################
    # TRAIN THE MODEL #
    ###################
    classifier.fit(train_data_features.toarray(), train["Problematic"])

########################################################################################################
# EVALUATE THE MODEL
########################################################################################################

Y_pred = classifier.predict(testDataVecs)
Y_p = classifier.predict_proba(testDataVecs)
Y_n = classifier.predict_proba_dict(testDataVecs)
print(Y_n)
print(Y_p)
print(Y_p)
print(Y_pred)
print(test["Problematic"])
# print('Done.\nAccuracy: %f' % accuracy_score(test["Problematic"], Y_pred))
# res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, test["Problematic"]]]
# writer.writerows(res)
# Numpy arrays are easy to work with, so convert the result to an
# array
np.asarray(train_data_features)
np.asarray(test_data_features)

# Training
classifier = SupervisedDBNClassification(
    hidden_layers_structure=[500, 250, 100],
    learning_rate_rbm=0.1,
    learning_rate=0.0001,
    n_epochs_rbm=50,
    n_iter_backprop=500,
    batch_size=16,
    activation_function='sigmoid',
    dropout_p=0)
classifier.fit(train_data_features.toarray(), train["Problematic"])

# Test
Y_pred = classifier.predict(test_data_features.toarray())
Y_p = classifier.predict_proba(test_data_features.toarray())
Y_n = classifier.predict_proba_dict(test_data_features.toarray())
print(Y_n)
print(Y_p)
print(Y_p)
print(Y_pred)
print(test["Problematic"])
print('Done.\nAccuracy: %f' % accuracy_score(test["Problematic"], Y_pred))
# res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, test["Problematic"]]]
# writer.writerows(res)
        if (os.stat(filename).st_size != 0):
            X_Test = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences])
            y_Test = np.transpose(np.asarray([1] * (len(list(sentences)))))

# Truncate and pad input sequences
X_Train = sequence.pad_sequences(X_Train, maxlen = max_review_length)
X_Test = sequence.pad_sequences(X_Test, maxlen = max_review_length)

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[500,250,100],
                                             learning_rate_rbm=0.1,
                                             learning_rate=0.0001,
                                             n_epochs_rbm=50,
                                             n_iter_backprop=500,
                                             batch_size=16,
                                             activation_function='sigmoid',
                                             dropout_p=0.25)
classifier.fit(X_Train, y_Train)

# Test
Y_pred = classifier.predict(X_Test)
Y_p = classifier.predict_proba(X_Test)
Y_n = classifier.predict_proba_dict(X_Test)
print(Y_n)
print(Y_p)
print(Y_p)
print(Y_pred)
print(y_Test)
print('Done.\nAccuracy: %f' % accuracy_score(y_Test, Y_pred))
res = [[Y_p[0, 0], Y_p[0, 1], Y_pred, y_Test]]
writer.writerows(res)