-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_network.py
89 lines (71 loc) · 3.2 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import numpy as np
import tensorflow as tf
from gensim.models import Doc2Vec
from util import *
batch_size = 128
H = 1024
ETA = 0.5
LAMBDA = 0.001
model_name = 'models/imdb.d2v'
num_features = 300
def shuffle_in_unison(a, b):
rng_state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(rng_state)
np.random.shuffle(b)
def accuracy(predictions, labels):
return (100.0 * np.sum((predictions >= 0.5) == labels) / predictions.shape[0])
def neural_network(train_dataset, train_labels, test_dataset, test_labels):
print('Learning...')
graph = tf.Graph()
with graph.as_default():
# Input data.
x = tf.placeholder(tf.float32, shape=(None, num_features))
y_ = tf.placeholder(tf.float32, shape=(None, 1))
# Variables.
W1 = tf.Variable(tf.truncated_normal([num_features, H]))
B1 = tf.Variable(tf.zeros([H]))
W2 = tf.Variable(tf.truncated_normal([H, 1]))
B2 = tf.Variable(tf.zeros([1]))
# Training computation.
a2 = tf.nn.relu(tf.matmul(x, W1) + B1)
y = tf.matmul(a2, W2) + B2
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(y, y_)) + LAMBDA * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(ETA).minimize(loss)
# Predictions.
predictions = tf.nn.sigmoid(y)
num_steps = 10001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
shuffle_in_unison(train_dataset, train_labels)
print("Initialized")
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {x: batch_data, y_: batch_labels}
_, l, p = session.run([optimizer, loss, predictions], feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step {}: {}".format(step, l))
print("Minibatch accuracy: {}".format(accuracy(predictions.eval(feed_dict={x: train_dataset}), train_labels)))
print("Test accuracy: {}".format(accuracy(predictions.eval(feed_dict={x: test_dataset}), test_labels)))
print("Test accuracy: {}".format(accuracy(predictions.eval(feed_dict={x: test_dataset}), test_labels)))
def get_features(reviews, model):
print('Creating features...')
features = np.zeros((len(reviews), num_features), dtype="float32")
for i, review_id in enumerate(reviews['id']):
if (i+1) % 1000 == 0:
print('Review {}'.format(i+1))
features[i,:] = model.docvecs[review_id]
return features
def predict():
train = get_reviews('data/imdb/train_data.csv')
test = get_reviews('data/imdb/test_data.csv')
model = Doc2Vec.load(model_name)
train_features = get_features(train, model)
train_labels = train['sentiment'].as_matrix().reshape((len(train), 1))
test_features = get_features(test, model)
test_labels = test['sentiment'].as_matrix().reshape((len(test), 1))
neural_network(train_features, train_labels, test_features, test_labels)
predict()