-
Notifications
You must be signed in to change notification settings - Fork 0
/
relbox.py
128 lines (110 loc) · 6.41 KB
/
relbox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import theano, numpy
from theano import tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
class RelBox(object):
def __init__(self, num_words, num_rels, vocab_embed_size, lr=0.01, tensor_activation=T.tanh, num_noise_samples=1, init_dense_vocab=None):
numpy_rng = numpy.random.RandomState(89677)
theano_rng = RandomStreams(12783)
rng_box_limit = 4 * numpy.sqrt(6. / (vocab_embed_size + vocab_embed_size + num_rels))
rng_box_low = 0
rng_box_high = rng_box_limit
init_box = numpy.asarray(numpy_rng.uniform(low=rng_box_low, high=rng_box_high, size=(vocab_embed_size, vocab_embed_size, num_rels)))
rng_proj_low = -4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
rng_proj_high = 4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
if init_dense_vocab is None:
init_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(num_words, vocab_embed_size)))
init_rev_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(vocab_embed_size, num_words)))
self.B = theano.shared(value=init_box, name='B')
self.P = theano.shared(value=init_dense_vocab, name='P')
self.P_hat = theano.shared(value=init_rev_dense_vocab, name='P_hat')
self.vocab = T.eye(num_words)
word_activation = T.nnet.softmax
self.rel = T.eye(num_rels)
rel_activation = T.nnet.softmax
self.lr = lr
self.x_ind, self.y_ind, self.r_ind = T.iscalars('x_ind', 'y_ind', 'r_ind')
x = self.vocab[self.x_ind]
self.x_rep = T.dot(x, self.P)
y = self.vocab[self.y_ind]
self.y_rep = T.dot(y, self.P)
r = self.rel[self.r_ind]
# Assumption: Corresponding dimensions: 0 -> x, 1 -> y, 2 -> r
# TODO: Where do we apply activations? Do we have to, at all?
pred_xy = tensor_activation(T.tensordot(r, self.B, axes=(0,2)))
pred_y = T.dot(T.tensordot(self.x_rep, pred_xy, axes=(0,0)), self.P_hat)
self.prob_y = word_activation(pred_y)
pred_x = T.dot(T.tensordot(self.y_rep, pred_xy, axes=(0,1)), self.P_hat)
self.prob_x = word_activation(pred_x)
pred_yr = tensor_activation(T.tensordot(self.x_rep, self.B, axes=(0,0)))
self.prob_r = rel_activation(T.tensordot(self.y_rep, pred_yr, axes=(0,0)))
self.score = T.dot(y, T.dot(T.tensordot(self.x_rep, T.tensordot(r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
# y \times (((x \times P) \times (r \otimes B)) \times P_hat)
rand_margin_score = T.constant(0)
noise_log_likelihood = T.constant(0)
# The noise distribution is one where words and the relation are independent of each other. The probability of the right tuple and the corrupted tuple are both equal in this distribution.
noise_prob = num_noise_samples/float(num_words * num_words * num_rels)
rand_x_ind = theano_rng.random_integers(low=0, high=num_words-1)
rand_y_ind = theano_rng.random_integers(low=0, high=num_words-1)
rand_r_ind = theano_rng.random_integers(low=0, high=num_rels-1)
rand_x = self.vocab[rand_x_ind]
rand_x_rep = T.dot(rand_x, self.P)
rand_y = self.vocab[rand_y_ind]
rand_y_rep = T.dot(rand_y, self.P)
rand_r = self.rel[rand_r_ind]
rand_score = T.dot(rand_y, T.dot(T.tensordot(rand_x_rep, T.tensordot(rand_r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
for _ in range(num_noise_samples):
rand_margin_score += rand_score
noise_log_likelihood += T.log(noise_prob/(T.abs_(rand_score) + noise_prob))
self.nce_margin_loss = T.maximum(0, 1 - self.score + rand_margin_score)
# NCE negative log likelihood:-1 * {log(score/(score + num_noise_samples*noise_prob)) + \sum_{i=1}^k (log(noise_prob/(rand_score + noise_prob)))}
self.nce_prob_loss = -(T.log(T.abs_(self.score)/(T.abs_(self.score) + noise_prob)) + noise_log_likelihood)
self.cost_inputs = [self.x_ind, self.y_ind, self.r_ind]
self.params = [self.B, self.P, self.P_hat]
self.x_loss = self.ce(x, self.prob_x)
self.y_loss = self.ce(y, self.prob_y)
self.r_loss = self.ce(r, self.prob_r)
# TODO: Other (better?) losses
def mse(self, t, p):
return T.mean((p - t) ** 2)
def ce(self, t, p):
return T.mean(T.nnet.binary_crossentropy(p, t))
def get_x_pred(self):
return theano.function([self.y_ind, self.r_ind], self.prob_x)
def get_y_pred(self):
return theano.function([self.x_ind, self.r_ind], self.prob_y)
def get_r_pred(self):
return theano.function([self.x_ind, self.y_ind], self.prob_r)
def get_costs(self):
return theano.function(self.cost_inputs, [self.x_loss, self.y_loss, self.r_loss])
def update_for_x(self):
gparams = T.grad(self.x_loss, self.params)
updates_for_x = []
for param, gparam in zip(self.params, gparams):
updates_for_x.append((param, param - self.lr * gparam))
return theano.function(self.cost_inputs, self.x_loss, updates=updates_for_x)
def update_for_y(self):
gparams = T.grad(self.y_loss, self.params)
updates_for_y = []
for param, gparam in zip(self.params, gparams):
updates_for_y.append((param, param - self.lr * gparam))
return theano.function(self.cost_inputs, self.y_loss, updates=updates_for_y)
def update_for_r(self):
gparams = T.grad(self.r_loss, self.params)
updates_for_r = []
for param, gparam in zip(self.params, gparams):
updates_for_r.append((param, param - self.lr * gparam))
return theano.function(self.cost_inputs, self.r_loss, updates=updates_for_r)
def train_with_nce_margin(self):
gparams = T.grad(self.nce_margin_loss, self.params)
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - self.lr * gparam))
return theano.function(self.cost_inputs, self.nce_margin_loss, updates=updates)
def train_with_nce_prob(self):
gparams = T.grad(self.nce_prob_loss, self.params)
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - self.lr * gparam))
return theano.function(self.cost_inputs, self.nce_prob_loss, updates=updates)
def get_score(self):
return theano.function(self.cost_inputs, self.score)