def __call__(self, p_w_s_batch, p_c_s_batch, q_w_s_batch, q_c_s_batch): with chainer.no_backprop_mode(): xpws = [Variable(F.forget(embed_w,item).data) for item in p_w_s_batch] xpcs = [F.concat( self.model2( Variable(np.zeros((n_layers * 2, len(items),50)).astype(np.float32)), [Variable(np.array(item).astype(np.int32)) for item in items]), axis = 1) for items in p_c_s_batch] concat_input_p = [F.concat((xpcs[index], xpws[index])) for index in xrange(self.batch_size)] _,u_ps = self.model3.l1( Variable(np.zeros((n_layers * 2, self.batch_size, self.unit_size/2)).astype(np.float32)), concat_input_p) with chainer.no_backprop_mode(): xqws = [Variable(F.forget(embed_w, item).data) for item in q_w_s_batch] xqcs = [F.concat( self.model2( Variable(np.zeros((n_layers * 2, len(items),50)).astype(np.float32)), [Variable(np.array(item).astype(np.int32)) for item in items]), axis = 1) for items in q_c_s_batch] concat_input_q = [F.concat((xqcs[index], xqws[index])) for index in xrange(self.batch_size)] _, u_qs = self.model3.l1( Variable(np.zeros((n_layers * 2, self.batch_size, self.unit_size/2)).astype(np.float32)), concat_input_q) #return u_qs, u_ps hps = self.model_MGRU(u_qs, u_ps) #return vtp_list #hps = self.model_SMARNN(vtp_list) hta_new_b_list = self.model_OL(hps) return hta_new_b_list
def __call__(self, p_w_s_batch, p_c_s_batch, q_w_s_batch, q_c_s_batch): with chainer.no_backprop_mode(): xpws = [ Variable(xp.array(F.forget(embed_w, item).data)) for item in p_w_s_batch ] #xpws = [F.embed_id(Variable(item), glove) for item in p_w_s_batch] #xpcs = [F.concat( #self.model2( #Variable(xp.zeros((n_layers * 2, len(items),50)).astype(np.float32)), #[Variable(np.array(item)) for item in items]), axis = 1) #for items in p_c_s_batch] xpcs = [ F.concat(self.model2( Variable( xp.zeros( (n_layers * 2, len(items), 50)).astype(np.float32)), [item for item in items]), axis=1) for items in p_c_s_batch ] concat_input = [ F.concat((xpcs[index], xpws[index])) for index in xrange(self.batch_size) ] _, u_ps = self.model3.l1( Variable( xp.zeros((n_layers * 2, self.batch_size, self.unit_size / 2)).astype(np.float32)), concat_input) with chainer.no_backprop_mode(): xqws = [ Variable(xp.array(F.forget(embed_w, item).data)) for item in q_w_s_batch ] #xqws = [F.embed_id(Variable(item), glove) for item in q_w_s_batch] xqcs = [ F.concat(self.model2( Variable( xp.zeros( (n_layers * 2, len(items), 50)).astype(np.float32)), [item for item in items]), axis=1) for items in q_c_s_batch ] concat_input = [ F.concat((xqcs[index], xqws[index])) for index in xrange(self.batch_size) ] _, u_qs = self.model3.l1( Variable( xp.zeros((n_layers * 2, self.batch_size, self.unit_size / 2)).astype(np.float32)), concat_input) #return self.model_GARNN(u_qs, u_ps) vtp_list, WqUqj = self.model_GARNN(u_qs, u_ps) hps = self.model_SMARNN(vtp_list) hta_new_b_list = self.model_OL(WqUqj, u_qs, hps) return hta_new_b_list
def __call__(self, p_w_s_batch, p_c_s_batch, q_w_s_batch, q_c_s_batch): self.xpws = [ Variable(F.forget(embed_w, item).data) for item in p_w_s_batch ] #xpws = [F.embed_id(Variable(item), glove) for item in p_w_s_batch] self.xpcs = [ Variable( F.concat(self.model2( Variable( np.zeros((n_layers * 2, len(items), 50)).astype(np.float32)), [ Variable(np.array(item).astype(np.int32)) for item in items ]), axis=1).data) for items in p_c_s_batch ] self.concat_input_p = [ F.concat((self.xpcs[index], self.xpws[index])) for index in xrange(self.batch_size) ] _, self.u_ps = self.model3.l1( Variable( np.zeros((n_layers * 2, self.batch_size, self.unit_size / 2)).astype(np.float32)), self.concat_input_p) self.xqws = [ Variable(F.forget(embed_w, item).data) for item in q_w_s_batch ] #xqws = [F.embed_id(Variable(item), glove) for item in q_w_s_batch] self.xqcs = [ Variable( F.concat(self.model2( Variable( np.zeros((n_layers * 2, len(items), 50)).astype(np.float32)), [ Variable(np.array(item).astype(np.int32)) for item in items ]), axis=1).data) for items in q_c_s_batch ] self.concat_input_q = [ F.concat((self.xqcs[index], self.xqws[index])) for index in xrange(self.batch_size) ] _, self.u_qs = self.model3.l1( Variable( np.zeros((n_layers * 2, self.batch_size, self.unit_size / 2)).astype(np.float32)), self.concat_input_q) #return self.model_GARNN(u_qs, u_ps) self.vtp_list, self.WqUqj = self.model_GARNN(self.u_qs, self.u_ps) self.hps = self.model_SMARNN(self.vtp_list) hta_new_b_list = self.model_OL(self.WqUqj, self.u_qs, self.hps) return hta_new_b_list
def check_forward(self, x_data, y_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) if self.out_len == 1: z = functions.forget(lambda x, y: (x + y + x,), x, y) testing.assert_allclose(x_data + y_data + x_data, z.data) elif self.out_len == 2: z = functions.forget(lambda x, y: (x + y + x, x * y), x, y) testing.assert_allclose(x_data + y_data + x_data, z[0].data) testing.assert_allclose(x_data * y_data, z[1].data)
def forward(self, x): if self.forget: return functions.forget( functools.partial(self.link, finetune=self.finetune), x) else: return self.link(x, finetune=self.finetune)
def __call__(self, hx, xs): with chainer.no_backprop_mode(): xs = [Variable(xp.array(F.forget(embed_w_c, item).data)) for item in xs] #xs = [self.embed(item) for item in xs] #print xs[0].shape, hx.shape hy, _ = self.l1(hx, xs) return hy
def calc_loss(self, x, t, add_kl=True, split_loss=False, calc_stats=True): train = configuration.config.train memory_efficiency = configuration.config.user_memory_efficiency self.y = self(x) if memory_efficiency > 0: self.class_loss = F.forget(F.softmax_cross_entropy, self.y, t) else: self.class_loss = F.softmax_cross_entropy(self.y, t) ignore = False if train and self.xp.isnan(self.class_loss.data): self.class_loss = chainer.Variable( self.xp.array(0.).astype('f').sum()) ignore = True else: reporter.report({'class': self.class_loss.data}, self) if add_kl: a_regf = sum( VDF.calculate_kl(link.W, link.loga_threshold, log_sigma2=link.log_sigma2, log_alpha=None, eps=1e-8, thresholds=(-8., 8.)) for link in self.links() if getattr(link, 'is_variational_dropout', False)) self.kl_loss = a_regf * self.kl_coef if train and self.xp.isnan(self.kl_loss.data): self.kl_loss = chainer.Variable( self.xp.array(0.).astype('f').sum()) ignore = True else: reporter.report({'kl': self.kl_loss.data}, self) self.kl_coef = min(self.kl_coef + self.warm_up, 1.) reporter.report({'kl_coef': self.kl_coef}, self) self.loss = self.class_loss + self.kl_loss else: self.loss = self.class_loss if not ignore: reporter.report({'loss': self.loss.data}, self) self.accuracy = F.accuracy(self.y.data, t).data reporter.report({'accuracy': self.accuracy}, self) if calc_stats: stats = calculate_stats(self) reporter.report({'mean_p': stats['mean_p']}, self) reporter.report({'sparsity': stats['sparsity']}, self) reporter.report({'W/Wnz': stats['W/Wnz']}, self) if split_loss: return self.class_loss, self.kl_loss else: return self.loss
def check_backward(self, x_data, y_data, gz_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.forget(lambda x, y: (x + y + x,), x, y) z.grad = gz_data z.backward() testing.assert_allclose(x.grad, gz_data * 2) testing.assert_allclose(y.grad, gz_data)
def __call__(self, x): bn_fn = None conv_fn = None out_size = None def forward(x): nonlocal bn_fn, conv_fn, out_size if not chainer.config.enable_backprop: # forget phase with chainer.force_backprop_mode(): y = self.bn(x) bn_fn = y.creator bn_fn.unchain() y = F.relu(y) with chainer.force_backprop_mode(): y = self.conv(y) conv_fn = y.creator conv_fn.unchain() out_size = y.shape return y # recompute bn using computed statistics expander = bn_fn.expander bn_out = self._recompute_bn(x.array, self.bn.gamma.array[expander], self.bn.beta.array[expander], bn_fn.mean[expander], bn_fn.inv_std[expander]) bn_out = chainer.Variable(bn_out) bn_fn.inputs = x.node, self.bn.gamma.node, self.bn.beta.node bn_fn.outputs = weakref.ref(bn_out.node), bn_out.creator_node = bn_fn x.retain_data() self.bn.gamma.retain_data() self.bn.beta.retain_data() # recompute relu h = F.relu(bn_out) # set dummy data to convolution output xp = cuda.get_array_module(h.array) conv_fn.inputs = h.node, self.conv.W.node h.retain_data() self.conv.W.retain_data() dummy_out = chainer.Variable( xp.broadcast_to(xp.empty((), dtype=h.dtype), out_size)) conv_fn.outputs = weakref.ref(dummy_out.node), dummy_out.creator_node = conv_fn bn_fn = None conv_fn = None return dummy_out return F.forget(forward, x)
def check_backward(self, x_data, y_data, gz_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.forget(lambda x, y: (x + y + x, ), x, y) z.grad = gz_data z.backward() testing.assert_allclose(x.grad, gz_data * 2) testing.assert_allclose(y.grad, gz_data)
def forward_step(self, x, reduce_memory=False): sum_logdet = 0 out = x if reduce_memory: out, logdet = cf.forget(self.actnorm.forward_step, out) else: out, logdet = self.actnorm.forward_step(out) sum_logdet += logdet out, logdet = self.conv_1x1.forward_step(out) sum_logdet += logdet if reduce_memory: out, logdet = cf.forget(self.coupling_layer.forward_step, out) else: out, logdet = self.coupling_layer.forward_step(out) sum_logdet += logdet return out, sum_logdet
def test_variable_grad(self): x = numpy.random.uniform(-1, 1, (3, 2)).astype(numpy.float32) x = variable.Variable(x) w = numpy.random.uniform(-1, 1, (3, 2)).astype(numpy.float32) w = variable.Variable(w) y = functions.forget(lambda a, b: a + b, x, w) y.grad_var = variable.Variable(numpy.ones_like(y.data)) y.backward() assert isinstance(x.grad_var, variable.Variable) assert isinstance(w.grad_var, variable.Variable)
def __call__(self, x, ss): # Inception (we don't need forget, as we don't backprop) p0 = ss[0] c = (chainer.configuration.config.max_perturbation - 4) * 3 x_plus_p0 = x + p0[:, c:c + 3, :, :] ds = self.ins(x_plus_p0) ss = [F.concat((s, d), axis=1) for s, d in zip(ss, ds)] # Decode (forget) assert isinstance(x, chainer.Variable) ss = F.forget(lambda x_, p0_, *ss_: self.dec(x_, p0_, ss_), x, p0, *ss) return ss
def __call__(self, x): """Stateful LSTM call """ memory_efficiency = configuration.config.user_memory_efficiency if memory_efficiency > 2: lstm_in = F.forget(self.upward, x) else: lstm_in = self.upward(x) if self.h is not None: if memory_efficiency > 2: lstm_in += F.forget(self.lateral, x) else: lstm_in += self.lateral(x) if self.c is None: self.c = self.xp.zeros((x.shape[0], self.out_size)).astype('f') if memory_efficiency > 1: self.c, self.h = F.forget(F.lstm, self.c, lstm_in) else: self.c, self.h = F.lstm(self.c, lstm_in) return self.h
def __call__(self, x): xp = chainer.cuda.get_array_module(x, x.data) p0 = xp.zeros(x.shape[0:1] + (self.n_out, ) + x.shape[2:], dtype=xp.float32) p0 = chainer.Variable(p0) # Encode (forget) assert isinstance(x, chainer.Variable) ss = [p0] + list(F.forget(lambda x_: self.enc(x_), x)) # RNN n_iters = self.n_iters() for i in range(n_iters): ss = self.rnn(x, ss) return ss[0]
def test_invalid_tuple_type_4th(self): with six.assertRaisesRegex(self, RuntimeError, '4th.*int'): functions.forget(lambda: (self.v,) * 3 + (1,))
def test_invalid_tuple_type_1st(self): with six.assertRaisesRegex(self, RuntimeError, '1st.*int'): functions.forget(lambda: (1,))
def test_invalid_tuple_type_3rd(self): with six.assertRaisesRegex(self, RuntimeError, '3rd.*int'): functions.forget(lambda: (self.v, self.v, 1))
def test_not_callable(self): with self.assertRaises(TypeError): functions.forget(1)
def test_invalid_type(self): with six.assertRaisesRegex(self, RuntimeError, 'int'): functions.forget(lambda: 1)
def test_invalid_tuple_type_2nd(self): with self.assertRaisesRegexp(RuntimeError, '2nd.*int'): functions.forget(lambda: (self.v, 1))
def test_invalid_tuple_type_13th(self): with self.assertRaisesRegexp(RuntimeError, '13th.*int'): functions.forget(lambda: (self.v,) * 12 + (1,))
def forward(self, x): return functions.forget(self.link, x)
def test_invalid_double_backprop(self): with self.assertRaises(RuntimeError): x = functions.forget(lambda v: v, self.v) x.grad_var = variable.Variable(numpy.ones_like(x.data)) x.backward(enable_double_backprop=True)
def f(x, y): return functions.forget(lambda x, y: (x * x * 3 + y * x,), x, y)
def check_forward(self, x_data, y_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.forget(lambda x, y: (x + y + x, ), x, y) testing.assert_allclose(x_data + y_data + x_data, z.data)
def f(x, y): if self.out_len == 1: return functions.forget(lambda x, y: (x + y + x), x, y) elif self.out_len == 2: return functions.forget(lambda x, y: (x + y + x, x * y), x, y)
def f(x, y): return functions.forget(lambda x, y: (x + y + x), x, y)
def check_forward(self, x_data, y_data): x = chainer.Variable(x_data) y = chainer.Variable(y_data) z = functions.forget(lambda x, y: (x + y + x,), x, y) testing.assert_allclose(x_data + y_data + x_data, z.data)
def f(x, y): return functions.forget(lambda x, y: (x * x * 3 + y * x, ), x, y)
def test_invalid_tuple_type_13th(self): with six.assertRaisesRegex(self, RuntimeError, '13th.*int'): functions.forget(lambda: (self.v, ) * 12 + (1, ))