def forward(self, x): n_batch, n_atom, n_feature = x.shape atom_repeat = functions.reshape(x, (n_batch, 1, n_atom, n_feature)) atom_repeat = functions.broadcast_to( atom_repeat, (n_batch, n_atom, n_atom, n_feature)) atom_repeat = functions.reshape(atom_repeat, (n_batch, n_atom * n_atom, n_feature)) atom_tile = functions.reshape(x, (n_batch, n_atom, 1, n_feature)) atom_tile = functions.broadcast_to( atom_tile, (n_batch, n_atom, n_atom, n_feature)) atom_tile = functions.reshape(atom_tile, (n_batch, n_atom * n_atom, n_feature)) pair_x0 = functions.concat((atom_tile, atom_repeat), axis=2) pair_x0 = functions.reshape(pair_x0, (n_batch * n_atom * n_atom, n_feature * 2)) for l in self.linear_layers: pair_x0 = l(pair_x0) pair_x0 = functions.relu(pair_x0) pair_x0 = functions.reshape(pair_x0, (n_batch, n_atom * n_atom, self.n_channel)) pair_x1 = functions.concat((atom_repeat, atom_tile), axis=2) pair_x1 = functions.reshape(pair_x1, (n_batch * n_atom * n_atom, n_feature * 2)) for l in self.linear_layers: pair_x1 = l(pair_x1) pair_x1 = functions.relu(pair_x1) pair_x1 = functions.reshape(pair_x1, (n_batch, n_atom * n_atom, self.n_channel)) return pair_x0 + pair_x1
def __call__(self, h, dist): """ Args: h (numpy.ndarray): axis 0 represents minibatch index, axis 1 represents atom_index and axis2 represents feature dimension. dist (numpy.ndarray): axis 0 represents minibatch index, axis 1 and 2 represent distance between atoms. """ mb, atom, ch = h.shape if ch != self.hidden_dim: raise ValueError('h.shape[2] {} and hidden_dim {} must be same!' .format(ch, self.hidden_dim)) embedlist = self.xp.arange( self.num_rbf).astype('f') * self.radius_resolution dist = functions.reshape(dist, (mb, atom, atom, 1)) dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf)) dist = functions.exp(- self.gamma * (dist - embedlist) ** 2) dist = functions.reshape(dist, (-1, self.num_rbf)) dist = self.dense1(dist) dist = functions.softplus(dist) dist = self.dense2(dist) dist = functions.softplus(dist) dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim)) h = functions.reshape(h, (mb, atom, 1, self.hidden_dim)) h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim)) h = functions.sum(h * dist, axis=1) return h
def __call__(self, x): # chainer requires explicit broadcast for avoiding latent bugs u = F.mean(x, -1, keepdims=True) u = F.broadcast_to(u, x.shape) s = F.mean((x - u) ** 2, -1, keepdims=True) s = F.broadcast_to(s, x.shape) x = (x - u) / F.sqrt(s + self.e) return F.bias(F.scale(x, self.g, axis=2), self.b, axis=2)
def cosine_similarity(x, y, eps=1e-6): n1, n2, n3 = x.data.shape _, m2, _ = y.data.shape z = F.batch_matmul(x, y, transb=True) x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2)) y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2)) z /= F.exp(F.log(x2 * y2 + eps) / 2) return z
def norm_by_freq(self, freq): word_embs = self.W mean = F.sum(freq * word_embs, axis=0, keepdims=True) mean = F.broadcast_to(mean, word_embs.shape) var = F.sum(freq * ((word_embs - mean) ** 2), axis=0, keepdims=True) var = F.broadcast_to(var, word_embs.shape) stddev = F.sqrt(1e-6 + var) word_embs_norm = (word_embs - mean) / stddev return word_embs_norm
def compute_dists(self, obs): mean_var = Variable(self.running_stat.mean.astype(np.float32)) std_var = Variable(self.running_stat.std.astype(np.float32)) obs = obs - F.broadcast_to(mean_var, obs.shape) obs = obs / (F.broadcast_to(std_var, obs.shape) + 1e-8) if self.clip is not None: obs = F.clip(obs, -self.clip, self.clip) return self.policy.compute_dists(obs)
def get_normalized_vector(d, xp=None): shape = tuple(range(1, len(d.shape))) if xp is not None: d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True)) d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True)) else: d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True) d /= F.broadcast_to(d_term, d.shape) d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True)) d /= F.broadcast_to(d_term, d.shape) return d
def query(self, u): xp = cuda.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
def __call__(self, x): """Applies the linear layer. Args: x (~chainer.Variable): Batch of input vectors. Returns: ~chainer.Variable: Output of the linear layer. """ norm = F.batch_l2_norm_squared(self.W) ** 0.5 norm_broadcasted = F.broadcast_to( F.expand_dims(norm, 1), self.W.data.shape) g_broadcasted = F.broadcast_to( F.expand_dims(self.g, 1), self.W.data.shape) return F.linear(x, g_broadcasted * self.W / norm_broadcasted, self.b)
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def perspective(vertices, angle=30.): assert (vertices.ndim == 3) xp = chainer.cuda.get_array_module(vertices) if isinstance(angle, float) or isinstance(angle, int): angle = chainer.Variable(xp.array(angle, 'float32')) angle = angle / 180. * 3.1416 angle = cf.broadcast_to(angle[None], (vertices.shape[0],)) width = cf.tan(angle) width = cf.broadcast_to(width[:, None], vertices.shape[:2]) z = vertices[:, :, 2] x = vertices[:, :, 0] / z / width y = vertices[:, :, 1] / z / width vertices = cf.concat((x[:, :, None], y[:, :, None], z[:, :, None]), axis=2) return vertices
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g = F.broadcast_to( F.gaussian( np.array([0], dtype=np.float32), np.array([np.exp(1)], dtype=np.float32)), x.shape) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x, context): e = model.embed(context) shape = e.data.shape x = F.broadcast_to(x, (shape[0], shape[1])) e = F.reshape(e, (shape[0] * shape[1], shape[2])) x = F.reshape(x, (shape[0] * shape[1],)) return self.loss_func(e, x)
def ordinal_loss(y, mask): xp = cuda.get_array_module(y.data) volatile = y.volatile b, c, n = y.data.shape max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape) y = y - max_y sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape) down_tri = np.tri(c, dtype=np.float32) up_tri = down_tri.T w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile) w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile) h = F.exp(F.expand_dims(y, -1)) h1 = F.convolution_2d(h, w1) h1 = F.convolution_2d(F.log(h1), w1) h2 = F.convolution_2d(h, w2) h2 = F.convolution_2d(F.log(h2), w2) h = F.reshape(h1 + h2, (b, c, n)) return F.sum((h - sum_y - y) * mask) / b
def __call__(self, x, contexts): e = self.embed(contexts) batch_size, n_context, n_units = e.shape x = F.broadcast_to(x[:, None], (batch_size, n_context)) e = F.reshape(e, (batch_size * n_context, n_units)) x = F.reshape(x, (batch_size * n_context,)) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def __call__(self, x, context): e = self.embed(context) shape = e.data.shape x = F.broadcast_to(x[:, None], (shape[0], shape[1])) e = F.reshape(e, (shape[0] * shape[1], shape[2])) x = F.reshape(x, (shape[0] * shape[1],)) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def _bias(x, y, axis=1): x_shape = x.data.shape y_shape = y.data.shape assert x_shape[axis:axis + len(y_shape)] == y_shape y1_shape = tuple([1] * axis + list(y_shape) + [1] * (len(x_shape) - axis - len(y_shape))) y1 = functions.reshape(y, y1_shape) y2 = functions.broadcast_to(y1, x_shape) return x + y2
def _calc_distmat(self, h): bs = h.shape[0] h_l2_2 = F.sum(h**2, axis=1) H = F.broadcast_to(h_l2_2, (bs, bs)) H_t = F.transpose(H) XX = F.linear(h, h) return (H_t - 2*XX + H)
def __call__(self, x, context): e = self.embed(context) shape = e.shape x = F.broadcast_to(x[:, None], (shape[0], shape[1])) e = F.reshape(e, (shape[0] * shape[1], shape[2])) x = F.reshape(x, (shape[0] * shape[1],)) loss = self.loss_func(e, x) # shouldn't we divide loss by batch size? reporter.report({'loss': loss}, self) return loss
def __call__(self, embeded_x, m_prev, h_prev, x): batch_size = embeded_x.shape[0] lstm_in = self.W(embeded_x) + self.U(h_prev) m_tmp, h_tmp = F.lstm(m_prev, lstm_in) # flags if feeding previous output feed_prev = F.broadcast_to(F.expand_dims(x.data != IGNORE_LABEL, -1), (batch_size, self.hidden_size)) m = F.where(feed_prev, m_tmp, m_prev) h = F.where(feed_prev, h_tmp, h_prev) return m, h
def check_backward(self, data, grad): x = chainer.Variable(data) bx = functions.broadcast_to(x, self.out_shape) func = bx.creator f = lambda: func.forward((data,)) bx.grad = grad bx.backward() gx, = gradient_check.numerical_grad(f, (data,), (bx.grad,)) gradient_check.assert_allclose(gx, x.grad)
def grad_unbias_hook(optimizer): for p in optimizer.target.params(): grad_data = p.grad bs = grad_data.shape[0] shape = grad_data.shape grad = Variable(grad_data) mean_grad = F.broadcast_to(F.sum(grad) / bs, shape) grad_unbias = grad - mean_grad p.grad = grad_unbias.data
def __call__(self, x, context): x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1])) x = F.reshape(x, (context.shape[0] * context.shape[1],)) context = context.reshape((context.shape[0] * context.shape[1])) e = self.rnn.charRNN(context) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def setUp(self): self.x1 = numpy.random.uniform( .5, 1, (batch_size, m, k)).astype(numpy.float32) self.x2 = numpy.random.uniform( .5, 1, (k, n)).astype(numpy.float32) self.gy = numpy.random.uniform( -1, 1, (batch_size, m, n)).astype(numpy.float32) self.op = lambda x, y: F.batch_matmul( x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n))) self.forward_answer = numpy.array([ numpy.dot(self.x1[i], self.x2) for i in six.moves.range(batch_size)])
def look_at(vertices, eye, at=None, up=None): """ "Look at" transformation of vertices. """ assert (vertices.ndim == 3) xp = chainer.cuda.get_array_module(vertices) batch_size = vertices.shape[0] if at is None: at = xp.array([0, 0, 0], 'float32') if up is None: up = xp.array([0, 1, 0], 'float32') if isinstance(eye, list) or isinstance(eye, tuple): eye = xp.array(eye, 'float32') if eye.ndim == 1: eye = cf.tile(eye[None, :], (batch_size, 1)) if at.ndim == 1: at = cf.tile(at[None, :], (batch_size, 1)) if up.ndim == 1: up = cf.tile(up[None, :], (batch_size, 1)) # create new axes z_axis = cf.normalize(at - eye) x_axis = cf.normalize(neural_renderer.cross(up, z_axis)) y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis)) # create rotation matrix: [bs, 3, 3] r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1) if r.shape[0] != vertices.shape[0]: r = cf.broadcast_to(r, (vertices.shape[0], 3, 3)) # apply # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3] if vertices.shape != eye.shape: eye = cf.broadcast_to(eye[:, None, :], vertices.shape) vertices = vertices - eye vertices = cf.matmul(vertices, r, transb=True) return vertices
def __call__(self, x): q_z = self.encoder(x) z = q_z.sample(self.k) p_x = self.decoder(z) p_z = self.prior() reconstr = F.mean(p_x.log_prob( F.broadcast_to(x[None, :], (self.k,) + x.shape))) kl_penalty = F.mean(chainer.kl_divergence(q_z, p_z)) loss = - (reconstr - self.beta * kl_penalty) reporter.report({'loss': loss}, self) reporter.report({'reconstr': reconstr}, self) reporter.report({'kl_penalty': kl_penalty}, self) return loss
def __call__(self, y, m_prev, s_prev, h_forward, h_backword, enable, disable_value): # m is memory cell of lstm, s is previous hidden output # calculate attention c = self._attention(h_forward, h_backword, s_prev, enable, disable_value) # decode once embeded_y = self.E(y) batch_size = y.shape[0] lstm_in = self.W(embeded_y) + self.U(s_prev) + self.C(c) m_tmp, s_tmp = F.lstm(m_prev, lstm_in) feed_prev = F.broadcast_to(F.expand_dims(y.data != IGNORE_LABEL, -1), (batch_size, self.hidden_size)) m = F.where(feed_prev, m_tmp, m_prev) s = F.where(feed_prev, s_tmp, s_prev) t = self.U_o(s) + self.V_o(embeded_y) + self.C_o(c) return self.W_o(t), m, s
def __call__(self, x): """Normalize input and scale it. Args: x (chainer.Variable): A variable holding 4-dimensional array. Its :obj:`dtype` is :obj:`numpy.float32`. Returns: chainer.Variable: The shape and :obj:`dtype` are same as those of input. """ x = F.normalize(x, eps=self.eps, axis=1) scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape) return x * scale
def __call__(self, x, context): x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1])) x = F.reshape(x, (context.shape[0] * context.shape[1],)) if args.subword == 'rnn': context = context.reshape((context.shape[0] * context.shape[1])) e = self.rnn.charRNN(context) if args.subword == 'none': e = self.embed(context) e = F.reshape(e, (e.shape[0] * e.shape[1], e.shape[2])) loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def metric(self, model, images, labels): xp = cupy.get_array_module(images) batchsize = len(images) embeddings = model(images) embeddings = F.reshape(embeddings, ((batchsize, -1))) shape = embeddings.shape metric = 0 for embedding, label in zip(embeddings, labels): eculideans = F.sum( (embeddings - F.broadcast_to(embedding, (batchsize, shape[1])))**2, axis=1) ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0] metric += F.sum(ratios[xp.where(labels == label)]) chainer.report({'metric': metric}, model) return metric
def _context(self, p, fb_mat, fbe_mat): batch_size, source_length, _ = fb_mat.data.shape # {pe,e}_mat: shape = [batch * srclen, atten] pe_mat = F.reshape( F.broadcast_to( F.expand_dims(self.p_e(p), 1), [batch_size, source_length, self.atten_size]), [batch_size * source_length, self.atten_size]) e_mat = F.tanh(fbe_mat + pe_mat) # a_mat: shape = [batch, srclen] a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length])) # q: shape = [batch, 2 * hidden] q = F.reshape( F.batch_matmul(a_mat, fb_mat, transa=True), [batch_size, 2 * self.hidden_size]) return q
def maximum_entropy_mellowmax(values, omega=1., beta_min=-10, beta_max=10): """Maximum entropy mellowmax policy function. This function provides a categorical distribution whose expectation matches the one of mellowmax function while maximizing its entropy. See: http://arxiv.org/abs/1612.05628 Args: values (Variable or ndarray): Input values. Mellowmax is taken along the second axis. omega (float): Parameter of mellowmax. beta_min (float): Minimum value of beta, used in Brent's algorithm. beta_max (float): Maximum value of beta, used in Brent's algorithm. Returns: outputs (Variable) """ xp = chainer.cuda.get_array_module(values) mm = mellowmax(values, axis=1) # Advantage: Q - mellowmax(Q) batch_adv = values - F.broadcast_to(F.expand_dims(mm, 1), values.shape) # Move data to CPU because we use Brent's algorithm in scipy batch_adv = chainer.cuda.to_cpu(batch_adv.data) batch_beta = np.empty(mm.shape, dtype=np.float32) # Beta is computed as the root of this function def f(y, adv): return np.sum(np.exp(y * adv) * adv) for idx in np.ndindex(mm.shape): idx_full = idx[:1] + (slice(None), ) + idx[1:] adv = batch_adv[idx_full] try: beta = scipy.optimize.brentq(f, a=beta_min, b=beta_max, args=(adv, )) except ValueError: beta = 0 batch_beta[idx] = beta return F.softmax(xp.expand_dims(xp.asarray(batch_beta), 1) * values)
def __call__(self, x, contexts): # print('skipgram') x = x.astype(np.int32) e = self.embed(contexts) batch_size, n_context, n_units = e.shape x = F.broadcast_to(x[:, None], (batch_size, n_context)) e = F.reshape(e, (batch_size * n_context, n_units)) x = F.reshape(x, (batch_size * n_context, )) # for i in range(7,len(self.embed.W.data)): # if i in [121,207,602,603]: # print(self.embed.W.data[i][0], end = ' ') # print() loss = self.loss_func(e, x) reporter.report({'loss': loss}, self) return loss
def compute_r(self, x, v): resnet_in = cf.relu(self.conv1_1(x)) residual = cf.relu(self.conv1_res(resnet_in)) out = cf.relu(self.conv1_2(resnet_in)) out = cf.relu(self.conv1_3(out)) + residual v = cf.reshape(v, (v.shape[0], v.shape[1], 1, 1)) broadcast_shape = ( out.shape[0], v.shape[1], ) + out.shape[2:] v = cf.broadcast_to(v, shape=broadcast_shape) resnet_in = cf.concat((out, v), axis=1) residual = cf.relu(self.conv2_res(resnet_in)) out = cf.relu(self.conv2_1(resnet_in)) out = cf.relu(self.conv2_2(out)) + residual out = self.conv2_3(out) return out
def compute_context(previous_state): decoder_factor = F.broadcast_to( F.reshape(self.s(previous_state), (batch_size, 1, self.n_attention_units)), (batch_size, max_length, self.n_attention_units)) attention = F.softmax( F.reshape( self.o( F.reshape(F.tanh(encoder_factor + decoder_factor), (batch_size * max_length, self.n_attention_units))), (batch_size, max_length))) context = F.reshape(F.batch_matmul(attention, hxs, transa=True), (batch_size, encoder_output_size)) return context
def metric(self, model, images, labels): batchsize = len(images) embeddings = model(images) embeddings = F.reshape(embeddings, ((batchsize, -1))) shape = embeddings.shape metric = 0 for embedding in embeddings: eculideans = F.sum( (embeddings - F.broadcast_to(embedding, (batchsize, shape[1])))**2, axis=1) ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0] weights = F.softmax(F.expand_dims(-eculideans, axis=0))[0] metric += F.sum(ratios * weights) chainer.report({'metric': metric}, model) return metric
def forward(self, xs, h, c, mask): batch_size = len(xs) lens = [x.shape[0] for x in xs] #max_len = max(lens) max_len = self.sequence_length #mask = (np.expand_dims(np.arange(max_len), 0) < # np.expand_dims(lens, 1)).astype(np.float) #h = np.zeros((batch_size, self.num_hidden), dtype=np.float32) #c = np.zeros((batch_size, self.num_hidden), dtype=np.float32) #h = self.initial_h #c = self.initial_c inputs = F.pad_sequence(xs) x = None input = None gate = None i = None o = None f = None nc = None nh = None m = None pmask = None nmask = None for time in range(max_len): x = inputs[:, time] input = F.concat((x, h), axis=1) gate = self.l(input) i = gate[:, 0:self.num_hidden] o = gate[:, self.num_hidden:self.num_hidden * 2] f = gate[:, self.num_hidden * 2:self.num_hidden * 3] nc = gate[:, self.num_hidden * 3:self.num_hidden * 4] #i, o, f, nc = F.split_axis(gate, 4, axis=1) i = F.sigmoid(i) o = F.sigmoid(o) f = F.sigmoid(f) nc = F.tanh(nc) nc = f * c + i * nc nh = o * F.tanh(nc) m = mask[:, time] pmask = F.reshape(m, (self.batch_size, )) pmask = F.broadcast_to(F.expand_dims(pmask, axis=1), (self.batch_size, self.num_hidden)) nmask = 1.0 - pmask h = nh * pmask + h * nmask return h
def __call__(self, x): batchsize = x.shape[0] # Compute q(z|x) qmu, qln_var = self.encode(x) kl_inst = gaussian_kl_divergence_inst(qmu, qln_var) logp_inst = None self.kl = F.sum(kl_inst) / batchsize self.logp = 0 for j in xrange(self.num_zsamples): # z ~ q(z|x) z = F.gaussian(qmu, qln_var) # Compute p(x|z) pxz = self.decode(z) logpxz = pxz(x) if logp_inst is None: logp_inst = logpxz else: logp_inst += logpxz # Compute objective batchsize = logpxz.shape[0] self.logp += F.sum(logpxz) / batchsize # Compute standard deviation logp_inst /= self.num_zsamples obj_inst = kl_inst - logp_inst obj_inst_mean = F.sum(obj_inst) / batchsize obj_c = obj_inst - F.broadcast_to(obj_inst_mean, obj_inst.shape) obj_var = F.sum(obj_c * obj_c) / (batchsize - 1) self.logp /= self.num_zsamples self.obj = self.kl - self.logp reporter.report( { 'obj': self.obj, 'obj_var': obj_var, 'kl': self.kl, 'logp': self.logp }, self) return self.obj
def predict(self, x): batchsize = len(x) xs = [F.dropout(self.embed(Variable(doc)), ratio=0.5) for doc in x] hy, cy, ys = self.bi_lstm(hx=None, cx=None, xs=xs) # hy: bilstmの最終的な中間層の状態(2, batchsize, mid_size) # cy: ?? # ys: 中間層の各状態のベクトルを保存してある(batchsize, lim, mid_size*2) ys = [F.dropout(midvec, ratio=0.3) for midvec in ys] concat_ys = F.concat(ys, axis=0) # (batchsize*lim, mid_size*2) attn = F.dropout(self.l_attn(concat_ys), ratio=0.25) # (batchsize*lim, 1) split_attention = F.split_axis(attn, np.cumsum([len(doc) for doc in xs])[:-1], axis=0) # (batchsize, lim, 1) split_attention_pad = F.pad_sequence(split_attention, padding=-1024.0) attn_softmax = F.softmax(split_attention_pad, axis=1) # (batchsize, lim, 1) ys_pad = F.pad_sequence(ys, length=None, padding=0.0) # (batchsize, lim, mid_size*2) # ys と attn_softmax の積を計算するためにshapeを揃える ys_pad_reshape = F.reshape( ys_pad, (-1, ys_pad.shape[-1])) # (batchsize*lim, mid_size*2) attn_softmax_reshape = F.broadcast_to( F.reshape(attn_softmax, (-1, attn_softmax.shape[-1])), ys_pad_reshape.shape) # (batchsize*lim, mid_size*2) attention_hidden = ys_pad_reshape * attn_softmax_reshape # 隠れ層 * 重みを計算 (batchsize*lim, mid_size*2) attention_hidden_reshape = F.reshape( attention_hidden, (batchsize, -1, attention_hidden.shape[-1])) # (batchsize*lim, mid_size*2) result = F.sum(attention_hidden_reshape, axis=1) # 隠れ層の重み付き和を計算(batchsize, mid_size*2) if chainer.config.train: return self.l3(F.dropout(result, ratio=0.2)) # return self.l3(result) else: attn_list = F.transpose(attn_softmax[0])[0] return self.l3(result), attn_list.data
def evaluate_with_quantile_thresholds(taus): assert taus.ndim == 2 assert taus.shape[0] == batch_size n_taus = taus.shape[1] phi_taus = self.phi(taus) assert phi_taus.ndim == 3 assert phi_taus.shape == (batch_size, n_taus, hidden_size) psi_x_b = F.broadcast_to(F.expand_dims(psi_x, axis=1), phi_taus.shape) h = psi_x_b * phi_taus h = F.reshape(h, (-1, hidden_size)) assert h.shape == (batch_size * n_taus, hidden_size) h = self.f(h) assert h.ndim == 2 assert h.shape[0] == batch_size * n_taus n_actions = h.shape[-1] h = F.reshape(h, (batch_size, n_taus, n_actions)) return QuantileDiscreteActionValue(h)
def __call__(self, a_list): e_list = [] self.empha = [] sum_e = Variable(np.array([[0]], dtype='float32')) for a in a_list: w = functions.tanh(self.pw(a)) e = functions.exp(self.we(w)) e_list.append(e) sum_e += e ZEROS = Variable(np.zeros((1, self.hidden_size), dtype='float32')) aa = ZEROS for a, e in zip(a_list, e_list): e /= sum_e self.empha.append(e) aa += a * functions.broadcast_to(e, (1, self.hidden_size)) #aa += functions.reshape(functions.batch_matmul(a, e), (batch_size, self.hidden_size)) return aa
def forward_onestep(self, prev_h_g, prev_h_e, prev_c_e, x, v, r): broadcast_shape = ( prev_h_e.shape[0], v.shape[1], ) + prev_h_e.shape[2:] v = cf.reshape(v, v.shape + (1, 1)) v = cf.broadcast_to(v, shape=broadcast_shape) x = cf.relu(self.params.conv_x_1(x)) x = cf.relu(self.params.conv_x_2(x)) lstm_in = cf.concat((prev_h_e, prev_h_g, x, v, r), axis=1) forget_gate = cf.sigmoid(self.params.lstm_f(lstm_in)) input_gate = cf.sigmoid(self.params.lstm_i(lstm_in)) next_c = forget_gate * prev_c_e + input_gate * cf.tanh( self.params.lstm_tanh(lstm_in)) next_h = cf.sigmoid(self.params.lstm_o(lstm_in)) * cf.tanh(next_c) return next_h, next_c
def compute_discriminative_loss(self, fact_vectors): reason_pool = F.max(fact_vectors, axis=0) # dim of fact_vectors is (n_view_steps, batch_size, voc_size) batch_size, voc_size = reason_pool.data.shape loss = 0 for i_word in range(voc_size): one_word_score = \ F.get_item(reason_pool, [range(batch_size), i_word]) one_word_score = F.expand_dims(one_word_score, axis=1) broaded_one_word_score = \ F.broadcast_to(one_word_score, reason_pool.data.shape) real_value_of_hinge = 1-(reason_pool-broaded_one_word_score) zero_mat = Variable(self.xp.zeros_like( \ real_value_of_hinge.data, self.xp.float32), volatile='auto') hinge_loss = F.where( \ real_value_of_hinge.data<0, zero_mat, real_value_of_hinge.data) loss += F.sum(hinge_loss) - voc_size loss /= (voc_size-1)*voc_size return loss
def __call__(self, x): if self.dr: with chainer.using_config('train', True): x = F.dropout(x, self.dr) if self.gap: x = F.sum(x, axis=(2,3)) N = x.shape[0] #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py feature = F.reshape(F.leaky_relu(x), (N, -1)) m = F.reshape(self.md(feature), (N, self.B * self.C, 1)) m0 = F.broadcast_to(m, (N, self.B * self.C, N)) m1 = F.transpose(m0, (2, 1, 0)) d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N))) d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1 h = F.concat([feature, d]) h = self.l(h) return h
def forward(self, u): B, C, H, W = u.shape h1 = u.reshape((B, C, H * W)) h2 = self.context(u) h2 = h2.reshape((B, H * W, 1)) h2 = F.softmax(h2) z = F.batch_matmul(h1, h2) x = F.relu(self.ln(self.down(z))) x = self.up(x) x = F.broadcast_to(x, (H, W, B, C)) x = x.transpose((2, 3, 0, 1)) return u + x
def lighting(faces, textures, intensity_ambient=0.5, intensity_directional=0.5, color_ambient=(1, 1, 1), color_directional=(1, 1, 1), direction=(0, 1, 0)): xp = chainer.cuda.get_array_module(faces) bs, nf = faces.shape[:2] # arguments if isinstance(color_ambient, tuple) or isinstance(color_ambient, list): color_ambient = xp.array(color_ambient, 'float32') if isinstance(color_directional, tuple) or isinstance( color_directional, list): color_directional = xp.array(color_directional, 'float32') if isinstance(direction, tuple) or isinstance(direction, list): direction = xp.array(direction, 'float32') if color_ambient.ndim == 1: color_ambient = cf.broadcast_to(color_ambient[None, :], (bs, 3)) if color_directional.ndim == 1: color_directional = cf.broadcast_to(color_directional[None, :], (bs, 3)) if direction.ndim == 1: direction = cf.broadcast_to(direction[None, :], (bs, 3)) # create light light = xp.zeros((bs, nf, 3), 'float32') # ambient light if intensity_ambient != 0: light = light + intensity_ambient * cf.broadcast_to( color_ambient[:, None, :], light.shape) # directional light if intensity_directional != 0: faces = faces.reshape((bs * nf, 3, 3)) v10 = faces[:, 0] - faces[:, 1] v12 = faces[:, 2] - faces[:, 1] normals = cf.normalize(cross(v10, v12)) normals = normals.reshape((bs, nf, 3)) if direction.ndim == 2: direction = cf.broadcast_to(direction[:, None, :], normals.shape) cos = cf.relu(cf.sum(normals * direction, axis=2)) light = (light + intensity_directional * cfmath.mul( *cf.broadcast(color_directional[:, None, :], cos[:, :, None]))) # apply light = cf.broadcast_to(light[:, :, None, None, None, :], textures.shape) textures = textures * light return textures
def __call__(self, x): N = x.shape[0] h = F.leaky_relu(self.bn1(self.conv1(x)), slope=0.2) h = F.leaky_relu(self.bn2(self.conv2(h)), slope=0.2) h = F.leaky_relu(self.bn3(self.conv3(h)), slope=0.2) h = F.reshape(h, (x.shape[0], -1)) feature = F.reshape(self.l_hidden(h), (N, self.B, self.C, 1)) feature = F.broadcast_to(feature, (N, self.B, self.C, N)) feature_batch = F.transpose(feature, (3, 1, 2, 0)) feature = F.absolute(feature - feature_batch) feature = F.exp(-F.sum(feature, axis=2)) feature = F.sum(feature, axis=2) - 1 h = F.concat([h, feature]) if self.use_feature_matching: return h, self.lout(h) else: return self.lout(h)
def generate_2dmeshgrid(H, W, N, xp=np): """Generate 2d meshgrid. Returns: Array: Shape is (N, 3, H*W) """ global meshgrid if meshgrid is None or meshgrid.shape[2] != H * W: ys, xs = xp.meshgrid(xp.arange(0, H, dtype=np.float32), xp.arange(0, W, dtype=np.float32), indexing='ij', copy=False) meshgrid = xp.concatenate( [xs[None], ys[None], xp.ones( (1, H, W), dtype=np.float32)], axis=0).reshape(1, 3, H * W) meshgrid = F.broadcast_to(meshgrid, (N, 3, H * W)) return meshgrid
def rescale_adj(adj): """Normalize adjacency matrix It ensures that activations are on a similar scale irrespective of the number of neighbors Args: adj (:class:`chainer.Variable`, or :class:`numpy.ndarray` \ or :class:`cupy.ndarray`): adjacency matrix Returns: :class:`chainer.Variable`: normalized adjacency matrix """ xp = cuda.get_array_module(adj) num_neighbors = functions.sum(adj, axis=(1, 2)) base = xp.ones(num_neighbors.shape, dtype=xp.float32) cond = num_neighbors.data != 0 num_neighbors_inv = 1 / functions.where(cond, num_neighbors, base) return adj * functions.broadcast_to(num_neighbors_inv[:, None, None, :], adj.shape)
def __call__(self, x): xs = [] h = self.f0(x) # tail xs.append(self.t0(h)) xs.append(self.t1(F.max_pooling_2d(xs[0], 2))) xs.append(self.t2(F.max_pooling_2d(xs[1], 2))) b = self.t3(xs[2]) # body xs[2] = self.b2(F.concat([xs[2], F.broadcast_to(b, xs[2].shape)])) xs[1] = self.b1(F.concat([xs[1], upsample(xs[2], 2)])) xs[0] = self.b0(F.concat([xs[0], upsample(xs[1], 2)])) # head xs[1] = self.h1(F.concat([xs[1], F.max_pooling_2d(xs[0], 2)])) xs[2] = self.h2(F.concat([xs[2], F.max_pooling_2d(xs[1], 2)])) h = self.h3(xs[2]) y = self.fin(h) return y
def get_elbo(self, x, k=None, with_ll=False): if not k: k = self.k q_z = self.encoder(x) z = q_z.sample(k) p_x = self.decoder(z, n_batch_axes=2) p_z = self.prior() reconstr = p_x.log_prob(F.broadcast_to(x[None, :], (k, ) + x.shape)) kl_penalty = q_z.log_prob(z) - p_z.log_prob(z) elbo_k = reconstr - kl_penalty elbo = F.mean(elbo_k) if with_ll: log_likelihood = F.mean(F.logsumexp(elbo_k, axis=0) - numpy.log(k)) return elbo, log_likelihood else: return elbo
def __call__(self, x): batch_size, sentence_len = x.shape n_class, n_embed = self.c.shape e = self.embed(x) ep = self.pad_sequence(e) # Eq. (2) c = F.broadcast_to(self.c, (batch_size, n_class, n_embed)) g = F.matmul(ep, c, transb=True) norm_ep = self.xp.expand_dims(self.xp.linalg.norm(ep.data, axis=2), axis=2) norm_c = self.xp.expand_dims(self.xp.linalg.norm(c.data, axis=2), axis=2) denom = self.xp.matmul(norm_ep, self.xp.transpose( norm_c, (0, 2, 1))) + 1e-10 # avoid zero division g = g / denom # Eq. (3) g = self.make_ngram( g) # (batch_size, sentence_len, window_size, n_class) g = F.reshape( g, (batch_size * sentence_len * n_class, self.n_window * 2 + 1)) u = F.relu( self.attention(g)) # (batch_size * sentence_len * n_class, 1) u = F.reshape(u, (batch_size, sentence_len, n_class)) # Eq. (4) m = F.max(u, axis=2) # (batch_size, sentence_len) # Eq. (5) mask = (x == PAD).astype( self.xp.float32) * -1024.0 # make attention-scores for PAD 0 m = m + mask beta = F.softmax(m) beta = F.expand_dims(beta, axis=1) # Eq. (6) z = F.reshape(F.matmul(beta, e), (batch_size, n_embed)) # (batch_size, n_embed) # f_2 h = F.dropout(F.relu(self.l1(z)), ratio=.8) return self.l2(h)
def update_core(self): train_iter = self.get_iterator('main') batch = train_iter.next() t_data = self.converter(batch, self.device) B = t_data.shape[0] y_data = self.func(B) B, C, H, W = t_data.shape[:4] y_data = F.broadcast_to(y_data, (B, C, H, W)) loss = compute_loss(y_data, t_data) pos_model = self.models['position'] dir_model = self.models['direction'] reporter.report({ 'main/loss': loss, 'camera_position/x': pos_model.camera_position[0], 'camera_position/y': pos_model.camera_position[1], 'camera_position/z': pos_model.camera_position[2], 'camera_direction/x': dir_model.camera_zaxis[0], 'camera_direction/y': dir_model.camera_zaxis[1], 'camera_direction/z': dir_model.camera_zaxis[2] }) y_data = y_data.data if self.device >= 0: y_data = y_data.get() cuda.get_device_from_id(self.device).synchronize() img = y_data[0] img = np.transpose(img, (1, 2, 0)) img = np.clip(img, 0, 1) img = (img * 255).astype(np.uint8) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) save_progress_image(self.odir, self.count, img) for o in self.optimizers.values(): o.target.cleargrads() loss.backward() for o in self.optimizers.values(): o.update() self.count += 1
def attention(self, hOut, encOut, args): #TODO この関数も何か汚い """calc attention""" if args.attention == 0: #アテンションをしない時hOutをそのまま返す return hOut #ターゲット側の下準備 hOut1 = self.attnIn(hOut) #[batch, Dim] hOut2 = F.expand_dims(hOut1, axis=1) #[batch, 1, Dim] hOut3 = F.broadcast_to(hOut2, (len(hOut2), len(encOut[0]), args.hiddenDim)) #[batch, max(enc_sentlen), Dim] 今encOutとhOut3は同じshapeのはず aval = F.sum(encOut*hOut3, axis=2) #[batch, sentlen] cAttn1 = F.softmax(aval) #[batch, max(enc_sentlen)] paddingで0のところはかなり小さい数字の確率で出てくる cAttn2 = F.expand_dims(cAttn1, axis=1) #[batch, 1, max(enc_sentlen)] cAttn3 = F.batch_matmul(cAttn2, encOut) #[batch, 1, Dim] context = F.reshape(cAttn3, (len(encOut), len(encOut[0][0]))) #[batch, Dim] エンコーダコンテキストベクトルの完成 c1 = F.concat((hOut, context)) #[batch, Dim + Dim] c2 = self.attnOut(c1) #[bathc, Dim] return F.tanh(c2) #活性化
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input( inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_pose = self.pose_encoder(pose_x) h = F.concat((h_pos, h_pose), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
def log_propensity_independent(self, x, action): xp = cuda.get_array_module(action) pred = self._predict(x) final_action = action if self.k > 0 and action.shape[1] < pred.shape[1]: all_actions = F.broadcast_to(xp.arange(0, pred.shape[1], dtype=action.data.dtype), pred.shape) inv_items = inverse_select_items_per_row(all_actions, action) items = select_items_per_row(all_actions, action) final_action = F.concat((items, inv_items), axis=1) pred = select_items_per_row(pred, final_action) results = F.log_softmax(pred) if self.k > 0: results = results[:, :self.k] return results
def __call__(self, x, y=None): h = x h = self.block1(h) h = self.block2(h) h = self.block3(h) if y is not None: emb = self.l_y(y) H, W = h.shape[2], h.shape[3] emb = F.broadcast_to( F.reshape(emb, (emb.shape[0], emb.shape[1], 1, 1)), (emb.shape[0], emb.shape[1], H, W)) h = F.concat([h, emb], axis=1) h = self.block4(h) h = self.block5(h) h = self.block6(h) h = self.activation(h) h = F.sum(h, axis=(2, 3)) # Global pooling output = self.l7(h) return output
def __call__(self, v, h, label): v_t = self.vertical_conv_t(v) v_s = self.vertical_conv_s(v) to_vertical_t = self.v_to_h_conv_t(v_t) to_vertical_s = self.v_to_h_conv_s(v_s) # v_gate = self.vertical_gate_conv(v) # label bias is added to both vertical and horizontal conv # here we take only shape as it should be the same label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape) v_t, v_s = v_t + label, v_s + label v = F.tanh(v_t) * F.sigmoid(v_s) h_t = self.horizontal_conv_t(h) h_s = self.horizontal_conv_s(h) h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s)) return v, h
def get_initial_logits(self, mb_size=None): if mb_size is None: mb_size = self.mb_size assert mb_size is not None #print self.multi_target_signal.data.shape previous_states = None if self.multi_target_signal is not None: previous_states = self.multi_target_signal else: previous_states = self.decoder_chain.gru.get_initial_states( mb_size) #print previous_states prev_y = F.broadcast_to(self.decoder_chain.bos_embeding, (mb_size, self.decoder_chain.Eo)) new_states, logits, attn = self.advance_one_step( previous_states, prev_y) return new_states, logits, attn
def compute_ctxt(previous_state, prev_word_embedding=None): current_mb_size = previous_state.data.shape[0] if current_mb_size < mb_size: al_factor, _ = F.split_axis(precomputed_al_factor, (current_mb_size, ), 0) used_fb_concat, _ = F.split_axis(fb_concat, (current_mb_size, ), 0) if mask_length > 0: used_concatenated_penalties = concatenated_penalties[: current_mb_size] else: al_factor = precomputed_al_factor used_fb_concat = fb_concat if mask_length > 0: used_concatenated_penalties = concatenated_penalties state_al_factor = self.al_lin_s(previous_state) #As suggested by Isao Goto if prev_word_embedding is not None: state_al_factor = state_al_factor + self.al_lin_y( prev_word_embedding) state_al_factor_bc = F.broadcast_to( F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha)) a_coeffs = F.reshape( self.al_lin_o( F.reshape(F.tanh(state_al_factor_bc + al_factor), (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems)) if mask_length > 0: with cuda.get_device_from_array(used_concatenated_penalties): a_coeffs = a_coeffs + used_concatenated_penalties # - 10000 * (1-used_concatenated_mask.data) attn = F.softmax(a_coeffs) ci = F.reshape(batch_matmul(attn, used_fb_concat, transa=True), (current_mb_size, self.Hi)) return ci, attn