def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] h = self.activation(self.main_stream(h)) h_a, h_v = F.split_axis(h, 2, axis=-1) ya = F.reshape(self.a_stream(h_a), (batch_size, self.n_actions, self.n_atoms)) mean = F.sum(ya, axis=1, keepdims=True) / self.n_actions ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = F.reshape(self.v_stream(h_v), (batch_size, 1, self.n_atoms)) ya, ys = F.broadcast(ya, ys) q = F.softmax(ya + ys, axis=2) return chainerrl.action_value.DistributionalDiscreteActionValue( q, self.z_values)
def __call__(self, x): """ Parameters ----------------- x: Variable Shape is 784 in case of MNIST """ # Reset mid outputs mid_outputs = self.mid_outputs = [] h = x for fc, bn in zip(self.fc_layers.values(), self.bn_layers.values()): z = fc(h) z_bn = bn(z, self.test) h = self.act(z_bn) shape = z.data.shape batch = shape[0] m, _ = F.broadcast(*[F.sum(z, 0) / batch, z]) v, _ = F.broadcast(*[F.sum((z - m)**2, 0) / batch, z]) #TODO: Add non-BN output mid_outputs.append((z - m) / v) return h
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) u = x_g_norm - 2 * x_g_y_g+ y_g_norm print(np.min(u.data)) print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape)) time.sleep(0.5) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] ya = self.a_stream(h) ya = F.reshape(ya, (batch_size, self.n_actions, self.n_atoms)) mean = F.reshape( F.sum(ya, axis=1) / self.n_actions, (batch_size, 1, self.n_atoms)) ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = self.v_stream(h) ys = F.reshape(ys, (batch_size, 1, self.n_atoms)) ya, ys = F.broadcast(ya, ys) q = ya + ys q = F.reshape(q, (-1, self.n_actions, self.n_atoms)) q = F.softmax(q, axis=2) return action_value.DistributionalDiscreteActionValue(q, self.z_values)
def __call__(self, x): """ Parameters ----------------- x: Variable Shape is 784 in case of MNIST """ # Reset mid outputs mid_outputs = self.mid_outputs = [] h = x for fc, bn in zip(self.fc_layers.values(), self.bn_layers.values()): z = fc(h) z_bn = bn(z, self.test) h = self.act(z_bn) shape = z.data.shape batch = shape[0] m, _ = F.broadcast(*[F.sum(z, 0) / batch, z]) v, _ = F.broadcast(*[F.sum((z - m) ** 2, 0) / batch, z]) #TODO: Add non-BN output mid_outputs.append((z - m) / v ) return h
def __call__(self, x): # Apply a mask to the filters (optional) if self.filter_mask is not None: w, m = F.broadcast(self.W, Variable(self.filter_mask)) w = w * m else: w = self.W # Perform the 2D convolution y = F.convolution_2d(x, w, b=self.b, stride=self.stride, pad=self.pad, use_cudnn=self.use_cudnn) # Get a square shaped mask if it does not yet exist. if not hasattr(self, 'output_mask'): ny, nx = y.data.shape[-2:] self.add_persistent( 'output_mask', self.xp.array(hexa.mask.square_axial(ny, nx)[None, None, ...])) y, m = F.broadcast(y, Variable(self.output_mask)) y = y * m return y
def small_distance_matrices(r, cutoff): # r : n_batch x n_atoms x 3 n_batch = r.shape[0] n_atoms = r.shape[1] d = distance_matrix(r).data # n_batch x n_atoms x n_atoms sort_index = np.argsort(d, axis=2) sort_index_inv = np.argsort(sort_index, axis=2) sorted_distance = np.take_along_axis(d, sort_index, axis=2) in_cut = np.sort(sorted_distance, axis=2) < 0.6 n_adaptable = np.sum(in_cut, axis=2) # n_batch x n_atoms max_n = np.max(np.sum(in_cut, axis=2)) i = sort_index[:, :, :max_n] broad_r = F.broadcast_to(r[:, None, :, :], (n_batch, n_atoms, n_atoms, 3)) shrink_r = np.take_along_axis(broad_r, i[:, :, :, None], axis=2) dm = F.sqrt( F.sum((shrink_r[:, :, :, None, :] - shrink_r[:, :, None, :, :])**2, axis=4)) filter_seed, na = F.broadcast( np.arange(max_n)[None, None, :], n_adaptable[:, :, None]) # n_batch x n_atoms x n_small filt = filter_seed.data < na.data filt1, filt2 = F.broadcast(filt[:, :, :, None], filt[:, :, None, :]) filt = np.logical_and(filt1.data, filt2.data) return dm, filt, sort_index[:, :, :max_n]
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) return F.exp(-x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g, x, y = F.broadcast(*[self.gamma, x, y]) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) u = x_g_norm - 2 * x_g_y_g + y_g_norm print(np.min(u.data)) print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape)) time.sleep(0.5) return F.exp(-x_g_norm + 2 * x_g_y_g - y_g_norm)
def getV(p, sentence, embed): v = None flg = False if 0 < p < len(sentence) - 1: pre = sentence[p - 1] nex = sentence[p + 1] if not (pre in embed.wv and nex in embed.wv): print(sentence, p, " not in vocabulary.") else: v_pre = embed.wv[pre] v_nex = embed.wv[nex] v = np.concatenate([v_pre, v_nex]) v = F.broadcast(v.astype(np.float32)) flg = True elif p == 0: nex = sentence[p + 1] if not nex in embed.wv: print(sentence, p, " not in vocabulary.") else: v_pre = np.zeros(n_dim, dtype=np.float32) v_nex = embed.wv[nex] v = np.concatenate([v_pre, v_nex]) v = F.broadcast(v.astype(np.float32)) flg = True elif p == len(sentence) - 1: pre = sentence[p - 1] if not pre in embed.wv: print(sentence, p, " not in vocabulary.") else: v_pre = embed.wv[pre] v_nex = np.zeros(n_dim, dtype=np.float32) v = np.concatenate([v_pre, v_nex]) v = F.broadcast(v.astype(np.float32)) flg = True return v, flg
def test_invalid_shape(self): x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32) y_data = numpy.zeros((1, 3, 4), dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): functions.broadcast(x, y)
def test_invalid_shape_fill(self): x_data = numpy.zeros((3, 2, 5), dtype=numpy.int32) y_data = numpy.zeros((4), dtype=numpy.float32) x = chainer.Variable(x_data) y = chainer.Variable(y_data) with self.assertRaises(type_check.InvalidType): functions.broadcast(x, y)
def moments(self, x, axis=None, keepdims=False): shift = self.mean(x, axis, True) x, shift = F.broadcast(x, shift) shifted = x - shift shifted_mean = self.mean(shift, axis, True) var_mean = self.mean(self.square(shifted), axis, True) var = var_mean - self.square(shifted_mean) shifted_mean, shift = F.broadcast(shifted_mean, shift) mean = shifted_mean + shift return mean, var
def __call__(self, x, z, test=False): if self.nolin: h = x else: h = self.lin(x) mu = F.sum(h, axis=0)/h.data.shape[0] self.mu = F.broadcast(F.reshape(mu, (1,h.data.shape[1])),h)[0] vr = (F.sum((h-self.mu)*(h-self.mu), axis=0)/h.data.shape[0])**0.5 self.vr = F.broadcast(F.reshape(vr, (1,h.data.shape[1])),h)[0] bnh = (h-self.mu)/(self.vr+1e-7) return self.comb(bnh, z)
def __call__(self, x): # Apply a mask to the filters (optional) if self.filter_mask is not None: w, m = F.broadcast(self.W, Variable(self.filter_mask)) w = w * m # w = self.W * Variable(self.filter_mask) else: w = self.W # Transform the filters # w.shape == (out_channels, in_channels, input_stabilizer_size, ksize, ksize) # tw.shape == (out_channels, output_stabilizer_size, in_channels, input_stabilizer_size, ksize, ksize) tw = TransformGFilter(self.inds)(w) # Fold the transformed filters tw_shape = (self.out_channels * self.output_stabilizer_size, self.in_channels * self.input_stabilizer_size, self.ksize, self.ksize) tw = F.Reshape(tw_shape)(tw) # If flat_channels is False, we need to flatten the input feature maps to have a single 1d feature dimension. if not self.flat_channels: batch_size = x.data.shape[0] in_ny, in_nx = x.data.shape[-2:] x = F.reshape(x, (batch_size, self.in_channels * self.input_stabilizer_size, in_ny, in_nx)) # Perform the 2D convolution y = F.convolution_2d(x, tw, b=None, stride=self.stride, pad=self.pad, use_cudnn=self.use_cudnn) # Unfold the output feature maps # We do this even if flat_channels is True, because we need to add the same bias to each G-feature map batch_size, _, ny_out, nx_out = y.data.shape y = F.reshape(y, (batch_size, self.out_channels, self.output_stabilizer_size, ny_out, nx_out)) # Add a bias to each G-feature map if self.usebias: bb = F.Reshape((1, self.out_channels, 1, 1, 1))(self.b) y, b = F.broadcast(y, bb) y = y + b # Flatten feature channels if needed if self.flat_channels: n, nc, ng, nx, ny = y.data.shape y = F.reshape(y, (n, nc * ng, nx, ny)) return y
def __call__(self, x, test=False): self.embedding = self.hout(x) activation = F.relu(self.embedding) batch_size = x.shape[0] ya = self.a_stream(activation) mean = F.reshape(F.sum(ya, axis=1) / self.num_actions, (batch_size, 1)) ya, mean = F.broadcast(ya, mean) ya -= mean ys = self.v_stream(activation) ya, ys = F.broadcast(ya, ys) q = ya + ys return DiscreteActionValue(q)
def __call__(self, x, test=False): self.embedding = self.hout(x) activation = F.relu(self.embedding) # activation = F.relu(self.fully_layer(l)) # h_a, h_v = F.split_axis(activation, 2, axis=-1) batch_size = x.shape[0] ya = F.reshape(self.a_stream(activation), (batch_size, self.num_actions, self.n_atoms)) mean = F.sum(ya, axis=1, keepdims=True) / self.num_actions ya, mean = F.broadcast(ya, mean) ya -= mean ys = F.reshape(self.v_stream(activation), (batch_size, 1, self.n_atoms)) ya, ys = F.broadcast(ya, ys) q = F.softmax(ya + ys, axis=2) return DistributionalDiscreteActionValue(q, self.z_values)
def __call__(self, x): # ch: canvas, ref, prev_pen pred = self.calc(x) # b, 1, w, h shape = pred.shape b, ch, h, w = shape pred = F.reshape(pred, (b, -1)) # pred = F.softmax(pred) pred = E.gumbel_softmax(pred, tau=self.tau) pred = F.reshape(pred, (b, 1) + shape[2:]) self.current_pos = pred # pen position # mx, my = np.meshgrid(np.arange(w), np.arange(h)) # bmx, bmy, pos = F.broadcast( # mx.reshape((1, 1, h, w)), # my.reshape((1, 1, h, w)), # self.current_pos) # px, py = pos*mx, pos*my # prex, prey = np.sum(mx*x[:, 2, :, :]), np.sum(my*x[:, 2, :, :]) # dx = F.sqrt((F.sum(px)-prex)**2+(F.sum(py)-prey)**2) mv_cost = F.sum( 0.5 * self.current_pos * (F.convolution_2d(x[:, 2:3, :, :], self.move_cost, pad=2) + 4)) # mv_cost = 0.3*F.relu(dx-1.5) # print(mv_cost.data) draw = F.convolution_2d(pred, self.pen, pad=1) # pen stroke strength, draw = F.broadcast(self.strength, draw[:, 0, :, :]) self.draw = strength * draw canvas = x[:, 0, :, :] + self.draw self.canvas = E.leaky_clip(canvas[0, :, :], 0., 1., leak=0.001) ref = x[:, 1, :, :] diff = F.sum((canvas - ref)**2) self.loss = diff + mv_cost return self.loss
def __call__(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X) if pad > 0: WX = WX[..., :-pad] Vh = self.V(ht_enc) # copy Vh # e.g. # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[11, 12, 13]] # # Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX) # # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[[ 11 11 11] # [ 12 12 12] # [ 13 13 13] Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g = F.broadcast_to( F.gaussian( np.array([0], dtype=np.float32), np.array([np.exp(1)], dtype=np.float32)), x.shape) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
def __call__(self, x, y): """ Parameters ----------------- x: Variable Feature of unlabeled samples. y: Variable Feature of unlabeled samples. """ g = F.broadcast_to( F.gaussian(np.array([0], dtype=np.float32), np.array([np.exp(1)], dtype=np.float32)), x.shape) x_g = x * g y_g = y * g x_g_norm = F.sum(x_g**2, axis=1) y_g_norm = F.sum(y_g**2, axis=1) x_g_y_g = F.linear(x_g, y_g) x_g_norm, x_g_y_g, y_g_norm = \ F.broadcast( *[x_g_norm, x_g_y_g, F.expand_dims(y_g_norm, 1)]) #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm)) return F.exp(-x_g_norm + 2 * x_g_y_g - y_g_norm)
def broadcast_and_squeeze(*args): if all([np.prod(val.shape[2:]) == 1 for val in args]): args = [ F.reshape(val, shape=val.shape[:2] + tuple([1, 1])) for val in args ] #TODO: Work in progress broadcasted_values = F.broadcast(*args) return broadcasted_values
def attention_history(self, dL, cue, train=True): D = F.concat(dL, axis=0) D, Cue = F.broadcast(D, cue) S = self.m(F.tanh(self.W_dm(D) + Cue)) S = F.softmax(F.reshape(S, (1, len(dL)))) pre_v = F.matmul(S, D) return pre_v
def predict(self, tokens): self.train = False contexts = self.feature_extract(tokens) \ if isinstance(tokens[0], unicode) else tokens # contexts [(w, c, l), (w, c, l)] ws, cs, ls = zip(*contexts) max_cs_size = max(c.shape[1] for c in cs) new_cs = [] for c in cs: c = np.pad(c, ((0, 0), (0, max_cs_size - c.shape[1])), mode='constant', constant_values=-1) new_cs.append(c) ws = np.asarray(ws, 'i') cs = np.asarray(new_cs, 'i') ls = np.asarray(ls, 'f') h_w = self.emb_word(ws) #_(batchsize, windowsize, word_dim) h_c = self.emb_char( cs) # (batchsize, windowsize, max_char_len, char_dim) batchsize, windowsize, _, _ = h_c.data.shape # (batchsize, windowsize, char_dim) h_c = F.sum(h_c, 2) h_c, ls = F.broadcast(h_c, F.reshape(ls, (batchsize, windowsize, 1))) h_c = h_c / ls h = F.concat([h_w, h_c], 2) h = F.reshape(h, (batchsize, -1)) # ys = self.linear(h) h = F.relu(self.linear1(h)) h = F.dropout(h, ratio=.5, train=self.train) ys = self.linear2(h) return ys.data
def pre(self, x): dims = len(x.shape) - 1 if self.kernel_size == 1: ret = self.W(x) elif self.kernel_size == 2: if dims == 2: xprev = Variable(self.xp.zeros( (self.batch_size, 1, self.in_size), dtype=np.float32), volatile='AUTO') xtminus1 = F.concat((xprev, x[:, :-1, :]), axis=1) else: xtminus1 = self.x ret = self.W(x) + self.V(xtminus1) else: ret = F.swapaxes( self.conv(F.swapaxes(x, 1, 2))[:, :, :x.shape[2]], 1, 2) if not self.attention: return ret if dims == 1: enc = self.encoding[:, -1, :] else: enc = self.encoding[:, -1:, :] return sum(F.broadcast(self.U(enc), ret))
def __call__(self, X, ht_enc, test=False): self._test = test WX = self.W(X) Vh = self.V(ht_enc) # copy Vh # e.g. # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[11, 12, 13]] # # Vh, WX = F.broadcast(F.expand_dims(Vh, axis=2), WX) # # WX = [[[ 0 1 2] # [ 3 4 5] # [ 6 7 8] # Vh = [[[ 11 11 11] # [ 12 12 12] # [ 13 13 13] Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) if test: WX.unchain_backward() Vh.unchain_backward() return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def __call__(self, x): """ Calucurate Minibatch Discrimination using broardcast. Parameters --------------- x: Variable input vector shape is (N, num_units) """ batch_size = x.shape[0] xp = x.xp x = F.reshape(x, (batch_size, -1)) activation = F.reshape(self.t(x), (-1, self.b, self.c)) m = F.reshape(activation, (-1, self.b, self.c)) m = F.expand_dims(m, 3) m_T = F.transpose(m, (3, 1, 2, 0)) m, m_T = F.broadcast(m, m_T) l1_norm = F.sum(F.absolute(m-m_T), axis=2) # eraser to erase l1 norm with themselves eraser = F.expand_dims(xp.eye(batch_size, dtype="f"), 1) eraser = F.broadcast_to(eraser, (batch_size, self.b, batch_size)) o_X = F.sum(F.exp(-(l1_norm + 1e6 * eraser)), axis=2) # concatunate along channels or units return F.concat((x, o_X), axis=1)
def forward_one_step(self, X, ht_enc): pad = self._kernel_size - 1 WX = self.W(X)[..., -pad - 1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) return self.pool(functions.split_axis(WX + Vh, self.num_split, axis=1))
def forward_one_step(self, X, ht_enc, H_enc, skip_mask, test=False): self._test = test WX = self.W(X)[:, :, -1, None] Vh = self.V(ht_enc) Vh, WX = functions.broadcast(functions.expand_dims(Vh, axis=2), WX) if test: WX.unchain_backward() Vh.unchain_backward() # f-pooling Z, F, O = functions.split_axis(WX + Vh, 3, axis=1) Z = functions.tanh(Z) F = self.zoneout(F) O = functions.sigmoid(O) T = Z.shape[2] # compute ungated hidden states for t in xrange(T): z = Z[:, :, t] f = F[:, :, t] if self.contexts is None: ct = (1 - f) * z self.contexts = [ct] else: ct = f * self.contexts[-1] + (1 - f) * z self.contexts.append(ct) if skip_mask is not None: assert skip_mask.shape[1] == H_enc.shape[2] softmax_getas = (skip_mask == 0) * -1e6 # compute attention weights (eq.8) H_enc = functions.swapaxes(H_enc, 1, 2) for t in xrange(T): ct = self.contexts[t - T] geta = 0 if skip_mask is None else softmax_getas[ ..., None] # to skip PAD mask = 1 if skip_mask is None else skip_mask[..., None] # to skip PAD alpha = functions.batch_matmul(H_enc, ct) + geta alpha = functions.softmax(alpha) * mask alpha = functions.broadcast_to(alpha, H_enc.shape) # copy kt = functions.sum(alpha * H_enc, axis=1) ot = O[:, :, t] self.ht = ot * self.o(functions.concat((kt, ct), axis=1)) if test: self.ht.unchain_backward() if self.H is None: self.H = functions.expand_dims(self.ht, 2) else: self.H = functions.concat( (self.H, functions.expand_dims(self.ht, 2)), axis=2) return self.H
def __call__(self, x, y): h = F.sigmoid(self.l1_(x)) coef = F.softmax(self.coef_(h)) mean = F.reshape(self.mean_(h), (-1,self.NUM_MIXTURE,self.OUT_DIM)) logvar = self.logvar_(h) mean, y = F.broadcast(mean, F.reshape(y, (-1,1,self.OUT_DIM))) return F.sum( coef*F.exp(-0.5*F.sum((y-mean)**2, axis=2)*F.exp(-logvar))/ ((2*np.pi*F.exp(logvar))**(0.5*self.OUT_DIM)),axis=1)
def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] ya = self.a_stream(h) mean = F.reshape(F.sum(ya, axis=1) / self.n_actions, (batch_size, 1)) ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = self.v_stream(h) ya, ys = F.broadcast(ya, ys) q = ya + ys return chainerrl.action_value.DiscreteActionValue(q)
def __call__(self, x): x = self.cnn_base(x) v = self.v_linear(x) a = self.a_linear(x) if self.mode == 'naive': v, a = F.broadcast(v, a) ret = v + a elif self.mode == 'avg': a_mean = F.mean(a, axis=1, keepdims=True) v, a, a_mean = F.broadcast(v, a, a_mean) ret = v + a - a_mean elif self.mode == 'max': a_max = F.max(a, axis=1, keepdims=True) v, a, a_max = F.broadcast(v, a, a_max) ret = v + a - a_max else: raise ValueError(f'Unknown mode {self.mode}') return ret
def __call__(self, x_block, y_in_block, y_out_block): batch = len(x_block) #embed ex_block = F.dropout(self.make_input_embedding(self.embed_x, x_block), self.dropout) ey_block = F.dropout( self.make_input_embedding(self.embed_y, y_in_block), self.dropout) eyy_block = F.dropout( self.make_input_embedding(self.embed_yy, y_in_block), self.dropout) eys = F.transpose(ey_block, (0, 2, 1)) eyys = F.transpose(eyy_block, (0, 2, 1)) #gcnn h = F.expand_dims(ex_block, axis=1) for i in range(self.stack): h = self.gcnn[i](h) h = F.dropout(F.squeeze(h, axis=1), self.dropout) #Nsteolstm eys2 = [i for i in eys] eyys2 = [i for i in eyys] _, _, oss = self.decoder(None, None, eys2) _, _, oss2 = self.decoder2(None, None, eyys2) ss = F.stack(oss, axis=0) ss2 = F.stack(oss2, axis=0) #mask_make mask = (y_in_block[:, :, None] >= 0) * self.xp.ones( (self.batch, 1, self.n_units), dtype=bool) ss = F.where(mask, ss, self.xp.full(ss.shape, 0, 'f')) #weight_calclate batch_A = F.batch_matmul(ss, h) * self.scale_score mask = (x_block[:, 0:len(x_block[0]) - self.stack * (self.width - 1)][:, None, :] >= 0) * (y_in_block[:, :, None] >= 0) batch_A = F.where(mask, batch_A, self.xp.full(batch_A.shape, -self.xp.inf, 'f')) batch_A = F.softmax(batch_A, axis=2) batch_A = F.where(self.xp.isnan(batch_A.data), self.xp.zeros(batch_A.shape, 'f'), batch_A) batch_A, h = F.broadcast(batch_A[:, None], h[:, :, None]) batch_C = F.sum(batch_A * h, axis=3) e = F.transpose(batch_C, (0, 2, 1)) e = F.squeeze(F.concat(F.split_axis(e, self.batch, axis=0), axis=1)) ss2 = F.squeeze(F.concat(F.split_axis(ss2, self.batch, axis=0), axis=1)) t = (self.We(e) + self.Ws(ss2)) t = F.dropout(t, self.dropout) concat_ys_out = F.concat(y_out_block, axis=0) loss = F.sum(F.softmax_cross_entropy(t, concat_ys_out, reduce='no')) / batch chainer.report({'loss': loss.data}, self) n_words = concat_ys_out.shape[0] perp = self.xp.exp(loss.data * batch / n_words) chainer.report({'perp': perp}, self) return loss
def __call__(self, x, test=False): """ Forward pass through the network. :param x: type numpy array, the input to the network :param test: type bool, true if network is in testing mode :return: type numpy array, the output from network """ self.h = self.activation(self.hidden(x)) batch_size = x.shape[0] ya = self.a_stream(self.h) mean = fun.reshape( fun.sum(ya, axis=1) / self.n_actions, (batch_size, 1)) ya, mean = fun.broadcast(ya, mean) ya -= mean ys = self.v_stream(self.h) ya, ys = fun.broadcast(ya, ys) q = ya + ys return q
def __call__(self, u, z): batchsize = u.data.shape[0] dim = u.data.shape[1] b0 = F.broadcast(self.b0.W, u)[0] w0z = F.broadcast(self.w0z.W, u)[0] w0u = F.broadcast(self.w0u.W, u)[0] w0zu = F.broadcast(self.w0zu.W, u)[0] ws = F.broadcast(self.ws.W, u)[0] b1 = F.broadcast(self.b1.W, u)[0] w1z = F.broadcast(self.w1z.W, u)[0] w1u = F.broadcast(self.w1u.W, u)[0] w1zu = F.broadcast(self.w1zu.W, u)[0] return b0 + w0z*z + w0u*u + w0zu*z*u + ws*F.sigmoid(b1 + w1z*z + w1u*u + w1zu*z*u)
def check_forward(self, data): xs = [chainer.Variable(x) for x in data] bxs = functions.broadcast(*xs) # When len(xs) == 1, function returns a Variable object if isinstance(bxs, chainer.Variable): bxs = (bxs, ) for bx in bxs: self.assertEqual(bx.data.shape, self.out_shape)
def check_forward(self, data): xs = [chainer.Variable(x) for x in data] bxs = functions.broadcast(*xs) # When len(xs) == 1, function returns a Variable object if isinstance(bxs, chainer.Variable): bxs = (bxs,) for bx in bxs: self.assertEqual(bx.data.shape, self.out_shape)
def __call__(self, x, enc_out=None, mask=None): """ args x: paralleled main features in the model Variable in (batch, hidden_dim, length) u: hidden features from Encoder Variable in (batch, hidden_dim, length) mask: padding-mask or future-mask xp-array in (batch, length, length) an element takes 'False' when pad/future, otherwise 'True' returns """ # ksize-1-convolution results in parallel linear projections if self.self_attention: qkv = F.squeeze(self.W(F.expand_dims(x, axis=3)), axis=3) query, key, value = F.split_axis(qkv, 3, axis=1) else: query = F.squeeze(self.W_Q(F.expand_dims(x, axis=3)), axis=3) kv = F.squeeze(self.W_KV(F.expand_dims(enc_out, axis=3)), axis=3) key, value = F.split_axis(kv, 2, axis=1) # make q,k,v into (batch*parallel, dim/parallel, length)shape query = F.concat(F.split_axis(query, self.parallel_num, axis=1), axis=0) key = F.concat(F.split_axis(key, self.parallel_num, axis=1), axis=0) value = F.concat(F.split_axis(value, self.parallel_num, axis=1), axis=0) mask = self.xp.concatenate([mask] * self.parallel_num, axis=0) attention_weight = F.batch_matmul(query, key, transa=True) * self.scale attention_weight = F.where( mask, attention_weight, self.xp.full(attention_weight.shape, -np.inf, dtype=np.float32)) attention_weight = F.softmax(attention_weight, axis=2) attention_weight = F.dropout(attention_weight, self.dropout_rate) attention_weight = F.where( self.xp.isnan(attention_weight.data), self.xp.full(attention_weight.shape, 0, dtype=np.float32), attention_weight) self.attention_weight = copy.deepcopy(attention_weight.data) # attention: (batch, q-length, k-length) -> (batch, 1, q-length, k-length) # value: (batch, dim/parallel, k-length) -> (batch, dim/parallel, 1, k-length) attention_weight, value = F.broadcast(attention_weight[:, None], value[:, :, None]) weighted_sum = F.sum(attention_weight * value, axis=3) weighted_sum = F.concat(F.split_axis(weighted_sum, self.parallel_num, axis=0), axis=1) weighted_sum = F.squeeze(self.linear( F.expand_dims(weighted_sum, axis=3)), axis=3) return weighted_sum
def __call__(self, x_u_0, x_u_1): """ Parameters ----------------- x_u_0: Variable Feature of unlabeled samples. x_u_1: Variable Feature of unlabeled samples. """ ffnn_u_0 = self.layers["ffnn_u_0"] ffnn_u_1 = self.layers["ffnn_u_1"] f_0 = F.softmax(ffnn_u_0(x_u_0)) f_1 = F.softmax(ffnn_u_1(x_u_1)) mid_outputs_0 = ffnn_u_0.mid_outputs mid_outputs_1 = ffnn_u_1.mid_outputs L = len(self.dims[1:]) similarities = self.similarities.values() # Efficient computation ## sample similarity W^l summed over l W = 0 for l in range(L): W += similarities[l](mid_outputs_0[l], mid_outputs_1[l]) ## class similarity f_0_norm = F.sum(f_0**2, axis=1) f_1_norm = F.sum(f_1**2, axis=1) f_0_f_1 = F.linear(f_0, f_1) f_0_norm, f_0_f_1, f_1_norm= \ F.broadcast( *[f_0_norm, f_0_f_1, F.expand_dims(f_1_norm, 1)]) F_ = f_0_norm - 2 * f_0_f_1 + f_1_norm print(np.max(F_.data)) print(np.min(F_.data)) print(len((np.where(F_.data < 0)[0])), np.prod(F_.data.shape)) loss = F.sum(W * F_) / (self.batch_size * 2) self.loss = loss return loss
def check_backward(self, data, grads): xs = [chainer.Variable(x) for x in data] bxs = functions.broadcast(*xs) # When len(xs) == 1, function returns a Variable object if isinstance(bxs, chainer.Variable): bxs = (bxs,) func = bxs[0].creator f = lambda: func.forward(data) for i, (bx, grad) in enumerate(zip(bxs, grads)): bx.grad = grad bx.backward() gxs = gradient_check.numerical_grad( f, data, tuple(bx.grad for bx in bxs)) gradient_check.assert_allclose(gxs[i], xs[i].grad)
def __call__(self, x, eta, test=False): h = self.lin(x) mu = F.sum(h, axis=0)/h.data.shape[0] self.mu = F.broadcast(F.reshape(mu, (1,h.data.shape[1])),h)[0] vr = (F.sum((h-self.mu)*(h-self.mu), axis=0)/h.data.shape[0])**0.5 self.vr = F.broadcast(F.reshape(vr, (1,h.data.shape[1])),h)[0] bnh = (h-self.mu)/(self.vr+1e-7) z = bnh + xp.random.randn(x.data.shape[0], self.n_out)*eta if self.act is None: return z, F.broadcast(self.gamma.W, z)[0]*(z + F.broadcast(self.beta.W, z)[0]) else: return z, self.act(F.broadcast(self.gamma.W, z)[0]*(z + F.broadcast(self.beta.W, z)[0]))
def lighting( faces, textures, intensity_ambient=0.5, intensity_directional=0.5, color_ambient=(1, 1, 1), color_directional=(1, 1, 1), direction=(0, 1, 0)): xp = chainer.cuda.get_array_module(faces) bs, nf = faces.shape[:2] # arguments if isinstance(color_ambient, tuple) or isinstance(color_ambient, list): color_ambient = xp.array(color_ambient, 'float32') if isinstance(color_directional, tuple) or isinstance(color_directional, list): color_directional = xp.array(color_directional, 'float32') if isinstance(direction, tuple) or isinstance(direction, list): direction = xp.array(direction, 'float32') if color_ambient.ndim == 1: color_ambient = cf.broadcast_to(color_ambient[None, :], (bs, 3)) if color_directional.ndim == 1: color_directional = cf.broadcast_to(color_directional[None, :], (bs, 3)) if direction.ndim == 1: direction = cf.broadcast_to(direction[None, :], (bs, 3)) # create light light = xp.zeros((bs, nf, 3), 'float32') # ambient light if intensity_ambient != 0: light = light + intensity_ambient * cf.broadcast_to(color_ambient[:, None, :], light.shape) # directional light if intensity_directional != 0: faces = faces.reshape((bs * nf, 3, 3)) v10 = faces[:, 0] - faces[:, 1] v12 = faces[:, 2] - faces[:, 1] normals = cf.normalize(neural_renderer.cross(v10, v12)) normals = normals.reshape((bs, nf, 3)) if direction.ndim == 2: direction = cf.broadcast_to(direction[:, None, :], normals.shape) cos = cf.relu(cf.sum(normals * direction, axis=2)) light = ( light + intensity_directional * cfmath.mul(*cf.broadcast(color_directional[:, None, :], cos[:, :, None]))) # apply light = cf.broadcast_to(light[:, :, None, None, None, :], textures.shape) textures = textures * light return textures
def proportions(self, doc_ids, softmax=False): """ Given an array of document indices, return a vector for each document of just the unnormalized topic weights. Returns: doc_weights : chainer.Variable Two dimensional topic weights of each document. """ w = self.weights(doc_ids) if softmax: size = w.data.shape mask = self.xp.random.random_integers(0, 1, size=size) y = (F.softmax(w * self.temperature) * Variable(mask.astype('float32'))) norm, y = F.broadcast(F.expand_dims(F.sum(y, axis=1), 1), y) return y / (norm + 1e-7) else: return w
def __call__(self, x): xp = chainer.cuda.get_array_module(x.data) batchsize = x.shape[0] if self.train_weights == False and self.initial_T is not None: self.T.W.data = self.initial_T M = F.reshape(self.T(x), (-1, self.num_kernels, self.ndim_kernel)) M = F.expand_dims(M, 3) M_T = F.transpose(M, (3, 1, 2, 0)) M, M_T = F.broadcast(M, M_T) norm = F.sum(abs(M - M_T), axis=2) eraser = F.broadcast_to(xp.eye(batchsize, dtype=x.dtype).reshape((batchsize, 1, batchsize)), norm.shape) c_b = F.exp(-(norm + 1e6 * eraser)) o_b = F.sum(c_b, axis=2) if self.train_weights == False: self.initial_T = self.T.W.data return F.concat((x, o_b), axis=1)
def gaussian_likelihood(x, mu, var): """Returns likelihood of ``x``, or ``N(x; mu, var)`` Args: x(float, numpy.ndarray or chainer.Variable): sample data mu(float or chainer.Variable): mean of Gaussian var(float): variance of Gaussian Returns: chainer.Variable: Variable holding likelihood ``N(x; mu, var)`` whose shape is same as that of ``x`` """ if numpy.isscalar(x): x = numpy.array(x) if isinstance(x, numpy.ndarray): x = chainer.Variable(x.astype(numpy.float32)) if numpy.isscalar(mu): mu = numpy.array(mu) if isinstance(mu, numpy.ndarray): mu = chainer.Variable(mu.astype(numpy.float32)) x, mu = F.broadcast(x, mu) return F.exp(-(x - mu) ** 2 / var / 2) / numpy.sqrt(2 * numpy.pi * var)
def f(*xs): return functions.broadcast(*xs)
def f(*xs): ys = functions.broadcast(*xs) return [y * y for y in ys]
def test_no_args(self): with self.assertRaises(type_check.InvalidType): functions.broadcast()
def _concat(self, h, s): batch, src_len, hidden = s.data.shape concat_h = F.reshape(F.concat(F.broadcast(F.expand_dims(h, 1), s), axis=1), (batch * src_len, 2* hidden)) return F.softmax(F.reshape(self.WA(concat_h), (batch, src_len)))