def forward(x_data, y_data, model,train=True): # Neural net architecture #x, t = chainer.Variable(x_data), chainer.Variable(y_data) t = chainer.Variable(y_data) x = {} for n in range(500): x[n] = chainer.Variable(x_data[n]) h = {} initial_V = {} initial_V_relu = {} for nameint in range(len(l_name)-2): initial_V[nameint] = model[l_name[nameint]](x[nameint]) #initial_V_relu[nameint] = F.relu(initial_V[nameint]) #initial_V_relu[nameint] = F.sigmoid(initial_V[nameint]) initial_V_relu[nameint] = F.tanh(initial_V[nameint]) #h[nameint] = F.dropout(F.relu(initial_V[nameint]), train=train) #h[nameint] = F.dropout(F.sigmoid(initial_V[nameint]), train=train) h[nameint] = F.dropout(F.tanh(initial_V[nameint]), train=train) #h[nameint] = F.relu(model[l_name[nameint]](x[nameint])) #h6 = F.dropout(F.relu(model.l501(Returnharray(h))), train=train) #h6 = F.dropout(F.sigmoid(model.l501(Returnharray(h))), train=train) h6 = F.dropout(F.tanh(model.l501(Returnharray(h))), train=train) y = model.l502(h6) y_pre = (y.data.argmax(axis = 1)) return F.softmax_cross_entropy(y, t), F.accuracy(y, t),y_pre,initial_V,initial_V_relu
def translate(model, id2wd, jline): result_words = [] gh = [] for w in jline: wid = model.jvocab[w] x_k = model.embedx(Variable(np.array([wid], dtype=np.int32), volatile='on')) h = model.H(x_k) gh.append(h.data[0]) x_k = model.embedx(Variable(np.array([model.jvocab[EOS]], dtype=np.int32), volatile='on')) h = model.H(x_k) ct = Variable(attention.mk_ct(gh, h.data[0]), volatile='on') h2 = F.tanh(model.Wc1(ct) + model.Wc2(h)) wid = np.argmax(F.softmax(model.W(h2)).data[0]) result_words.append(id2wd.get(wid, wid)) loop = 0 while (wid != model.evocab[EOS]) and (loop <= 30): x_k = model.embedy(Variable(np.array([wid], dtype=np.int32), volatile='on')) h = model.H(x_k) ct = Variable(attention.mk_ct(gh, h.data[0]), volatile='on') h2 = F.tanh(model.Wc1(ct) + model.Wc2(h)) wid = np.argmax(F.softmax(model.W(h2)).data[0]) result_words.append(id2wd.get(wid, wid)) loop += 1 return ' '.join(result_words)
def forward(self, x_data, y_data, train=True): #print y_data batchsize = len(x_data) csize = self.channel x, t = chainer.Variable(x_data,volatile=not train), chainer.Variable(y_data.reshape(len(y_data),),volatile=not train) x = F.reshape(x,(batchsize,csize,-1)) h = F.reshape(x,(batchsize,csize,-1,1)) h = self.conv1(h) h = F.reshape(h,(batchsize,10,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,10,-1,1)) h = self.conv2(h) h = F.reshape(h,(batchsize,10,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,10,-1,1)) h = self.conv3(h) h = F.reshape(h,(batchsize,100,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,100,-1,1)) h = self.conv4(h) h = F.reshape(h,(batchsize,100,-1)) h = F.tanh(h) h = F.dropout(F.tanh(self.fc5(h)), train=train) y = self.fc6(h) return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
def __call__(self, a_list, b_list, p, sentence_length, window_size): batch_size = p.data.shape[0] SENTENCE_LENGTH = XP.fnonzeros((batch_size, 1),sentence_length) e_list = [] sum_e = XP.fzeros((batch_size, 1)) s = functions.tanh(self.ts(p)) pos = SENTENCE_LENGTH * functions.sigmoid(self.sp(s)) # Develop batch logic to set to zero the components of a and b which are out of the window # Big question: Do I have to iterate over each element in the batch? That would suck. # One logic: Get global alignment matrix of (batch x) hidden size x sentence length and then another matrix of (batch x) sentence length which # will essentially be a matrix containing the gaussian distrubution weight and there will be zeros where the sentence position falls out of the window # Another logic: Create a matrix of (batch x) sentence length where there will be 1 for each position in the window # Separate the attention weights for a and b cause forward is different from backward. for a, b in zip(a_list, b_list): w = functions.tanh(self.aw(a) + self.bw(b) + self.pw(p)) e = functions.exp(self.we(w)) e_list.append(e) sum_e += e ZEROS = XP.fzeros((batch_size, self.hidden_size)) aa = ZEROS bb = ZEROS for a, b, e in zip(a_list, b_list, e_list): e /= sum_e aa += a * e bb += b * e return aa, bb
def forward(self, x_data, y_data, train=True): #print y_data batchsize = len(x_data) csize = self.channel x, t = chainer.Variable(x_data,volatile=not train), chainer.Variable(y_data,volatile=not train) x = F.reshape(x,(batchsize,csize,-1)) h = F.reshape(x,(batchsize,csize,-1,1)) h = self.conv1(h) h = F.reshape(h,(batchsize,10,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,10,-1,1)) h = self.conv2(h) h = F.reshape(h,(batchsize,10,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,10,-1,1)) h = self.conv3(h) h = F.reshape(h,(batchsize,100,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,100,-1,1)) h = self.conv4(h) h = F.reshape(h,(batchsize,100,-1)) h = F.tanh(h) h = F.dropout(F.tanh(self.fc5(h)), train=train) y = self.fc6(h) return F.mean_squared_error(y, t)
def predict(self, x_data, train=False): #print y_data batchsize = len(x_data) csize = self.channel x = chainer.Variable(x_data,volatile=True) x = F.reshape(x,(batchsize,csize,-1)) h = F.reshape(x,(batchsize,1,-1,1)) h = self.conv1(h) h = F.reshape(h,(batchsize,10,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,10,-1,1)) h = self.conv2(h) h = F.reshape(h,(batchsize,100,-1)) h = F.tanh(h) h = F.reshape(h,(batchsize,100,-1,1)) h = self.conv3(h) h = F.reshape(h,(batchsize,256,-1)) h = F.tanh(h) h = F.dropout(F.tanh(self.fc4(h)), train=train) y = self.fc5(h) return y
def __call__(self, jline, eline): gh = [] self.H.reset_state() for w in jline: wid = self.jvocab[w] x_k = self.embedx(Variable(np.array([wid], dtype=np.int32))) h = self.H(x_k) gh.append(np.copy(h.data[0])) x_k = self.embedx(Variable(np.array([self.jvocab[EOS]], dtype=np.int32))) tx = Variable(np.array([self.evocab[eline[0]]], dtype=np.int32)) h = self.H(x_k) ct = Variable(mk_ct(gh, h.data[0])) h2 = F.tanh(self.Wc1(ct) + self.Wc2(h)) accum_loss = F.softmax_cross_entropy(self.W(h2), tx) for i in range(len(eline)): wid = self.evocab[eline[i]] x_k = self.embedy(Variable(np.array([wid], dtype=np.int32))) next_w = eline[i + 1] if i < len(eline) - 1 else EOS next_wid = self.evocab[next_w] tx = Variable(np.array([next_wid], dtype=np.int32)) h = self.H(x_k) ct = Variable(mk_ct(gh, h.data[0])) h2 = F.tanh(self.Wc1(ct) + self.Wc2(h)) loss = F.softmax_cross_entropy(self.W(h2), tx) accum_loss += loss return accum_loss
def check_forward(self, x_data, use_cudnn='always'): x = chainer.Variable(x_data) with chainer.using_config('use_cudnn', use_cudnn): y = functions.tanh(x) self.assertEqual(y.data.dtype, self.dtype) y_expect = functions.tanh(chainer.Variable(self.x)) testing.assert_allclose(y_expect.data, y.data)
def forward_one_step(c, h, cur_word, next_word): i = Variable(np.array([cur_word], dtype=np.int32)) t = Variable(np.array([next_word], dtype=np.int32)) x = F.tanh(model.embed(i)) c, h = F.lstm(c, model.x_to_h(x) + model.h_to_h(h)) y = F.tanh(model.h_to_y(h)) return c, h, F.softmax_cross_entropy(y, t)
def __call__(self, xs): xs = self.embed(xs) batch_size, length, _ = xs.shape h = F.sum(xs, axis=1) / length h = F.tanh(self.linear1(h)) h = F.tanh(self.linear2(h)) return h
def __call__(self,x,y,state,train=True,target=True): if train: h = Variable(x.reshape(self.batchsize,12), volatile=not train) else: h = Variable(x, volatile=not train) t = Variable(y.flatten(), volatile=not train) h0 = F.relu(self.l0(h)) if target == False: data = h0.data self.data_first.append(data) h1_in = self.l1_x(h0) + self.l1_h(state['h1']) h1_in = F.dropout(F.tanh(h1_in),train=train) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = F.dropout(F.tanh(self.l2_x(h1)), train=train) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) if target == False: data = h1.data self.data_hidden.append(data) y = self.l3(h2) if target ==False: data = y.data self.data_output.append(data) state = {'c1': c1, 'h1': h1,'c2':c2,'h2':h2} self.loss = F.softmax_cross_entropy(y,t) return state,self.loss
def __call__(self, s): accum_loss = None _, k = self.embed.W.data.shape h = Variable(np.zeros((1, k), dtype=np.float32)) c = Variable(np.zeros((1, k), dtype=np.float32)) s_length = len(s) for i in range(s_length): w1 = s[i] w2 = s[i + 1] if i < s_length - 1 else self.eos_id x_k = self.embed(Variable(np.array([w1], dtype=np.int32))) tx = Variable(np.array([w2], dtype=np.int32)) z0 = self.Wz(x_k) + self.Rz(F.dropout(h)) z1 = F.tanh(z0) i0 = self.Wi(x_k) + self.Ri(F.dropout(h)) i1 = F.sigmoid(i0) f0 = self.Wf(x_k) + self.Rf(F.dropout(h)) f1 = F.sigmoid(f0) c = i1 * z1 + f1 * c o0 = self.Wo(x_k) + self.Ro(F.dropout(h)) o1 = F.sigmoid(o0) y = o1 * F.tanh(c) h = y loss = F.softmax_cross_entropy(self.W(y), tx) accum_loss = loss if accum_loss is None else accum_loss + loss return accum_loss
def predict(node, neural_model_size, root=True): if isinstance(node['node'], np.ndarray): # leaf node word = np.reshape(node['node'], (1, neural_model_size)) v = chainer.Variable(word) else: # internal node left_node, right_node = node['node'] left = predict(left_node, neural_model_size, root=False) right = predict(right_node, neural_model_size, root=False) intermediate = F.tanh(model.h(F.concat((left, right)))) v = F.tanh(model.l(F.concat((left, right)))) y = model.w(v) # evaluate root label if root: predicted = cuda.to_cpu(y.data).argmax(1) try: label = node['label'] return predicted[0], label except: pass return predicted[0] return v
def forward_one_step_rnn(model, h, cur_word, next_word, train=True): word = Variable(cur_word, volatile=not train) t = Variable(next_word, volatile=not train) x = F.tanh(model.embed(word)) h = F.tanh(model.x_to_h(x) + model.h_to_h(h)) y = model.h_to_y(h) loss = F.softmax_cross_entropy(y, t) return h, loss
def predict(self, x_data, train=False): # print y_data x = chainer.Variable(x_data) h1 = F.dropout(F.tanh(self.fc1(x)), train=train) h2 = F.dropout(F.tanh(self.fc2(h1)), train=train) h3 = F.dropout(F.tanh(self.fc3(h2)), train=train) y = self.fc4(h3) return y
def forward_one_step(h, cur_word, next_word, volatile=False): word = Variable(cur_word, volatile=volatile) t = Variable(next_word, volatile=volatile) x = F.tanh(model.embed(word)) h = F.tanh(model.x_to_h(x) + model.h_to_h(h)) y = model.h_to_y(h) loss = F.softmax_cross_entropy(y, t) return h, loss
def forward(x_data, y_data, model, train=True): # Neural net architecture x, t = chainer.Variable(x_data), chainer.Variable(y_data) initialV = model.l1(x) h1 = F.dropout(F.tanh(initialV), train=train) h2 = F.dropout(F.tanh(model.l2(h1)), train=train) y = model.l3(h2) y_pre = (y.data.argmax(axis = 1)) return F.softmax_cross_entropy(y, t), F.accuracy(y, t),y_pre,initialV,h1,h2
def forward_one_step(h, cur_word, next_word, volatile=False): word = V(cur_word, volatile=volatile) t = V(next_word, volatile=volatile) x = F.tanh(model.embed(word)) h = F.tanh(model.Wx(x) + model.Wh(h)) y = model.Wy(h) loss = F.softmax_cross_entropy(y, t) pred = F.softmax(y) return h, loss, np.argmax(pred.data)
def __call__(self, c1, c2, a, b, s1, s2, s3): u1 = XP.dropout(functions.tanh(self.w_au1(a) + self.w_bu1(b) + self.w_s2u1(s2))) u2 = XP.dropout(functions.tanh(self.w_s1u2(s1) + self.w_s3u2(s3))) c, h = slstm( c1, c2, self.w_u1s1(u1) + self.w_s1s1(s1), self.w_u2s2(u2) + self.w_s2s2(s2), ) return c, XP.dropout(h)
def forward_one_step(model, h, cur_word, label, volatile=False): word = Variable(cur_word) t = Variable(label, volatile=volatile) x = F.tanh(model.embed(word)) h = F.tanh(model.x_to_h(x) + model.h_to_h(h)) y = model.h_to_y(h) loss = F.softmax_cross_entropy(y, t) accuracy = F.accuracy(y,t) return h, loss, accuracy
def predict(self, x_data, train=False): #print y_data x = chainer.Variable(x_data,volatile=True) h = F.dropout(F.tanh(self.fc1(x)), train=train) h = F.dropout(F.tanh(self.fc2(h)), train=train) h = F.dropout(F.tanh(self.fc3(h)), train=train) h = F.dropout(F.tanh(self.fc4(h)), train=train) h = F.dropout(F.tanh(self.fc5(h)), train=train) y = self.fc6(h) return y
def test_forward(x_data): # Variable(chainer独自の型)に変換 x = Variable(x_data) # 第三引数がFalse場合は第一引数の値をそのまま返す(trainの場合のみDropoutを行い、testの場合は行わないようにする) # hは前の層からの出力 h1 = F.tanh(model.l1(x)) h2 = F.tanh(model.l2(h1)) y = model.l3(h2) # 出力データを返す return y
def train_forward(x_data, y_data): # Variable(chainer独自の型)に変換 x, t = Variable(x_data), Variable(y_data) # 第三引数がFalse場合は第一引数の値をそのまま返す(trainの場合のみDropoutを行い、testの場合は行わないようにする) # hは前の層からの出力 h1 = F.dropout(F.tanh(model.l1(x))) h2 = F.dropout(F.tanh(model.l2(h1))) y = model.l3(h2) # 2乗平均誤差(MSE)を返す return F.mean_squared_error(y, t)
def __call__(self, x,train = True): x_batch1,x_batch2 = x initial_V_concat_1 = self.l_polarity(x_batch1) initial_V_concat_2 = self.l_polarity(x_batch2) h_concat_1 = F.dropout(F.leaky_relu(initial_V_concat_1), train=False) h_concat_2 = F.dropout(F.leaky_relu(initial_V_concat_2), train=False) h_hidden_1 = F.dropout(F.tanh(self.l_hidden1(h_concat_1)), train=train) h_hidden_2 = F.dropout(F.tanh(self.l_hidden2(h_concat_2)), train=train) y = self.l_output(h_hidden_1 + h_hidden_2) #y = self.l_output(h_concat_1 + h_concat_2) return y, (initial_V_concat_1 + initial_V_concat_2)
def __call__(self, x): h = {} initial_V = {} initial_V_relu = {} for nameint in range(len(l_name)-2): initial_V[nameint] = model[l_name[nameint]](x[nameint]) initial_V_relu[nameint] = F.tanh(initial_V[nameint]) h[nameint] = F.dropout(F.tanh(initial_V[nameint]), train=self.train) h6 = F.dropout(F.tanh(model.l501(Returnharray(h))), train=self.train) y = model.l502(h6) y_pre = (y.data.argmax(axis = 1)) return self.l3(h2)
def __call__(self, x_batch,train = True): initial_V_list = [] for index in range(DimentionN): link_name = ("l_"+ str(index)) initial_V_list.append(self[link_name](x_batch[index])) #initial_V_list.append(self[link_name](chainer.Variable(xp.array(x_batch[index])))) initial_V_concat = F.concat(initial_V_list) h_concat = F.dropout(F.tanh(initial_V_concat), train=train) h_concat_tanh = F.dropout(F.tanh(self.l_hidden(h_concat)), train=train) y = self.l_output(h_concat_tanh) #y_pre = (y.data.argmax(axis = 1)) return y, initial_V_concat
def main(): args = parse_args() trace('making vocabulary ...') vocab, num_lines, num_words = make_vocab(args.corpus, args.vocab) trace('initializing CUDA ...') cuda.init() trace('start training ...') model = make_rnnlm_model(args.vocab, args.embed, args.hidden) for epoch in range(args.epoch): trace('epoch %d/%d: ' % (epoch + 1, args.epoch)) log_ppl = 0.0 trained = 0 opt = optimizers.SGD() opt.setup(model) for batch in generate_batch(args.corpus, args.minibatch): batch = [[vocab[x] for x in words] for words in batch] K = len(batch) L = len(batch[0]) - 1 opt.zero_grads() s_h = zeros((K, args.hidden)) for l in range(L): s_x = make_var([batch[k][l] for k in range(K)], dtype=np.int32) s_t = make_var([batch[k][l + 1] for k in range(K)], dtype=np.int32) s_e = functions.tanh(model.w_xe(s_x)) s_h = functions.tanh(model.w_eh(s_e) + model.w_hh(s_h)) s_y = model.w_hy(s_h) loss = functions.softmax_cross_entropy(s_y, s_t) loss.backward() log_ppl += get_data(loss).reshape(()) * K opt.update() trained += K trace(' %d/%d' % (trained, num_lines)) log_ppl /= float(num_words) trace(' log(PPL) = %.10f' % log_ppl) trace(' PPL = %.10f' % math.exp(log_ppl)) trace(' writing model ...') save_rnnlm_model(args.model + '.%d' % (epoch + 1), args.vocab, args.embed, args.hidden, vocab, model) trace('training finished.')
def forward(self, x_data, y_data, train=True): #print y_data x, t = chainer.Variable(x_data,volatile=not train), chainer.Variable(y_data,volatile=not train) h = F.dropout(F.tanh(self.fc1(x)), train=train) h = F.dropout(F.tanh(self.fc2(h)), train=train) h = F.dropout(F.tanh(self.fc3(h)), train=train) h = F.dropout(F.tanh(self.fc4(h)), train=train) h = F.dropout(F.tanh(self.fc5(h)), train=train) y = self.fc6(h) return F.mean_squared_error(y, t)
def forward_one_step_lstm(model, state, cur_word, next_word, train=True): x = Variable(cur_word, volatile=not train) t = Variable(next_word, volatile=not train) h0 = model.embed(x) h1_in = model.l1_x(F.tanh(h0)) + model.l1_h(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = model.l2_x(F.tanh(h1)) + model.l2_h(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) y = model.l3(F.tanh(h2)) state = {"c1": c1, "h1": h1, "c2": c2, "h2":h2} loss = F.softmax_cross_entropy(y, t) return state, loss
def forward_dumb(x_data,train=True,level=1): x = Variable(x_data) y = Variable(x_data) for d in range(level): x = F.average_pooling_2d(x,2) for d in range(level): x = zoom_x2(x) ret = (global_normalization**(-2))*F.mean_squared_error(F.tanh(y),F.tanh(x)) if(not train): plot_img(x.data, 'd{}'.format(level), 'Lv {} dumb encoder, msqe={}'.format(level, ret.data)) return ret
def __call__(self, x): h1 = F.leaky_relu(self.e0(x)) h2 = self.e1(h1) h3 = self.e2(h2) h4 = self.e3(h3) h5 = self.e4(h4) h6 = self.e5(h5) h7 = self.e6(h6) h8 = self.e7(h7) h = self.d0(h8) attn0 = self.a0(x=h7, g=h8) h9 = F.concat([h, attn0], axis=1) h = self.d1(h9) attn1 = self.a1(x=h6, g=h9) h10 = F.concat([h, attn1], axis=1) h = self.d2(h10) attn2 = self.a2(x=h5, g=h10) h11 = F.concat([h, attn2], axis=1) h = self.d3(h11) attn3 = self.a3(x=h4, g=h11) h12 = F.concat([h, attn3], axis=1) h = self.d4(h12) attn4 = self.a4(x=h3, g=h12) h13 = F.concat([h, attn4], axis=1) h = self.d5(h13) attn5 = self.a5(x=h2, g=h13) h14 = F.concat([h, attn5], axis=1) h = self.d6(h14) attn6 = self.a6(x=h1, g=h14) h15 = F.concat([h, attn6], axis=1) out = self.d7(h15) out = F.tanh(out) return out
def decoder_predict(self, start_word, enc_states, max_predict_len=MAX_PREDICT_LEN, sample=False): xp = cuda.cupy if self.gpuid >= 0 else np # __QUESTION -- Following code is to assist with ATTENTION # alpha_arr should store the alphas for every predicted word alpha_arr = xp.empty((0,enc_states.shape[0]), dtype=xp.float32) # return list of predicted words predicted_sent = [] # load start symbol with chainer.no_backprop_mode(): pred_word = Variable(xp.asarray([start_word], dtype=np.int32)) pred_count = 0 # start prediction loop while pred_count < max_predict_len and (int(pred_word.data) != (EOS_ID)): self.decode(pred_word, train=False) if self.attn == NO_ATTN: prob = F.softmax(self.out(self[self.lstm_dec[-1]].h)) else: # __QUESTION Add attention hidden_decoder=self[self.lstm_dec[-1]].h hidden_encoder=enc_states a_t = F.matmul(hidden_decoder, hidden_encoder, transa=False, transb=True) score = F.softmax(a_t) context=F.matmul(score,hidden_encoder) final_vector=F.concat((context,hidden_decoder)) h_t_p=F.tanh(final_vector) predict = self.attention_layer(h_t_p) predicted_out = self.out(predict) prob = F.softmax(predicted_out) alpha_arr = np.concatenate((alpha_arr,score.data),axis=0) pred_word = self.select_word(prob, train=False, sample=sample) # add integer id of predicted word to output list predicted_sent.append(int(pred_word.data)) pred_count += 1 # __QUESTION Add attention # When implementing attention, make sure to use alpha_array to store # your attention vectors. # The visualisation function in nmt_translate.py assumes such an array as input. return predicted_sent, alpha_arr
def __call__(self, x, train): self.train = train if self.train: test = False else: test = True h = F.relu(self.bn0(self.fc0(x), test=test)) h = F.relu(self.bn1(self.fc1(h), test=test)) h = F.reshape(h, (h.data.shape[])) h = F.relu(self.bn2(self.dc2(h), test=test)) l = F.tanh(self.dc3(h)) return l
def forward(self, z): xp = self.xp c = xp.ones((z.shape[0], 1)).astype("float32") c = self.linear(c) z = F.split_axis(z, 7, axis=1) h = self.gen.G_linear(z[0]).reshape(-1, 4, 4, 16 * self.gen.ch).transpose( 0, 3, 1, 2) h = self.BSA_linear(h) h = self.gen.GBlock(h, z[1], c) h = self.gen.GBlock_1(h, z[2], c) h = self.gen.GBlock_2(h, z[3], c) h = self.gen.GBlock_3(h, z[4], c) h = self.gen.GBlock_4(h, z[5], c) h = self.gen.attention(h) h = self.gen.GBlock_5(h, z[6], c) h = F.relu(self.gen.ScaledCrossReplicaBN(h)) h = F.tanh(self.gen.conv_2d(h)) return h
def run(): parser = argparse.ArgumentParser() parser.add_argument('-io', '--filename_obj', type=str, default='./examples/data/teapot.obj') parser.add_argument('-ir', '--filename_ref', type=str, default='./examples/data/example3_ref.png') parser.add_argument('-or', '--filename_output', type=str, default='./examples/data/example3_result.gif') parser.add_argument('-g', '--gpu', type=int, default=0) args = parser.parse_args() working_directory = os.path.dirname(args.filename_output) model = Model(args.filename_obj, args.filename_ref) model.to_gpu() optimizer = chainer.optimizers.Adam(alpha=0.1, beta1=0.5) optimizer.setup(model) loop = tqdm.tqdm(range(300)) for _ in loop: loop.set_description('Optimizing') optimizer.target.cleargrads() loss = model() loss.backward() optimizer.update() # draw object loop = tqdm.tqdm(range(0, 360, 4)) for num, azimuth in enumerate(loop): loop.set_description('Drawing') model.renderer.eye = neural_renderer.get_points_from_angles( 2.732, 0, azimuth) images = model.renderer.render(model.vertices, model.faces, cf.tanh(model.textures)) image = images.data.get()[0].transpose((1, 2, 0)) scipy.misc.toimage(image, cmin=0, cmax=1).save( '%s/_tmp_%04d.png' % (working_directory, num)) make_gif(working_directory, args.filename_output)
def autograd(X, W, b, initial_ct=None, use_tanh=False, mask_x=None): batchsize, feature_dimension, seq_length = X.shape if initial_ct is None: initial_ct = chainer.Variable( np.zeros((batchsize, feature_dimension), dtype=X.dtype)) if isinstance(X, chainer.Variable) is False: X = chainer.Variable(X) if mask_x is not None: X *= mask_x[..., None] U = functions.connection.convolution_2d.convolution_2d( X[:, :, None, :], W[..., None, None])[:, :, 0] Z, F, R = functions.split_axis(U, 3, axis=1) H = None C = None bf = functions.broadcast_to(b[:feature_dimension], (batchsize, feature_dimension)) br = functions.broadcast_to(b[feature_dimension:], (batchsize, feature_dimension)) ct = initial_ct for t in range(seq_length): xt = X[..., t] zt = Z[..., t] ft = functions.sigmoid(F[..., t] + bf) rt = functions.sigmoid(R[..., t] + br) ct = ft * ct + (1 - ft) * zt C = functions.expand_dims(ct, 2) if C is None else functions.concat( (C, functions.expand_dims(ct, 2)), axis=2) g_ct = ct if use_tanh: g_ct = functions.tanh(ct) ht = rt * g_ct + (1 - rt) * xt H = functions.expand_dims(ht, 2) if H is None else functions.concat( (H, functions.expand_dims(ht, 2)), axis=2) return H, C, C[..., -1]
def seq_encode(self, xs, cr): if cr == "c": embed_xs = self.embed_c(xs) else: embed_xs = self.embed_r(xs) if self.wordvec_unchain: embed_xs.unchain_backward() batchsize, seq_length, dim = embed_xs.shape sum_embed_xs = F.sum(embed_xs, axis=1) embed_xs = F.reshape(embed_xs, (batchsize, 1, seq_length, dim)) # embed_avg = F.average_pooling_2d(embed_xs, ksize=(embed_xs.shape[2], 1)) # 1. wide_convolution # 著者はnarrow? xs_conv1 = F.tanh(self.conv1(embed_xs)) # xs_conv1_swap = F.reshape(F.swapaxes(xs_conv1, 1, 3),(batchsize, seq_length+3, 50)) xs_conv1_swap = F.swapaxes( xs_conv1, 1, 3) # (batchsize, 50, seqlen, 1) --> (batchsize, 1, seqlen, 50) return sum_embed_xs, xs_conv1, xs_conv1_swap
def __call__(self, x, h, c): hy = [] cy = [] for i, name in enumerate(self.x_amps.layer_names): hx_i = h[i] cx_i = c[i] gates = self.x_amps[name](x) + self.h_amps[name](hx_i) i_gate, f_gate, c_gate, o_gate = F.split_axis( gates, indices_or_sections=4, axis=1) i_gate = F.sigmoid(i_gate) f_gate = F.sigmoid(f_gate) c_gate = F.tanh(c_gate) o_gate = F.sigmoid(o_gate) cy_i = (f_gate * cx_i) + (i_gate * c_gate) hy_i = o_gate * F.sigmoid(cy_i) cy.append(cy_i) hy.append(hy_i) x = self.dropout(hy_i) return hy, cy
def __call__(self, x, adj): """ Args: x: (batchsize, num_nodes, in_channels) adj: (batchsize, num_edge_type, num_nodes, num_nodes) Returns: (batchsize, hidden_channels) """ if x.dtype == self.xp.int32: assert self.input_type == 'int' else: assert self.input_type == 'float' h = self.embed(x) # (minibatch, max_num_atoms) if self.scale_adj: adj = rescale_adj(adj) for rgcn_conv in self.rgcn_convs: h = functions.tanh(rgcn_conv(h, adj)) h = self.rgcn_readout(h) return h
def forward(self, x_list): height = len(x_list[0]) batch_size = len(x_list) xs = np.array(x_list).flatten() itmp = self.x_i(_mkivar(xs)) i = F.reshape(itmp, (batch_size, 1, height, self.embed_size)) c1 = self.i_c1(i) c2 = self.i_c2(i) c3 = self.i_c3(i) pc1 = F.max_pooling_2d(c1, (height, 1)) pc2 = F.max_pooling_2d(c2, (height, 1)) pc3 = F.max_pooling_2d(c3, (height, 1)) h1 = F.reshape(pc1, (batch_size, 128)) h2 = F.reshape(pc2, (batch_size, 128)) h3 = F.reshape(pc3, (batch_size, 128)) h = F.dropout(F.concat((h1, h2, h3), axis=1), 0.2) h = F.dropout(F.tanh(self.h_h(h)), 0.2) z = self.h_z(h) return z
def __call__(self, x, c, test=False): ### text encoding hc_mu = F.leaky_relu(self.lc_mu(c)) hc_var = F.leaky_relu(self.lc_var(c)) h_c = F.gaussian(hc_mu, hc_var) h_c = F.expand_dims(h_c, axis=2) h_c = F.expand_dims(h_c, axis=2) h_c = F.tile(h_c, (1, 1, self.s16, self.s16)) ### image encoder h = self.c1(x, test=test) h = self.c2(h, test=test) h = self.c3(h, test=test) ### concate text and image h = F.concat((h, h_c)) h = self.c_joint(h, test=test) ### residual block h0 = self.cr1_0(h, test=test) h0 = self.cr1_1(h0, test=test) h = F.relu(h + h0) h0 = self.cr2_0(h, test=test) h0 = self.cr2_1(h0, test=test) h = F.relu(h + h0) h0 = self.cr3_0(h, test=test) h0 = self.cr3_1(h0, test=test) h = F.relu(h + h0) h0 = self.cr4_0(h, test=test) h0 = self.cr4_1(h0, test=test) h = F.relu(h + h0) ### upsampling h = self.dc1(h, test=test) h = self.dc2(h, test=test) h = self.dc3(h, test=test) h = self.dc4(h, test=test) h = F.tanh(self.c5(h)) if test: return h else: return h, hc_mu, hc_var
def __call__(self, batchsize=64, z=None, y=None, **kwargs): if z is None: z = sample_continuous(self.dim_z, batchsize, distribution=self.distribution, xp=self.xp) if y is None: y = sample_categorical(self.n_classes, batchsize, distribution="uniform", xp=self.xp) if self.n_classes > 0 else None if (y is not None) and z.shape[0] != y.shape[0]: raise Exception('z.shape[0] != y.shape[0], z.shape[0]={}, y.shape[0]={}'.format(z.shape[0], y.shape[0])) h = z h = self.l1(h) h = F.reshape(h, (h.shape[0], -1, self.bottom_width, self.bottom_width)) h = self.block2(h, y, **kwargs) h = self.block3(h, y, **kwargs) h = self.block4(h, y, **kwargs) h = self.block5(h, y, **kwargs) h = self.b6(h) h = self.activation(h) h = F.tanh(self.l6(h)) return h
def forward(self, xs, ys): #print(xs,ys) #exit() #xs = [x[::-1] for x in xs] eos_dst = self.xp.array([self.v_eos_dst], np.int32) ys_in = [F.concat([eos_dst, y], axis=0) for y in ys] ys_out = [F.concat([y, eos_dst], axis=0) for y in ys] # Both xs and ys_in are lists of arrays. exs = sequence_embed(self.embed_x, xs) eys = sequence_embed(self.embed_y, ys_in) #print(list(map(lambda x: len(x),exs))) #print(list(map(lambda x: len(x),eys))) #exit() batch = len(xs) # None represents a zero vector in an encoder. hx, cx, xs_states = self.encoder(None, None, exs) hx = F.transpose( F.reshape(F.transpose(hx, (1, 0, 2)), (batch, self.n_layers, self.n_units * 2)), (1, 0, 2)) cx = F.transpose( F.reshape(F.transpose(cx, (1, 0, 2)), (batch, self.n_layers, self.n_units * 2)), (1, 0, 2)) _, _, os = self.decoder(hx, cx, eys) ctxs = [self.att(xh, yh) for (xh, yh) in zip(xs_states, os)] att_os = [ F.tanh(self.Wc(F.concat([ch, yh], axis=1))) for (ch, yh) in zip(ctxs, os) ] concat_os = F.concat(att_os, axis=0) concat_ys_out = F.concat(ys_out, axis=0) #print(concat_ys_out.dtype) loss = F.sum( F.softmax_cross_entropy( self.Ws(concat_os), concat_ys_out, reduce='no')) / batch chainer.report({'loss': loss}, self) return loss
def __call__(self, bs, dim=100, y=None, test=False): self.hiddens = [] z = self.generate_norm(bs, dim) # Linear/BatchNorm/Branch/Nonlinear h = self.linear0(z) h = self.bn0(h, test) z = self.generate_unif(bs, dim) b = self.branch0(z, y, test) h = h + b h = self.act(h) self.hiddens.append(h) h = self.linear1(h) h = self.bn1(h, test) z = self.generate_unif(bs, dim) b = self.branch1(z, y, test) h = h + b h = self.act(h) self.hiddens.append(h) h = self.linear2(h) h = self.bn2(h, test) z = self.generate_unif(bs, dim) b = self.branch2(z, y, test) h = h + b h = self.act(h) self.hiddens.append(h) h = self.linear3(h) h = self.bn3(h, test) z = self.generate_unif(bs, dim) b = self.branch3(z, y, test) h = h + b h = self.act(h) self.hiddens.append(h) h = self.linear4(h) return h #TODO: tanh? return F.tanh(h)
def __call__(self, batchsize=64, z=None, y=None): if z is None: z = sample_continuous(self.dim_z, batchsize, distribution=self.distribution, xp=self.xp) if y is None: y = sample_categorical(self.n_classes, batchsize, distribution="uniform", xp=self.xp) if self.n_classes > 0 else None if (y is not None) and z.shape[0] != y.shape[0]: raise ValueError('z.shape[0] != y.shape[0]') print("B0", np.sum(z.data)) print("C2B0", np.sum(self.block2.c2.b.data)) h = z h = self.l1(h) h = F.reshape(h, (h.shape[0], -1, self.bottom_width, self.bottom_width)) print("B1", np.sum(h.data)) print("C2B1", np.sum(self.block2.c2.b.data)) h = self.block2(h, y) print("B2", np.sum(h.data)) print("C2B2", np.sum(self.block2.c2.b.data)) h = self.block3(h, y) print("B3", np.sum(h.data)) print("C2B3", np.sum(self.block2.c2.b.data)) h = self.block4(h, y) print("B4", np.sum(h.data)) print("C2B4", np.sum(self.block2.c2.b.data)) h = self.b5(h) print("B5", np.sum(h.data)) print("C2B5", np.sum(self.block2.c2.b.data)) h = self.activation(h) print("B6", np.sum(h.data)) print("C2B6", np.sum(self.block2.c2.b.data)) h = F.tanh(self.c5(h)) print("B7", np.sum(h.data)) print("C2B7", np.sum(self.block2.c2.b.data)) return h
def compute_ctxt(previous_state, prev_word_embedding=None): current_mb_size = previous_state.data.shape[0] if current_mb_size < mb_size: al_factor, _ = F.split_axis(precomputed_al_factor, (current_mb_size, ), 0) used_fb_concat, _ = F.split_axis(fb_concat, (current_mb_size, ), 0) if mask_length > 0: used_concatenated_penalties = concatenated_penalties[: current_mb_size] else: al_factor = precomputed_al_factor used_fb_concat = fb_concat if mask_length > 0: used_concatenated_penalties = concatenated_penalties state_al_factor = self.al_lin_s(previous_state) #As suggested by Isao Goto if prev_word_embedding is not None: state_al_factor = state_al_factor + self.al_lin_y( prev_word_embedding) state_al_factor_bc = F.broadcast_to( F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha)) a_coeffs = F.reshape( self.al_lin_o( F.reshape(F.tanh(state_al_factor_bc + al_factor), (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems)) if mask_length > 0: with cuda.get_device_from_array(used_concatenated_penalties): a_coeffs = a_coeffs + used_concatenated_penalties # - 10000 * (1-used_concatenated_mask.data) attn = F.softmax(a_coeffs) ci = F.reshape(batch_matmul(attn, used_fb_concat, transa=True), (current_mb_size, self.Hi)) return ci, attn
def run(): parser = argparse.ArgumentParser() parser.add_argument('-io', '--filename_obj', type=str, default='./examples/data/teapot.obj') parser.add_argument('-ir', '--filename_ref', type=str, default='./examples/data/example4_ref.png') parser.add_argument('-or', '--filename_output', type=str, default='./examples/data/example4_result.gif') parser.add_argument('-mr', '--make_reference_image', type=int, default=0) parser.add_argument('-g', '--gpu', type=int, default=0) args = parser.parse_args() working_directory = os.path.dirname(args.filename_output) if args.make_reference_image: make_reference_image(args.filename_ref, args.filename_obj) model = Model(args.filename_obj, args.filename_ref) model.to_gpu() optimizer = chainer.optimizers.Adam(alpha=0.1) optimizer.setup(model) loop = tqdm.tqdm(range(1000)) for i in loop: optimizer.target.cleargrads() loss = model() loss.backward() optimizer.update() images = model.renderer.render(model.vertices, model.faces, cf.tanh(model.textures)) image = images.data.get()[0] scipy.misc.toimage(image, cmin=0, cmax=1).save('%s/_tmp_%04d.png' % (working_directory, i)) loop.set_description('Optimizing (loss %.4f)' % loss.data) if loss.data < 70: break make_gif(working_directory, args.filename_output)
def __call__(self, fs, bs, h): """ Attentionの計算 :param fs: 順向きのEncoderの中間ベクトルが記録されたリスト :param bs: 逆向きのEncoderの中間ベクトルが記録されたリスト :param h: Decoderで出力された中間ベクトル :return att_f: 順向きのEncoderの中間ベクトルの加重平均 :return att_b: 逆向きのEncoderの中間ベクトルの加重平均 :return att: 各中間ベクトルの重み """ # ミニバッチのサイズを記憶 batch_size = h.data.shape[0] # ウェイトを記録するためのリストの初期化 ws = [] att = [] # ウェイトの合計値を計算するための値を初期化 sum_w = Variable(self.ARR.zeros((batch_size, 1), dtype='float32')) # Encoderの中間ベクトルとDecoderの中間ベクトルを使ってウェイトの計算 for f, b in zip(fs, bs): # 順向きEncoderの中間ベクトル、逆向きEncoderの中間ベクトル、Decoderの中間ベクトルを使ってウェイトの計算 w = self.hw(functions.tanh(self.fh(f) + self.bh(b) + self.hh(h))) att.append(w) # softmax関数を使って正規化する w = functions.exp(w) # 計算したウェイトを記録 ws.append(w) sum_w += w # 出力する加重平均ベクトルの初期化 att_f = Variable( self.ARR.zeros((batch_size, self.hidden_size), dtype='float32')) att_b = Variable( self.ARR.zeros((batch_size, self.hidden_size), dtype='float32')) for i, (f, b, w) in enumerate(zip(fs, bs, ws)): # ウェイトの和が1になるように正規化 w /= sum_w # ウェイト * Encoderの中間ベクトルを出力するベクトルに足していく att_f += functions.reshape(functions.batch_matmul(f, w), (batch_size, self.hidden_size)) att_b += functions.reshape(functions.batch_matmul(f, w), (batch_size, self.hidden_size)) att = functions.concat(att, axis=1) return att_f, att_b, att
def forward_one(x, target, label): # make input window vector distance = window // 2 char_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.insert(0,'<s>') for i in range(-distance+1 , distance + 2): char = x[target + i] char_id = char2id[char] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) concat = F.concat(tuple(char_vecs)) hidden = model.hidden1(F.sigmoid(concat)) output = F.tanh(model.output(hidden)) dist = F.softmax(output) #print(dist.data, label, np.argmax(dist.data)) correct = get_onehot(label) return np.argmax(dist.data), F.softmax_cross_entropy(output, correct)
def _attention(self, h, s, batch_size, sequence_length): decoder_hidden_size = self.decoder_hidden_size encoder_hidden_size = self.encoder_hidden_size input_shape = (batch_size, sequence_length, decoder_hidden_size) weighted_h = F.reshape(self.W1(h), input_shape) weighted_s = F.broadcast_to(F.expand_dims(self.W2(s), axis=1), input_shape) score = self.v( F.reshape(F.tanh(weighted_s + weighted_h), (batch_size * sequence_length, decoder_hidden_size))) a = F.softmax(F.reshape(score, (batch_size, sequence_length))) self.a = a # c = F.matmul(F.reshape(h, (batch_size, encoder_hidden_size, sequence_length)), # a[..., None]) c = F.batch_matmul( F.reshape(h, (batch_size, encoder_hidden_size, sequence_length)), a) return F.reshape(c, (batch_size, encoder_hidden_size))
def __call__(self, z): """ Function that computs foward Parametors ---------------- z: Variable random vector drown from a uniform distribution, this shape is (N, 100) """ h = F.relu(self.bn0(self.l0(z))) h = F.reshape(h, (len(z), self.ch, self.bottom_width, self.bottom_width)) # dataformat is NCHW h = F.relu(self.bn1(self.dc1(h))) h = F.relu(self.bn2(self.dc2(h))) h = F.relu(self.bn3(self.dc3(h))) h = F.relu(self.bn4(self.dc4(h))) x = F.tanh(self.dc5(h)) return x
def forward(self, x): #x = F.relu(self.linear(x)) #mb, _ = x.data.shape #x = F.reshape(x, [mb, -1, int(img_height / 16), int(img_width / 16)]) #x = self.bn(x) x = self.l1(x) x = self.bn1(x) x = F.relu(x) x = self.l2(x) x = self.bn2(x) x = F.relu(x) x = self.l3(x) x = self.bn3(x) x = F.relu(x) x = self.l4(x) x = self.bn4(x) x = F.relu(x) x = self.l5(x) x = F.tanh(x) return x
def get_scores(self,candidates,links,relations,edges,xp,mode,RC,EC): entities = set() for h,r,t,l in candidates: entities.add(h) entities.add(t) entities = list(entities) xe = self.get_context(entities,links,relations,edges,0,xp,RC,EC) xe = F.split_axis(xe,len(entities),axis=0) edict = dict() for e,x in zip(entities,xe): edict[e]=x diffs,rels = [],[] for h,r,t,l in candidates: rels.append(r) diffs.append(edict[h]-edict[t]) diffs = F.concat(diffs,axis=0) xr = self.embedR(xp.array(rels,'i')) if self.is_bound_wr: xr = F.tanh(xr) scores = F.batch_l2_norm_squared(diffs+xr) return scores
def decode(self, p, hiddens, t=None): """ @param p @param t ground truth """ c = Seq2Seq.calculate_ct(hiddens, p.data) vc = Variable(c) q = F.tanh(self.w1(vc) + self.w2(p)) y = self.l3(q) if self.phase is Seq2Seq.Train: loss = F.mean_squared_error(y, t) p = self.encode(y) return p, loss elif self.phase is Seq2Seq.Valid: loss = F.mean_squared_error(y, t) p = self.encode(y) return p, loss else: # Test p = self.encode(y) return p, y
def onestep(self, ys, hx, cx, oxs, hts): bs = len(ys) emb_ys = self.emb(ys) if self.feeding: hts = F.stack(hts) emb_ys = F.expand_dims(emb_ys, axis=1) emb_ys = F.concat((emb_ys, hts), axis=2) hy, cy, oys = self.rnn(hx, cx, F.separate(emb_ys)) else: emb_ys = F.split_axis(emb_ys, bs, 0) hy, cy, oys = self.rnn(hx, cx, emb_ys) oys = F.stack(oys) oxs = F.stack(oxs) cts = self.attn(oys, oxs) cs = F.concat((oys, cts), axis=2) hts = F.tanh(F.stack(sequence_linear(self.wc, cs))) oys = self.wo(F.concat(hts, axis=0)) return hy, cy, oys, hts
def forward(self, x, index_array): x = x.reshape(-1, self.dim) if self.dropout_rate != 0: x = F.dropout(x, ratio=self.dropout_rate) z = self.x2z(x) if self.activate == 'tanh': z = F.tanh(z) if self.activate == 'relu': z = F.relu(z) if self.is_residual: z = z + x split_array = F.split_axis(z, index_array, axis=0)[:-1] a = [] for i in split_array: if len(i) > 0: a.append(F.average(i, axis=0)) else: a.append(Variable(np.zeros(self.dim, dtype=np.float32))) p = F.stack(a) return p
def __call__(self, batchsize=64, z=None, **kwargs): if z is None: z = sample_continuous(self.dim_z, batchsize, distribution=self.distribution, xp=self.xp) h = z h = self.l1(h) h = F.reshape(h, (h.shape[0], -1, self.bottom_width, self.bottom_width)) # (Batchsize, auto, 4, 4) h = self.block2(h) h = self.block3(h) h = self.block4(h) h = self.block5(h) h = self.block6(h) h = self.block7(h) h = self.b7(h) h = self.activation(h) h = F.tanh(self.l7(h)) return h
def __call__(self, hy, ys): context_vector = [] # reshape: (1, batch_size, hidden_size) -> (batch_size, hidden_size) hy = F.reshape(hy, hy.shape[1:]) for h, y in zip(hy, ys): ''' 各文に対して、最後の隠れ状態h:(1, hidden_size)とその単語y:(単語数, hidden_size)とで要素積をとる broadcast_to: (1, hidden_size) -> (単語数, hidden_size) sum: (単語数, hidden_size) -> (hidden_size, ) ''' h = F.broadcast_to(h, y.shape) m = F.sum(h * y, axis=0) context_vector.append(m) context_vector = F.stack([v for v in context_vector], axis=0) sentence_vector = F.tanh( self.linear( F.dropout(F.concat((hy, context_vector), axis=1), self.dropout))) return sentence_vector
def decode_once(self, x, state, train=True): c = state['c'] h = state['h'] h_tilde = state.get('h_tilde', None) emb = self.trg_emb(x) lstm_in = self.eh(emb) + self.hh(h) if h_tilde is not None: lstm_in += self.ch(h_tilde) c, h = F.lstm(c, lstm_in) a = self.attender(h, train=train) h_tilde = F.concat([a, h]) h_tilde = F.tanh(self.w_c(h_tilde)) o = self.ho(h_tilde) state['c'] = c state['h'] = h state['h_tilde'] = h_tilde return o, state
def pi_and_v(self, x): h = F.relu(self.conv1(x[:, 0:1, :, :])) h = self.diconv2(h) h = self.diconv3(h) h = self.diconv4(h) h_pi = self.diconv5_pi(h) x_t = self.diconv6_pi(h_pi) h_t1 = x[:, -64:, :, :] z_t = F.sigmoid(self.conv7_Wz(x_t) + self.conv7_Uz(h_t1)) r_t = F.sigmoid(self.conv7_Wr(x_t) + self.conv7_Ur(h_t1)) h_tilde_t = F.tanh(self.conv7_W(x_t) + self.conv7_U(r_t * h_t1)) h_t = (1 - z_t) * h_t1 + z_t * h_tilde_t pout = self.conv8_pi(h_t) h_V = self.diconv5_V(h) h_V = self.diconv6_V(h_V) vout = self.conv7_V(h_V) return pout, vout, h_t