def check_invalid_reduce(self, x, t): with chainer.using_config('use_cudnn', self.use_cudnn): with self.assertRaises(ValueError): functions.softmax_cross_entropy( x, t, self.normalize, self.cache_score, reduce='unknown_reduce_type', enable_double_backprop=self.enable_double_backprop)
def train_one(gen, dis, optimizer_gen, optimizer_dis, x_batch, y_batch, gpu_device): batch_size = len(x_batch) if gpu_device == None: xp = np else: xp = cuda.cupy # train generator y = Variable(xp.asarray(y_batch)) t = Variable(xp.asarray(y_batch + 1)) z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32)) x = gen((z, y)) y1 = dis(x) loss_gen = F.softmax_cross_entropy(y1, t) loss_dis = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32))) # train discriminator y2 = dis(Variable(xp.asarray(x_batch))) loss_dis += F.softmax_cross_entropy(y2, t) optimizer_gen.zero_grads() loss_gen.backward() optimizer_gen.update() optimizer_dis.zero_grads() loss_dis.backward() optimizer_dis.update() return (float(loss_gen.data), float(loss_dis.data))
def train_one(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device): batch_size = len(x_batch) if gpu_device == None: xp = xp else: xp = cuda.cupy # train generator z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32)) x = gen(z) y1 = dis(x) loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32))) loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32))) # train discriminator x2 = Variable(xp.asarray(np.reshape(x_batch, (batch_size, 1, 28, 28)))) y2 = dis(x2) loss_dis += F.softmax_cross_entropy(y2, Variable(xp.zeros(batch_size).astype(np.int32))) optimizer_gen.zero_grads() loss_gen.backward() optimizer_gen.update() optimizer_dis.zero_grads() loss_dis.backward() optimizer_dis.update() return (loss_gen.data, loss_dis.data)
def __call__(self, jline, eline): gh = [] self.H.reset_state() for w in jline: wid = self.jvocab[w] x_k = self.embedx(Variable(np.array([wid], dtype=np.int32))) h = self.H(x_k) gh.append(np.copy(h.data[0])) x_k = self.embedx(Variable(np.array([self.jvocab[EOS]], dtype=np.int32))) tx = Variable(np.array([self.evocab[eline[0]]], dtype=np.int32)) h = self.H(x_k) ct = Variable(mk_ct(gh, h.data[0])) h2 = F.tanh(self.Wc1(ct) + self.Wc2(h)) accum_loss = F.softmax_cross_entropy(self.W(h2), tx) for i in range(len(eline)): wid = self.evocab[eline[i]] x_k = self.embedy(Variable(np.array([wid], dtype=np.int32))) next_w = eline[i + 1] if i < len(eline) - 1 else EOS next_wid = self.evocab[next_w] tx = Variable(np.array([next_wid], dtype=np.int32)) h = self.H(x_k) ct = Variable(mk_ct(gh, h.data[0])) h2 = F.tanh(self.Wc1(ct) + self.Wc2(h)) loss = F.softmax_cross_entropy(self.W(h2), tx) accum_loss += loss return accum_loss
def forward(self, x_data, y_data, train=True): x = Variable(x_data, volatile=not train) t = Variable(y_data, volatile=not train) h = F.relu(self.bn1_1(self.conv1_1(x))) h = F.relu(self.bn1_2(self.conv1_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(h, ratio=0.25, train=train) h = F.relu(self.bn2_1(self.conv2_1(h))) h = F.relu(self.bn2_2(self.conv2_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(h, ratio=0.25, train=train) h = F.relu(self.bn3_1(self.conv3_1(h))) h = F.relu(self.bn3_2(self.conv3_2(h))) h = F.relu(self.bn3_3(self.conv3_3(h))) h = F.relu(self.bn3_4(self.conv3_4(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(h, ratio=0.25, train=train) h = F.dropout(F.relu(self.fc4(h)), train=train, ratio=0.5) h = F.dropout(F.relu(self.fc5(h)), train=train, ratio=0.5) h = self.fc6(h) if train: return F.softmax_cross_entropy(h, t), F.accuracy(h, t) else: return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
def forward(self, x_data, y_data, train=True): x = Variable(x_data, volatile=not train) t = Variable(y_data, volatile=not train) h = self.prelu1_1(self.bn1_1(self.conv1_1(x))) h = self.prelu1_2(self.bn1_2(self.conv1_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = self.prelu2_1(self.bn2_1(self.conv2_1(h))) h = self.prelu2_2(self.bn2_2(self.conv2_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = self.prelu3_1(self.conv3_1(h)) h = self.prelu3_2(self.conv3_2(h)) h = self.prelu3_3(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=1) h = self.prelu4_1(self.conv4_1(h)) h = self.prelu4_2(self.conv4_2(h)) h = self.prelu4_3(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=1) h = self.prelu5_1(self.conv5_1(h)) h = self.prelu5_2(self.conv5_2(h)) h = self.prelu5_3(self.conv5_3(h)) h = F.max_pooling_2d(h, 2, stride=1) h = F.dropout(self.prelu6(self.fc6(h)), train=train, ratio=0.5) h = F.dropout(self.prelu7(self.fc7(h)), train=train, ratio=0.5) h = self.fc8(h) if train: return F.softmax_cross_entropy(h, t), F.accuracy(h, t) else: return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
def forward(self, x_data, y_data, train=True): x = chainer.Variable(x_data, volatile=not train) t = chainer.Variable(y_data, volatile=not train) h = F.max_pooling_2d( F.relu(self.norm1(self.conv1(x))), 3, stride=2, pad=1) h = F.max_pooling_2d( F.relu(self.norm2(self.conv2(h))), 3, stride=2, pad=1) h = self.inc3a(h) h = self.inc3b(h) h = self.inc3c(h) h = self.inc4a(h) a = F.average_pooling_2d(h, 5, stride=3) a = F.relu(self.norma(self.conva(a))) a = F.relu(self.norma2(self.lina(a))) a = self.outa(a) a = F.softmax_cross_entropy(a, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) b = F.average_pooling_2d(h, 5, stride=3) b = F.relu(self.normb(self.convb(b))) b = F.relu(self.normb2(self.linb(b))) b = self.outb(b) b = F.softmax_cross_entropy(b, t) h = self.inc4e(h) h = self.inc5a(h) h = F.average_pooling_2d(self.inc5b(h), 7) h = self.out(h) return 0.3 * (a + b) + F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def forward(self, x_data, y_data, train=True, models=None): VGG_mini = models["VGG_mini"] VGG_mini2 = models["VGG_mini2"] VGG_mini3 = models["VGG_mini3"] x = Variable(x_data, volatile=not train) t = Variable(y_data, volatile=not train) h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.relu(self.conv1_3(h)) h = F.relu(self.conv1_4(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(h, ratio=0.25, train=train) h = F.relu(self.conv1_5(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.dropout(h, ratio=0.25, train=train) h = self.fc(h) if train: return F.softmax_cross_entropy(h, t), F.accuracy(h, t) else: # return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def test_variable_assertion(self): wrong_inst_class_weight = chainer.Variable( numpy.array([0, 0], dtype='f')) with self.assertRaises(ValueError): functions.softmax_cross_entropy( self.x, self.t, class_weight=wrong_inst_class_weight, enable_double_backprop=self.enable_double_backprop)
def d_norm(flag, dis, img1, img2): yl = dis(img1, img2) if flag == 0: return F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32))) elif flag == 1: return F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32))) else: raise "norm flag should be either 0 / 1"
def test_NNET_train(self): struct = 'w2vec(/project/nakamura-lab01/Work/truong-dq/chainer/vidnn/exp/word2vec_truecase_200/vectors.bin):lstm(200-2):linear(2-2)' # struct = 'embed(2-200):lstm(200-2):linear(2-2)' nnet = NNET_Model.parse_structure(struct) # Testing variable test_var = chainer.Variable(np.asarray([1], dtype=np.int32)) output_before_train = nnet(test_var).data inp = chainer.Variable(np.asarray([1], dtype=np.int32)) target = chainer.Variable(np.asarray([0], dtype=np.int32)) output = nnet(inp) loss = F.softmax_cross_entropy(output, target) optimizer = optimizers.SGD(lr=0.1) optimizer.setup(nnet) optimizer.zero_grads() loss.backward() optimizer.update() inp = chainer.Variable(np.asarray([0], dtype=np.int32)) target = chainer.Variable(np.asarray([0], dtype=np.int32)) output = nnet(inp) loss = F.softmax_cross_entropy(output, target) optimizer = optimizers.SGD(lr=0.1) optimizer.setup(nnet) optimizer.zero_grads() loss.backward() optimizer.update() nnet.save('test_output') nnet_2 = NNET_Model.load('test_output') nnet.forget_history() nnet_2.forget_history() np.testing.assert_equal(nnet[0][1].W.data, nnet_2[0][1].W.data) np.testing.assert_equal(nnet[1][1].upward.W.data, nnet_2[1][1].upward.W.data) np.testing.assert_equal(nnet[1][1].lateral.W.data, nnet_2[1][1].lateral.W.data) np.testing.assert_equal(nnet[1][1].upward.b.data, nnet_2[1][1].upward.b.data) output_after_train = nnet(test_var).data output_after_load = nnet_2(test_var).data after_first_layer_nnet = nnet[0][1](test_var) after_first_layer_nnet_2 = nnet_2[0][1](test_var) np.testing.assert_equal(after_first_layer_nnet.data, after_first_layer_nnet_2.data) after_first_layer_nnet.volatile = False after_first_layer_nnet_2.volatile = False after_second_layer_nnet = nnet[1][1](after_first_layer_nnet) after_second_layer_nnet_2 = nnet_2[1][1](after_first_layer_nnet_2) np.testing.assert_equal(after_second_layer_nnet.data, after_second_layer_nnet_2.data) assert (output_before_train != output_after_train).any() assert (output_before_train != output_after_load).any() np.testing.assert_equal(output_after_train, output_after_load)
def forward(self, *inputs): batch = len(inputs) // 6 lefts = inputs[0: batch] rights = inputs[batch: batch * 2] dests = inputs[batch * 2: batch * 3] labels = inputs[batch * 3: batch * 4] sequences = inputs[batch * 4: batch * 5] leaf_labels = inputs[batch * 5: batch * 6] inds = numpy.argsort([-len(l) for l in lefts]) # Sort all arrays in descending order and transpose them lefts = F.transpose_sequence([lefts[i] for i in inds]) rights = F.transpose_sequence([rights[i] for i in inds]) dests = F.transpose_sequence([dests[i] for i in inds]) labels = F.transpose_sequence([labels[i] for i in inds]) sequences = F.transpose_sequence([sequences[i] for i in inds]) leaf_labels = F.transpose_sequence( [leaf_labels[i] for i in inds]) batch = len(inds) maxlen = len(sequences) loss = 0 count = 0 correct = 0 stack = self.xp.zeros( (batch, maxlen * 2, self.n_units), self.xp.float32) for i, (word, label) in enumerate(zip(sequences, leaf_labels)): batch = word.shape[0] es = self.leaf(word) ds = self.xp.full((batch,), i, self.xp.int32) y = self.label(es) loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, ds, es) for left, right, dest, label in zip(lefts, rights, dests, labels): l, stack = thin_stack.thin_stack_get(stack, left) r, stack = thin_stack.thin_stack_get(stack, right) o = self.node(l, r) y = self.label(o) batch = l.shape[0] loss += F.softmax_cross_entropy(y, label, normalize=False) * batch count += batch predict = self.xp.argmax(y.array, axis=1) correct += (predict == label.array).sum() stack = thin_stack.thin_stack_set(stack, dest, o) loss /= count reporter.report({'loss': loss}, self) reporter.report({'total': count}, self) reporter.report({'correct': correct}, self) return loss
def train_dcgan_labeled(images, gen, dis): o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_gen.setup(gen) o_dis.setup(dis) o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) zeros = Variable(xp.zeros(batchsize, dtype=np.int32)) ones = Variable(xp.ones(batchsize, dtype=np.int32)) for epoch in tqdm(range(n_epoch)): # discriminator # 0: from dataset # 1: from noise # train generator z = xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32) z = Variable(z) x = gen(z) yl = dis(x) L_gen = F.softmax_cross_entropy(yl, zeros) L_dis = F.softmax_cross_entropy(yl, ones) # train discriminator x = generate_data(images) yl = dis(x) L_dis += F.softmax_cross_entropy(yl, zeros) o_gen.zero_grads() L_gen.backward() o_gen.update() o_dis.zero_grads() L_dis.backward() o_dis.update() if epoch % image_save_interval == 0 and epoch > 0: z = zvis z[50:, :] = xp.random.uniform(-1, 1, (50, nz), dtype=np.float32) z = Variable(z) x = gen(z, test=True) filename = '{}/vis_{}.png'.format(out_image_dir, epoch) generate_and_save(filename, x.data.get()) path = join(out_model_dir, "dcgan_model_dis_{}.h5".format(epoch)) serializers.save_hdf5(path, dis) path = join(out_model_dir, "dcgan_model_gen_%d.h5".format(epoch)) serializers.save_hdf5(path, gen) path = join(out_model_dir, "dcgan_state_dis_%d.h5".format(epoch)) serializers.save_hdf5(path, o_dis) path = join(out_model_dir, "dcgan_state_gen_%d.h5".format(epoch)) serializers.save_hdf5(path, o_gen)
def __call__(self, x, t): self.clear() test = not self.train h = F.max_pooling_2d( F.relu(self.norm1(self.conv1(x), test=test)), 3, stride=2, pad=1) h = F.max_pooling_2d( F.relu(self.norm2(self.conv2(h), test=test)), 3, stride=2, pad=1) h = self.inc3a(h) h = self.inc3b(h) h = self.inc3c(h) h = self.inc4a(h) a = F.average_pooling_2d(h, 5, stride=3) a = F.relu(self.norma(self.conva(a), test=test)) a = F.relu(self.norma2(self.lina(a), test=test)) a = self.outa(a) self.loss1 = F.softmax_cross_entropy(a, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) b = F.average_pooling_2d(h, 5, stride=3) b = F.relu(self.normb(self.convb(b), test=test)) b = F.relu(self.normb2(self.linb(b), test=test)) b = self.outb(b) self.loss2 = F.softmax_cross_entropy(b, t) h = self.inc4e(h) h = self.inc5a(h) h = F.average_pooling_2d(self.inc5b(h), 7) h = self.out(h) self.loss3 = F.softmax_cross_entropy(h, t) self.loss = 0.3 * (self.loss1 + self.loss2) + self.loss3 self.accuracy = F.accuracy(h, t) shishi = F.softmax(h) # kankan = shishi.data[0].tolist() # categories = np.loadtxt("labels.txt", str, delimiter="\t") # top_k = 10 # prediction = zip(kankan,categories) # for feifei in categories: # print(feifei) # prediction.sort(cmp=lambda x,y: cmp(x[0],y[0]),reverse=True) # cuowushuchu = ('cuowuchushu.txt','w') # for rank,(score,name) in enumerate(prediction[:3],start=1): # print('#%d | %s | %4.1f%%' % (rank,name,score * 100)) # print('\n') # for rank,(score,name) in enumerate(prediction[:2],start=1): # feijigege = score * 100 # cuowushuchu.write(str(name)+' '+str(feijigege)) # cuowushuchu.close() return shishi
def check_value_check(self, x_data, t_data, use_cudnn): x = chainer.Variable(x_data) t = chainer.Variable(t_data) if self.valid: # Check if it throws nothing functions.softmax_cross_entropy(x, t, use_cudnn) else: with self.assertRaises(ValueError): functions.softmax_cross_entropy(x, t, use_cudnn)
def forward(self, x_img, x_doc, y_data, train=True): x_img = cuda.cupy.asarray(x_img) x_doc = cuda.cupy.asarray(x_doc) y_data = cuda.cupy.asarray(y_data) img, doc, t = Variable(x_img), Variable(x_doc), Variable(y_data) h = F.relu(self.conv1(img)) h = F.local_response_normalization( F.max_pooling_2d(h, 3, stride=2), n=5) h = F.relu(self.conv2_reduce(h)) h = F.relu(self.conv2(h)) h = F.max_pooling_2d( F.local_response_normalization(h, n=5), 3, stride=2) h = self.inc3a(h) h = self.inc3b(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc4a(h) l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self.loss1_conv(l)) l = F.relu(self.loss1_fc1(l)) l = self.loss1_fc2(l) self.loss1 = F.softmax_cross_entropy(l, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self.loss2_conv(l)) l = F.relu(self.loss2_fc1(l)) l = self.loss2_fc2(l) self.loss2 = F.softmax_cross_entropy(l, t) h = self.inc4e(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc5a(h) h = self.inc5b(h) h = F.average_pooling_2d(h, 7, stride=1) h = self.loss3_fc1(F.dropout(h, 0.4, train=train)) h2 = F.relu(self.doc_fc1(F.dropout(doc, train=train))) h2 = F.relu(self.doc_fc2(h2)) b = F.relu(self.bi1(h, h2)) h = self.loss3_fc2(b) self.loss3 = F.softmax_cross_entropy(h, t) if train: return 0.3 * (self.loss1 + self.loss2) + self.loss3 else: return F.accuracy(h, t)
def forward(self, x_data, y_data, train=True): x, t = Variable(x_data), Variable(y_data) h = F.max_pooling_2d(F.relu(self.bn1(self.conv1(x))), 3, stride=2) h = F.max_pooling_2d(F.relu(self.bn2(self.conv2(h))), 3, stride=2) h = F.max_pooling_2d(F.relu(self.conv3(h)), 3, stride=2) h = self.fc4(h) if train: return F.softmax_cross_entropy(h, t), F.accuracy(h, t) else: return F.softmax_cross_entropy(h, t), F.accuracy(h, t), h
def compute_loss(self, input_ids, input_mask, token_type_ids, start_positions, end_positions): (start_logits, end_logits) = self.__call__( input_ids, input_mask, token_type_ids) start_loss = F.softmax_cross_entropy(start_logits, start_positions) end_loss = F.softmax_cross_entropy(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 chainer.report({'loss': total_loss.array}, self) accuracy = (check_answers(start_logits, start_positions) * check_answers(end_logits, end_positions, start_positions)).mean() chainer.report({'accuracy': accuracy}, self) return total_loss
def train(self, x_img, x_doc, y_data, regression, gpu=True, useImage=True, useDoc=True): xp = cuda.cupy if gpu else np x_img = xp.asarray(x_img) x_doc = xp.asarray(x_doc) y_data = xp.asarray(y_data) img, doc, t = Variable(x_img), Variable(x_doc), Variable(y_data) y = self.model.forward(img, doc, regression=regression, useImage=useImage, useDoc=useDoc) # calc loss if useImage: if regression: a = self.toLog(y["a"], xp) b = self.toLog(y["b"], xp) h = self.toLog(y["h"], xp) t = self.toLog(t, xp) self.loss1 = F.mean_squared_error(a, t) self.loss2 = F.mean_squared_error(b, t) self.loss3 = F.mean_squared_error(h, t) else: a = y["a"] b = y["b"] h = y["h"] self.loss1 = F.softmax_cross_entropy(a, t) self.loss2 = F.softmax_cross_entropy(b, t) self.loss3 = F.softmax_cross_entropy(h, t) loss = 0.3 * (self.loss1 + self.loss2) + self.loss3 else: if regression: h = self.toLog(y, xp) t = self.toLog(t, xp) self.loss1 = F.mean_squared_error(h, t) else: h = y self.loss1 = F.softmax_cross_entropy(y, t) loss = self.loss1 # random select optimizer rnd = np.random.randint(0, len(self.myOptimizers)) self.optimizer = self.myOptimizers[rnd] self.optimizer.setup(self.model) self.optimizer.zero_grads() loss.backward() self.optimizer.update() if regression: h = np.array(cuda.to_cpu(h.data)).reshape((len(h))) t = np.array(cuda.to_cpu(t.data)).reshape((len(t))) return loss.data, h, t else: return loss.data, F.accuracy(h, t).data, []
def forward(self, x_data, y_data, train=True): x = chainer.Variable(x_data, volatile=not train) t = chainer.Variable(y_data, volatile=not train) h = F.relu(self.conv1(x)) h = F.local_response_normalization( F.max_pooling_2d(h, 3, stride=2), n=5) h = F.relu(self.conv2_reduce(h)) h = F.relu(self.conv2(h)) h = F.max_pooling_2d( F.local_response_normalization(h, n=5), 3, stride=2) h = self.inc3a(h) h = self.inc3b(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc4a(h) if train: loss1 = F.average_pooling_2d(h, 5, stride=3) loss1 = F.relu(self.loss1_conv(loss1)) loss1 = F.relu(self.loss1_fc1(loss1)) loss1 = self.loss1_fc2(loss1) loss1 = F.softmax_cross_entropy(loss1, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) if train: loss2 = F.average_pooling_2d(h, 5, stride=3) loss2 = F.relu(self.loss2_conv(loss2)) loss2 = F.relu(self.loss2_fc1(loss2)) loss2 = self.loss2_fc2(loss2) loss2 = F.softmax_cross_entropy(loss2, t) h = self.inc4e(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc5a(h) h = self.inc5b(h) h = F.dropout(F.average_pooling_2d(h, 7, stride=1), 0.4, train=train) h = self.loss3_fc(h) loss3 = F.softmax_cross_entropy(h, t) if train: loss = 0.3 * (loss1 + loss2) + loss3 else: loss = loss3 accuracy = F.accuracy(h, t) return loss, accuracy
def train_word_embedding_batch(self, char_ids_batch): xp = self.xp word_vec = self.encode_word_batch(char_ids_batch) batchsize = char_ids_batch.shape[0] char_ids_batch = char_ids_batch.T # reconstruction loss loss_reconstruction = 0 self.word_decoder_lstm.reset_state() prev_y = None for i in xrange(char_ids_batch.shape[0]): if prev_y is None: prev_y = Variable(xp.zeros((batchsize, self.char_embed_size), dtype=xp.float32)) dec_in = F.concat((word_vec, prev_y)) y = self.word_decoder_lstm(dec_in, test=False) target = Variable(char_ids_batch[i]) if self.gpu_enabled: target.to_gpu() loss = F.softmax_cross_entropy(y, target) prev_y = self.embed_id(target) loss_reconstruction += loss self.zero_grads_generator() loss_reconstruction.backward() self.update_generator() # adversarial loss ## 0: from encoder ## 1: from noise real_z = self.sample_z(batchsize, self.word_embed_size) fake_z = word_vec y_fake = self.discriminator(fake_z, test=False) ## train generator loss_generator = F.softmax_cross_entropy(y_fake, Variable(xp.ones((batchsize,), dtype=xp.int32))) self.zero_grads_generator() loss_generator.backward() self.update_generator() # train discriminator y_real = self.discriminator(real_z, test=False) loss_discriminator = F.softmax_cross_entropy(y_fake, Variable(xp.zeros((batchsize,), dtype=xp.int32))) loss_discriminator += F.softmax_cross_entropy(y_real, Variable(xp.ones((batchsize,), dtype=xp.int32))) self.optimizer_discriminator.zero_grads() loss_discriminator.backward() self.optimizer_discriminator.update() return float(loss_reconstruction.data), float(loss_generator.data), float(loss_discriminator.data)
def __call__(self, x, t): h = F.relu(self.conv1(x)) h = F.local_response_normalization( F.max_pooling_2d(h, 3, stride=2), n=5) h = F.relu(self.conv2_reduce(h)) h = F.relu(self.conv2(h)) h = F.max_pooling_2d( F.local_response_normalization(h, n=5), 3, stride=2) h = self.inc3a(h) h = self.inc3b(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc4a(h) l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self.loss1_conv(l)) l = F.relu(self.loss1_fc1(l)) l = self.loss1_fc2(l) loss1 = F.softmax_cross_entropy(l, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self.loss2_conv(l)) l = F.relu(self.loss2_fc1(l)) l = self.loss2_fc2(l) loss2 = F.softmax_cross_entropy(l, t) h = self.inc4e(h) h = F.max_pooling_2d(h, 3, stride=2) h = self.inc5a(h) h = self.inc5b(h) h = F.average_pooling_2d(h, 7, stride=1) h = self.loss3_fc(F.dropout(h, 0.4)) loss3 = F.softmax_cross_entropy(h, t) loss = 0.3 * (loss1 + loss2) + loss3 accuracy = F.accuracy(h, t) chainer.report({ 'loss': loss, 'loss1': loss1, 'loss2': loss2, 'loss3': loss3, 'accuracy': accuracy }, self) return loss
def check_value_check(self, x_data, t_data, use_cudnn): x = chainer.Variable(x_data) t = chainer.Variable(t_data) with chainer.using_config('use_cudnn', use_cudnn): if self.valid: # Check if it throws nothing functions.softmax_cross_entropy( x, t, enable_double_backprop=self.enable_double_backprop) else: with self.assertRaises(ValueError): functions.softmax_cross_entropy( x, t, enable_double_backprop=self.enable_double_backprop)
def __call__(self, z, x): batchsize = z.data.shape[0] # generate x_gen = self.gen(z) y_gen = self.dis(x_gen) loss_gen = F.softmax_cross_entropy(y_gen, Variable(self._get_zeros(batchsize))) loss_dis = F.softmax_cross_entropy(y_gen, Variable(self._get_ones(batchsize))) # discriminate y = self.dis(x) loss_dis += F.softmax_cross_entropy(y, Variable(self._get_zeros(batchsize))) return loss_gen, loss_dis
def __call__(self, x, t): h = F.relu(self['conv1/7x7_s2'](x)) h = F.local_response_normalization( F.max_pooling_2d(h, 3, stride=2), n=5, alpha=(1e-4)/5, k=1) h = F.relu(self['conv2/3x3_reduce'](h)) h = F.relu(self['conv2/3x3'](h)) h = F.max_pooling_2d(F.local_response_normalization( h, n=5, alpha=(1e-4)/5, k=1), 3, stride=2) h = self.call_inception(h, 'inception_3a') h = self.call_inception(h, 'inception_3b') h = F.max_pooling_2d(h, 3, stride=2) h = self.call_inception(h, 'inception_4a') l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self['loss1/conv'](l)) l = F.dropout(F.relu(self['loss1/fc'](l)), 0.7, train=self.train) l = self['loss1/classifier'](l) loss1 = F.softmax_cross_entropy(l, t) h = self.call_inception(h, 'inception_4b') h = self.call_inception(h, 'inception_4c') h = self.call_inception(h, 'inception_4d') l = F.average_pooling_2d(h, 5, stride=3) l = F.relu(self['loss2/conv'](l)) l = F.dropout(F.relu(self['loss2/fc'](l)), 0.7, train=self.train) l = self['loss2/classifier'](l) loss2 = F.softmax_cross_entropy(l, t) h = self.call_inception(h, 'inception_4e') h = F.max_pooling_2d(h, 3, stride=2) h = self.call_inception(h, 'inception_5a') h = self.call_inception(h, 'inception_5b') h = F.average_pooling_2d(h, 7, stride=1) h = self['loss3/classifier'](F.dropout(h, 0.4, train=self.train)) loss3 = F.softmax_cross_entropy(h, t) loss = 0.3 * (loss1 + loss2) + loss3 accuracy = F.accuracy(h, t) chainer.report({ 'loss': loss, 'loss1': loss1, 'loss2': loss2, 'loss3': loss3, 'accuracy': accuracy }, self) return loss
def train_gen(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device): batch_size = len(x_batch) if gpu_device == None: xp = xp else: xp = cuda.cupy z = Variable(xp.random.uniform(-1, 1, (batch_size, LATENT_SIZE)).astype(np.float32)) x = gen(z) y1 = dis(x) loss_gen = F.softmax_cross_entropy(y1, Variable(xp.zeros(batch_size).astype(np.int32))) loss_dis = F.softmax_cross_entropy(y1, Variable(xp.ones(batch_size).astype(np.int32))) optimizer_gen.zero_grads() loss_gen.backward() optimizer_gen.update() return loss_gen.data
def __call__(self, x, t): test = not self.train finetune = self.finetune h = self.call_conv_bn_sc(x, 'conv1/7x7_s2', test=test, finetune=finetune) h = F.max_pooling_2d(h, 3, stride=2, pad=1) h = self.call_conv_bn_sc(h, 'conv2/3x3_reduce', test=test, finetune=finetune) h = self.call_conv_bn_sc(h, 'conv2/3x3', test=test) h = F.max_pooling_2d(h, 3, stride=2, pad=1) h = self.call_inception_bn(h, 'inception_3a', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_3b', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_3c', test=test, finetune=finetune) a = F.average_pooling_2d(h, 5, stride=3) a = self.call_conv_bn_sc(a, 'loss1/conv', test=test, finetune=finetune) a = self.call_fc_bn_sc(a, 'loss1/fc', test=test, finetune=finetune) a = self['loss1/classifier'](a) loss1 = F.softmax_cross_entropy(a, t) h = self.call_inception_bn(h, 'inception_4a', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_4b', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_4c', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_4d', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_4e', test=test, finetune=finetune) b = F.average_pooling_2d(h, 5, stride=3) b = self.call_conv_bn_sc(b, 'loss2/conv', test=test, finetune=finetune) b = self.call_fc_bn_sc(b, 'loss2/fc', test=test, finetune=finetune) b = self['loss2/classifier'](b) loss2 = F.softmax_cross_entropy(b, t) h = self.call_inception_bn(h, 'inception_5a', test=test, finetune=finetune) h = self.call_inception_bn(h, 'inception_5b', test=test, finetune=finetune) h = F.average_pooling_2d(h, 7, stride=1) h = self['loss3/classifier'](h) loss3 = F.softmax_cross_entropy(h, t) loss = 0.3 * (loss1 + loss2) + loss3 accuracy = F.accuracy(h, t) chainer.report({ 'loss': loss, 'loss1': loss1, 'loss2': loss2, 'loss3': loss3, 'accuracy': accuracy }, self) return loss
def update_parameter_by_meta_learner( self, model_params, loss, x_l0, x_l1, y_l): # Forward meta-learner namedparams = model_params for i, elm in enumerate(namedparams.items()): # parameter-loop k, p = elm with cuda.get_device_from_id(self.device): shape = p.shape xp = cuda.get_array_module(p.data) x = p.grad grad = xp.reshape(x, (np.prod(shape), )) meta_learner = self.meta_learners[i] g = meta_learner(Variable(grad)) # forward w = p - F.reshape(g, shape) self.model_params[k] = w # Train meta-learner with main objective y_pred = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred, y_l) self.cleargrads() # need to clear W'grad due to loss_rec.backward for meta_learner in self.meta_learners: meta_learner.cleargrads() loss_ce.backward(retain_grad=True) for opt in self.opt_meta_learners: opt.update() loss_ce.unchain_backward() #TODO: here is a proper place to unchain?
def forward(self, xs, ys): xs = [x[::-1] for x in xs] eos = self.xp.array([EOS], numpy.int32) ys_in = [F.concat([eos, y], axis=0) for y in ys] ys_out = [F.concat([y, eos], axis=0) for y in ys] # Both xs and ys_in are lists of arrays. exs = sequence_embed(self.embed_x, xs) eys = sequence_embed(self.embed_y, ys_in) batch = len(xs) # None represents a zero vector in an encoder. hx, cx, _ = self.encoder(None, None, exs) _, _, os = self.decoder(hx, cx, eys) # It is faster to concatenate data before calculating loss # because only one matrix multiplication is called. concat_os = F.concat(os, axis=0) concat_ys_out = F.concat(ys_out, axis=0) loss = F.sum(F.softmax_cross_entropy( self.W(concat_os), concat_ys_out, reduce='no')) / batch chainer.report({'loss': loss}, self) n_words = concat_ys_out.shape[0] perp = self.xp.exp(loss.array * batch / n_words) chainer.report({'perp': perp}, self) return loss
def forward(x_data, y_data, model,train=True): # Neural net architecture #x, t = chainer.Variable(x_data), chainer.Variable(y_data) t = chainer.Variable(y_data) x = {} for n in range(500): x[n] = chainer.Variable(x_data[n]) h = {} initial_V = {} initial_V_relu = {} for nameint in range(len(l_name)-2): initial_V[nameint] = model[l_name[nameint]](x[nameint]) #initial_V_relu[nameint] = F.relu(initial_V[nameint]) #initial_V_relu[nameint] = F.sigmoid(initial_V[nameint]) initial_V_relu[nameint] = F.tanh(initial_V[nameint]) #h[nameint] = F.dropout(F.relu(initial_V[nameint]), train=train) #h[nameint] = F.dropout(F.sigmoid(initial_V[nameint]), train=train) h[nameint] = F.dropout(F.tanh(initial_V[nameint]), train=train) #h[nameint] = F.relu(model[l_name[nameint]](x[nameint])) #h6 = F.dropout(F.relu(model.l501(Returnharray(h))), train=train) #h6 = F.dropout(F.sigmoid(model.l501(Returnharray(h))), train=train) h6 = F.dropout(F.tanh(model.l501(Returnharray(h))), train=train) y = model.l502(h6) y_pre = (y.data.argmax(axis = 1)) return F.softmax_cross_entropy(y, t), F.accuracy(y, t),y_pre,initial_V,initial_V_relu
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = model.config # settings max_epoch = 1000 num_trains_per_epoch = 5000 num_validation_data = 10000 batchsize = 128 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split training_images, training_labels, validation_images, validation_labels = dataset.split_data( images, labels, num_validation_data, seed=args.seed) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution image_batch, label_batch = dataset.sample_data(training_images, training_labels, batchsize, binarize=False) distribution = model.discriminate(image_batch, apply_softmax=False) loss = F.softmax_cross_entropy(distribution, model.to_variable(label_batch)) sum_loss += float(loss.data) model.backprop(loss) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) model.save(args.model_dir) train_accuracy = compute_accuracy(training_images, training_labels) validation_accuracy = compute_accuracy(validation_images, validation_labels) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "loss": sum_loss / num_trains_per_epoch, "accuracy (validation)": validation_accuracy, "accuracy (train)": train_accuracy, }) # write accuracy to csv csv_results.append( [epoch, validation_accuracy, progress.get_total_time()]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy", "min"] data.to_csv("{}/result.csv".format(args.model_dir))
def __call__(self, x, t, train=True): y = self.fwd(x, train) return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
def forward(self, x_img, x_doc, y_data, train=True, regression=False, predict=False, gpu=True): test = not train xp = cuda.cupy if gpu else np x_img = xp.asarray(x_img) y_data = xp.asarray(y_data) img, t = Variable(x_img), Variable(y_data) if regression and not predict: t = self.toLog(t) #t.data = cuda.cupy.asarray(t.data, dtype=cuda.cupy.float32).reshape((20,1)) h = F.max_pooling_2d(F.relu(self.norm1(self.conv1(img), test=test)), 3, stride=2, pad=1) h = F.max_pooling_2d(F.relu(self.norm2(self.conv2(h), test=test)), 3, stride=2, pad=1) h = self.inc3a(h) h = self.inc3b(h) h = self.inc3c(h) h = self.inc4a(h) if not predict: a = F.average_pooling_2d(h, 5, stride=3) a = F.relu(self.norma(self.conva(a), test=test)) a = F.relu(self.norma2(self.lina(a), test=test)) a = self.outa(a) if regression: #a = self.toLog(a) self.loss1 = F.mean_squared_error(a, t) else: self.loss1 = F.softmax_cross_entropy(a, t) h = self.inc4b(h) h = self.inc4c(h) h = self.inc4d(h) if not predict: b = F.average_pooling_2d(h, 5, stride=3) b = F.relu(self.normb(self.convb(b), test=test)) b = F.relu(self.normb2(self.linb(b), test=test)) b = self.outb(b) if regression: #b = self.toLog(b) self.loss2 = F.mean_squared_error(b, t) else: self.loss2 = F.softmax_cross_entropy(b, t) h = self.inc4e(h) h = self.inc5a(h) h = F.average_pooling_2d(self.inc5b(h), 7) h = self.out(h) if predict: #t.data = cuda.cupy.asarray(t.data, dtype=cuda.cupy.float32).reshape((20,1)) #myloss = F.mean_squared_error(h, t) return h if regression: h = self.toLog(h) self.loss3 = F.mean_squared_error(h, t) else: self.loss3 = F.softmax_cross_entropy(h, t) if train or regression: h = np.array(cuda.to_cpu(h.data)).reshape((len(h))) t = np.array(cuda.to_cpu(t.data)).reshape((len(t))) #print(h) #print(t) return 0.3 * (self.loss1 + self.loss2) + self.loss3, np.corrcoef( h, t) else: return F.accuracy(h, t)
def forward(self): x = chainer.Variable(self.x) t = chainer.Variable(self.t) return functions.softmax_cross_entropy(x, t, enable_double_backprop=False)
def __call__(self, x, t): loss = F.softmax_cross_entropy(self.predict(x), t) chainer.report({'loss': loss / t.shape[0]}, self) return loss
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = adgm.config # settings max_epoch = 1000 num_trains_per_epoch = 500 batchsize_l = 100 batchsize_u = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # save validation accuracy per epoch csv_results = [] # create semi-supervised split num_validation_data = 10000 num_labeled_data = 100 num_types_of_label = 10 training_images_l, training_labels_l, training_images_u, validation_images, validation_labels = dataset.create_semisupervised( images, labels, num_validation_data, num_labeled_data, num_types_of_label, seed=args.seed) print training_labels_l # init weightnorm layers if config.use_weightnorm: print "initializing weight normalization layers ..." images_l, label_onehot_l, label_id_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) adgm.compute_lower_bound(images_l, label_onehot_l, images_u) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_lower_bound_l = 0 sum_lower_bound_u = 0 sum_loss_classifier = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_l, label_onehot_l, label_ids_l = dataset.sample_labeled_data( training_images_l, training_labels_l, batchsize_l, config.ndim_x, config.ndim_y) images_u = dataset.sample_unlabeled_data(training_images_u, batchsize_u, config.ndim_x) # lower bound loss lower_bound, lb_labeled, lb_unlabeled = adgm.compute_lower_bound( images_l, label_onehot_l, images_u) loss_lower_bound = -lower_bound # classification loss a_l = adgm.encode_x_a(images_l, False) unnormalized_y_distribution = adgm.encode_ax_y_distribution( a_l, images_l, softmax=False) loss_classifier = alpha * F.softmax_cross_entropy( unnormalized_y_distribution, adgm.to_variable(label_ids_l)) # backprop adgm.backprop(loss_classifier + loss_lower_bound) sum_lower_bound_l += float(lb_labeled.data) sum_lower_bound_u += float(lb_unlabeled.data) sum_loss_classifier += float(loss_classifier.data) progress.show(t, num_trains_per_epoch, {}) adgm.save(args.model_dir) # validation images_l, _, label_ids_l = dataset.sample_labeled_data( validation_images, validation_labels, num_validation_data, config.ndim_x, config.ndim_y) images_l_segments = np.split(images_l, num_validation_data // 500) label_ids_l_segments = np.split(label_ids_l, num_validation_data // 500) sum_accuracy = 0 for images_l, label_ids_l in zip(images_l_segments, label_ids_l_segments): y_distribution = adgm.encode_x_y_distribution(images_l, softmax=True, test=True) accuracy = F.accuracy(y_distribution, adgm.to_variable(label_ids_l)) sum_accuracy += float(accuracy.data) validation_accuracy = sum_accuracy / len(images_l_segments) progress.show( num_trains_per_epoch, num_trains_per_epoch, { "lb_u": sum_lower_bound_u / num_trains_per_epoch, "lb_l": sum_lower_bound_l / num_trains_per_epoch, "loss_spv": sum_loss_classifier / num_trains_per_epoch, "accuracy": validation_accuracy, }) # write accuracy to csv csv_results.append([epoch, validation_accuracy]) data = pd.DataFrame(csv_results) data.columns = ["epoch", "accuracy"] data.to_csv("{}/result.csv".format(args.model_dir))
def __call__(self, x, t): y = self.predictor(x) loss = F.softmax_cross_entropy(y, t) accuracy = F.accuracy(y, t) report({'loss': loss, 'accuracy': accuracy}, self) return loss
def __call__(self, x, t, train=True): y = self.predictor(x, train) self.loss = F.softmax_cross_entropy(y, t) self.accuracy = F.accuracy(y, t) return self.loss
def trainBatch(self, encSents, decSents, args): """main training""" ###encoder step encEmbed = self.getEmbeddings(encSents, args) hy, cy, ys = self.encNStepLSTM(hx=None, cx=None, xs=encEmbed) encOut = F.pad_sequence(ys) #[batch, max(sentlen), Dim] ###decoder step decEmbed = self.getEmbeddings(decSents, args) #embed のリストの状態 decEmbed = F.pad_sequence(decEmbed).transpose([1, 0, 2]) #padding して[sentLen, batch, Dim]に変更 decode_step = len(decEmbed) - 1 decoderOutList = [0] * decode_step lstmStateList = [0] * decode_step firstInput = chainer.Variable(xp.zeros(hy[0].shape, dtype=xp.float32)) #decLSTMの最初に入力として初期化 embedじゃない方 for i in range(decode_step): #decEmbedじゃないdecLSTMへの入力の準備と、decLSTMのstate準備 if i == 0: #デコーダの最初のステップ self.set_state([cy[0], hy[0]]) anoInput = firstInput else: self.set_state(lstmStateList[i - 1]) anoInput = decoderOutList[i - 1] hOut = self.decLSTM(F.concat([decEmbed[i], anoInput], 1)) #decoder LSTMの出力 lstmStateList[i] = self.get_state() decoderOutList[i] = self.attention(hOut, encOut, args) #decoder LSTMの出力をアテンションしたもの decoderの出力 decode_step * [batch, Dim] total_loss = chainer.Variable(xp.zeros((), dtype=xp.float32)) proc = 0 correct = 0 incorrect = 0 ###output層 correctLabels = F.pad_sequence(decSents, padding=-1).T.array #TODO 何か嫌だから上手く書きたい ってかこの関数全体何か汚い -1でパディングしたから1足したら0がeosトークンになるんじゃね? for i in range(decode_step): oVector = self.decOut(F.dropout(decoderOutList[i], args.dropout_rate)) correctLabel = correctLabels[i + 1] proc += (xp.count_nonzero(correctLabel + 1)) ###TODO 0を数えてたらunkトークンがなくなるし、1足したら全部1以上になるンゴ # 必ずminibatchsizeでわる closs = F.softmax_cross_entropy( oVector, correctLabel, normalize=False) #normalize=Falseの意味? paddingしてるからっぽい # これで正規化なしのloss cf. seq2seq-attn code #total_loss_val += closs.data * cMBSize #if train_mode > 0: # 学習データのみ backward する total_loss += closs # 実際の正解数を獲得したい t_correct = 0 t_incorrect = 0 # Devのときは必ず評価,学習データのときはオプションに従って評価 # if train_mode == 0 or args.doEvalAcc > 0: # 予測した単語のID配列 CuPy pred_arr = oVector.data.argmax(axis=1) # 正解と予測が同じなら0になるはず # => 正解したところは0なので,全体から引く ###xp.count_nonzero()は間違えた数? t_correct = (correctLabel.size - xp.count_nonzero(correctLabel - pred_arr)) #t_correct正解した数 # 予測不要の数から正解した数を引く # +1はbroadcast t_incorrect = xp.count_nonzero(correctLabel + 1) - t_correct #xp.count_nonzero()は予測する必要のある数 つまりt_incorrectは間違えた数 correct += t_correct incorrect += t_incorrect #### #total_loss.backward() return total_loss, (correct, incorrect, decode_step, proc)
def __call__(self, imgs, masks, labels, bboxes, scales): """Forward FCIS and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. * :math:`H` is the image height. * :math:`W` is the image width. Currently, only :math:`N=1` is supported. Args: imgs (~chainer.Variable): A variable with a batch of images. masks (~chainer.Variable): A batch of masks. Its shape is :math:`(N, R, H, W)`. labels (~chainer.Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. bboxes (~chainer.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. scales (float or ~chainer.Variable): Amount of scaling applied to the raw image during preprocessing. Returns: chainer.Variable: Scalar loss variable. This is the sum of losses for Region Proposal Network and the head module. """ if isinstance(masks, chainer.Variable): masks = masks.array if isinstance(labels, chainer.Variable): labels = labels.array if isinstance(bboxes, chainer.Variable): bboxes = bboxes.array if isinstance(scales, chainer.Variable): scales = scales.array scales = cuda.to_cpu(scales) batch_size, _, H, W = imgs.shape img_size = (H, W) assert img_size == masks.shape[2:] if any(len(b) == 0 for b in bboxes): return chainer.Variable(self.xp.array(0, dtype=np.float32)) rpn_features, roi_features = self.fcis.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.fcis.rpn( rpn_features, img_size, scales) rpn_locs = F.concat(rpn_locs, axis=0) rpn_scores = F.concat(rpn_scores, axis=0) gt_rpn_locs = [] gt_rpn_labels = [] for bbox in bboxes: gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( bbox, anchor, img_size) if cuda.get_array_module(rpn_locs.array) != np: gt_rpn_loc = cuda.to_gpu(gt_rpn_loc) gt_rpn_label = cuda.to_gpu(gt_rpn_label) gt_rpn_locs.append(gt_rpn_loc) gt_rpn_labels.append(gt_rpn_label) del gt_rpn_loc, gt_rpn_label gt_rpn_locs = self.xp.concatenate(gt_rpn_locs, axis=0) gt_rpn_labels = self.xp.concatenate(gt_rpn_labels, axis=0) batch_indices = range(batch_size) sample_rois = [] sample_roi_indices = [] gt_roi_masks = [] gt_roi_labels = [] gt_roi_locs = [] for batch_index, mask, label, bbox in \ zip(batch_indices, masks, labels, bboxes): roi = rois[roi_indices == batch_index] sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc = \ self.proposal_target_creator( roi, mask, label, bbox, self.loc_normalize_mean, self.loc_normalize_std, self.mask_size) del roi sample_roi_index = self.xp.full((len(sample_roi), ), batch_index, dtype=np.int32) sample_rois.append(sample_roi) sample_roi_indices.append(sample_roi_index) del sample_roi, sample_roi_index gt_roi_masks.append(gt_roi_mask) gt_roi_labels.append(gt_roi_label) gt_roi_locs.append(gt_roi_loc) del gt_roi_mask, gt_roi_label, gt_roi_loc sample_rois = self.xp.concatenate(sample_rois, axis=0) sample_roi_indices = self.xp.concatenate(sample_roi_indices, axis=0) gt_roi_masks = self.xp.concatenate(gt_roi_masks, axis=0) gt_roi_labels = self.xp.concatenate(gt_roi_labels, axis=0) gt_roi_locs = self.xp.concatenate(gt_roi_locs, axis=0) roi_ag_seg_scores, roi_ag_locs, roi_cls_scores, _, _ = self.fcis.head( roi_features, sample_rois, sample_roi_indices, img_size, gt_roi_labels) # RPN losses rpn_loc_loss = _fast_rcnn_loc_loss(rpn_locs, gt_rpn_locs, gt_rpn_labels, self.rpn_sigma) rpn_cls_loss = F.softmax_cross_entropy(rpn_scores, gt_rpn_labels) if self.n_ohem_sample is None: n_roi = roi_ag_locs.shape[0] gt_roi_fg_labels = (gt_roi_labels > 0).astype(np.int) roi_locs = roi_ag_locs[self.xp.arange(n_roi), gt_roi_fg_labels] roi_loc_loss = _fast_rcnn_loc_loss(roi_locs, gt_roi_locs, gt_roi_labels, self.roi_sigma) roi_cls_loss = F.softmax_cross_entropy(roi_cls_scores, gt_roi_labels) roi_mask_loss = F.softmax_cross_entropy( roi_ag_seg_scores, gt_roi_masks, normalize=False) \ * 10.0 / self.mask_size / self.mask_size else: # Losses for outputs of the head roi_loc_loss, roi_cls_loss, roi_mask_loss = _ohem_loss( roi_ag_locs, roi_cls_scores, roi_ag_seg_scores, gt_roi_locs, gt_roi_labels, gt_roi_masks, self.n_ohem_sample, self.roi_sigma, self.mask_size) loss = rpn_loc_loss + rpn_cls_loss \ + roi_loc_loss + roi_cls_loss + roi_mask_loss chainer.reporter.report( { 'rpn_loc_loss': rpn_loc_loss, 'rpn_cls_loss': rpn_cls_loss, 'roi_loc_loss': roi_loc_loss, 'roi_cls_loss': roi_cls_loss, 'roi_mask_loss': roi_mask_loss, 'loss': loss, }, self) return loss
def forward(self): x = chainer.Variable(self.x) t = chainer.Variable(self.t) return functions.softmax_cross_entropy(x, t, self.use_cudnn)
def __call__(self, x, y): return F.softmax_cross_entropy(self.fwd(x), y)
#以下正確なaccuracyを求めるため test_loss = 0 test_accuracy = 0 miss_train_copy = miss_train_total.copy() miss_test_copy = miss_test_total.copy() correct_train_copy = correct_train_total.copy() correct_test_copy = correct_test_total.copy() for i in range(0, N_test, batch_size_predict): x = Variable(cuda.to_gpu(x_test[i:i + batch_size_predict])) #評価画像データ t = Variable(cuda.to_gpu(t_test[i:i + batch_size_predict])) #評価ラベル y = model.predict(x) model.zerograds() loss = F.softmax_cross_entropy(y, t) #こっから誤判別を特定するプログラム acc, data, data2, index, pred, corre, correct_index = accuracy( y, t) acc.to_cpu() data.to_cpu() data2.to_cpu() index.to_cpu() pred.to_cpu() corre.to_cpu() correct_index.to_cpu() t.to_cpu() x.to_cpu() for i in range(len(data.data)): miss_test_total[data.data[i]] += 1
def forward(self, word_list, gold_op_list, unary_limit): is_training = gold_op_list is not None # check args if len(word_list) < 1: raise ValueError('Word list is empty.') if is_training: n_shift = 0 n_binary = 0 for op, _ in gold_op_list: if op == OP_SHIFT: n_shift += 1 if op == OP_BINARY: n_binary += 1 if n_shift != len(word_list) or n_binary != len(word_list) - 1: raise ValueError( 'Invalid operation number: SHIFT=%d (required: %d), BINARY=%d (required: %d)' % (n_shift, n_binary, len(word_list), len(word_list) - 1)) if gold_op_list[-1] != (OP_FINISH, None): raise ValueError('Last operation is not OP_FINISH.') # initial values QUEUE_ZEROS = XP.fzeros((1, self.n_queue)) STACK_ZEROS = XP.fzeros((1, self.n_stack)) SRSTATE_ZEROS = XP.fzeros((1, self.n_srstate)) NEG_INF = -1e20 # queue encoding q_list = [] qc = QUEUE_ZEROS q = QUEUE_ZEROS for text, wid in reversed(word_list): qc, q = self.net_encoder(qc, XP.iarray([wid]), q) q_list.insert(0, (text, q)) # estimate s_list = [] zc = SRSTATE_ZEROS z = SRSTATE_ZEROS unary_chain = 0 if is_training: loss = XP.fzeros(()) for i in itertools.count(): text, q = q_list[0] if q_list else ('', QUEUE_ZEROS) t1, sc1, s1 = s_list[-1] if s_list else (None, STACK_ZEROS, STACK_ZEROS) t2, sc2, s2 = s_list[-2] if len(s_list) >= 2 else (None, STACK_ZEROS, STACK_ZEROS) t3, sc3, s3 = s_list[-3] if len(s_list) >= 3 else (None, STACK_ZEROS, STACK_ZEROS) zc, z = self.net_sr(zc, q, s1, z) o = self.net_operation(z) if is_training: loss += functions.softmax_cross_entropy( o, XP.iarray([gold_op_list[i][0]])) o_argmax = gold_op_list[i][0] else: o_filter = [0.0 for _ in range(NUM_OP)] filtered = 0 if not q_list: o_filter[OP_SHIFT] = NEG_INF filtered += 1 if not s_list or unary_chain >= unary_limit: o_filter[OP_UNARY] = NEG_INF filtered += 1 if len(s_list) < 2: o_filter[OP_BINARY] = NEG_INF filtered += 1 if q_list or len(s_list) > 1: o_filter[OP_FINISH] = NEG_INF if filtered == NUM_OP: raise RuntimeError('No possible operation!') o += XP.farray([o_filter]) o_argmax = int(cuda.to_cpu(o.data.argmax(1))) if o_argmax == OP_SHIFT: t0 = Tree(None, [text]) sc0, s0 = (STACK_ZEROS, self.net_shift(q, s1, z)) q_list.pop(0) unary_chain = 0 label = self.net_semiterminal(s0) elif o_argmax == OP_UNARY: t0 = Tree(None, [t1]) sc0, s0 = self.net_unary(sc1, q, s1, s2, z) s_list.pop() unary_chain += 1 label = self.net_phrase(s0) elif o_argmax == OP_BINARY: t0 = Tree(None, [t2, t1]) sc0, s0 = self.net_binary(sc1, sc2, q, s1, s2, s3, z) s_list.pop() s_list.pop() unary_chain = 0 label = self.net_phrase(s0) else: # OP_FINISH break if is_training: loss += functions.softmax_cross_entropy( label, XP.iarray([gold_op_list[i][1]])) label_argmax = gold_op_list[i][1] else: label_argmax = int(cuda.to_cpu(label.data.argmax(1))) t0.set_label(label_argmax) s_list.append((t0, sc0, s0)) ''' if is_training: o_est = int(cuda.to_cpu(o.data.argmax(1))) label_est = int(cuda.to_cpu(label.data.argmax(1))) trace('%c %c gold=%d-%2d, est=%d-%2d, stack=%2d, queue=%2d' % ( '*' if o_est == gold_op_list[i][0] else ' ', '*' if label_est == gold_op_list[i][1] else ' ', gold_op_list[i][0], gold_op_list[i][1], o_est, label_est, len(s_list), len(q_list))) ''' if is_training: return loss else: # combine multiple trees if they exists, and return the result. t0, _, __ = s_list.pop() if s_list: raise RuntimeError('There exist multiple subtrees!') return t0
def train(args): vocab = Vocabulary.from_conll(args.train, args.vocab) train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)] dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)] parser = Parser(args.vocab, args.embed, args.hidden, args.depth) if args.gpu >= 0: parser.to_gpu() opt = optimizers.AdaGrad(lr=0.01) opt.setup(parser) opt.add_hook(optimizer.GradientClipping(10)) opt.add_hook(optimizer.WeightDecay(0.0001)) for epoch in range(args.epoch): random.shuffle(train_dataset) parser.zerograds() loss = XP.fzeros(()) for i, data in enumerate(train_dataset): trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1)) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: trace(' %3d: root' % j) root = j else: parent_est = p_scores.data.argmax() trace('%c %3d -> %3d (%3d)' % ('*' if parent == parent_est else ' ', j, parent_est, parent)) loss += functions.softmax_cross_entropy( p_scores, XP.iarray([parent])) root_est = root_scores.data.argmax() trace('ROOT: %3d (%3d)' % (root_est, root)) loss += functions.softmax_cross_entropy(root_scores, XP.iarray([root])) if (i + 1) % 200 == 0: loss.backward() opt.update() parser.zerograds() loss = XP.fzeros(()) loss.backward() opt.update() trace('epoch %3d: trained. ' % (epoch + 1)) parent_num = 0 parent_match = 0 root_num = 0 root_match = 0 for i, data in enumerate(dev_dataset): trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1), rollback=True) parent_scores, root_scores = parser.forward(data) if len(data) > 1: parent_scores = functions.split_axis(parent_scores, len(data), 0) else: parent_scores = (parent_scores, ) root = -1 for j, (p_scores, (wid, parent)) in enumerate(zip(parent_scores, data)): if parent == -1: root = j else: parent_est = p_scores.data.argmax() parent_num += 1 parent_match += 1 if parent_est == parent else 0 root_est = root_scores.data.argmax() root_num += 1 root_match += 1 if root_est == root else 0 result_str = \ 'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \ ( \ epoch + 1, \ parent_match / parent_num, parent_match, parent_num, \ root_match / root_num, root_match, root_num) trace(result_str) with open(args.model + '.log', 'a') as fp: print(result_str, file=fp) trace('epoch %3d: saving models ...' % (epoch + 1)) prefix = args.model + '.%03d' % (epoch + 1) vocab.save(prefix + '.vocab') parser.save_spec(prefix + '.parent_spec') serializers.save_hdf5(prefix + '.parent_weights', parser) trace('finished.')
def forward(self, x, t): return F.softmax_cross_entropy(self.out(x), t)
def main(): # load MNIST images images, labels = dataset.load_train_images() # config config = aae.config # settings max_epoch = 1000 num_trains_per_epoch = 5000 batchsize = 100 alpha = 1 # seed np.random.seed(args.seed) if args.gpu_device != -1: cuda.cupy.random.seed(args.seed) # classification # 0 -> true sample # 1 -> generated sample class_true = aae.to_variable(np.zeros(batchsize, dtype=np.int32)) class_fake = aae.to_variable(np.ones(batchsize, dtype=np.int32)) # training progress = Progress() for epoch in xrange(1, max_epoch): progress.start_epoch(epoch, max_epoch) sum_loss_reconstruction = 0 sum_loss_discriminator = 0 sum_loss_generator = 0 for t in xrange(num_trains_per_epoch): # sample from data distribution images_u = dataset.sample_unlabeled_data(images, batchsize) # reconstruction phase qy_x_u, z_u = aae.encode_x_yz(images_u, apply_softmax=True) reconstruction_u = aae.decode_yz_x(qy_x_u, z_u) loss_reconstruction = F.mean_squared_error(aae.to_variable(images_u), reconstruction_u) aae.backprop_generator(loss_reconstruction) aae.backprop_decoder(loss_reconstruction) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) z_true_u = sampler.gaussian(batchsize, config.ndim_z, mean=0, var=1) y_true_u = sampler.onehot_categorical(batchsize, config.ndim_y) discrimination_z_true = aae.discriminate_z(z_true_u, apply_softmax=False) discrimination_y_true = aae.discriminate_y(y_true_u, apply_softmax=False) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_discriminator_z = F.softmax_cross_entropy(discrimination_z_true, class_true) + F.softmax_cross_entropy(discrimination_z_fake, class_fake) loss_discriminator_y = F.softmax_cross_entropy(discrimination_y_true, class_true) + F.softmax_cross_entropy(discrimination_y_fake, class_fake) loss_discriminator = loss_discriminator_z + loss_discriminator_y aae.backprop_discriminator(loss_discriminator) # adversarial phase y_fake_u, z_fake_u = aae.encode_x_yz(images_u, apply_softmax=True) discrimination_z_fake = aae.discriminate_z(z_fake_u, apply_softmax=False) discrimination_y_fake = aae.discriminate_y(y_fake_u, apply_softmax=False) loss_generator_z = F.softmax_cross_entropy(discrimination_z_fake, class_true) loss_generator_y = F.softmax_cross_entropy(discrimination_y_fake, class_true) loss_generator = loss_generator_z + loss_generator_y aae.backprop_generator(loss_generator) sum_loss_reconstruction += float(loss_reconstruction.data) sum_loss_discriminator += float(loss_discriminator.data) sum_loss_generator += float(loss_generator.data) if t % 10 == 0: progress.show(t, num_trains_per_epoch, {}) aae.save(args.model_dir) progress.show(num_trains_per_epoch, num_trains_per_epoch, { "loss_r": sum_loss_reconstruction / num_trains_per_epoch, "loss_d": sum_loss_discriminator / num_trains_per_epoch, "loss_g": sum_loss_generator / num_trains_per_epoch, })
def __call__(self, x, t): y = self.predictor(x) loss = F.softmax_cross_entropy(y, t) return loss
use_dropout = 0.25 label_size = 3 knowledge_size = 2 input_hidden = 3 kelic_hidden = 5 enrich_hidden = 5 mlp_hidden = 3 input_layers = 1 enrich_layer = 1 model = kim(word_embed, emb_dim, label_size, knowledge_size, input_hidden, kelic_hidden, enrich_hidden, mlp_hidden, input_layers, enrich_layer, use_dropout) optimizer = optimizers.AdaGrad() optimizer.use_cleargrads() optimizer.setup(model) optimizer.add_hook(WeightDecay(0.0001)) if args.gpu >= 0: model.to_gpu() word_embed.to_gpu() for i in range(1, args.epoch + 1): system_start, system_end = model(x_list, y_list, x_mask, y_mask, knowledge) loss = F.softmax_cross_entropy(system_start, gold) print(F.argmax(system_start, axis=1), "loss:", loss.data) model.cleargrads() loss.backward() optimizer.update()
def __call__(self, x, t): return F.softmax_cross_entropy(self.out(x), t)
def train(): # model model = Mynet(train=True) if GPU >= 0: chainer.cuda.get_device(GPU).use() model.to_gpu() opt = chainer.optimizers.MomentumSGD(0.01, momentum=0.9) opt.setup(model) #opt.add_hook(chainer.optimizer.WeightDecay(0.0005)) xs, ts, paths = data_load('../Dataset/train/images/', hf=True, vf=True) # training mb = 4 mbi = 0 train_ind = np.arange(len(xs)) np.random.seed(0) np.random.shuffle(train_ind) for i in range(500): if mbi + mb > len(xs): mb_ind = train_ind[mbi:] np.random.shuffle(train_ind) mb_ind = np.hstack((mb_ind, train_ind[:(mb - (len(xs) - mbi))])) mbi = mb - (len(xs) - mbi) else: mb_ind = train_ind[mbi:mbi + mb] mbi += mb x = xs[mb_ind] t = ts[mb_ind] if GPU >= 0: x = chainer.cuda.to_gpu(x) t = chainer.cuda.to_gpu(t) #else: # x = chainer.Variable(x) # t = chainer.Variable(t) y = model(x) #accu = F.accuracy(y, t[..., 0]) y = F.transpose(y, axes=(0, 2, 3, 1)) y = F.reshape(y, [-1, num_classes + 1]) t = F.reshape(t, [-1]) loss = F.softmax_cross_entropy(y, t) accu = F.accuracy(y, t) model.cleargrads() loss.backward() opt.update() loss = loss.data accu = accu.data if GPU >= 0: loss = chainer.cuda.to_cpu(loss) accu = chainer.cuda.to_cpu(accu) print("iter >>", i + 1, ',loss >>', loss.item(), ',accuracy >>', accu) chainer.serializers.save_npz('cnn.npz', model)
dis.zerograds() styleparam_g = Variable(chainer.cuda.to_gpu( styleg[batch:batch + batchsize])) styleparam = Variable(chainer.cuda.to_gpu( style[batch:batch + batchsize])) style_vector = gen(styleparam_g) dis_out1 = dis(style_vector) # にせもの dis_out2 = dis(styleparam) # ほんもの # if debug: # print('params:') # print(style_vector.data[2][0:10]) # print(styleparam.data[2][0:10]) # print(style_vector.data.shape,styleparam.data.shape) # da = chainer.cuda.to_gpu(data[1]) loss_gen = F.softmax_cross_entropy(dis_out1, Variable(xp.zeros(batchsize, dtype=np.int32))) # 生成したパラメータがどうなのかadversarial loss, 0本物に近づけたい loss_dis = F.softmax_cross_entropy(dis_out1, Variable(xp.ones(batchsize, dtype=np.int32))) # 1(偽物)に近づけたい loss_dis += F.softmax_cross_entropy(dis_out2, Variable(xp.zeros(batchsize, dtype=np.int32))) # 本物をdiscriminatorに入力. 0(本物)に近づけたい # data[1] = chainer.cuda.to_cpu(data[1]) Lsum_gen += loss_gen Lsum_dis += loss_dis if batch < style.shape[0] * 0.9: loss_gen.backward() Optimizer_gen.update() loss_dis.backward() Optimizer_dis.update() # else: # print("val loss: gen:%s dis:%s" % (loss_gen.data, loss_dis.data)) batch += batchsize # if(batch % 1000 == 0): # print('batch loss: ' + str(loss.data))
def __call__(self, hs, ys): '''Decoder forward :param Variable hs: :param Variable ys: :return: ''' self.loss = None # prepare input and output word sequences with sos/eos IDs eos = self.xp.array([self.eos], 'i') sos = self.xp.array([self.sos], 'i') ys_in = [F.concat([sos, y], axis=0) for y in ys] ys_out = [F.concat([y, eos], axis=0) for y in ys] # padding for ys with -1 # pys: utt x olen pad_ys_in = F.pad_sequence(ys_in, padding=self.eos) pad_ys_out = F.pad_sequence(ys_out, padding=-1) # get dim, length info batch = pad_ys_out.shape[0] olength = pad_ys_out.shape[1] logging.info(self.__class__.__name__ + ' input lengths: ' + str(self.xp.array([h.shape[0] for h in hs]))) logging.info(self.__class__.__name__ + ' output lengths: ' + str(self.xp.array([y.shape[0] for y in ys_out]))) # initialization c_list = [None] # list of cell state of each layer z_list = [None] # list of hidden state of each layer for l in six.moves.range(1, self.dlayers): c_list.append(None) z_list.append(None) att_w = None z_all = [] self.att.reset() # reset pre-computation of h # pre-computation of embedding eys = self.embed(pad_ys_in) # utt x olen x zdim eys = F.separate(eys, axis=1) # loop for an output sequence for i in six.moves.range(olength): att_c, att_w = self.att(hs, z_list[0], att_w) if i > 0 and random.random() < self.sampling_probability: logging.info(' scheduled sampling ') z_out = self.output(z_all[-1]) z_out = F.argmax(F.log_softmax(z_out), axis=1) z_out = self.embed(z_out) ey = F.hstack((z_out, att_c)) # utt x (zdim + hdim) else: ey = F.hstack((eys[i], att_c)) # utt x (zdim + hdim) c_list[0], z_list[0] = self.lstm0(c_list[0], z_list[0], ey) for l in six.moves.range(1, self.dlayers): c_list[l], z_list[l] = self['lstm%d' % l](c_list[l], z_list[l], z_list[l - 1]) z_all.append(z_list[-1]) z_all = F.reshape(F.stack(z_all, axis=1), (batch * olength, self.dunits)) # compute loss y_all = self.output(z_all) self.loss = F.softmax_cross_entropy(y_all, F.flatten(pad_ys_out)) # -1: eos, which is removed in the loss computation self.loss *= (np.mean([len(x) for x in ys_in]) - 1) acc = F.accuracy(y_all, F.flatten(pad_ys_out), ignore_label=-1) logging.info('att loss:' + str(self.loss.data)) # show predicted character sequence for debug if self.verbose > 0 and self.char_list is not None: y_hat = F.reshape(y_all, (batch, olength, -1)) y_true = pad_ys_out for (i, y_hat_), y_true_ in zip(enumerate(y_hat.data), y_true.data): if i == MAX_DECODER_OUTPUT: break idx_hat = self.xp.argmax(y_hat_[y_true_ != -1], axis=1) idx_true = y_true_[y_true_ != -1] seq_hat = [self.char_list[int(idx)] for idx in idx_hat] seq_true = [self.char_list[int(idx)] for idx in idx_true] seq_hat = "".join(seq_hat).replace('<space>', ' ') seq_true = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%d]: " % i + seq_true) logging.info("prediction [%d]: " % i + seq_hat) if self.labeldist is not None: if self.vlabeldist is None: self.vlabeldist = chainer.Variable( self.xp.asarray(self.labeldist)) loss_reg = -F.sum( F.scale(F.log_softmax(y_all), self.vlabeldist, axis=1)) / len(ys_in) self.loss = ( 1. - self.lsm_weight) * self.loss + self.lsm_weight * loss_reg return self.loss, acc
def _check_forward(self, mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k): if self.variable: mb_locs = chainer.Variable(mb_locs) mb_confs = chainer.Variable(mb_confs) gt_mb_locs = chainer.Variable(gt_mb_locs) gt_mb_labels = chainer.Variable(gt_mb_labels) loc_loss, conf_loss = multibox_loss(mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, k) self.assertIsInstance(loc_loss, chainer.Variable) self.assertEqual(loc_loss.shape, ()) self.assertEqual(loc_loss.dtype, mb_locs.dtype) self.assertIsInstance(conf_loss, chainer.Variable) self.assertEqual(conf_loss.shape, ()) self.assertEqual(conf_loss.dtype, mb_confs.dtype) if self.variable: mb_locs = mb_locs.array mb_confs = mb_confs.array gt_mb_locs = gt_mb_locs.array gt_mb_labels = gt_mb_labels.array mb_locs = cuda.to_cpu(mb_locs) mb_confs = cuda.to_cpu(mb_confs) gt_mb_locs = cuda.to_cpu(gt_mb_locs) gt_mb_labels = cuda.to_cpu(gt_mb_labels) loc_loss = cuda.to_cpu(loc_loss.array) conf_loss = cuda.to_cpu(conf_loss.array) n_positive_total = 0 expect_loc_loss = 0 expect_conf_loss = 0 for i in six.moves.xrange(gt_mb_labels.shape[0]): n_positive = 0 negatives = [] for j in six.moves.xrange(gt_mb_labels.shape[1]): loc = F.huber_loss(mb_locs[np.newaxis, i, j], gt_mb_locs[np.newaxis, i, j], 1).array conf = F.softmax_cross_entropy(mb_confs[np.newaxis, i, j], gt_mb_labels[np.newaxis, i, j]).array if gt_mb_labels[i, j] > 0: n_positive += 1 expect_loc_loss += loc expect_conf_loss += conf else: negatives.append(conf) n_positive_total += n_positive if n_positive > 0: expect_conf_loss += sum(sorted(negatives)[-n_positive * k:]) if n_positive_total == 0: expect_loc_loss = 0 expect_conf_loss = 0 else: expect_loc_loss /= n_positive_total expect_conf_loss /= n_positive_total np.testing.assert_almost_equal(loc_loss, expect_loc_loss, decimal=2) np.testing.assert_almost_equal(conf_loss, expect_conf_loss, decimal=2)
# 5. Write a training loop import numpy as np from chainer.dataset import concat_examples from chainer.cuda import to_cpu, to_gpu max_epoch = 10 gpu_id = 0 model.to_gpu() while train_iter.epoch < max_epoch: train_batch = train_iter.next() image_train, target_train = concat_examples(train_batch, gpu_id) prediction_train = model(image_train) loss = F.softmax_cross_entropy(prediction_train, target_train) model.cleargrads() loss.backward() optimizer.update() if train_iter.is_new_epoch: print('epoch:{:02d} train_loss:{:.04f} '.format(train_iter.epoch, float(to_cpu(loss.data))), end='') test_losses = [] test_accuracies = [] while True: test_batch = test_iter.next() image_test, target_test = concat_examples(test_batch, gpu_id) prediction_test = model(image_test) loss_test = F.softmax_cross_entropy(prediction_test, target_test) test_losses.append(to_cpu(loss_test.data)) accuracy = F.accuracy(prediction_test, target_test)
def _elementwise_softmax_cross_entropy(x, t): assert x.shape[:-1] == t.shape shape = t.shape x = F.reshape(x, (-1, x.shape[-1])) t = F.flatten(t) return F.reshape(F.softmax_cross_entropy(x, t, reduce='no'), shape)
def update_core(self): # TODO: support n_Classfier <- いる? # TIPS: in case of experiments, set n_critic as 5 is best result. gen_optimizer = self.get_optimizer('gen') critic_optimizer = self.get_optimizer('critic') clfr_optimizer = self.get_optimizer('classfier') xp = self.generator.xp for i in range(self.n_critic): # grab data batch = self.get_iterator('main').next() batchsize = len(batch) batch = self.converter(batch, self.device) real_data, real_label = batch real_label = Variable(real_label) real_data = Variable(real_data) / 255. # TODO: cWGANってuniformで良いんだっけ...? z = Variable( xp.asarray( self.generator.make_input_z_with_label( batchsize, real_label.data))) # Generator gen_data = self.generator(z) gen_data = gen_data.reshape(batchsize, 1, 28, 28) # Critic(Discrimintor) critic_real = self.critic(real_data) critic_fake = self.critic(gen_data) # Classifier # classifier_real = self.classifier(real_data) # classifier_fake = self.classifier(gen_data) # Loss ## Critic Loss # print(critic_fake.shape, critic_real.shape, gen_data.shape, real_data.shape) # critic_loss = F.mean(critic_fake - critic_real) e = xp.random.uniform(0., 1., (batchsize, 1, 1, 1)).astype(np.float32) x_hat = e * real_data + (1 - e) * gen_data # recreate Variable loss_gan = F.average(critic_fake - critic_real) # x_hat.backward(retain_grad=True, enable_double_backprop=True) grad, = chainer.grad([self.critic(x_hat)], [x_hat], enable_double_backprop=True) grad = F.sqrt(F.batch_l2_norm_squared(grad)) loss_grad = self.l * F.mean_absolute_error(grad, xp.ones_like(grad.data)) critic_loss = loss_gan + loss_grad self.critic.cleargrads() critic_loss.backward() critic_optimizer.update() chainer.report({'critic_loss': critic_loss}) chainer.report({'loss_grad': loss_grad}) chainer.report({'loss_gan': loss_gan}) batch = self.get_iterator('main').next() batchsize = len(batch) batch = self.converter(batch, self.device) real_data, real_label = batch real_label = Variable(real_label) real_data = Variable(real_data) / 255. z = Variable( xp.asarray( self.generator.make_input_z_with_label(batchsize, real_label.data))) # Generator gen_data = self.generator(z) # Critic(Discrimintor) critic_fake = self.critic(gen_data) # Classifier classifier_real = self.classifier(real_data) classifier_fake = self.classifier(gen_data) ## Categorical Loss c_f_loss = F.softmax_cross_entropy(classifier_fake, real_label) c_r_loss = F.softmax_cross_entropy(classifier_real, real_label) c_loss = (c_r_loss + c_f_loss) / 2 self.classifier.cleargrads() c_loss.backward() clfr_optimizer.update() chainer.report({'c_r_loss': c_r_loss}) chainer.report({'c_loss': c_loss}) # Generator Loss gen_loss = F.average(-critic_fake) self.generator.cleargrads() gen_loss.backward() gen_optimizer.update() chainer.report({'gen_loss': gen_loss}) self.classifier.cleargrads() c_f_loss.backward() gen_optimizer.update() chainer.report({'c_f_loss': c_f_loss})
def train_dcgan_labeled(evol, dis, proj, epoch0=0): global epoch o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5) o_evol.setup(evol) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis.setup(dis) o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5) o_proj.setup(proj) if not args.fresh_start: serializers.load_hdf5("%s/dcgan_model_evol.h5" % (out_model_dir), evol) serializers.load_hdf5("%s/dcgan_state_evol.h5" % (out_model_dir), o_evol) serializers.load_hdf5("%s/dcgan_model_dis.h5" % (out_model_dir), dis) serializers.load_hdf5("%s/dcgan_state_dis.h5" % (out_model_dir), o_dis) serializers.load_hdf5("%s/dcgan_model_proj.h5" % (out_model_dir), proj) serializers.load_hdf5("%s/dcgan_state_proj.h5" % (out_model_dir), o_proj) o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001)) vis_process = None for epoch in xrange(epoch0, n_epoch): for train_ctr in xrange(0, n_train, batchsize): print "epoch:", epoch, "train:", train_ctr, # discriminator # 0: from dataset # 1: from noise good_movie = True prediction_movie = n_movie * [None] try: current_movie = load_movie() except: continue for i in range(n_timeseries - 1): if current_movie[i] is None: good_movie = False else: prediction_movie[i] = current_movie[i] if not good_movie: continue for i in range(n_timeseries - 1, n_movie): prediction_movie[i] = evolve_image( evol, proj, prediction_movie[i - n_timeseries + 1:i]) if train_ctr % save_interval == 0: for answer_mode in ['predict', 'observe']: for offset in [n_timeseries, 16, 32, 64, 119]: if offset >= n_movie: continue img_prediction = prediction_movie[offset] if answer_mode == 'observe': img_prediction = current_movie[offset] if img_prediction is None: continue imgfn = '%s/futuresun_%d_%04d_%s+%03d.png' % ( out_image_dir, epoch, train_ctr, answer_mode, offset) plt.rcParams['figure.figsize'] = (12.0, 12.0) plt.close('all') plt.imshow(img_prediction, vmin=0, vmax=1.4) plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call("cp %s ~/public_html/futuresun/" % (imgfn), shell=True) # we don't have enough disk for history history_dir = 'history/' #%d-%d'%(epoch, train_ctr) subprocess.call("mkdir -p %s " % (history_dir), shell=True) subprocess.call("cp %s/*.h5 %s " % (out_model_dir, history_dir), shell=True) if epoch > 0 or train_ctr > 0: print 'saving model...' serializers.save_hdf5( "%s/dcgan_model_evol.h5" % (out_model_dir), evol) serializers.save_hdf5( "%s/dcgan_state_evol.h5" % (out_model_dir), o_evol) serializers.save_hdf5( "%s/dcgan_model_dis.h5" % (out_model_dir), dis) serializers.save_hdf5( "%s/dcgan_state_dis.h5" % (out_model_dir), o_dis) serializers.save_hdf5( "%s/dcgan_model_proj.h5" % (out_model_dir), proj) serializers.save_hdf5( "%s/dcgan_state_proj.h5" % (out_model_dir), o_proj) print '...saved.' movie_in = None movie_out = None movie_out_predict = None evol_scores = {} proj_scores = {} matsuoka_shuzo = {} shuzo_evoke_timestep = [] difficulties = ['normal', 'hard'] vis_kit = {} for difficulty in difficulties: evol_scores[difficulty] = [0.0] proj_scores[difficulty] = [0.0] matsuoka_shuzo[difficulty] = True vis_kit[difficulty] = None matsuoka_shuzo[ 'normal'] = False # dameda, dameda.... Akirameyou.... if vis_process is not None: vis_process.join() vis_process = None # start main training routine. print next_shuzo_scale = 10.0 * (1 + epoch) next_shuzo_offset = 1 + abs( int(round(np.random.normal(scale=next_shuzo_scale)))) for train_offset in range(0, n_movie - n_timeseries): for difficulty in difficulties: movie_clip = current_movie if not matsuoka_shuzo[difficulty]: # Doushitesokode yamerunda... continue else: # Akiramen'nayo! pass if difficulty == 'normal': movie_clip_in = movie_clip else: movie_clip_in = prediction_movie maybe_dat = create_batch(train_offset, movie_clip_in, movie_clip) if not maybe_dat: #print "Warning: skip offset", train_offset, "because of unavailable data." continue data_in, data_out, data_other = maybe_dat movie_in = Variable(cuda.to_gpu(data_in)) movie_out = Variable(cuda.to_gpu(data_out)) movie_other = Variable(cuda.to_gpu(data_other)) movie_out_predict_before = evol(movie_in) movie_out_predict = proj( movie_out_predict_before) # no proj vis_kit[difficulty] = (movie_in.data.get(), movie_out.data.get(), movie_out_predict_before.data.get(), movie_out_predict.data.get()) if args.norm == 'dcgan': yl = dis(movie_in, movie_out_predict) L_evol = F.softmax_cross_entropy( yl, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis = F.softmax_cross_entropy( yl, Variable(xp.ones(batchsize, dtype=np.int32))) # train discriminator yl_train = dis(movie_in, movie_out) L_dis += F.softmax_cross_entropy( yl_train, Variable(xp.zeros(batchsize, dtype=np.int32))) elif args.norm == 'CA': L_evol = d_norm(0, dis, movie_out, movie_out_predict_before) L_proj = d_norm(0, dis, movie_out, movie_out_predict) L_dis = d_norm(1, dis, movie_out, movie_out_predict_before) # L_dis += d_norm(1, dis, movie_out, movie_out_predict) L_dis += d_norm(0, dis, movie_out, movie_other) # L_dis += d_norm(0, dis, movie_other, movie_out) else: L2norm = (movie_out - movie_out_predict)**2 yl = F.sum(L2norm) / L2norm.data.size L_evol = yl evol_scores[difficulty] += [ L_evol.data.get() ] # np.average(F.softmax(yl).data.get()[:,0]) proj_scores[difficulty] += [ L_proj.data.get() ] # np.average(F.softmax(yl).data.get()[:,0]) # stop learning on normal mode. if difficulty == 'hard': o_evol.zero_grads() L_evol.backward() o_evol.update() o_dis.zero_grads() L_dis.backward() o_dis.update() o_proj.zero_grads() L_proj.backward() o_proj.update() movie_in.unchain_backward() movie_out_predict.unchain_backward() movie_out_predict_before.unchain_backward() movie_other.unchain_backward() L_evol.unchain_backward() if args.norm == 'dcgan' or args.norm == 'CA': L_dis.unchain_backward() sys.stdout.write( '%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r' % (train_offset, difficulty, args.norm, np.average(evol_scores['normal']), np.average(proj_scores['normal']), np.average(evol_scores['hard']), np.average(proj_scores['hard']), str(shuzo_evoke_timestep[-10:]))) sys.stdout.flush() # update the prediction as results of learning. prediction_movie[ train_offset + n_timeseries - 1] = evolve_image( evol, proj, prediction_movie[train_offset:train_offset + n_timeseries - 1]) # prevent too much learning from noisy prediction. # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']): if train_offset == next_shuzo_offset: next_shuzo_offset = train_offset + 1 + abs( int(round( np.random.normal(scale=next_shuzo_scale)))) # Zettaini, akiramennna yo! # matsuoka_shuzo['hard'] = False shuzo_evoke_timestep += [train_offset] evol_scores['hard'] = [0.0] proj_scores['hard'] = [0.0] for t in range(train_offset, train_offset + n_timeseries): if current_movie[t] is not None: prediction_movie[t] = current_movie[t] print def visualize_vis_kit(vis_kit): print "visualizing...", sys.stdout.flush() for difficulty in difficulties: if vis_kit[difficulty] is None: continue movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[ difficulty] imgfn = '%s/batch-%s_%d_%04d.png' % ( out_image_dir, difficulty, epoch, train_ctr) n_col = n_timeseries + 3 plt.rcParams['figure.figsize'] = (1.0 * n_col, 1.0 * batchsize) plt.close('all') for ib in range(batchsize): for j in range(n_timeseries - 1): plt.subplot(batchsize, n_col, 1 + ib * n_col + j) if j < 2: vmin = -1 vmax = 1 else: vmin = 0 vmax = 1.4 plt.imshow(movie_data[ib, j, :, :], vmin=vmin, vmax=vmax) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries - 1) plt.imshow(movie_pred_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries) plt.imshow(movie_proj_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries + 2) plt.imshow(movie_out_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call( "cp %s ~/public_html/suntomorrow-batch-%s-%s.png" % (imgfn, difficulty, args.gpu), shell=True) print "visualized.", sys.stdout.flush() vis_process = Process(target=visualize_vis_kit, args=(vis_kit, )) vis_process.start()
model = MLP(10, 10, 2) optimizer = optimizers.SGD() optimizer.setup(model) train_data_variable = Variable(train_data.astype(np.float32)) train_label_variable = Variable(train_label.astype(np.int32)) loss_log = [] for epoch in range(200): model.cleargrads() prod_label = model(train_data_variable) loss = F.softmax_cross_entropy(prod_label, train_label_variable) loss.backward() optimizer.update() loss_log.append(loss.data) #print(loss_log) print(test_data[0:10]) print("-----") test_data_variable = Variable(test_data.astype(np.float32)) y = model(test_data_variable) y = F.softmax(y) print(y.data[0:50]) pred_label = np.argmax(y.data, 1) print(pred_label[0:50])
def __call__(self, x, t): h = self.predict(x) loss = F.softmax_cross_entropy(h, t) chainer.report({'loss': loss, 'accuracy': F.accuracy(h, t)}, self) return loss