def predict(self, x_data, y_data=None): x = Variable(x_data) y = self.forward_raw(x, train=False) if y_data is None: return F.sigmoid(y).data.reshape(x_data.shape[0]) else: return F.sigmoid_cross_entropy(y, Variable(y_data)).data
def forward_eye_states(self, x_batch_curr, y_batch_curr, volatile): current_sample = Variable(x_batch_curr, volatile=volatile) y_batch_curr = np.asarray(y_batch_curr).reshape(32, -1) current_output = Variable(y_batch_curr, volatile=volatile) h1_current = F.sigmoid(self.model_to_use.x_h1(current_sample)) h2_current = F.sigmoid(self.model_to_use.h1_h2(h1_current)) h3_current = F.sigmoid(self.model_to_use.h2_h3(h2_current)) h4_current = F.sigmoid(self.model_to_use.h3_h4(h3_current)) h4 = h4_current y = self.model_to_use.h4_y(h4) y.data = y.data.reshape(32, -1) loss = F.sigmoid_cross_entropy(y, current_output) current_output.data = np.squeeze(current_output.data) accuracy = F.accuracy(y, current_output) return accuracy, loss, y
def check_forward(self, x_data, t_data, use_cudnn='always'): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) with chainer.using_config('use_cudnn', use_cudnn): loss = functions.sigmoid_cross_entropy(x_val, t_val, self.normalize) self.assertEqual(loss.data.shape, ()) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = float(cuda.to_cpu(loss.data)) # Compute expected value loss_expect = 0 non_ignore_count = 0 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): xd, td = self.x[i, j], self.t[i, j] if td == -1: continue loss_expect -= xd * (td - (xd >= 0)) \ - math.log(1 + math.exp(-numpy.abs(xd))) non_ignore_count += 1 if non_ignore_count == 0: loss_expect = 0 elif self.normalize: loss_expect /= non_ignore_count else: loss_expect /= self.t.shape[0] self.assertAlmostEqual(loss_expect, loss_value, places=5)
def patch_loss_gen(self, dis): fake = self.random(1, n=self.config.batchsize[self.config.gan_type]) dis_fake = dis(fake) if self.config.loss_type == "wgan-gp": loss = F.mean(dis_fake) elif self.config.loss_type == "nsgan": loss = F.sigmoid_cross_entropy( dis_fake, self.xp.ones(dis_fake.shape, dtype="int32")) else: raise NotImplementedError() return loss
def update_discriminator(gen, dis, dis_optimizer, x, z): xp = cuda.get_array_module(x) # generate x_fake = gen(z) #discriminate y_real = dis(x) y_fake = dis(x_fake) # target t_real = xp.ones_like(y_real.data, dtype=np.int32) t_fake = xp.zeros_like(y_fake.data, dtype=np.int32) loss_dis = F.sigmoid_cross_entropy( y_real, t_real) + F.sigmoid_cross_entropy(y_fake, t_fake) dis.cleargrads() loss_dis.backward() dis_optimizer.update() return loss_dis
def __call__(self, f_atoms, f_bonds, super_node_x, atom_label, mask_reagents, mask_reactants_reagents, batch_size): h = self.ggnngwm(f_atoms, f_bonds, super_node_x) h = self.mlp(h)[:, :, 0] loss = functions.sigmoid_cross_entropy(h, atom_label) h = functions.sigmoid(h).array acc = atom_selection_acc(h, atom_label, mask_reagents, self.topK, batch_size) atom_selected = atom_selection(h, mask_reactants_reagents, self.topK) return loss, acc, atom_selected
def evaluate(self): train_X, train_y = self.collect_prediction_for_train_data() model = LinearSVC() model.fit(X=train_X, y=train_y) iterator = self._iterators['dev'] target = self._targets['main'] # this is necessary for more-than-once-evaluation it = copy.copy(iterator) label_scores = [] svm_label_scores = [] summary = reporter.DictSummary() for n, batch in enumerate(it): observation = {} with reporter.report_scope(observation): padded_batch = self.converter(batch, device=self.device) x1s = padded_batch['x1s'] x2s = padded_batch['x2s'] wordcnt = padded_batch['wordcnt'] wgt_wordcnt = padded_batch['wgt_wordcnt'] x1s_len = padded_batch['x1s_len'] x2s_len = padded_batch['x2s_len'] y = padded_batch['y'] y_score, sim_scores = target(x1s, x2s, wordcnt, wgt_wordcnt, x1s_len, x2s_len) # compute loss loss = F.sigmoid_cross_entropy(x=y_score, t=y).data reporter.report({'loss': loss}, target) # We evaluate WikiQA by MAP and MRR # for direct evaluation label_score = np.c_[y, y_score.data] label_scores.append(label_score) # for SVM/LR x = np.concatenate([x.data for x in sim_scores] + [wordcnt, wgt_wordcnt, x1s_len, x2s_len], axis=1) y_score = model.decision_function(x) svm_label_score = np.c_[y, y_score] svm_label_scores.append(svm_label_score) summary.add(observation) stats = compute_map_mrr(label_scores) svm_stats = compute_map_mrr(svm_label_scores) summary_dict = summary.compute_mean() summary_dict["validation/main/svm_map"] = svm_stats.map summary_dict["validation/main/svm_mrr"] = svm_stats.mrr summary_dict["validation/main/map"] = stats.map summary_dict["validation/main/mrr"] = stats.mrr return summary_dict
def check_backward(self, x_data, t_data, use_cudnn=True): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.sigmoid_cross_entropy(x, t, use_cudnn) loss.backward() self.assertEqual(None, t.grad) func = loss.creator f = lambda: func.forward((x.data, t.data)) gx, = gradient_check.numerical_grad(f, (x.data,), (1,), eps=0.01) gradient_check.assert_allclose(gx, x.grad)
def forward(self, inp, target=None): out = self.calc(inp) if not target is None: loss = F.sigmoid_cross_entropy(out, target) acc = F.binary_accuracy(out, target) return loss, acc else: out = F.sigmoid(out) return out.reshape(-1)
def calculate_metrics(nets, batch): gen = nets['gen'] dis = nets['dis'] x, t = batch xp = cuda.get_array_module(x) batch_size = len(x) z = xp.asarray( np.random.random((batch_size, gen.latent_size)).astype(np.float32)) x_fake = gen(z) y_real = dis(x) y_fake = dis(x_fake) real_label = xp.ones((batch_size, 1), dtype=np.int32) fake_label = xp.zeros((batch_size, 1), dtype=np.int32) loss_dis = F.sigmoid_cross_entropy(y_real, real_label) loss_dis += F.sigmoid_cross_entropy(y_fake, fake_label) loss_gen = F.sigmoid_cross_entropy(y_fake, real_label) return {'gen': loss_gen, 'dis': loss_dis}
def mse(self, x, y, undo_norm, test_data=False): acc = 0.0 y = Variable(np.array(y, dtype=np.int32)) pred = self.prediction(x) '''accuracy''' if test_data: for i in range(len(pred)): if int(y[i].data[0]) == int(pred[i]._data[0][0]): acc = acc + 1.0 '''mse → sigomoid cross entropy''' return F.sigmoid_cross_entropy(pred, y), acc / float(len(pred))
def _elementwise_softmax_cross_entropy(x, t, two_class): assert x.shape[:-1] == t.shape shape = t.shape t = F.flatten(t) if two_class: x = F.flatten(x) return F.reshape( F.sigmoid_cross_entropy(x, t, reduce='no'), shape) else: x = F.reshape(x, (-1, x.shape[-1])) return F.reshape( F.softmax_cross_entropy(x, t, reduce='no'), shape)
def __init__(self, model, loss_fn): super().__init__() with self.init_scope(): self.model = model if loss_fn == 'focal': self.loss_fn = lambda x, t: F.sum(focal_loss(F.sigmoid(x), t)) elif loss_fn == 'sigmoid': self.loss_fn = lambda x, t: F.sum( F.sigmoid_cross_entropy(x, t, reduce='no')) else: raise ValueError('unknown loss function. {}'.format(loss_fn))
def forward(self, data, is_training=True): """ data: [([label], [features]), ([label], [features]), ...)] """ x_raw = [x[1] for x in data] batch_size = len(x_raw) # embed sparse featuer vector to dense vector x_sparse = XP.iarray(x_raw) x_sparse = F.reshape(x_sparse, [batch_size * self.nzdim]) embeddings = self.embed(x_sparse) # FM Component # 1st order first_order = F.reshape( F.sum(F.reshape(self.L1(x_sparse), (batch_size, self.nzdim)), 1), (batch_size, 1)) # 2nd order embeddings = F.reshape(embeddings, (batch_size, self.nzdim * self.embed_size)) second_order = XP.fzeros((batch_size, 1)) for i in range(self.nzdim - 1): for j in range(1, self.nzdim - i): former = embeddings[:, i * self.embed_size:(i + 1) * self.embed_size] later = embeddings[:, (i + j) * self.embed_size:(i + j + 1) * self.embed_size] second_order += F.reshape( F.batch_matmul(former, later, transa=True), (batch_size, 1)) y_fm = first_order + second_order # Deep Component embeddings = F.reshape(embeddings, (batch_size, self.nzdim * self.embed_size)) h = F.dropout(F.relu(self.L2(embeddings)), ratio=0.9, train=is_training) h = F.dropout(F.relu(self.L3(h)), ratio=0.9, train=is_training) h = F.dropout(F.relu(self.L4(h)), ratio=0.9, train=is_training) y_deep = self.L5(h) y = y_fm + y_deep if is_training: t_raw = [t[0] for t in data] t = XP.iarray(t_raw) return F.sigmoid_cross_entropy(y, t) else: return F.sigmoid(y)
def __call__(self, x, x1, x2, x3, x4, x5, x6, t): h1 = self.FE(reshapei(x)) h1 = F.sigmoid(self.o1(h1)) loss1 = F.sigmoid_cross_entropy(h1, t[:, :self.n1_classes]) h2 = self.FE(reshapei(x1)) h2 = F.sigmoid(self.o2(h2)) loss2 = F.sigmoid_cross_entropy(h2, t[:, :self.n2_classes]) h3 = self.FE(reshapei(x2)) h3 = F.sigmoid(self.o3(h3)) loss3 = F.sigmoid_cross_entropy(h3, t[:, :self.n3_classes]) h4 = self.FE(reshapei(x3)) h4 = F.sigmoid(self.o4(h4)) loss4 = F.sigmoid_cross_entropy(h4, t[:, :self.n4_classes]) h5 = self.FE(reshapei(x4)) h5 = F.sigmoid(self.o5(h5)) loss5 = F.sigmoid_cross_entropy(h5, t[:, :self.n5_classes]) h6 = self.FE(reshapei(x5)) h6 = F.sigmoid(self.o(h6)) loss6 = F.sigmoid_cross_entropy(h6, t) return h1, h2, h3, h4, h5, h6, loss1, loss2, loss3, loss4, loss5, loss6
def train(max_epoch, train_size, valid_size): model = RNN() # train に1000サンプル、 testに1000サンプル使用 x_train, x_test, y_train, y_test = dataset(train_size + valid_size, train_size) optimizer = optimizers.RMSprop(lr=0.03) optimizer.setup(model) early_stopping = 20 min_valid_loss = 1e8 min_epoch = 0 train_loss, valid_loss = [], [] for epoch in range(1, max_epoch): _y = model(x_test) y = _y.data y = np.array([1 - y, y], dtype='f').T[0] accuracy = F.accuracy(y, y_test.data.flatten()).data _train_loss = F.sigmoid_cross_entropy(model(x_train), y_train).data _valid_loss = F.sigmoid_cross_entropy(_y, y_test).data train_loss.append(_train_loss) valid_loss.append(_valid_loss) # valid_lossが20回連続で更新されなかった時点で学習を終了 if min_valid_loss >= _valid_loss: min_valid_loss = _valid_loss min_epoch = epoch elif epoch - min_epoch >= early_stopping: break optimizer.update(forward, x_train, y_train, model) print('epoch: {} acc: {} loss: {} valid_loss: {}'.format(epoch, accuracy, _train_loss, _valid_loss)) loss_plot(train_loss, valid_loss) serializers.save_npz('model.npz', model)
def compute_exp_reg_loss(self, pred): """Compute expalanation loss. Args: pred: Shape is (Batch, 2, H, W) """ p_shape = pred.shape label = self.xp.ones((p_shape[0] * p_shape[2] * p_shape[3], ), dtype='i') l = F.sigmoid_cross_entropy(F.reshape(pred, (-1, )), label, reduce='no') return F.mean(l)
def evaluate_roc(self, trainer): iterator = self._iterators['main'] eval_func = self.eval_func or self._targets['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) y_total = np.array([]).reshape([0, len(self.label_name)]) t_total = np.array([]).reshape([0, len(self.label_name)]) for batch in it: in_arrays = self.converter(batch, self.device) with chainer.no_backprop_mode(), chainer.using_config('train', False): y = eval_func(*in_arrays[:-1]) t = in_arrays[-1] # y = F.sigmoid(y) y_data = cuda.to_cpu(y.data) t_data = cuda.to_cpu(t) y_total = np.vstack([y_total, y_data]) t_total = np.vstack([t_total, t_data]) updater = trainer.updater epoch = updater.iteration out_dir = trainer.out observation = {} for label_index, label in enumerate(self.label_name): y = y_total[:, label_index] t = t_total[:, label_index] index = numpy.where(t != -1)[0] y = y[index] t = t[index] out_name = os.path.join(out_dir, str(epoch) + 'iteration_' + label + '_roc.pdf') gather_data = self.comm.gather(np.vstack([t, y])) if self.rank == 0: gather_data = reduce(lambda x, y: np.hstack([x, y]), gather_data) gather_t = np.array(gather_data[0], dtype=np.int) gather_y = np.array(gather_data[1], dtype=np.float32) plot_roc(y_true=gather_t, y_score=F.sigmoid(gather_y).data, out_name=out_name) roc_auc = metrics.roc_auc_score(gather_t, F.sigmoid(gather_y).data) with reporter.report_scope(observation): reporter.report({'roc_auc_' + label: roc_auc}, self._targets['main']) reporter.report({'loss': F.sigmoid_cross_entropy(gather_y, gather_t).data}, self._targets['main']) reporter.report({'accuracy': F.binary_accuracy(gather_y, gather_t).data}, self._targets['main']) return observation
def update_core(self): image, labels, metadata = self.converter( self.get_iterator('main').next()) # img = np.transpose(image[0], (1, 2, 0)) # print(img) # cv2.imshow('img', np.uint8(img)) # cv2.imshow('lbl', np.uint8(labels[0])) # cv2.waitKey(0) image = Variable(image) assert image.shape[0] == 1, "Batchsize of only 1 is allowed for now" if self.device >= 0: image.to_gpu(self.device) xp = get_array_module(image.data) to_substract = np.array((-1, 0)) noise_classes = np.unique(labels[0]).astype(np.int32) # print(noise_classes) target = xp.asarray([[0] * (self.no_of_classes)]) gt_labels = np.setdiff1d( noise_classes, to_substract) - 1 # np.unique(labels[0]).astype(np.int32)[2:] - 1 target[0][gt_labels] = 1 gcam, cl_scores, class_id, fconf, floc = self._optimizers[ 'main'].target.stream_cl(image, gt_labels) mask = self._optimizers['main'].target.get_mask(gcam) masked_image = self._optimizers['main'].target.mask_image(image, mask) masked_output = self._optimizers['main'].target.stream_am(masked_image) masked_output = F.sigmoid(masked_output) cl_loss = F.sigmoid_cross_entropy(cl_scores, target, normalize=True) am_loss = masked_output[0][class_id][0] finger_loss = self.finger_loss(fconf, floc, metadata, xp) labels = Variable(labels) if self.device >= 0: labels.to_gpu(self.device) segment_loss = self._optimizers['main'].target(image, labels) total_loss = self.lambd1 * cl_loss + self.lambd2 * am_loss + self.lambd3 * segment_loss + 1 * finger_loss report({'AM_Loss': am_loss}, self.get_optimizer('main').target) report({'CL_Loss': cl_loss}, self.get_optimizer('main').target) report({'SG_Loss': segment_loss}, self.get_optimizer('main').target) report({'TotalLoss': total_loss}, self.get_optimizer('main').target) self._optimizers['main'].target.cleargrads() # if not np.isnan(float(total_loss.data)): total_loss.backward() self._optimizers['main'].update()
def evaluate_roc(self, trainer): iterator = self._iterators['main'] eval_func = self.eval_func or self._targets['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) y_total = [] t_total = [] length = it.dataset.__len__() batchsize = it.batch_size length = length // batchsize from tqdm import tqdm pbar = tqdm(total=length) for batch in it: in_arrays = self.converter(batch, self.device) with chainer.no_backprop_mode(), chainer.using_config('train', False): y = eval_func(*in_arrays[:-1]) t = in_arrays[-1] y_data = cuda.to_cpu(y.data) t_data = cuda.to_cpu(t) y_total.extend(y_data) t_total.extend(t_data) pbar.update(1) y_total = numpy.concatenate(y_total).ravel() t_total = numpy.concatenate(t_total).ravel() index = numpy.where(t_total != -1)[0] y_total = y_total[index] t_total = t_total[index] d = {'label': t_total, 'score': y_total} from pathlib import Path out_path = Path('./valid_result') / str(self.epoch) out_path.mkdir(exist_ok=True) np.savez(out_path / ('validation_' + str(self.rank)), **d) observation = {} with reporter.report_scope(observation): roc_auc = metrics.roc_auc_score(t_total, F.sigmoid(y_total).data) with reporter.report_scope(observation): reporter.report({'roc_auc_': roc_auc}, self._targets['main']) reporter.report({'loss': F.sigmoid_cross_entropy(y_total, t_total).data}, self._targets['main']) reporter.report({'accuracy': F.binary_accuracy(y_total, t_total).data}, self._targets['main']) return observation
def __call__(self, x, t): h = F.relu(self.conv1(x)) h = F.relu(self.conv2(h)) h = F.relu(self.conv3(h)) h = F.dropout(F.relu(self.fc4(h)), train=self.train) h = self.fc5(h) self.pred = F.reshape(h, (x.data.shape[0], 16, 16)) if t is not None: self.loss = F.sigmoid_cross_entropy(self.pred, t, normalize=False) return self.loss else: self.pred = F.sigmoid(self.pred) return self.pred
def evaluate(self): train_X, train_y = self.collect_prediction_for_train_data() model = LinearSVC() # model = LinearSVC(loss="hinge",tol=0.001,max_iter=6000) # model = SVC(C=1.0, kernel="linear") model.fit(X=train_X, y=train_y) iterator = self._iterators['dev'] target = self._targets['main'] # this is necessary for more-than-once-evaluation it = copy.copy(iterator) label_scores = [] svm_label_scores = [] summary = reporter.DictSummary() for n, batch in enumerate(it): observation = {} with reporter.report_scope(observation): x1s, x2s, wordcnt, wgt_wordcnt, x1s_len, x2s_len, y = self.converter( batch, device=-1) y_score, sim_scores = target(x1s, x2s, wordcnt, wgt_wordcnt, x1s_len, x2s_len) # batch分のy_score(feature vec を1次元に写像したもの)とsim_scoreが帰ってくる # 正解ラベルyを使ってloss計算 loss = F.sigmoid_cross_entropy(x=y_score, t=y).data reporter.report({'loss': loss}, target) # We evaluate WikiQA by MAP and MRR # for direct evaluation label_score = np.c_[tc(y), tc(y_score.data)] label_scores.append(label_score) # for SVM/LR x = np.concatenate([tc(x.data) for x in sim_scores] + [wordcnt, wgt_wordcnt, x1s_len, x2s_len], axis=1) y_score = model.decision_function(x) svm_label_score = np.c_[y, y_score] svm_label_scores.append(svm_label_score) summary.add(observation) stats = compute_map_mrr(label_scores) svm_stats = compute_map_mrr(svm_label_scores) summary_dict = summary.compute_mean() summary_dict["validation/svm_map"] = svm_stats.map summary_dict["validation/svm_mrr"] = svm_stats.mrr summary_dict["validation/map"] = stats.map summary_dict["validation/mrr"] = stats.mrr return summary_dict
def update_core(self): gen_optimizer = self.get_optimizer('gen') dis_optimizer = self.get_optimizer('dis') xp = self.gen.xp for i in range(self.n_dis): batch = self.get_iterator('main').next() batchsize = len(batch) if i == 0: z = self.gen.make_hidden(batchsize) x_fake = self.gen(z) y_fake = self.dis(x_fake) loss_gen = F.sigmoid_cross_entropy( y_fake, Variable(xp.ones_like(y_fake.data, dtype=xp.int8))) self.gen.cleargrads() loss_gen.backward() gen_optimizer.update() chainer.reporter.report({'gen/loss': loss_gen}) x_fake.unchain_backward() x_real = Variable(self.converter(batch, self.device)) y_real = self.dis(x_real) z = self.gen.make_hidden(batchsize) x_fake = self.gen(z) y_fake = self.dis(x_fake.data) loss_dis = F.sigmoid_cross_entropy( y_real, Variable(xp.ones_like(y_real.data, dtype=xp.int8))) loss_dis += F.sigmoid_cross_entropy( y_fake, Variable(xp.zeros_like(y_fake.data, dtype=xp.int8))) self.dis.cleargrads() loss_dis.backward() dis_optimizer.update() chainer.reporter.report({'dis/loss': loss_dis})
def noised_sigmoid_cross_entropy(y, t, mc_iteration, normalize=True, reduce='mean'): """ Sigmoid Cross-entropy for aleatoric uncertainty estimates. Args: y (list of ~chainer.Variable): logits and sigma t (~numpy.ndarray or ~cupy.ndarray): ground-truth mc_iteration (int): number of iteration of MCMC. normalize (bool, optional): Defaults to True. reduce (str, optional): Defaults to 'mean'. Returns: [~chainer.Variable]: Loss value. """ assert isinstance(y, (list, tuple)) logits, log_std = y assert logits.shape[0] == log_std.shape[0] assert log_std.shape[1] in (logits.shape[1], 1) assert logits.shape[2:] == log_std.shape[2:] assert logits.shape == t.shape xp = backend.get_array_module(t) # std = F.sqrt(F.exp(log_var)) std = F.exp(log_std) loss = 0. for _ in range(mc_iteration): noise = std * xp.random.normal(0., 1., std.shape) loss += sigmoid_cross_entropy(logits + noise, t, normalize=False, reduce='no') if not reduce == 'mean': return loss if normalize: count = loss.size * mc_iteration else: count = max(1, len(loss)) * mc_iteration return F.sum(loss) / count
def patch_loss_dis(self, real, dis, n_dis=5): for _ in range(n_dis): fake = self.random( n=self.config.batchsize[self.config.gan_type]).array if not isinstance(real, Variable): real = Variable(real) dis_real = dis(real) dis_fake = dis(fake) if self.config.loss_type == "wgan-gp": loss = F.mean(dis_real) - F.mean(dis_fake) loss += F.mean( F.batch_l2_norm_squared( chainer.grad([loss], [real], enable_double_backprop=True)[0])) * 1000 elif self.config.loss_type == "nsgan": loss = F.sigmoid_cross_entropy(dis_real, self.xp.ones(dis_real.shape, dtype="int32")) + \ F.sigmoid_cross_entropy(dis_fake, self.xp.zeros(dis_fake.shape, dtype="int32")) loss += F.mean( F.batch_l2_norm_squared( chainer.grad([loss], [real], enable_double_backprop=True)[0])) * 1000 else: assert "loss type: {self.config.loss_type} is not supported" return loss
def check_forward(self, x_data, t_data, use_cudnn=True): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.sigmoid_cross_entropy(x_val, t_val, use_cudnn) loss_value = float(cuda.to_cpu(loss.data)) # Compute expected value loss_expect = 0 for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): xd, td = self.x[i, j], self.t[i, j] loss_expect -= xd * (td - (xd >= 0)) \ - math.log(1 + math.exp(-numpy.abs(xd))) loss_expect /= self.t.shape[0] self.assertAlmostEqual(loss_expect, loss_value, places=5)
def __call__(self, roi_feature, labels): # labels shape = B, T, F(9 or 8), 12 # roi_feature shape = B, T, F, D, where F is box number in one frame image with chainer.cuda.get_device_from_array(roi_feature.data) as device: node_out = self.forward(roi_feature, labels) # node_out B,T,F,D node_out = F.reshape(node_out, (-1, self.out_size)) node_labels = self.xp.reshape(labels, (-1, self.out_size)) pick_index, accuracy_pick_index = self.get_loss_index(node_out, node_labels) loss = F.sigmoid_cross_entropy(node_out[list(pick_index[0]), list(pick_index[1])], node_labels[list(pick_index[0]), list(pick_index[1])]) accuracy = F.binary_accuracy(node_out[list(accuracy_pick_index[0]), list(accuracy_pick_index[1])], node_labels[[list(accuracy_pick_index[0]), list(accuracy_pick_index[1])]]) return loss, accuracy
def forward_one_step(x, t, state, train=True): # if args.gpu >= 0: # data = cuda.to_gpu(data) # targets = cuda.to_gpu(targets) x = chainer.Variable(x, volatile=not train) t = chainer.Variable(t, volatile=not train) h_in = model.x_to_h(x) + model.h_to_h(state['h']) c, h = F.lstm(state['c'], h_in) y = model.h_to_y(h) state = {'c': c, 'h': h} sigmoid_y = 1 / (1 + np.exp(-y.data)) mean_squared_error = ((t.data - sigmoid_y)**2).sum() / t.data.size return state, F.sigmoid_cross_entropy(y, t), mean_squared_error, h.data[0]
def __call__(self, xs, bboxes, labels): # all shape is (B, T, F, D) node_out = self.forward(xs) # node_out B,T,F,D node_out = F.reshape(node_out, (-1, self.out_size)) node_labels = self.xp.reshape(labels, (-1, self.out_size)) pick_index, accuracy_pick_index = self.get_loss_index(node_out, node_labels) loss = F.sigmoid_cross_entropy(node_out, node_labels) accuracy = F.binary_accuracy(node_out[list(accuracy_pick_index[0]), list(accuracy_pick_index[1])], node_labels[[list(accuracy_pick_index[0]), list(accuracy_pick_index[1])]]) report_dict = {'loss': loss, "accuracy":accuracy} chainer.reporter.report(report_dict, self) return loss
def encode(self, input_batch, teacher_wp, train): """ Input batch of sequence and update self.c (context vector) and self.h (hidden vector) :param teacher_wp: :param input_batch: batch of input text embed id ex.) [[ 1, 0 ,14 ,5 ], [ ...] , ...] :param train : True or False """ for batch_word in input_batch: batch_word = chainer.Variable(xp.array(batch_word, dtype=xp.int32)) self.c_batch, self.h_batch = self.enc(batch_word, self.c_batch, self.h_batch, train=train) self.h_enc.append(self.h_batch) predict_mat, self.h_batch = self.wpe(self.h_enc) if train: return F.sigmoid_cross_entropy(predict_mat, teacher_wp)
def forward_one_step(x, t, state, train=True): # if args.gpu >= 0: # data = cuda.to_gpu(data) # targets = cuda.to_gpu(targets) x = chainer.Variable(x, volatile=not train) t = chainer.Variable(t, volatile=not train) h_in = model.x_to_h(x) + model.h_to_h(state['h']) c, h = F.lstm(state['c'], h_in) y = model.h_to_y(h) state = {'c': c, 'h': h} sigmoid_y = 1 / (1 + np.exp(-y.data)) mean_squared_error = ((t.data - sigmoid_y) ** 2).sum() / t.data.size return state, F.sigmoid_cross_entropy(y, t), mean_squared_error
def update_core(self): gen_optimizer = self.get_optimizer('gen') dis_optimizer = self.get_optimizer('dis') batch = self.get_iterator('main').next() x_real = Variable(self.converter(batch, self.device)) batchsize = len(batch) z = self.gen.make_hidden(batchsize) x_fake = self.gen(z) real_feature, y_real = self.dis(x_real) fake_feature, y_fake = self.dis(x_fake) loss_gen = F.sigmoid_cross_entropy( y_fake, Variable(self.xp.ones_like(y_fake.data, dtype=self.xp.int32))) loss_feature = F.mean_squared_error(real_feature, fake_feature) self.gen.cleargrads() loss_gen.backward() loss_feature.backward() gen_optimizer.update() chainer.reporter.report({'gen/loss': loss_gen}) chainer.reporter.report({'gen/loss_feature': loss_feature}) x_fake.unchain_backward() L1 = F.sigmoid_cross_entropy( y_real, Variable(self.xp.ones_like(y_real.data, dtype=self.xp.int32))) L2 = F.sigmoid_cross_entropy( y_fake, Variable(self.xp.zeros_like(y_fake.data, dtype=self.xp.int32))) loss_dis = L1 + L2 self.dis.cleargrads() loss_dis.backward() dis_optimizer.update() chainer.reporter.report({'dis/loss': loss_dis})
def __call__(self, xs, crf_pact_structures): loss = 0.0 xp = chainer.cuda.cupy.get_array_module(xs.data) accuracy = 0.0 for idx, X in enumerate(xs): crf_pact_structure = crf_pact_structures[idx] gt_label = self.get_gt_label_one_graph(xp, crf_pact_structure, is_bin=True) # N x Y A = crf_pact_structure.A for i in range(self.layers_num): X = getattr(self, "attn_{}".format(i))(X, A) loss += F.sigmoid_cross_entropy(X, gt_label) accuracy = F.binary_accuracy(X, gt_label) chainer.reporter.report({"loss": loss, "accuracy": accuracy}) return loss
def check_backward(self, x_data, t_data, use_cudnn=True): x = chainer.Variable(x_data) t = chainer.Variable(t_data) loss = functions.sigmoid_cross_entropy(x, t, use_cudnn) loss.backward() self.assertEqual(None, t.grad) # Skip too large case. That requires a long time. if self.shape[0] == 65536: return func = loss.creator f = lambda: func.forward((x.data, t.data)) gx, = gradient_check.numerical_grad(f, (x.data,), (1,), eps=0.01) gradient_check.assert_allclose(gx, x.grad)
def forward(x_data, y_data, train=True): x = Variable(x_data, volatile=not train) t = Variable(y_data, volatile=not train) feature_input = F.relu(model.bn_feature(model.img_feature2vec(x))) l1 = F.relu(model.bn1(model.h1(feature_input))) y = model.out(l1) loss = F.sigmoid_cross_entropy(y, t) predicted = cuda.to_cpu(F.sigmoid(y).data) predicted[predicted > 0.5] = 1 predicted[predicted <= 0.5] = 0 label = cuda.to_cpu(y_data) index = np.multiply(predicted == label, label == 1) accuracy = float(np.sum(index)) / float(np.sum(label == 1)) return loss, accuracy
def __call__(self, x, t): h1 = F.max_pooling_2d(F.relu(self.conv1(x)), 2) h2 = F.max_pooling_2d(F.relu(self.conv2(h1)), 2) h3 = F.relu(self.l1(h2)) y = self.l2(h3) self.loss = F.sigmoid_cross_entropy(y, t) accuracy = [] y_ = F.array.split_axis.split_axis(y, self.output_dim, 1) t_ = F.array.split_axis.split_axis(t, self.output_dim, 1) for y_ele, t_ele in zip(y_, t_): accuracy.append(F.accuracy(y_ele, chainer.Variable(t_ele.data.squeeze()))) self.accuracy = accuracy return self.loss
def forward_one_step(x, t, state, train=True): # if args.gpu >= 0: # data = cuda.to_gpu(data) # targets = cuda.to_gpu(targets) x = chainer.Variable(x, volatile=not train) t = chainer.Variable(t, volatile=not train) h_in = model.x_to_h(x) + model.h_to_h(state['h']) c, h = F.lstm(state['c'], h_in) y = model.h_to_y(h) state = {'c': c, 'h': h} sigmoid_y = 1 / (1 + np.exp(-y.data)) bin_y = np.round((np.sign(sigmoid_y - 0.5) + 1) / 2) square_sum_error = ((t.data - sigmoid_y) ** 2).sum() bin_y_error = ((t.data - bin_y) ** 2).sum() return state, F.sigmoid_cross_entropy(y, t), square_sum_error, bin_y_error, h.data[0]
def loss(model, xs, ts, uss=None): model.reset_state() tags = model([Variable( np.array([x], dtype=np.int32) ) for x in xs]) zss = [] d = Variable(np.array(0, dtype=np.float32)) for t, (y, zs) in zip(ts, tags): d += cf.sigmoid_cross_entropy( y, Variable(np.array([[t]], dtype=np.int32)) ) if t: zss.append(zs) if uss: assert len(uss) == len(zss) for us, zs in zip(uss, zss): for u, z in zip(us, zs): d += cf.softmax_cross_entropy( z, Variable(np.array([u], dtype=np.int32)) ) return d
def forward(self, x_batch_curr, y_batch_curr, volatile=False): current_sample = Variable(x_batch_curr, volatile=volatile) y_batch_curr = np.asarray(y_batch_curr).reshape(1, -1) current_output = Variable(y_batch_curr, volatile=volatile) h1_current = F.sigmoid(self.model_to_use.x_h1(current_sample)) # h1_previous = F.sigmoid(self.model_to_use.x_h1(previous_sample)) # h1_next = F.sigmoid(self.model_to_use.x_h1(next_sample)) # h1_diff_previous = h1_current - h1_previous # h1_diff_next = h1_next - h1_current h2_current = F.sigmoid(self.model_to_use.h1_h2(h1_current)) # h2_diff_n = F.sigmoid(self.model_to_use.h1_h2(h1_diff_next)) # h2_diff_p = F.sigmoid(self.model_to_use.h1_h2(h1_diff_previous)) # h2_diff_next = h2_diff_n - h2_current # h2_diff_previous = h2_current - h2_diff_p h3_current = F.sigmoid(self.model_to_use.h2_h3(h2_current)) # h3_diff_p = F.sigmoid(self.model_to_use.h2_h3(h2_diff_previous)) # h3_diff_n = F.sigmoid(self.model_to_use.h2_h3(h2_diff_next)) # h3_diff_next = h3_diff_n - h3_current # h3_diff_previous = h3_current - h3_diff_p h4_current = F.sigmoid(self.model_to_use.h3_h4(h3_current)) # h4_diff_previous = F.sigmoid(self.model_to_use.h3_h4(h3_diff_previous)) # h4_diff_next = F.sigmoid(self.model_to_use.h3_h4(h3_diff_next)) # h4_diff = h4_diff_next + h4_diff_previous # h4 = h4_current * h4_diff h4 = h4_current y = self.model_to_use.h4_y(h4) loss = F.sigmoid_cross_entropy(y, current_output) current_output.data = np.squeeze(current_output.data) y.data = y.data.reshape(-1, 1) accuracy = F.accuracy(y, current_output) return accuracy, loss, y
def __call__(self, x, labels): x = BatchTransform(self.model.mean)(x) x = self.xp.array(x) scores = self.model(x) B, n_class = scores.shape[:2] one_hot_labels = self.xp.zeros((B, n_class), dtype=np.int32) for i, label in enumerate(labels): one_hot_labels[i, label] = 1 # sigmoid_cross_entropy normalizes the loss # by the size of batch and the number of classes. # It works better to remove the normalization factor # of the number of classes. loss = self.loss_scale * F.sigmoid_cross_entropy( scores, one_hot_labels) result = calc_accuracy(scores, labels) reporter.report({'loss': loss}, self) reporter.report({'accuracy': result['accuracy']}, self) reporter.report({'n_pred': result['n_pred']}, self) reporter.report({'n_pos': result['n_pos']}, self) return loss
def check_forward_no_reduction(self, x_data, t_data): x_val = chainer.Variable(x_data) t_val = chainer.Variable(t_data) loss = functions.sigmoid_cross_entropy( x_val, t_val, self.normalize, reduce='no') self.assertEqual(loss.data.shape, self.x.shape) self.assertEqual(loss.data.dtype, numpy.float32) loss_value = cuda.to_cpu(loss.data) # Compute expected value if not getattr(self, 'ignore_all', False): for i in six.moves.range(self.x.shape[0]): for j in six.moves.range(self.x.shape[1]): xd, td = self.x[i, j], self.t[i, j] if td == -1: loss_expect = 0 else: loss_expect = -( xd * (td - (xd >= 0)) - math.log(1 + math.exp(-numpy.abs(xd)))) self.assertAlmostEqual( loss_expect, loss_value[i, j], places=5)
d_opt = optimizers.Adam(alpha=0.0002, beta1=0.5) g_opt.setup(gen) d_opt.setup(dis) g_opt.add_hook(chainer.optimizer.WeightDecay(0.00001)) d_opt.add_hook(chainer.optimizer.WeightDecay(0.00001)) example_z = gen.make_z(nbatch) for epoch in range(50000): print "epoch:", epoch xmb = image_samples(nbatch) x = gen(chainer.Variable(gen.make_z(nbatch))) y1 = dis(x) l_gen = F.sigmoid_cross_entropy(y1, chainer.Variable(np.ones((nbatch, 1), dtype=np.int32))) l1_dis = F.sigmoid_cross_entropy(y1, chainer.Variable(np.zeros((nbatch, 1), dtype=np.int32))) x2 = chainer.Variable(xmb) y2 = dis(x2) l2_dis = F.sigmoid_cross_entropy(y2, chainer.Variable(np.ones((nbatch, 1), dtype=np.int32))) l_dis = l1_dis + l2_dis print "loss gen:", l_gen.data print "loss dis1:", l1_dis.data print "loss dis2:", l2_dis.data gen.zerograds() dis.zerograds() margin = 0.25
def __call__(self, xb, yb, tb): xc = Variable(np.array(xb, dtype=np.int32)) yc = Variable(np.array(yb, dtype=np.int32)) tc = Variable(np.array(tb, dtype=np.int32)) fv = self.fwd(xc, yc) return F.sigmoid_cross_entropy(fv, tc)
def forward(self, x_data, y_data): x, t = Variable(x_data), Variable(y_data) y = self.forward_raw(x, train=True) return F.sigmoid_cross_entropy(y, t)
def forward_one_step(self, x_data, state, continuous=True, nonlinear_q='tanh', nonlinear_p='tanh', output_f = 'sigmoid', gpu=-1): output = np.zeros( x_data.shape ).astype(np.float32) nonlinear = {'sigmoid': F.sigmoid, 'tanh': F.tanh, 'softplus': self.softplus, 'relu': F.relu} nonlinear_f_q = nonlinear[nonlinear_q] nonlinear_f_p = nonlinear[nonlinear_p] output_a_f = nonlinear[output_f] # compute q(z|x) for i in range(x_data.shape[0]): x_in_t = Variable(x_data[i].reshape((1, x_data.shape[1]))) hidden_q_t = nonlinear_f_q( self.recog_in_h( x_in_t ) + self.recog_h_h( state['recog_h'] ) ) state['recog_h'] = hidden_q_t q_mean = self.recog_mean( state['recog_h'] ) q_log_sigma = 0.5 * self.recog_log_sigma( state['recog_h'] ) eps = np.random.normal(0, 1, q_log_sigma.data.shape ).astype(np.float32) if gpu >= 0: eps = cuda.to_gpu(eps) eps = Variable(eps) z = q_mean + F.exp(q_log_sigma) * eps # compute p( x | z) h0 = nonlinear_f_p( self.z(z) ) out= self.output(h0) x_0 = output_a_f( out ) state['gen_h'] = h0 if gpu >= 0: np_x_0 = cuda.to_cpu(x_0.data) output[0] = np_x_0 else: output[0] = x_0.data if continuous == True: rec_loss = F.mean_squared_error(x_0, Variable(x_data[0].reshape((1, x_data.shape[1])))) else: rec_loss = F.sigmoid_cross_entropy(out, Variable(x_data[0].reshape((1, x_data.shape[1])).astype(np.int32))) x_t = x_0 for i in range(1, x_data.shape[0]): h_t_1 = nonlinear_f_p( self.gen_in_h( x_t ) + self.gen_h_h(state['gen_h']) ) x_t_1 = self.output(h_t_1) state['gen_h'] = h_t_1 if continuous == True: output_t = output_a_f( x_t_1 ) rec_loss += F.mean_squared_error(output_t, Variable(x_data[i].reshape((1, x_data.shape[1])))) else: out = x_t_1 rec_loss += F.sigmoid_cross_entropy(out, Variable(x_data[i].reshape((1,x_data.shape[1])).astype(np.int32))) x_t = output_t = output_a_f( x_t_1 ) if gpu >= 0: np_output_t = cuda.to_cpu(output_t.data) output[i] = np_output_t else: output[i] = output_t.data KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean**2 - F.exp(q_log_sigma)) return output, rec_loss, KLD, state
def loss(self, y, t): sigmoid_y = 1 / (1 + self.mod.exp(-y.data)) mean_squared_error = ((t.data - sigmoid_y) ** 2).sum() / t.data.size return F.sigmoid_cross_entropy(y, t), mean_squared_error
def concat_losses(p, e): loss_x = -F.sum(F.log(sum_axis(p))) / numpy.float32(p.data.shape[0]) loss_e = F.sigmoid_cross_entropy(*e) return loss_x + loss_e
def trainer(G,D,data,len_z=100,n_epoch=10000,pre_epoch=0,batchsize=500,save_interval=1000, output_dir=None,G_path=None,D_path=None,show=True): opt_g = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_d = optimizers.Adam(alpha=0.0002, beta1=0.5) opt_g.setup(G) opt_d.setup(D) opt_g.add_hook(chainer.optimizer.WeightDecay(0.00001)) opt_d.add_hook(chainer.optimizer.WeightDecay(0.00001)) if D_path != None: serializers.load_hdf5("%s"%(D_path), D) if G_path != None: serializers.load_hdf5("%s"%(G_path), G) n_epoch += pre_epoch loss_d_mem =np.zeros(n_epoch-pre_epoch) loss_g_mem =np.zeros(n_epoch-pre_epoch) for epoch in xrange(pre_epoch,n_epoch): if epoch%10==0: print 'epoch',epoch perm = np.arange(len(data)) np.random.shuffle(perm) for i in xrange(0,len(data),batchsize): z = Variable(np.random.uniform(-1,1,(batchsize, len_z)).astype(np.float32)) y1 = G(z) y2 = D(y1) # discriminator loss_d = F.sigmoid_cross_entropy(y2,Variable(np.zeros((batchsize,1),dtype=np.int32))) loss_g = F.sigmoid_cross_entropy(y2,Variable(np.ones((batchsize,1),dtype=np.int32))) # get images images = data[perm[i:i+batchsize]] y2 = D(Variable(images)) loss_d += F.sigmoid_cross_entropy(y2,Variable(np.ones((images.shape[0],1),dtype=np.int32))) loss_d_mem[epoch-n_epoch] += loss_d.data loss_g_mem[epoch-n_epoch] += loss_g.data opt_g.zero_grads() loss_g.backward() opt_g.update() opt_d.zero_grads() loss_d.backward() opt_d.update() #save model if (epoch+1)%save_interval == 0: z = Variable(np.random.uniform(-1,1,(10, len_z)).astype(np.float32)) confirm = G(z,False) if output_dir != None: serializers.save_hdf5("%s/gan_model_dis%d.h5"%(output_dir,epoch+1), D) serializers.save_hdf5("%s/gan_model_gen%d.h5"%(output_dir,epoch+1), G) serializers.save_hdf5("%s/current_gen.h5"%(output_dir), G) if show: if D.imshape[0] == 3: plt.imshow(np.swapaxes(np.swapaxes(confirm.data[0], 0, 2),0,1)) else: plt.imshow(confirm.data[0].reshape(D.imshape[1],D.imshape[2]),cmap="gray") plt.axis('off') plt.savefig('%s/image%d.jpg'%(output_dir,epoch+1)) print '--%d--'%(epoch+1) print 'p_g :',D(confirm,False).data[0] print 'p_delta:', D(Variable(images),False).data[0] print 'done' return loss_g_mem,loss_d_mem
def forward(self): x = chainer.Variable(self.x) t = chainer.Variable(self.t) return functions.sigmoid_cross_entropy(x, t)
def __call__(self, x, t): y = self.forward(x) self.loss = F.sigmoid_cross_entropy(y, t) return self.loss
def __call__(self, x, t): y = self.predictor(x) self.loss = F.sigmoid_cross_entropy(y, t) return self.loss, y