def loss(self, x, y): if hasattr(self, 'train_class_weight'): loss = rm.softmax_cross_entropy(x, y, reduce_sum=False) * \ np.broadcast_to(class_weight.reshape(1, -1, 1, 1), x.shape) loss = rm.sum(loss) else: loss = rm.softmax_cross_entropy(x, y) return loss / (self.imsize[0] * self.imsize[1])
def test_multi_gpu(): from renom.cuda import cuGetDeviceCount class NN2(rm.Model): def __init__(self): super(NN2, self).__init__() self.layer1 = rm.Dense(output_size=2) self.layer2 = rm.Dense(output_size=2) def forward(self, x): return self.layer2(rm.relu(self.layer1(x))) def weight_initiallize(self, input_size): self.layer1.weight_initiallize(input_size) self.layer2.weight_initiallize(input_size) nn = NN2() nn.set_gpu(0) nn.weight_initiallize((2, )) nn2 = NN2() nn2.set_gpu(cuGetDeviceCount() - 1) for i in range(2): nn2.copy_params(nn) x = np.random.rand(100, 2) with nn.train(): ret1 = nn(x[:50]) with use_device(nn.device_id): loss1 = rm.softmax_cross_entropy(ret1, np.random.rand(50, 2)) with nn2.train(): ret2 = nn2(x[50:]) with use_device(nn2.device_id): loss2 = rm.softmax_cross_entropy(ret2, np.random.rand(50, 2)) nn.sync() nn2.sync() grad1 = loss1.grad() with use_device(nn2.device_id): grad2 = loss2.grad() grad2.get(nn2.layer1.params.w) org_l1_w = grad1.get(nn.layer1.params.w) nn.join_grads(grad1, [(nn2, grad2)]) assert np.allclose(grad1.get(nn.layer1.params.w), org_l1_w + grad2.get(nn2.layer1.params.w).copy()) grad1.update(models=[nn])
def loss(self, x, y, class_weight=None): if class_weight is not None and class_weight: mask = np.concatenate([ np.ones((y.shape[0], 1, y.shape[2], y.shape[3])) * c for c in class_weight ], axis=1) loss = rm.softmax_cross_entropy(x, y, reduce_sum=False) loss *= mask.astype(y.dtype) loss = rm.sum(loss) / float(len(x)) else: loss = rm.softmax_cross_entropy(x, y) return loss / (self.imsize[0] * self.imsize[1])
def test_gpu_node_softmax_cross_entropy(a, b): set_cuda_active(True) g1 = Variable(a) g2 = Variable(b) g3 = rm.softmax_cross_entropy(g1, g2) g = g3.grad() g_g1 = g.get(g1) g3.to_cpu() g_g1.to_cpu() set_cuda_active(False) c3 = rm.softmax_cross_entropy(g1, g2) c = c3.grad() c_g1 = c.get(g1) close(g3, c3) close(c_g1, g_g1)
def test_save(tmpdir_factory): class NN2(rm.Model): def __init__(self): super(NN2, self).__init__() self.layer1 = rm.Dense(output_size=2) self.layer2 = rm.Dense(output_size=2) self.bn = rm.BatchNormalize() def forward(self, x): return self.layer2(self.bn(rm.relu(self.layer1(x)))) class NN3(rm.Model): SERIALIZED = ('AAA', 'BBB') def __init__(self): super(NN3, self).__init__() self.layer1 = NN2() self.layer2 = NN2() self.AAA = 0 def forward(self, x): return self.layer2(rm.relu(self.layer1(x))) nn = NN3() with nn.train(): result = nn(np.random.rand(2, 2)) l = rm.softmax_cross_entropy(result, np.random.rand(2, 2)) grad = l.grad() opt = rm.Sgd() grad.update(opt) nn.layer1.layer1.params.b._auto_update = False d = tmpdir_factory.mktemp('h5') fname = os.path.join(str(d), 'aaa') nn.AAA = 9999 nn.save(fname) nn2 = NN3() nn2.load(fname) assert np.allclose(nn.layer1.layer1.params.w, nn2.layer1.layer1.params.w) assert np.allclose(nn.layer1.layer1.params.b, nn2.layer1.layer1.params.b) assert np.allclose(nn.layer1.layer2.params.w, nn2.layer1.layer2.params.w) assert np.allclose(nn.layer1.layer2.params.b, nn2.layer1.layer2.params.b) assert np.allclose(nn.layer2.layer1.params.w, nn2.layer2.layer1.params.w) assert np.allclose(nn.layer2.layer1.params.b, nn2.layer2.layer1.params.b) assert np.allclose(nn.layer2.layer2.params.w, nn2.layer2.layer2.params.w) assert np.allclose(nn.layer2.layer2.params.b, nn2.layer2.layer2.params.b) assert nn2.layer1.layer1.params.w._auto_update assert not nn2.layer1.layer1.params.b._auto_update assert nn2.AAA == 9999
def __call__(self, x, t): y = self.predictor(x) # if t.ndim == 2: # use squared error when label is one hot label # y = rm.softmax(y) # # loss = F.mean_squared_error(y, t) # loss = rm.mean_squared_error(y, t) # accuracy = rm.accuracy(y, t.argmax(axis=1).astype(np.int32)) # else: # use softmax cross entropy when label is normal label loss = rm.softmax_cross_entropy(y, t) return loss
def fit(self, x, y): N = len(x) labels = self.lb.transform(y) for i in range(self.epoch): perm = np.random.permutation(N) for j in range(N // self.batch): train_batch = x[perm[j * self.batch:(j + 1) * self.batch]] labels_batch = labels[perm[j * self.batch:(j + 1) * self.batch]] with self.network.train(): z = self.network(train_batch) loss = rm.softmax_cross_entropy(z, labels_batch) loss.grad().update(self.optimizer)
def loss(self, x, y, neg_pos_ratio=3.0, negatives_for_hard=100.0): batch_size = y.shape[0] num_boxes = y.shape[2] conf_loss = rm.sum(rm.softmax_cross_entropy(x[:, 4:-8, :], y[:, 4:-8, :], reduce_sum=False), axis=1) loc_loss = rm.sum(rm.smoothed_l1(x[:, :4, :], y[:, :4, :], reduce_sum=False), axis=1) num_pos = np.sum(y[:, -8, :], axis=1) pos_loc_loss = rm.sum(loc_loss * (y[:, -8, :]), axis=1) pos_conf_loss = rm.sum(conf_loss * y[:, -8, :], axis=1) num_neg = np.minimum(neg_pos_ratio * num_pos, num_boxes - num_pos) has_min = num_neg > 0 has_min = np.any(has_min).astype('float') num_neg = np.concatenate( [num_neg, [(1 - has_min) * negatives_for_hard]]) num_neg_batch = np.min(num_neg[(num_neg > 0)]) num_neg_batch = int(num_neg_batch) confs_start = 5 # 4+0(background label) + 1 confs_end = confs_start + self.num_class - 1 max_confs = np.max(x[:, confs_start:confs_end, :].as_ndarray(), axis=1) indices = (max_confs * (1 - y[:, -8, :])).argsort()[:, ::-1][:, :num_neg_batch] batch_idx = np.expand_dims(range(0, batch_size), 1) batch_idx = np.tile(batch_idx, (1, num_neg_batch)) full_indices = (batch_idx.reshape(-1) * int(num_boxes) + indices.reshape(-1)) neg_conf_loss = conf_loss.reshape(-1)[full_indices] neg_conf_loss = neg_conf_loss.reshape((batch_size, num_neg_batch)) neg_conf_loss = rm.sum(neg_conf_loss, axis=1) total_loss = neg_conf_loss + pos_conf_loss total_loss /= (num_pos + float(num_neg_batch)) num_pos = np.where(np.not_equal(num_pos, 0), num_pos, np.ones_like(num_pos)) total_loss = total_loss + (pos_loc_loss / num_pos) loss = rm.sum(total_loss) return loss
def test_update(): nn = rm.Dense(2) nn2 = rm.Dense(2) with nn.train(): ret = nn(np.random.rand(2, 2)) loss = rm.softmax_cross_entropy(ret, np.random.rand(2, 2)) cur = nn.params.w.copy() grad = loss.grad(np.array([1])) grad.update(models=[nn2]) assert np.allclose(cur.as_ndarray(), nn.params.w) grad.update(models=[nn]) assert np.allclose(cur.as_ndarray() - grad.get(nn.params.w), nn.params.w.as_ndarray())
def loss(self, x, y): """ Loss function of ${class} algorithm. Args: x(ndarray, Node): Output of model. y(ndarray, Node): Target array. Returns: (Node): Loss between x and y. Example: >>> builder = model.build_data() # This will return function. >>> x, y = builder(image_path_list, annotation_list) >>> z = model(x) >>> loss = model.loss(z, y) """ return rm.softmax_cross_entropy(x, y)
def forward(self, src_seq, tar_seq): src_seq = src_seq[::-1] # reverse xi = [self.src_w2i[word] for word in src_seq] # input word to index xi = np.array(xi).reshape(len(xi),1) xe = self.l1(xi) # index to vector(embedding) # encode for x in xe: h = self.encode(x.reshape(1,-1)) # Let the initial state of the decoder's LSTM be the final state of the encoder's LSTM. self.l4._z = h self.l4._state = self.l2._state yi = [self.tar_w2i[word] for word in tar_seq] # input word to index yi = np.array(yi).reshape(len(yi),1) ye = self.l3(yi) loss = 0 # decode for i in range(len(ye) - 1): y = ye[i].reshape(1,-1) yy = self.decode(y) d = self.tar_word2onehot(tar_seq[i+1]) loss += rm.softmax_cross_entropy(yy.reshape(1, -1), d.reshape(1, -1)) return loss
def loss(self, x, y, neg_pos_ratio=3.0): pos_samples = (y[:, :, 5] == 0)[..., None] N = np.sum(pos_samples) pos_Ns = np.sum(pos_samples, axis=1) neg_Ns = np.clip(neg_pos_ratio * pos_Ns, 0, y.shape[1]) # Loc loss loc_loss = rm.sum( rm.smoothed_l1(x[..., :4], y[..., 1:5], reduce_sum=False) * pos_samples) # this is for hard negative mining. np_x = x[..., 4:].as_ndarray() max_np_x = np.max(np_x) loss_c = np.log( np.sum(np.exp(np_x.reshape(-1, self.num_class) - max_np_x), axis=1, keepdims=True) + 1e-8) + max_np_x loss_c -= np_x[..., 0].reshape(-1, 1) loss_c = loss_c.reshape(len(x), -1) loss_c[pos_samples.astype( np.bool)[..., 0]] = np.Inf # Cut positive samples. sorted_index = np.argsort(-1 * loss_c, axis=1) # Arg sort by dicending order. index_rank = np.argsort(sorted_index, axis=1) neg_samples = index_rank < neg_Ns samples = (neg_samples[..., None] + pos_samples).astype(np.bool) conf_loss = rm.sum( rm.softmax_cross_entropy(x[..., 4:].transpose(0, 2, 1), y[..., 5:].transpose(0, 2, 1), reduce_sum=False).transpose(0, 2, 1) * samples) loss = conf_loss + loc_loss return loss / (N / len(x))
def func(node, x): return sum(rm.softmax_cross_entropy(node, x, reduce_sum=False))
def func(node, x): return rm.softmax_cross_entropy(node, x)
N = len(X_train) nn = MNist() for i in range(epoch): start_t = time.time() perm = np.random.permutation(N) loss = 0 for j in range(0, N // batch): train_batch = X_train[perm[j * batch:(j + 1) * batch]] responce_batch = labels_train[perm[j * batch:(j + 1) * batch]] with nn.train(): result = nn(train_batch) l = rm.softmax_cross_entropy(result, responce_batch) l.to_cpu() grad = l.grad() grad.update(opt) loss += l train_loss = loss / (N // batch) train_loss.to_cpu() test_loss = rm.softmax_cross_entropy(nn(X_test), labels_test) test_loss.to_cpu() test_learning_curve.append(test_loss) learning_curve.append(train_loss) print("epoch %03d train_loss:%f test_loss:%f took time:%f" %
def _train(self, x, idx, y): with self.fcnn.train(): x = self.fcnn(x) z = rm.reshape(x, self.batch_output_shape) return z, rm.softmax_cross_entropy(z[:len(idx)], y)
def loss(self, x, y): return rm.softmax_cross_entropy(x[0], y) + rm.softmax_cross_entropy(x[1], y)
epoch = 20 learning_curve = [] test_learning_curve = [] for i in range(epoch): perm = np.random.permutation(N) loss = 0 for j in range(0, N // batch): train_batch = train_x[perm[j * batch:(j + 1) * batch]] responce_batch = labels_train[perm[j * batch:(j + 1) * batch]] # Loss function network.set_models(inference=False) with network.train(): l = rm.softmax_cross_entropy(network(train_batch), responce_batch) # Back propagation grad = l.grad() # Update grad.update(optimizer) loss += l.as_ndarray() train_loss = loss / (N // batch) # Validation test_loss = 0 M = len(test_x) network.set_models(inference=True) for j in range(M // batch):