def compute_error(self,x,y): preds=selectmax(self.compute_predictions(x)) labels=onehot(y,self.n_output) results=preds*labels return 1-results.sum()/float(len(y))
def mixup_train(model, optimizer, data_loaders, alpha, data_length, num_classes=10, share_lambda=False, loss_f=naive_cross_entropy_loss): """ train function for mixup """ model.train() loop_loss = [] for (input1, target1), (input2, target2) in tqdm(zip(*data_loaders), total=len(data_loaders[0])): target1 = onehot(target1, num_classes) target2 = onehot(target2, num_classes) if share_lambda: # share a same lambda in each minibatch _lambda = random.beta(alpha, alpha) else: _lambda = torch.Tensor( random.beta(alpha, alpha, size=input1.size()[0])) input = _lambda * input1 + (1 - _lambda) * input2 target = _lambda * target1 + (1 - _lambda) * target2 input = variable(input) target = variable(target) optimizer.zero_grad() loss = loss_f(model(input), target) loss.backward() optimizer.step() loop_loss.append(loss.data[0] / data_length) print(f">>>(mixup)loss: {sum(loop_loss):.2f}")
def bprop_fast(self,x,y): values=self.fprop(x) grad_oa = values['o_s']-onehot(y,self.n_output) grad_b2 = np.sum(grad_oa,axis=0) grad_w2 = np.dot(grad_oa.T, values['h_s']) grad_hs = np.tensordot(self.W['w2'], grad_oa, (0,1)).transpose() grad_ha = grad_hs * (values['h_s']>0).astype(float) grad_w1 = np.dot(grad_ha.T, x) grad_b1=np.sum(grad_ha,axis=0) return dict([('w1',grad_w1), ('b1',grad_b1), ('w2',grad_w2), ('b2',grad_b2)])
def standard_train(model, optimizer, data_loader, data_length, loss_f): """ train in standard way """ model.train() loop_loss = [] for (input, target) in tqdm(data_loader): if cuda_available: input = variable(input) target = variable(onehot(target, 10)) optimizer.zero_grad() loss = loss_f(model(input), target) loss.backward() optimizer.step() loop_loss.append(loss.data[0] / data_length) print(f">>>(standard)loss: {sum(loop_loss):.2f}")
def bprop_slow(self,x,y): grad_w1, grad_b1, grad_w2, grad_b2 = 0,0,0,0 for example,label in zip(x,y): values=self.fprop(example) grad_oa = values['o_s']-onehot(label,self.n_output) grad_b2 += grad_oa grad_w2 += np.multiply(grad_oa.T, values['h_s']) grad_hs = grad_oa.dot(self.W['w2']) grad_ha = grad_hs * (values['h_s']>0).astype(float) grad_w1 += np.multiply(grad_ha.T, example) grad_b1 += grad_ha return dict([('w1',grad_w1), ('b1',grad_b1), ('w2',grad_w2), ('b2',grad_b2)])
def compute_loss(self,x,y): loss= np.sum(-np.log(self.compute_predictions(x))*onehot(y,self.n_output)) return loss/y.shape[0]