def check_backward(self, x_data, t_data, class_weight, use_cudnn='always'): with chainer.using_config('use_cudnn', use_cudnn): func = functions.SoftmaxCrossEntropy( cache_score=self.cache_score, class_weight=class_weight) gradient_check.check_backward( func, (x_data, t_data), None, **self.check_backward_options)
def check_backward(self, x_data, t_data, use_cudnn=True): gradient_check.check_backward(functions.SoftmaxCrossEntropy( use_cudnn=use_cudnn, cache_score=self.cache_score), (x_data, t_data), None, eps=0.02, **self.check_backward_options)
def check_backward(self, x_data, t_data, class_weight, use_cudnn=True): func = functions.SoftmaxCrossEntropy(use_cudnn=use_cudnn, cache_score=self.cache_score, class_weight=class_weight) gradient_check.check_backward(func, (x_data, t_data), None, eps=0.02, **self.check_backward_options)
def check_backward( self, x_data, t_data, g_data, class_weight): func = functions.SoftmaxCrossEntropy( cache_score=self.cache_score, class_weight=class_weight, reduce='no') gradient_check.check_backward( func, (x_data, t_data), g_data, **self.check_backward_options)
def __call__(self, mini_batch_list): data1 = mini_batch_list[0] data2 = mini_batch_list[1] labels = mini_batch_list[2] y = self.predictor(data1, data2) self.loss = F.SoftmaxCrossEntropy(y, labels) reporter.report({'main/loss': self.loss}, self) self.accuracy = self.accfun(y, labels) reporter.report({'main/accuracy': self.accuracy}, self) return self.loss
def check_backward(self, xp): x = xp.asarray(self.x) t = xp.asarray(self.t) gy = xp.asarray(self.gy) if self.class_weight is not None: class_weight = xp.asarray(self.class_weight) else: class_weight = None f = functions.SoftmaxCrossEntropy(reduce=self.reduce, class_weight=class_weight, ignore_label=self.ignore_label) gradient_check.check_backward(f, (x, t), gy)
from mkldnn import switch switch.enable_softmax_cross_entropy = True # Accuracy Test mkldnn.set_mkldnn_enable(True) print("With mkldnn") x = np.arange(1.0, 13.0, 2.0, dtype=np.float32).reshape(2, 3) label = np.array([0, 2], dtype=np.int32) print("x ****************** ") print(x) print("label ****************** ") print(label) sce = F.SoftmaxCrossEntropy(use_cudnn=False, normalize=True, cache_score=True) loss = sce.forward_cpu((x, label)) gx = sce.backward_cpu((x, label), (1, 1)) print("loss ******************* ") print(loss) print("gx ******************* ") print(gx) mkldnn.set_mkldnn_enable(False) print(" ") print("Without mkldnn") x = np.arange(1.0, 13.0, 2.0, dtype=np.float32).reshape(2, 3) label = np.array([0, 2], dtype=np.int32) print("x ****************** ")
def check_backward(self, x_data, t_data, use_cudnn=True): gradient_check.check_backward(functions.SoftmaxCrossEntropy(use_cudnn), (x_data, t_data), None, eps=0.02, atol=self.backward_atol)
if submit: teX, teM = transform_sst(teX) else: raise NotImplementedError n_train = len(trY) n_valid = len(vaY) n_batch_train = n_batch * n_gpu n_updates_total = (n_train // n_batch_train) * n_iter single_prediction = (dataset != 'rocstories') model = Model(args, vocab, n_ctx) lm_head = LMHead(model, args) clf_head = ClfHead(clf_token, args, single_prediction=single_prediction) criterion = F.SoftmaxCrossEntropy(reduce='no') model_opt = get_OpenAIAdam([model, clf_head], lr=lr, schedule=lr_schedule, warmup=lr_warmup, t_total=n_updates_total, b1=b1, b2=b2, e=e, l2=l2, vector_l2=vector_l2, max_grad_norm=max_grad_norm) # model_opt.setup(model) compute_loss_fct = LossCompute(criterion, criterion, lm_coef, model_opt) load_openai_pretrained_model(model, n_ctx=n_ctx, n_special=n_special)