def fit_generator(self, generator, nb_epochs=20): """ Fit the classifier using the generator that yields batches as specified. :param generator: Batch generator providing `(x, y)` for each epoch. :type generator: `DataGenerator` :param nb_epochs: Number of epochs to use for trainings. :type nb_epochs: `int` :return: `None` """ from mxnet import autograd, nd from art.data_generators import MXDataGenerator if isinstance(generator, MXDataGenerator) and \ not (hasattr(self, 'label_smooth') or hasattr(self, 'feature_squeeze')): # Train directly in MXNet for _ in range(nb_epochs): for x_batch, y_batch in generator.data_loader: x_batch = nd.array(x_batch).as_in_context(self._ctx) y_batch = np.argmax(y_batch, axis=1) y_batch = nd.array(y_batch).as_in_context(self._ctx) with autograd.record(train_mode=True): preds = self._model(x_batch) loss = nd.softmax_cross_entropy(preds, y_batch) loss.backward() # Update parameters self._optimizer.step(x_batch.shape[0]) else: # Fit a generic data generator through the API super(MXClassifier, self).fit_generator(generator, nb_epochs=nb_epochs)
def hybrid_forward(self, F, xcos_theta, xphi_theta, target): self.it += 1 batch_size = target.size # size = (B,classnum) oh_target = target.one_hot(xcos_theta.shape[1]) self.lamb = max(self.LambdaMin, self.LambdaMax / (1 + 0.1 * self.it)) # because indexing is not differentiable in mxnet, we must do this output = xcos_theta - \ oh_target * xcos_theta[range(0, batch_size), target].reshape(-1, 1) / (1 + self.lamb) + \ oh_target * xphi_theta[range(0, batch_size), target].reshape(-1, 1) / (1 + self.lamb) loss = nd.softmax_cross_entropy(output, nd.cast(target, 'float32')) # (B,Classnum) return loss
def fit(self, x, y, batch_size=128, nb_epochs=20): """ Fit the classifier on the training set `(inputs, outputs)`. :param x: Training data. :type x: `np.ndarray` :param y: Labels, one-vs-rest encoding. :type y: `np.ndarray` :param batch_size: Size of batches. :type batch_size: `int` :param nb_epochs: Number of epochs to use for trainings. :type nb_epochs: `int` :return: `None` """ if self._optimizer is None: raise ValueError( 'An MXNet optimizer is required for fitting the model.') from mxnet import autograd, nd train_mode = self._learning_phase if hasattr( self, '_learning_phase') else True # Apply preprocessing and defences x_ = self._apply_processing(x) x_, y_ = self._apply_defences_fit(x_, y) y_ = np.argmax(y_, axis=1) nb_batch = int(np.ceil(len(x_) / batch_size)) ind = np.arange(len(x_)) for _ in range(nb_epochs): # Shuffle the examples np.random.shuffle(ind) # Train for one epoch for m in range(nb_batch): x_batch = nd.array(x_[ind[m * batch_size:(m + 1) * batch_size]]).as_in_context( self._ctx) y_batch = nd.array(y_[ind[m * batch_size:(m + 1) * batch_size]]).as_in_context( self._ctx) with autograd.record(train_mode=train_mode): preds = self._model(x_batch) loss = nd.softmax_cross_entropy(preds, y_batch) loss.backward() # Update parameters self._optimizer.step(batch_size)
def hybrid_forward(self, F, ypred, ylabel): assert ypred.shape == ylabel.shape, 'Check output shapes' anc_pred, coord_pred, anc_real, coord_real = self.split_signals(ypred, ylabel) if np.random.rand() > 0.9: print(anc_pred, anc_real) print(coord_pred, coord_real) sq_loss = nd.mean(nd.square(coord_pred - coord_real)) soft_loss = nd.mean(nd.softmax_cross_entropy(data=nd.softmax(anc_pred, axis=1), label=nd.argmax(anc_real, -1))) ans_loss = 1. * soft_loss + 1. * sq_loss return ans_loss
def compute_bin_loss(output, target, mask): mask = mask.broadcast_like(output) output = output * mask.astype('float32') return nd.softmax_cross_entropy(output, target).mean() / (1.0 * output.size)
def test_mxnet_light_module(data_x, data_y, constants, tmpdir): in_feats = constants model_dir = str(tmpdir.mkdir("mlp")) configuration = Configuration( model_name="mlp", model_dir=model_dir, hyper_params_update={"in_feats": in_feats, "hidden_layers": [16, 32]}, end_epoch=2, batch_size=32, ) batch_data = etl(data_x, data_y, configuration) lm.train( net=None, cfg=configuration, loss_function=None, get_loss=get_loss, trainer=None, train_data=batch_data, test_data=batch_data, fit_f=fit_f, eval_f=eval_f, initial_net=True, get_net=get_net, ) # test hyper-params search with nni and use loss function def cross_entropy(x, y): return nd.softmax_cross_entropy(x, y) lm.train( net=None, cfg=configuration, loss_function=cross_entropy, trainer=None, train_data=batch_data, test_data=batch_data, fit_f=fit_f, eval_f=eval_f, initial_net=True, get_net=get_net, enable_hyper_search=True, primary_key="accuracy", loss_as_dict=True, ) # test lambda loss function and batch lr scheduler configuration.lr_params = {"scheduler": "linear", "learning_rate": 0.01, "max_update": 20} # when lr_params is set, batch lr scheduler or epoch lr scheduler is expected to be specified lm.train( net=None, cfg=configuration, loss_function=lambda x, y: nd.softmax_cross_entropy(x, y), trainer=None, train_data=batch_data, test_data=batch_data, fit_f=fit_f, eval_f=eval_f, initial_net=True, get_net=get_net, loss_as_dict=True, ) lm.train( net=None, cfg=configuration, loss_function=lambda x, y: nd.softmax_cross_entropy(x, y), trainer=None, train_data=batch_data, test_data=batch_data, fit_f=fit_f, eval_f=eval_f, initial_net=True, get_net=get_net, loss_as_dict=True, ) # use loss class and use tqdm as progress monitor and epoch lr scheduler configuration.lr_params = {"update_params": {"scheduler": "linear", "learning_rate": 0.01}} loss = gluon.loss.SoftmaxCELoss() net = get_net(**configuration.hyper_params) net.initialize() lm.train( net=net, loss_function=loss, cfg=configuration, trainer=None, train_data=batch_data, test_data=batch_data, fit_f=fit_f, eval_f=eval_f, initial_net=False, loss_as_dict=True, progress_monitor="tqdm", dump_result=True, params_save=True, )
def cross_entropy(x, y): return nd.softmax_cross_entropy(x, y)
""" calculate the softmax and xent loss in a more efficient way :param preds: the pred is the output of the model :param truth: the true label, a one-hot vector :return: the loss """ pred_max = ad.max_op(preds) preds_shift = ad.add_byscalar_op(ad.neg_op(pred_max), x) exps = ad.exp_op(preds_shift) return minus_op(ad.log_op(ad.sum_op(exps)), ad.sum_op(ad.mul_op(preds_shift, truth))) if __name__ == "__main__": x = ad.Variable("x") # y = ad.Variable("y") # z = ad.mul_byscalar_op(ad.max_op(x), y) y = softmax(x) label = ad.Variable("label") z = softmax_ce_loss(x, label) grad_x, = ad.gradients(z, [x]) executor = ad.Executor([y, z, grad_x]) print(executor.run({x : np.array([1., 2., 5.]), label: np.array([0, 0, 1])})) from mxnet import nd, autograd a = nd.array([[1., 2., 5.]]) label = nd.array([2]) a.attach_grad() with autograd.record(): l = nd.softmax_cross_entropy(a, label) l.backward() print(l, a.grad)