def backward(self, req, out_grad, in_data, out_data, in_grad, aux): for i in range(len(in_grad)): self.assign(in_grad[i], req[i], 0) with autograd.train_section(): for i in range(self.num_strides): if len(self.feat_idx[i] > 0): autograd.compute_gradient([mx.nd.take(out_grad[0], mx.nd.array(self.feat_idx[i], out_grad[0].context)) * self.roi_pool[i]]) if self.with_deformable: for i in range(0, self.num_strides * 3): self.assign(in_grad[i], req[i], self.in_grad_hist_list[i]) else: for i in range(0, self.num_strides): self.assign(in_grad[i], req[i], self.in_grad_hist_list[i]) gc.collect()
updater = Updater(model, update_rule='adam', lr=args.lr) # updater = Updater(model, update_rule='sgd_momentum', lr=1e-1, momentum=0.9) import numpy as np from mxnet.contrib.autograd import compute_gradient import minpy.nn.utils as utils validation_accuracy = [] for epoch in range(args.n_epochs): training_data.reset() for iteration, batch in enumerate(training_data): data, labels = unpack_batch(batch) predictions = model.forward(data, is_train=True) loss = model.loss(predictions, labels, is_train=True) compute_gradient((loss, )) ''' for key, value in model.params.items(): print key, nd.max(value).asnumpy(), nd.min(nd.abs(value)).asnumpy() ''' updater(model.grad_dict) loss_value = utils.cross_entropy(loss, labels) print 'epoch %d loss %f' % (epoch, loss_value) if epoch < 1: print utils.count_params(model.params) validation_data.reset() n_errors, n_samples = 0, 0 for batch in validation_data:
# Training epoch_number = 0 iteration_number = 0 terminated = False while not terminated: epoch_number += 1 real_data.reset() i = 0 for real_batch in real_data: random_batch = random_data.getdata() dnet_real_output = dnetwork.forward(real_batch.data[0], is_train=True) dnet_real_loss = dnetwork.loss(dnet_real_output, real_batch.label[0], is_train=True) print dnet_real_loss.context compute_gradient((dnet_real_loss,)) dnet_real_grad_dict = dnetwork.grad_dict ''' copy = lambda array: array.copyto(mx.cpu()) cache_dict = dict(zip(dnetwork.grad_dict.keys(), map(copy, dnetwork.grad_dict.values()))) ''' generated_data = gnet_model.forward(random_batch[0], is_train=True) dnet_fake_output = dnetwork.forward(generated_data, is_train=True) dnet_fake_loss = dnetwork.loss(dnet_fake_output, nd.zeros(generated_data.shape[0]), is_train=True) compute_gradient((dnet_fake_loss,)) ''' for key, value in dnetwork.grad_dict.items(): value += cache_dict[key].copyto(value.context)