def run(num_gpus, batch_size, lr): # the list of GPUs will be used ctx = [mx.gpu(i) for i in range(num_gpus)] print('Running on {}'.format(ctx)) # data iterator mnist = get_mnist() train_data = NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size) valid_data = NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size) print('Batch size is {}'.format(batch_size)) net.collect_params().initialize(force_reinit=True, ctx=ctx) trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': lr}) for epoch in range(10): # train start = time() train_data.reset() for batch in train_data: train_batch(batch, ctx, net, trainer) nd.waitall( ) # wait until all computations are finished to benchmark the time print('Epoch %d, training time = %.1f sec' % (epoch, time() - start)) # validating valid_data.reset() correct, num = 0.0, 0.0 for batch in valid_data: correct += valid_batch(batch, ctx, net) num += batch.data[0].shape[0] print(' validation accuracy = %.4f' % (correct / num))
''' X_training, X_test = fetch_and_get_mnist() real_data = NDArrayIter(X_training, np.ones(X_training.shape[0]), batch_size=batch_size) random_data = RandIter(batch_size, Z) gnet_updater = Updater(gnet_model, update_rule='sgd_momentum', lr=0.1, momentum=0.9) dnet_updater = Updater(dnetwork, update_rule='sgd_momentum', lr=0.1, momentum=0.9) # Training epoch_number = 0 iteration_number = 0 terminated = False while not terminated: epoch_number += 1 real_data.reset() i = 0 for real_batch in real_data: random_batch = random_data.getdata() dnet_real_output = dnetwork.forward(real_batch.data[0], is_train=True) dnet_real_loss = dnetwork.loss(dnet_real_output, real_batch.label[0], is_train=True) print dnet_real_loss.context compute_gradient((dnet_real_loss,)) dnet_real_grad_dict = dnetwork.grad_dict ''' copy = lambda array: array.copyto(mx.cpu()) cache_dict = dict(zip(dnetwork.grad_dict.keys(), map(copy, dnetwork.grad_dict.values()))) ''' generated_data = gnet_model.forward(random_batch[0], is_train=True)
training_data = NDArrayIter(data[0], data[1], batch_size=args.batch_size) validation_data = NDArrayIter(data[2], data[3], batch_size=args.batch_size) test_data = NDArrayIter(data[4], data[5], batch_size=args.batch_size) model = MSPCNN(args.n_layers, args.n_filters, args.n_scales, args.n_units) updater = Updater(model, update_rule='adam', lr=args.lr) # updater = Updater(model, update_rule='sgd_momentum', lr=1e-1, momentum=0.9) import numpy as np from mxnet.contrib.autograd import compute_gradient import minpy.nn.utils as utils validation_accuracy = [] for epoch in range(args.n_epochs): training_data.reset() for iteration, batch in enumerate(training_data): data, labels = unpack_batch(batch) predictions = model.forward(data, is_train=True) loss = model.loss(predictions, labels, is_train=True) compute_gradient((loss, )) ''' for key, value in model.params.items(): print key, nd.max(value).asnumpy(), nd.min(nd.abs(value)).asnumpy() ''' updater(model.grad_dict) loss_value = utils.cross_entropy(loss, labels) print 'epoch %d loss %f' % (epoch, loss_value) if epoch < 1:
parser.add_argument('--batch_size', default=128, type=int) parser.add_argument('--kfold', default=5, type=int) parser.add_argument('--gpu', default=0, type=int) args = parser.parse_args() # ctx = mx.cpu()# gpu(7) test_data, test_id = fetch_test_data() data_iter = NDArrayIter(data=test_data, batch_size=args.batch_size, shuffle=False) for i in range(args.kfold): print(i) ctx = mx.gpu(args.gpu) net = net_define_eu() net.collect_params().reset_ctx(ctx) net.load_params('net' + str(i) + '.params', ctx) data_iter.reset() with open('result' + str(i) + '.csv', 'w') as f: f.write( 'id,toxic,severe_toxic,obscene,threat,insult,identity_hate\n') for i, d in enumerate(data_iter): output = net(d.data[0].as_in_context(ctx)).asnumpy() for j in range(args.batch_size): if i * args.batch_size + j < test_id.shape[0]: str_out = ','.join( [str(test_id[i * args.batch_size + j])] + [str(v) for v in output[j]]) + '\n' f.write(str_out)