'lr_scheduler': lr_scheduler, 'optimizer': 'SGD', 'weight_decay': 0.0001, } from mx_solver import MXSolver from mx_initializer import PReLUInitializer solver = MXSolver( batch_size=args.batch_size, devices=(0, 1, 2, 3), epochs=150, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) from data_utilities import load_cifar10_record data = load_cifar10_record(args.batch_size) info = solver.train(data) postfix = '-' + args.postfix if args.postfix else '' identifier = 'rnn-attention-network-on-cifar-10-%d%s' % (args.n_layers, postfix) import cPickle as pickle pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
network = nin(activate) lr = 0.1 lr_table = {100000 : lr * 0.1} lr_scheduler = AtIterationScheduler(lr, lr_table) optimizer_settings = { 'args' : {'momentum' : 0.9}, 'initial_lr' : lr, 'lr_scheduler' : lr_scheduler, 'optimizer' : 'SGD', 'weight_decay' : 0.0001, } solver = MXSolver( batch_size = BATCH_SIZE, devices = (0, 1, 2, 3), epochs = 300, initializer = DReLUInitializer(0, 1), optimizer_settings = optimizer_settings, symbol = network, verbose = True, ) data = load_cifar10_record(BATCH_SIZE) info = solver.train(data) identifier = 'residual-network-n-%d-activate-%s-times-%d' % (N, ACTIVATE, TIMES) pickle.dump(info, open('info/%s' % identifier, 'wb')) parameters = solver.export_parameters() pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
rnn_executor = rnn.bind(context, rnn_arg_dict) rnn_args_grad = { arg : mx.nd.zeros(shape, context) \ for arg, shape in zip(rnn_args, rnn_arg_shapes) if 'data' not in arg } rnn_loss_arg_dict = rnn_arg_dict.copy() rnn_loss_arg_dict['criterion_label'] = mx.nd.zeros(logit_shape, context) rnn_loss_executor = rnn_loss.bind(context, rnn_loss_arg_dict, rnn_args_grad) mean_dict = {key : mx.nd.zeros(value.shape, context) for key, value in rnn_args_grad.items()} variance_dict = {key : mx.nd.ones(value.shape, context) for key, value in rnn_args_grad.items()} from data_utilities import load_cifar10_record training_data, validation_data, _ = load_cifar10_record(args.batch_size) move = lambda h, d, i: (h * i + d) / (i + 1) for epoch in range(args.n_epochs): training_data.reset() for iteration, batch in enumerate(training_data): resnet_executor.arg_dict['data'][:] = batch.data[0] outputs = resnet_executor.forward() for index, output in enumerate(outputs[:-1]): rnn_loss_executor.arg_dict['data%d' % index][:] = output rnn_loss_executor.arg_dict['criterion_label'][:] = outputs[-1] outputs = rnn_loss_executor.forward(is_train=True) rnn_loss_executor.backward()