Exemple #1
0
        'lr_scheduler': lr_scheduler,
        'optimizer': 'SGD',
        'weight_decay': 0.0001,
    }

    from mx_solver import MXSolver
    from mx_initializer import PReLUInitializer
    solver = MXSolver(
        batch_size=args.batch_size,
        devices=(0, 1, 2, 3),
        epochs=150,
        initializer=PReLUInitializer(),
        optimizer_settings=optimizer_settings,
        symbol=network,
        verbose=True,
    )

    from data_utilities import load_cifar10_record
    data = load_cifar10_record(args.batch_size)

    info = solver.train(data)

    postfix = '-' + args.postfix if args.postfix else ''
    identifier = 'rnn-attention-network-on-cifar-10-%d%s' % (args.n_layers,
                                                             postfix)

    import cPickle as pickle
    pickle.dump(info, open('info/%s' % identifier, 'wb'))
    parameters = solver.export_parameters()
    pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
Exemple #2
0
network = nin(activate)

lr = 0.1
lr_table = {100000 : lr * 0.1}
lr_scheduler = AtIterationScheduler(lr, lr_table)
optimizer_settings = {
  'args'         : {'momentum' : 0.9},
  'initial_lr'   : lr,
  'lr_scheduler' : lr_scheduler,
  'optimizer'    : 'SGD',
  'weight_decay' : 0.0001,
}

solver = MXSolver(
  batch_size = BATCH_SIZE,
  devices = (0, 1, 2, 3),
  epochs = 300,
  initializer = DReLUInitializer(0, 1),
  optimizer_settings = optimizer_settings,
  symbol = network,
  verbose = True,
)

data = load_cifar10_record(BATCH_SIZE)
info = solver.train(data)

identifier = 'residual-network-n-%d-activate-%s-times-%d' % (N, ACTIVATE, TIMES)
pickle.dump(info, open('info/%s' % identifier, 'wb'))
parameters = solver.export_parameters()
pickle.dump(parameters, open('parameters/%s' % identifier, 'wb'))
Exemple #3
0
  rnn_executor = rnn.bind(context, rnn_arg_dict)

  rnn_args_grad = {
    arg : mx.nd.zeros(shape, context) \
      for arg, shape in zip(rnn_args, rnn_arg_shapes) if 'data' not in arg
  }

  rnn_loss_arg_dict = rnn_arg_dict.copy()
  rnn_loss_arg_dict['criterion_label'] = mx.nd.zeros(logit_shape, context)
  rnn_loss_executor = rnn_loss.bind(context, rnn_loss_arg_dict, rnn_args_grad)

  mean_dict = {key : mx.nd.zeros(value.shape, context) for key, value in rnn_args_grad.items()}
  variance_dict = {key : mx.nd.ones(value.shape, context) for key, value in rnn_args_grad.items()}

  from data_utilities import load_cifar10_record
  training_data, validation_data, _ = load_cifar10_record(args.batch_size)

  move = lambda h, d, i: (h * i + d) / (i + 1)

  for epoch in range(args.n_epochs):
    training_data.reset()
    for iteration, batch in enumerate(training_data):
      resnet_executor.arg_dict['data'][:] = batch.data[0]
      outputs = resnet_executor.forward()

      for index, output in enumerate(outputs[:-1]):
        rnn_loss_executor.arg_dict['data%d' % index][:] = output
      rnn_loss_executor.arg_dict['criterion_label'][:] = outputs[-1]
      outputs = rnn_loss_executor.forward(is_train=True)
      rnn_loss_executor.backward()