if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument('--batch_size', type=int, default=128) parser.add_argument('--initial_lr', type=float, default=0.1) parser.add_argument('--n_layers', type=int, required=True) parser.add_argument('--postfix', type=str, default='') parser.add_argument('--sharing', type=bool, default=False) args = parser.parse_args() network = build_network(n_layers=args.n_layers) from lr_scheduler import AtIterationScheduler lr_table = {32000: args.initial_lr * 0.1, 48000: args.initial_lr * 0.01} lr_scheduler = AtIterationScheduler(args.initial_lr, lr_table) optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': args.initial_lr, 'lr_scheduler': lr_scheduler, 'optimizer': 'SGD', 'weight_decay': 0.0001, } from mx_solver import MXSolver from mx_initializer import PReLUInitializer solver = MXSolver( batch_size=args.batch_size,
from data_utilities import load_cifar10_record from mx_layers import ReLU from mx_solver import MXSolver from drelu import drelu, DReLUInitializer from nin import nin ACTIVATE = sys.argv[1] BATCH_SIZE = 128 if ACTIVATE == 'relu' : activate = ReLU elif ACTIVATE == 'drelu': activate = lambda X : drelu(X, {'data' : (BATCH_SIZE, 3, 32, 32)}) network = nin(activate) lr = 0.1 lr_table = {100000 : lr * 0.1} lr_scheduler = AtIterationScheduler(lr, lr_table) optimizer_settings = { 'args' : {'momentum' : 0.9}, 'initial_lr' : lr, 'lr_scheduler' : lr_scheduler, 'optimizer' : 'SGD', 'weight_decay' : 0.0001, } solver = MXSolver( batch_size = BATCH_SIZE, devices = (0, 1, 2, 3), epochs = 300, initializer = DReLUInitializer(0, 1), optimizer_settings = optimizer_settings, symbol = network,
DATA_SHAPE = (BATCH_SIZE / len(devices), 3, 32, 32) MODES = {'mode': 'hyper', 'embedding': 'feature_map', 'data_shape': DATA_SHAPE} # MODES = {'mode' : 'hyper', 'embedding' : 'parameter'} N = int(sys.argv[1]) network = triple_state_residual_network(N, **MODES) data = load_cifar10_record(BATCH_SIZE) lr = 0.1 lr_table = {32000: lr * 0.1, 48000: lr * 0.01} optimizer_settings = { 'args': { 'momentum': 0.9 }, 'initial_lr': lr, 'lr_scheduler': AtIterationScheduler(lr, lr_table), 'optimizer': 'SGD', 'weight_decay': 0.0001, } solver = MXSolver( batch_size=BATCH_SIZE, devices=devices, epochs=150, initializer=PReLUInitializer(), optimizer_settings=optimizer_settings, symbol=network, verbose=True, ) info = solver.train(data)