def run(trainset, trainloader, testloader, config): check_path('./img') torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) np.random.seed(config.seed) exp_name = "experiment_%s_%s_initdim%d_seed%d_grow%f_gra%d_alpha3_new" % ( config.dataset, config.method, config.dim_hidden, config.seed, config.grow_ratio, config.granularity) config.resume = False if config.resume: load_round = config.load_round stats = np.load("stats/round_%d_%s.npy" % (load_round, exp_name), allow_pickle=True) stats = stats.tolist() model = Classifier(config, stats['cfg']).to(config.device) ckpt = torch.load("stats/round_%d_%s.pt" % (load_round, exp_name)) model.load_state_dict(ckpt) else: model = Classifier(config).to(config.device) if config.verbose: print('[INFO] initial model trainable parameter number: %d' % model.get_num_params()) stats = { 'train_loss': [], 'test_accuracy': [], 'compression_rate': [], 'widths': {}, 'cfg': None, } for i, layer in enumerate(model.net): if isinstance(layer, sp.SpModule) and layer.can_split: if config.method == 'fireflyn': stats['widths'][i] = [0, 0] else: stats['widths'][i] = 0 n_batches = len(trainloader) print("[INFO] Split method: ", config.method) if config.resume: load_round = config.load_round ckpt = torch.load("checkpoint/roundfull_%d_%s.pt" % (load_round, exp_name)) model.load_state_dict(ckpt) print('load succ') load_round = config.load_round for round in range(load_round, 10 + 1): for epoch in range(1, config.n_epochs + 1): if round <= load_round and config.resume: break loss = 0. for i, (x, y) in enumerate(trainloader): inputs = x.to(config.device) targets = y.to(config.device) loss += model.update(inputs, targets) loss /= n_batches test_acc = test(testloader, model) stats['train_loss'].append(loss) stats['test_accuracy'].append(test_acc) if epoch % 5 == 0: print( "[INFO] Round %d Epoch %05d | Training loss is %10.4f | Test accuracy is %10.4f" % (round, epoch, loss, test_acc)) if epoch == config.n_epochs // 2 - 1: model.decay_lr(0.1) if epoch == config.n_epochs // 4 * 3 - 1: model.decay_lr(0.1) if epoch % 20 == 0 or epoch == config.n_epochs: np.save("checkpoint/%s.npy" % exp_name, stats) if epoch % 20 == 0 or epoch == config.n_epochs: torch.save(model.state_dict(), "checkpoint/%s.pt" % exp_name) np.save("checkpoint/roundfull_%d_%s.npy" % (round, exp_name), stats) torch.save(model.state_dict(), "checkpoint/roundfull_%d_%s.pt" % (round, exp_name)) if config.method != 'none': # Grow the network use NASH if config.method == 'random': best_acc = 0 rtime = time.time() for n in range(8): newmodel = copy.deepcopy(model) newmodel.set_lr(0.05) newmodel.create_optimizer() n_neurons = newmodel.split(config.method, trainset) loss = 0. for e in range(17): print(e) for i, (x, y) in enumerate(trainloader): inputs = x.to(config.device) targets = y.to(config.device) loss += newmodel.update(inputs, targets) adjust_learning_rate(newmodel.opt, e, 17, 0.05) test_acc = test(testloader, newmodel) if test_acc > best_acc: bestmodel = copy.deepcopy(newmodel) del newmodel model = copy.deepcopy(bestmodel) del bestmodel print('Search Time', time.time() - rtime) else: n_neurons = model.split(config.method, trainset) print('Current parameter size', model.get_num_params()) CFG = model.get_cfg() print('Current cfg', model.get_cfg()) model.set_lr(0.1) lr = 0.1 model.create_optimizer()
def run(trainset, trainloader, testloader, config): print("[INFO] into run function") check_path('./img') torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) np.random.seed(config.seed) exp_name = "exp_%s_%s_initdim%d_seed%d_grow%.3f_gra%d_alpha3_new" % ( config.dataset, config.method, config.dim_hidden, config.seed, config.grow_ratio, config.granularity) log = config.log config.resume = False if config.resume: # load_round = config.load_round # stats = np.load("stats/round_%d_%s.npy" % (load_round, exp_name), allow_pickle=True) # stats = stats.tolist() # model = Classifier(config, stats['cfg']).to(config.device) # ckpt = torch.load("stats/round_%d_%s.pt" % (load_round, exp_name)) # model.load_state_dict(ckpt) pass elif config.onlys: stats = np.load("%s.npy" % config.ckpt, allow_pickle=True) stats = stats.tolist() print(stats) model = Classifier(config).to(config.device) ckpt = torch.load('%s.pt' % config.ckpt) model.load_state_dict(ckpt) print("[INFO] model done") else: print("[INFO] load/new model on device %s" % str(config.device)) model = Classifier(config).to(config.device) print("[INFO] model done") stats = { 'train_loss': [], 'test_accuracy': [], 'compression_rate': [], 'widths': {}, 'cfg': None, } for i, layer in enumerate(model.net): if isinstance(layer, sp.SpModule) and layer.can_split: if config.method == 'fireflyn': stats['widths'][i] = [0, 0] else: stats['widths'][i] = 0 if config.verbose: params = model.get_num_params() cfg = model.get_cfg() log.info('[INFO] Initial model #params: %10d' % (params)) log.info('[INFO] Initial model configuration: [{}]'.format(', '.join(map(str, cfg)))) log.info("[INFO] Split method: {}".format(str(config.method))) log.info("=" * 80) n_batches = len(trainloader) test_acc = test(testloader, model) log.info("[INFO] The loaded model acc = %10.4f" % test_acc) log.info("[INFO] model:") log.info(model) log.info("="*80) load_round = config.load_round for round in range(load_round, config.n_rounds): start_time = time.time() traindone = False if not config.onlys: for epoch in range(1, config.n_epochs + 1): if round <= load_round and config.resume: break loss = 0. for i, (inputs, targets) in enumerate(trainloader): # if config.debug and i > 50: # traindone = True # break inputs = inputs.to(config.device) targets = targets.to(config.device) loss += model.update(inputs, targets) loss /= n_batches test_acc = test(testloader, model) stats['train_loss'].append(loss) stats['test_accuracy'].append(test_acc) if epoch == config.n_epochs: traindone = True # adjust learning rate if epoch == config.n_epochs // 2 - 1: model.decay_lr(0.1) if epoch == config.n_epochs // 4 * 3 - 1: model.decay_lr(0.1) if epoch % 20 == 0 or traindone: print("[INFO] Round %d Epoch %03d | Training loss is %10.4f | Test accuracy is %10.4f" % ( round, epoch, loss, test_acc)) np.save("checkpoint/%s/%s.npy" % (config.save, exp_name), stats) torch.save(model.state_dict(), "checkpoint/%s/%s.pt" % (config.save, exp_name)) if traindone: log.info("[INFO] Round %d:" % round) log.info("[INFO] Training takes %10.4f sec | Training loss: %10.4f | Test accuracy: %10.4f" % ( (time.time() - start_time), loss, test_acc)) np.save("checkpoint/%s/round_%d_%s.npy" % (config.save, round, exp_name), stats) torch.save(model.state_dict(), "checkpoint/%s/round_%d_%s.pt" % (config.save, round, exp_name)) if config.method != 'none': # Grow the network use NASH if config.method == 'random': best_acc = 0 rtime = time.time() for n in range(8): newmodel = copy.deepcopy(model) newmodel.set_lr(0.05) newmodel.create_optimizer() n_neurons = newmodel.split(config.method, trainset) loss = 0. for e in range(17): print(e) for i, (x, y) in enumerate(trainloader): inputs = x.to(config.device); targets = y.to(config.device) loss += newmodel.update(inputs, targets) adjust_learning_rate(newmodel.opt, e, 17, 0.05) test_acc = test(testloader, newmodel) if test_acc > best_acc: bestmodel = copy.deepcopy(newmodel) del newmodel model = copy.deepcopy(bestmodel) del bestmodel print('Search Time', time.time() - rtime) else: n_neurons, splittime = model.split(config.method, trainset) params = model.get_num_params() cfg = model.get_cfg() log.info('[INFO] Current #params: %10d' % (params)) log.info('[INFO] Current configuration: [{}]'.format(', '.join(map(str, cfg)))) test_acc = test(testloader, model) log.info( "[INFO] Splitting takes %10.4f sec | Test accuracy after splitting: %10.4f" % (splittime, test_acc)) # log.info( # "[INFO] Test accuracy after splitting: %10.4f" % (test_acc)) model.set_lr(0.1) lr = 0.1 model.create_optimizer()