def sample_architecture_from_the_supernet(unique_name_of_arch, hardsampling=True): logger = get_logger(CONFIG_SUPERNET['logging']['path_to_log_file']) lookup_table = LookUpTable() model = FBNet_Stochastic_SuperNet(lookup_table, cnt_classes=10).cuda() model = nn.DataParallel(model) load(model, CONFIG_SUPERNET['train_settings']['path_to_save_model']) ops_names = [op_name for op_name in lookup_table.lookup_table_operations] cnt_ops = len(ops_names) arch_operations = [] if hardsampling: for layer in model.module.stages_to_search: arch_operations.append(ops_names[np.argmax( layer.thetas.detach().cpu().numpy())]) else: rng = np.linspace(0, cnt_ops - 1, cnt_ops, dtype=int) for layer in model.module.stages_to_search: distribution = softmax(layer.thetas.detach().cpu().numpy()) arch_operations.append(ops_names[np.random.choice(rng, p=distribution)]) logger.info("Sampled Architecture: " + " - ".join(arch_operations)) writh_new_ARCH_to_fbnet_modeldef( arch_operations, my_unique_name_for_ARCH=unique_name_of_arch) logger.info("CONGRATULATIONS! New architecture " + unique_name_of_arch \ + " was written into fbnet_building_blocks/fbnet_modeldef.py")
def train_supernet(): manual_seed = 1 np.random.seed(manual_seed) torch.manual_seed(manual_seed) torch.cuda.manual_seed_all(manual_seed) torch.backends.cudnn.benchmark = True create_directories_from_list([CONFIG_SUPERNET['logging']['path_to_tensorboard_logs']]) logger = get_logger(CONFIG_SUPERNET['logging']['path_to_log_file']) writer = SummaryWriter(log_dir=CONFIG_SUPERNET['logging']['path_to_tensorboard_logs']) #### LookUp table consists all information about layers lookup_table = LookUpTable(calulate_latency=CONFIG_SUPERNET['lookup_table']['create_from_scratch']) #### DataLoading train_w_loader = dataloader.create_loaders(load_random_triplets=False, batchsize=CONFIG_SUPERNET['dataloading']['batch_size'], n_triplets=3000) train_thetas_loader = dataloader.create_loaders(load_random_triplets=False, batchsize=CONFIG_SUPERNET['dataloading']['batch_size'], n_triplets=1000) test_loader = dataloader.create_test_loaders(load_random_triplets=False, batchsize=CONFIG_SUPERNET['dataloading']['batch_size'], n_triplets=50) #### Model model = FBNet_Stochastic_SuperNet(lookup_table).cuda() model = model.apply(weights_init) model = nn.DataParallel(model, device_ids=[0]) #### Loss, Optimizer and Scheduler criterion = SupernetLoss().cuda() thetas_params = [param for name, param in model.named_parameters() if 'thetas' in name] params_except_thetas = [param for param in model.parameters() if not check_tensor_in_list(param, thetas_params)] w_optimizer = torch.optim.SGD(params=params_except_thetas, lr=CONFIG_SUPERNET['optimizer']['w_lr'], momentum=CONFIG_SUPERNET['optimizer']['w_momentum'], weight_decay=CONFIG_SUPERNET['optimizer']['w_weight_decay']) theta_optimizer = torch.optim.Adam(params=thetas_params, lr=CONFIG_SUPERNET['optimizer']['thetas_lr'], weight_decay=CONFIG_SUPERNET['optimizer']['thetas_weight_decay']) last_epoch = -1 w_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(w_optimizer, T_max=CONFIG_SUPERNET['train_settings']['cnt_epochs'], last_epoch=last_epoch) #### Training Loop trainer = TrainerSupernet(criterion, w_optimizer, theta_optimizer, w_scheduler, logger, writer, lookup_table) trainer.train_loop(train_w_loader, train_thetas_loader, test_loader, model)
def train_supernet(): manual_seed = 1 np.random.seed(manual_seed) torch.manual_seed(manual_seed) torch.cuda.manual_seed_all(manual_seed) torch.backends.cudnn.benchmark = True create_directories_from_list( [CONFIG_SUPERNET['logging']['path_to_tensorboard_logs']]) logger = get_logger(CONFIG_SUPERNET['logging']['path_to_log_file']) writer = SummaryWriter( log_dir=CONFIG_SUPERNET['logging']['path_to_tensorboard_logs']) #### DataLoading train_w_loader, train_thetas_loader = get_loaders( CONFIG_SUPERNET['dataloading']['w_share_in_train'], CONFIG_SUPERNET['dataloading']['batch_size'], CONFIG_SUPERNET['dataloading']['path_to_save_data'], logger) test_loader = get_test_loader( CONFIG_SUPERNET['dataloading']['batch_size'], CONFIG_SUPERNET['dataloading']['path_to_save_data']) ###TRAIN HIGH_LEVEL lookup_table = LookUpTable_HIGH( calulate_latency=CONFIG_SUPERNET['lookup_table'] ['create_from_scratch']) if args.high_or_low == 'high': ###MODEL model = FBNet_Stochastic_SuperNet(lookup_table, cnt_classes=10).cuda() model = model.apply(weights_init) model = nn.DataParallel(model, device_ids=[0]) model.load_state_dict( torch.load('/home/khs/data/sup_logs/cifar10/pretrained_high.pth')) #### Loss, Optimizer and Scheduler criterion = SupernetLoss().cuda() for layer in model.module.stages_to_search: layer.thetas = nn.Parameter( torch.Tensor([1.0 / 6 for i in range(6)]).cuda()) thetas_params = [ param for name, param in model.named_parameters() if 'thetas' in name ] params_except_thetas = [ param for param in model.parameters() if not check_tensor_in_list(param, thetas_params) ] w_optimizer = torch.optim.SGD( params=params_except_thetas, lr=CONFIG_SUPERNET['optimizer']['w_lr'], momentum=CONFIG_SUPERNET['optimizer']['w_momentum'], weight_decay=CONFIG_SUPERNET['optimizer']['w_weight_decay']) theta_optimizer = torch.optim.Adam( params=thetas_params, lr=CONFIG_SUPERNET['optimizer']['thetas_lr'], weight_decay=CONFIG_SUPERNET['optimizer']['thetas_weight_decay']) last_epoch = -1 w_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optimizer, T_max=CONFIG_SUPERNET['train_settings']['cnt_epochs'], last_epoch=last_epoch) #### Training Loop trainer = TrainerSupernet(criterion, w_optimizer, theta_optimizer, w_scheduler, logger, writer, True) trainer.train_loop(train_w_loader, train_thetas_loader, test_loader, model) ops_names = [ op_name for op_name in lookup_table.lookup_table_operations ] f = open('result.txt', 'w') for i, layer in enumerate(model.module.stages_to_search): print(ops_names[np.argmax(layer.thetas.detach().cpu().numpy())], end=" ") f.write('Layer {}: '.format(i) + ops_names[np.argmax(layer.thetas.detach().cpu().numpy())]) f.close() else: count = 0 previous = [] index = [] act_update = [] weight_update = [] while True: print(count, "th Iterations") lookup_table = LookUpTable( calulate_latency=CONFIG_SUPERNET['lookup_table'] ['create_from_scratch'], count=count, act_update=act_update, weight_update=weight_update) for i in range(len(weight_update)): weight_update[i] = 0 #if count != 0: # lookup_table.index[0] = copy.deepcopy(index) ###MODEL model = FBNet_Stochastic_SuperNet(lookup_table, cnt_classes=10).cuda() model = nn.DataParallel(model, device_ids=[0]) #if count == 0: # model.load_state_dict(torch.load('/home/khs/data/sup_logs/cifar10/pretrained.pth')) #else: #model.load_state_dict(torch.load('/home/khs/data/sup_logs/cifar10/best_model.pth')) model.load_state_dict( torch.load('/home/khs/data/sup_logs/cifar10/best_model.pth')) #model = model.apply(weights_init) #### Loss, Optimizer and Scheduler criterion = SupernetLoss().cuda() for layer in model.module.stages_to_search: layer.thetas = nn.Parameter( torch.Tensor([1.0 / 3 for i in range(3)]).cuda()) thetas_params = [ param for name, param in model.named_parameters() if 'thetas' in name ] params_except_thetas = [ param for param in model.parameters() if not check_tensor_in_list(param, thetas_params) ] w_optimizer = torch.optim.SGD( params=params_except_thetas, lr=CONFIG_SUPERNET['optimizer']['w_lr'], momentum=CONFIG_SUPERNET['optimizer']['w_momentum'], weight_decay=CONFIG_SUPERNET['optimizer']['w_weight_decay']) theta_optimizer = torch.optim.Adam( params=thetas_params, lr=CONFIG_SUPERNET['optimizer']['thetas_lr'], weight_decay=CONFIG_SUPERNET['optimizer'] ['thetas_weight_decay']) last_epoch = -1 w_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optimizer, T_max=CONFIG_SUPERNET['train_settings']['cnt_epochs'], last_epoch=last_epoch) #### Training Loop trainer = TrainerSupernet(criterion, w_optimizer, theta_optimizer, w_scheduler, logger, writer, False) trainer.train_loop(train_w_loader, train_thetas_loader, test_loader, model) del index[:] with open('index.txt', 'w') as f: for idx, layer in enumerate(model.module.stages_to_search): ops = np.argmax(layer.thetas.detach().cpu().numpy()) tmp = lookup_table.index[ops][idx] index.append(tmp) f.write('%s\n' % tmp) f.close() same = 1 if count != 0: for i in range(len(previous)): for j in range(len(previous[i])): if previous[i][j] not in index[i]: same = 0 if same == 1: break previous = copy.deepcopy(index) count += 1