import torch.distributed as dist from utils.config import SPOSConfig from model.network import get_shufflenas_oneshot from utils.imagenet_dataloader import get_imagenet_iter_torch from utils import utils config = SPOSConfig() device = torch.device("cuda") # tensorboard writer = SummaryWriter(log_dir=os.path.join(config.path, "tb")) writer.add_text('config', config.as_markdown(), 0) logger = utils.get_logger( os.path.join(config.path, "{}.log".format(config.name))) config.print_params(logger.info) def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(config.local_rank % len(config.gpus)) torch.distributed.init_process_group(backend='nccl', init_method='env://') config.world_size = torch.distributed.get_world_size() config.total_batch = config.world_size * config.batch_size np.random.seed(config.seed)
def initialize_run(self): """ TODO This is the same as NAO one. :return: """ args = self.args utils = project_utils if not self.args.continue_train: self.sub_directory_path = 'WeightSharingNasBenchNetRandom-{}_SEED_{}'.format( self.args.save, self.args.seed) self.exp_dir = os.path.join(self.args.main_path, self.sub_directory_path) utils.create_exp_dir(self.exp_dir) if self.args.visualize: self.viz_dir_path = utils.create_viz_dir(self.exp_dir) if self.args.tensorboard: self.tb_dir = self.exp_dir tboard_dir = os.path.join(self.args.tboard_dir, self.sub_directory_path) self.writer = SummaryWriter(tboard_dir) if self.args.debug: torch.autograd.set_detect_anomaly(True) self.nasbench = self.search_space.nasbench # Set logger. self.logger = utils.get_logger( "train_search", file_handler=utils.get_file_handler( os.path.join(self.exp_dir, 'log.txt')), level=logging.INFO if not args.debug else logging.DEBUG) logging.info(f"setting random seed as {args.seed}") utils.torch_random_seed(args.seed) logging.info('gpu number = %d' % args.gpus) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss().cuda() eval_criterion = nn.CrossEntropyLoss().cuda() self.eval_loss = eval_criterion train_transform, valid_transform = utils._data_transforms_cifar10( args.cutout_length if args.cutout else None) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=valid_transform) test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.enas_search_config.ratio * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.enas_search_config.child_eval_batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.evaluate_batch_size, shuffle=False, pin_memory=True, num_workers=8) repeat_valid_loader = RepeatedDataLoader(valid_queue) return train_queue, valid_queue, test_queue, repeat_valid_loader, criterion, eval_criterion
parser.add_argument('--compress_rate', type=str, default=None, help='compress rate of each conv') args = parser.parse_args() CLASSES = 10 print_freq = (256 * 50) // args.batch_size if not os.path.isdir(args.job_dir): os.mkdir(args.job_dir) utils.record_config(args) logger = utils.get_logger(os.path.join(args.job_dir, 'logger.log')) def load_vgg_model(model, oristate_dict, random_rule): logger.info('random rule: ' + random_rule) state_dict = model.state_dict() last_select_index = None #Conv index selected in the previous layer cnt = 0 for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): cnt += 1 oriweight = oristate_dict[name + '.weight']
def main(): seed = args.seed np.random.seed(seed) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) timestamp = str(utils.get_unix_timestamp()) path = os.path.join(args.save, timestamp) logger = utils.get_logger(args.save, timestamp, file_type='txt') tb_logger = tensorboardX.SummaryWriter('../runs/{}'.format(timestamp)) logger.info("time = %s, args = %s", str(utils.get_unix_timestamp()), args) train_data, test_data, input_shape = utils.get_data( args.data, args.observ_window, args.downsampling, args.multi_slice) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) model = Network(input_shape, args.num_drones) model = model.to(device) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logger.info('time = %s, epoch %d lr %e', str(utils.get_unix_timestamp()), epoch, lr) print('time = {}, epoch {} lr {}'.format( str(utils.get_unix_timestamp()), epoch, lr)) model.train() train_loss, train_acc = train(train_queue, model, criterion, optimizer, logger) logger.info('time = %s, train_loss %f train_acc %f', str(utils.get_unix_timestamp()), train_loss, train_acc) print('time = {}, train_loss {} train_acc {}'.format( str(utils.get_unix_timestamp()), train_loss, train_acc)) tb_logger.add_scalar("epoch_train_loss", train_loss, epoch) tb_logger.add_scalar("epoch_train_acc", train_acc, epoch) scheduler.step() model.eval() test_loss, test_acc = test(test_queue, model, criterion, logger) logger.info('time = %s, test_loss %f test_acc %f', str(utils.get_unix_timestamp()), test_loss, test_acc) print('time = {}, test_loss {} test_acc {}'.format( str(utils.get_unix_timestamp()), test_loss, test_acc)) tb_logger.add_scalar("epoch_test_loss", test_loss, epoch) tb_logger.add_scalar("epoch_test_acc", test_acc, epoch) utils.save(model, os.path.join(path, 'weights.pt'))