def main(): torch.manual_seed(12345) args = parse_args() cfg = Config.fromfile(args.config) if args.work_dir is not None: cfg.work_dir = args.work_dir _logger = init_logger(cfg.work_dir, 'INFO') _logger.info(cfg) print('before init_process') init_process(cfg.dist_config) print('after init_process') print('before build_model') model = build_model(cfg.model) print('after build_model') print('before train_dataloader') train_dataloader = get_dataloader(cfg.data.train_data, cfg.data.train_dataloader) print('after train_dataloader') val_dataloader = train_dataloader dataloaders = {'train': train_dataloader, 'val': val_dataloader} try: train_model( model, dataloaders, cfg, ) except KeyboardInterrupt: print('KeyboardInterrupt') dist.destroy_process_group()
def __init__(self, args): # cuda setting os.environ["CUDA_VISIBLE_DEVICES"] = args['cuda'] # dir setting self.model_dir = args['model_dir'] self.best_model_dir = args['best_model_dir'] tool.check_mkdir(self.model_dir) tool.check_mkdir(self.best_model_dir) # dataset setting self.dataloader = DataSet.get_dataloader(args) self.no_eval = args['no_eval'] self.img_size = args['img_size'] args['mean'] = self.dataloader.mean args['std'] = self.dataloader.std args['num_classes'] = self.dataloader.num_classes # basic setting self.opt_type = args['optimizer'] self.lr = args['lr'] self.lr_epoch = args['lr_epoch'] self.epoch = args['epoch'] self.eval_best = 0 self.eval_best_epoch = 0 self.save_cm = args['save_cm'] # save confusion matrix # model name config self.model_desc = '{}_{}_{}_{}'. \ format(args['dataset'], args['model'], args['action'], args['desc']) self.model_pkl = self.model_desc + '.ckpt' # logger setup self.pblog = logger.get_pblog() self.pblog.total = self.epoch self.tblog = SummaryWriter(join(args['tb_dir'], self.model_desc)) # model setup self.action = Action.get_action(args) self.model = Model.get_net(args) if args['pre_train']: state_dir = join(self.model_dir, self.model_desc) state = torch.load(state_dir, map_location='cpu') self.model.load_state_dict(state['net']) self.model.cuda() # self.action.save_graph(self.model, self.img_size, self.tblog, # self.pblog) if torch.cuda.device_count() > 1: self.model = torch.nn.DataParallel(self.model) # ism: IS using Multiple gpus self.ism = True else: self.ism = False
def main(): torch.manual_seed(0) args = parse_args() cfg = Config.fromfile(args.config) if args.work_dir is not None: cfg.work_dir = args.work_dir _logger = init_logger(cfg.work_dir, 'INFO') _logger.info(cfg) print('before init_process') init_process(cfg.dist_config) rank = dist.get_rank() print('rank={}'.format(rank)) print('world_size={}'.format(dist.get_world_size())) print('after init_process') print('before build_model') model = build_model(cfg.model) print('after build_model') print('before train_dataloader') if rank in cfg.base_model_ranks: train_dataloader = get_dataloader(cfg.data.train_data, cfg.data.train_dataloader) val_dataloader = train_dataloader dataloaders = {'train': train_dataloader, 'val': val_dataloader} else: dataloaders = {'train': None, 'val': None} if cfg.data.train_num_samples: cfg.data.dataloader_lens = cfg.data.train_num_samples // len(cfg.base_model_ranks) // cfg.data.batch_size else: cfg.data.dataloader_lens = 5822653 // len(cfg.base_model_ranks) // cfg.data.batch_size if not cfg.load_top: if rank in cfg.top_model_ranks: cfg.load_from = None try: train_nbase_mtop_model( model, dataloaders, cfg, ) except KeyboardInterrupt: print('KeyboardInterrupt') dist.destroy_process_group()
def save_checkpoint(state): torch.save(state, path + '/checkpoints/%d.pkl' % state['epoch']) # Model opts m = importlib.import_module('models.' + opt.model_type) m.get_args(parser) opt, _ = parser.parse_known_args() print(opt) (dataset_train, dataloader_train, dataset_val, dataloader_val) = get_dataloader(opt) transform_target = dataset_train.transform_target model = m.Model(opt) optimizer_G = Adam(model.gen_params, lr=opt.lr, betas=(opt.beta1, 0.999)) optimizer_D = Adam(model.dis_params, lr=opt.lr, betas=(opt.beta1, 0.999)) stats = Stats(opt) stats.calc_stats(dataset_val.targets, True) losses = [] # Make directories if not os.path.exists(opt.experiments_dir):
def main(): dataset, train_loader, val_loader = get_dataloader(config.data_path) config.input_size = dataset.n_features config.n_classes = dataset.n_classes
'-save_intermediate', action='store_true', help='Whether to save intermediate images during optimization') parser.add_argument( '-save_latents', action='store_true', help='Whether to save the final latent and noise vectors to file') kwargs = vars(parser.parse_args()) # Setup output paths out_path = Path(kwargs["output_dir"]) out_path.mkdir(parents=True, exist_ok=True) # Load data dataloader = get_dataloader(kwargs['input_dir'], kwargs['batch_size']) # Load model model = DataParallel(EmbeddingModel(cache_dir='cache')) # Run model on each image for ref_im, ref_im_name in dataloader: if (kwargs["save_intermediate"]): # Create output directories for each image for i in range(kwargs["batch_size"]): int_path = Path(out_path / ref_im_name[i]) int_path.mkdir(parents=True, exist_ok=True) # Save current image after each step for j, (output, latent, noise) in enumerate(model(ref_im, **kwargs)): for i in range(kwargs["batch_size"]):