def run(args, config, model, lifelong_agent=None): for i in range(1, len(config['dataset']['train']['noisy'])): train_loader = get_dataloader(args.n_jobs, config['dataset']['train']['noisy'][i], config['dataset']['train']['clean'][i], config['train']['batch_size'], True) train(args, config, train_loader, model, lifelong_agent) lifelong_agent.update_weights(model, train_loader) score = 0 model.eval() with torch.no_grad(): for i in range(len(config['dataset']['train']['noisy'])): dev_loader = get_dataloader(args.n_jobs, config['dataset']['dev']['noisy'][i], config['dataset']['dev']['clean'][i], config['eval']['batch_size']) loss_sum = 0 sample_num = 0 for (lengths, niy_audio, cln_audio) in dev_loader: lengths, niy_audio, cln_audio = lengths.to( device), niy_audio.to(device), cln_audio.to(device) batch_size = len(niy_audio) loss = model(lengths, niy_audio, cln_audio).item() loss_sum += loss * batch_size sample_num += batch_size loss_sum /= sample_num score += loss_sum score -= (loss_sum * 0.05) return score / len(config['dataset']['train']['noisy'])
def pretrain(args, config, model, lifelong_agent): save_dir = f'{args.logdir}/pretrain/' log = SummaryWriter(save_dir) train_loader = get_dataloader(args.n_jobs, config['dataset']['train']['noisy'][0], config['dataset']['train']['clean'][0], config['train']['batch_size'], True) dev_loader = get_dataloader(args.n_jobs, config['dataset']['dev']['noisy'][0], config['dataset']['dev']['clean'][0], config['eval']['batch_size']) train(args, config, log, train_loader, dev_loader, model, lifelong_agent) torch.save(model, f'{save_dir}/{args.model}_model_T0.pth') lifelong_agent.update_weights(model, train_loader) torch.save(lifelong_agent, f'{save_dir}/{args.model}_synapses_T0.pth') log.close()
def __init__(self, cfg_module, writer): super().__init__(save_name=f'{cfg_module["network"]["name"]}_cls', writer=writer) dataset_type = cfg_module["data"].get("dataset_type", "srproj") if dataset_type == "directory": # Setup Dataloader (put custom dataset such as "cls_dataset") self.trainloader = get_dataloader("training", cfg_module["data"], cls_dataset_directory) self.valloader = get_dataloader("validation", cfg_module["data"], cls_dataset_directory) assert (self.trainloader.dataset.n_classes == self.valloader.dataset. n_classes), "train/val dataset n_classes missmatch" cfg_module["network"].update( n_classes=self.trainloader.dataset.n_classes) cfg_module["metric"].update( n_classes=self.trainloader.dataset.n_classes) # Define Module type (for external usage) self.module_type = "classification" # Setup Model # Don't need to model.to(device) self.network = cls_network(cfg_module["network"]) # Setup Loss self.loss = cls_loss(cfg_module["loss"]) # Setup Metric self.metric = cls_metric(cfg_module["metric"]) # Setup Optimizer and Scheduler self.optimizer = get_optimizer(cfg_module["optimizer"], self.network.parameters()) self.scheduler_name = (cfg_module["scheduler"]["name"] if cfg_module["scheduler"] else "") self.scheduler = get_scheduler(cfg_module["scheduler"], self.optimizer) # Load State self._load_state(cfg_module["load_state"])
def adapt(args, config, model, lifelong_agent=None): log = SummaryWriter(args.logdir) save_dir = f'{args.logdir}/' os.makedirs(save_dir, exist_ok=True) for i in range(1, len(config['dataset']['train']['noisy'])): train_loader = get_dataloader(args.n_jobs, config['dataset']['train']['noisy'][i], config['dataset']['train']['clean'][i], config['train']['batch_size'], True) dev_loader = get_dataloader(args.n_jobs, config['dataset']['dev']['noisy'][i], config['dataset']['dev']['clean'][i], config['eval']['batch_size']) train(args, config, log, train_loader, dev_loader, model, lifelong_agent, True) torch.save(model, f'{save_dir}/{args.model}_model_T{i}.pth') if lifelong_agent is not None: lifelong_agent.update_weights(model, train_loader) torch.save(lifelong_agent, f'{save_dir}/{args.model}_synapses_T{i}.pth') log.close()
def test(args, config): torch.cuda.set_device(args.gpu) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') save_dir = f'{args.logdir}/{args.mode}' assert os.path.exists(save_dir) task_num = len(config['dataset']['test']['noisy']) model_list = [] pretrain_model = torch.load( f'{args.logdir}/pretrain/{args.model}_model_T0.pth') for i in range(1, task_num): model_list.append( torch.load(f'{save_dir}/{args.model}_model_T{i}.pth')) if len(model_list) > 0: results = [ np.zeros((task_num, task_num)) for m in config['eval']['metrics'] ] for Ti in range(task_num): test_loader = get_dataloader(args.n_jobs, config['dataset']['test']['noisy'][Ti], config['dataset']['test']['clean'][Ti], config['eval']['batch_size']) _, score = evaluate(args, config, test_loader, pretrain_model.to(device)) for i in range(len(score)): results[i][0, Ti] = score[i] for Mi in range(len(model_list)): _, score = evaluate(args, config, test_loader, model_list[Mi].to(device)) for i in range(len(score)): results[i][Mi + 1, Ti] = score[i] for i in range(len(config['eval']['metrics'])): metric = config['eval']['metrics'][i] np.savetxt(f'{args.logdir}/{args.mode}_{metric}.csv', results[i], delimiter=",", fmt='%1.4f')
def main(argv): TEST_IMAGE_LIST = argv[1] output_file_path = argv[2] cudnn.benchmark = True print('loading models & data') resumes = ['model_1.pkl',\ 'model_2.pkl',\ 'model_3.pkl',\ 'model_4.pkl',\ 'model_5.pkl',\ 'model_6.pkl'] weights = [0.2, 0.9, 0.7, 0.4, 0.4, 0.3] # load models & data models = [] dataloders = [] for resume in resumes: model, gray, image_size = get_trained_model( resume.split('_')[0], 'Chexpert_challenge_submit/best_models_chexpert_ft/' + resume) models.append(model) tmp_dataloder = get_dataloader(TEST_IMAGE_LIST, gray, image_size) dataloders.append(tmp_dataloder) print('load models & data success!') print('predicting') predictions = [] for i in range(0, len(resumes)): pred_np = predict_single_model(models[i], dataloders[i]) predictions.append(pred_np) print('predict success!') print('ensembleing') ensemble_result = ensemble(predictions, weights) print('ensemble success!') print('predict_file') predict_file(ensemble_result, TEST_IMAGE_LIST, output_file_path) print('predict_file success!')
def main(): # Handle parameters args = util.get_args() # Select gpu device = torch.device(args.device) args.device = device # Load data train_loader, val_loader, test_loader = util.get_dataloader(args) train_dataloader, train_val_dataloader = train_loader val_dataloader, val_val_dataloader = val_loader test_dataloader, test_val_dataloader = test_loader args.train_size, args.nSeries = train_dataloader.dataset.X.shape args.val_size, args.val_nSeries = val_dataloader.dataset.X.shape args.test_size, args.test_nSeries = test_dataloader.dataset.X.shape # Create logger logger = util.Logger(args) # Display arguments util.print_args(args) # Create model model = models.get_model(args) # Create imputation engine engine = util.ImpEngine.from_args(model, scaler=None, args=args) # Training if args.impset == 'train': data_loader = train_dataloader val_loader = train_val_dataloader elif args.impset == 'val': data_loader = val_dataloader val_loader = val_val_dataloader elif args.impset == 'test': data_loader = test_dataloader val_loader = test_val_dataloader else: raise NotImplementedError if not args.test: iterator = trange(args.num_epoch) try: if os.path.isfile(logger.best_model_save_path): print('Model checkpoint exist!') print('Load model checkpoint? (y/n)') _in = input() if _in == 'y' or _in == 'yes': print('Loading model...') engine.model.load_state_dict( torch.load(logger.best_model_save_path)) else: print('Training new model') for epoch in iterator: loss = engine.train(data_loader) engine.scheduler.step() with torch.no_grad(): # metrics = (val_loss, rse, mae, mape, mse, rmse) Xhat_val, val_metrics = engine.validation( data_loader, val_loader) m = dict(train_loss=loss, val_loss=val_metrics[0], val_rse=val_metrics[1], val_mae=val_metrics[2], val_mape=val_metrics[3], val_mse=val_metrics[4], val_rmse=val_metrics[5]) # report stats description = logger.summary(m, engine.model) if logger.stop: break description = 'Epoch: {} '.format(epoch) + description iterator.set_description(description) except KeyboardInterrupt: pass # data recovery engine.model.load_state_dict(torch.load(logger.best_model_save_path)) with torch.no_grad(): # metrics = (rse, mae, mape, mse, rmse) imp_X, metrics, metrics_li = engine.imputation(data_loader) m = dict(imp_rse=metrics[0], imp_mae=metrics[1], imp_mape=metrics[2], imp_mse=metrics[3], imp_rmse=metrics[4]) # m_li = dict(imp_rse=metrics_li[0], imp_mae=metrics_li[1], imp_mape=metrics_li[2], imp_mse=metrics_li[3], # imp_rmse=metrics_li[4]) logger.imputation_summary(m=m, X=data_loader.dataset.X, imp_X=imp_X, W=data_loader.dataset.W, save_imp=True)
def main(): ap = argparse.ArgumentParser("SZO") ap.add_argument("--data", choices=["mnist", "cifar10"], default="mnist", help="dataset") #, "skewedmnist" ap.add_argument( "--opt", choices=["first", "flaxman", "dueling", "ghadimi", "agarwal"], help="optimizer type") ap.add_argument("--model", choices=["fc3", "cnn"], help="Model type") ap.add_argument("--depth", default=1, type=int, help="Depth of the cnn") ap.add_argument("--seed", default=12345, type=int, help="random seed") ap.add_argument("--num_epochs", default=5, type=int, help="number of epochs") ap.add_argument("--num_rounds", default=20, type=int, help="number of rounds") ap.add_argument("--lr", default=0.1, type=float, help="initial learning rate") ap.add_argument("--pr", default=0.2, type=float, help="pruning rate") ap.add_argument("--mu", default=0.1, type=float, help="exploration rate, smoothing parameter") ap.add_argument("--beta", default=0.0, type=float, help="momentum") ap.add_argument("--max_grad_norm", default=0.0, type=float, help="maximum gradient norm") ap.add_argument("--var", default=1.0, type=float, help="noise variance") ap.add_argument("--eval_interval", default=10000, type=int, help="evaluation interval") ap.add_argument("--batch_size", default=64, type=int, help="batch_size") ap.add_argument("--eval_batch_size", default=1000, type=int, help="batch size used in evaluation") ap.add_argument("--cv", default=True, action="store_true", help="whether to include control variates") # type=bool, ap.add_argument( "--init", choices=["reset", "random", "last"], #, 'rewind', 'best' help="initialization strategy in pruning: one of {reset, random, last}" ) #, rewind, best #ap.add_argument("--rewind_step", type=int, help="which epoch to return to after pruning") ap.add_argument( "--reward", choices=["nce", "acc", "expected_reward", "sampled_score"], help= "reward function: one of {nce, acc, expected_reward, sampled_score}") ap.add_argument("--prune_or_freeze", choices=["none", "prune", "freeze"], help="sparsification strategy: one of {prune or freeze}") ap.add_argument( "--masking_strategy", choices=["none", "L1", "heldout", "random"], help="masking strategy: one of {none, L1, heldout, random}") ap.add_argument( "--num_samples", type=int, help="number of samples to evaluate for gradient estimation") ap.add_argument("--device", choices=["cpu", "gpu"], default="cpu") ap.add_argument( '--affine', action="store_true", default=False, # type=bool, help="if specified, turn on affine transform in normalization layers") ap.add_argument('--norm', choices=["batch", "layer", "none"], default="batch", help="type of normalization to use between NN layeres") args = ap.parse_args() log_dir = f'runs-{args.seed}' if not os.path.exists(log_dir): os.mkdir(log_dir) #if not os.path.exists('logs/'+log_dir): # os.mkdir('logs/'+log_dir) # logging label = f'{args.opt}-{args.reward}-{args.prune_or_freeze}-{args.init}-{args.masking_strategy}-{args.batch_size}' logging.basicConfig( filename=os.path.join(log_dir, f'{label}-train.log'), filemode='a', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) logger.addHandler(TqdmLoggingHandler()) logger.info('Arguments:') for arg in vars(args): logger.info(f'\t{arg}: {getattr(args, arg)}') # data if args.data == 'mnist': trainset, testset, classes = mnist(data_path='data/MNIST_data/') elif args.data == 'cifar10': trainset, testset, classes = cifar10(data_path='data/CIFAR10_data/') trainloader, testloader, devloader = get_dataloader( trainset, testset, batch_size=args.batch_size, eval_batch_size=args.eval_batch_size, seed=args.seed) # model model = None model_kwargs = { 'seed': args.seed, 'class_names': classes, 'output_dim': len(classes), 'norm_affine': args.affine, 'norm': args.norm } if args.model == 'cnn': assert args.data == 'cifar10' model_kwargs['modules'] = args.depth model_kwargs['input_size'] = 32 model = ConvolutionalNN(**model_kwargs) elif args.model == 'fc3': if args.data == 'mnist': model_kwargs['input_dim'] = 28 * 28 elif args.data == 'cifar10': model_kwargs['input_dim'] = 32 * 32 * 3 model = FullyConnectedNN(**model_kwargs) else: raise ValueError("Unknown model type") # gpu device = None if args.device == 'gpu' and torch.cuda.is_available(): device = 'cuda:0' torch.set_default_tensor_type(torch.cuda.FloatTensor) else: device = 'cpu' model.to(device) logger.info(f"Device: {device}") if torch.cuda.is_available(): logger.info(f"\tn_gpu: {torch.cuda.device_count()}") # optimizer kwargs = {'prune_or_freeze': args.prune_or_freeze, 'init': args.init} if args.lr: kwargs['lr'] = args.lr if args.mu: kwargs['mu'] = args.mu if args.beta: kwargs['beta'] = args.beta if args.max_grad_norm: kwargs['max_grad_norm'] = args.max_grad_norm if args.var: kwargs['var'] = args.var if args.num_samples: kwargs['num_samples'] = args.num_samples #if args.init == 'rewind': # print(args.rewind_step) opt = None if args.opt == 'first': if args.reward in ['sampled_score']: kwargs['cv'] = args.cv # control variates opt = FirstOrderBanditOptimizer(model.parameters(), **kwargs) elif args.reward in ['nce', 'expected_reward']: opt = FirstOrderOptimizer(model.parameters(), **kwargs) else: raise ValueError elif args.opt == 'flaxman': opt = VanillaEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'dueling': opt = DuelingEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'ghadimi': opt = OneSideEvolutionOptimizer(model.parameters(), **kwargs) elif args.opt == 'agarwal': opt = TwoSideEvolutionOptimizer(model.parameters(), **kwargs) else: raise ValueError("Unknown optimizer type") #scheduler = lr_scheduler.ReduceLROnPlateau(opt, mode='max', patience=3, threshold=1e-2) scheduler = None # constant learning rate # trainer pruning_rate = 0.0 if args.prune_or_freeze == 'none' or args.masking_strategy == 'none' else args.pr metrics = ['acc', 'f1-score', 'precision', 'recall'] trainer = Trainer(model, opt, scheduler, args.num_epochs, args.num_rounds, label, seed=args.seed, init=args.init, pruning_rate=pruning_rate, reward=args.reward, metrics=metrics, log_dir=log_dir, eval_interval=args.eval_interval, masking_strategy=args.masking_strategy, device=device) trainer.train(trainloader, testloader, devloader) #del model #del opt #del scheduler #del trainer logging.shutdown()