def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root='./data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='./data', train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format(args.arch)) if args.arch.startswith('resnext'): basic_model = models.__dict__[args.arch]( cardinality=args.cardinality, num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('densenet'): basic_model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, growthRate=args.growthRate, compressionRate=args.compressionRate, dropRate=args.drop, ) elif args.arch.startswith('wrn'): basic_model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('resnet'): basic_model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, block_name=args.block_name, ) elif args.arch.startswith('preresnet'): basic_model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, block_name=args.block_name, ) elif args.arch.startswith('horesnet'): basic_model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('hopreresnet'): basic_model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('nagpreresnet'): basic_model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('mompreresnet'): basic_model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) else: print('Model is specified wrongly - Use standard model') basic_model = models.__dict__[args.arch](num_classes=num_classes) # From https://github.com/MadryLab/cifar10_challenge/blob/master/config.json config = { 'epsilon': 0.031, #8.0 / 255, # Test 1.0-8.0 'num_steps': 10, 'step_size': 0.007, #6.0 / 255, # 7.0 'random_start': True, 'loss_func': 'xent', } model = AttackPGD(basic_model, config).cuda() model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() if args.optimizer.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # elif args.optimizer.lower() == 'adam': # optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'radam': optimizer = RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'adamw': optimizer = AdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, warmup=args.warmup) # Resume title = 'cifar-10-' + args.arch # if args.resume: # # Load checkpoint. # print('==> Resuming from checkpoint..') # assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' # args.checkpoint = os.path.dirname(args.resume) # checkpoint = torch.load(args.resume) # best_acc = checkpoint['best_acc'] # start_epoch = checkpoint['epoch'] # model.load_state_dict(checkpoint['state_dict']) # optimizer.load_state_dict(checkpoint['optimizer']) # logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) # else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) logger.file.write('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda, logger) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) writer.add_scalars('train_loss', {args.model_name: train_loss}, epoch) writer.add_scalars('test_loss', {args.model_name: test_loss}, epoch) writer.add_scalars('train_acc', {args.model_name: train_acc}, epoch) writer.add_scalars('test_acc', {args.model_name: test_acc}, epoch) # writer.add_scalars('loss_tracking/train_loss', {args.model_name: train_loss}, epoch) # writer.add_scalars('loss_tracking/test_loss', {args.model_name: test_loss}, epoch) # writer.add_scalars('loss_tracking/train_acc', {args.model_name: train_acc}, epoch) # writer.add_scalars('loss_tracking/test_acc', {args.model_name: test_acc}, epoch) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': basic_model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.file.write('Best acc:%f' % best_acc) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = True if args.model_type == 'base': bert_weights = 'bert-base-uncased' else: bert_weights = 'bert-large-uncased' # output folder output_dir = os.path.join(args.output, args.data_type) create_dir(output_dir) # logger logger_arg = Logger(os.path.join(output_dir, 'args.txt')) logger_arg.write(args.__repr__()) logger_log = Logger(os.path.join(output_dir, 'log.txt')) # tokenizer tokenizer = BertTokenizer.from_pretrained(bert_weights) sep_token = tokenizer.sep_token_id # dataset train_dset = CNNDMDataset(data_path=os.path.join(args.data_path, 'train'), data_type=args.data_type, sep_token=sep_token, logger=logger_log, max_seq_len=args.max_seq_len, debug=args.debug) trainval_loader = DataLoader(train_dset,
nargs='?', help='use a poisson random variable to generate time evolution') parser.add_argument('-L', '--lamb', type=float, default=0.0265, help='lambda for poisson if poisson mode is specified') parser.add_argument('-v', '--level', type=str, default='INFO', help='log level') parser.add_argument('-s', '--summary', type=bool, const=True, nargs='?', help='set if you want a summary of every turn') parser.add_argument('-F', '--file', type=bool, const=True, nargs='?', help='set if you want log to a file') args = parser.parse_args() if not os.path.exists('./logs'): os.mkdir('./logs/') log = Logger(name='Simulation', log=args.file) log.setLevel(args.level) main(args)
def __init__(self, cfg, datasets, net, epoch): self.cfg = cfg if 'train' in datasets: self.trainset = datasets['train'] if 'val' in datasets: self.valset = datasets['val'] if 'trainval' in datasets: self.trainval = datasets['trainval'] else: self.trainval = False if 'test' in datasets: self.testset = datasets['test'] self.net = net name = cfg.exp_name self.name = name self.checkpoints = os.path.join(cfg.checkpoint, name) self.device = cfg.device self.optimizer = optim.Adam(self.net.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) self.lr_sheudler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', factor=cfg.lr_factor, threshold=0.0001, patience=cfg.patience, min_lr=cfg.min_lr) if not (os.path.exists(self.checkpoints)): os.mkdir(self.checkpoints) self.predictions = os.path.join(self.checkpoints, 'pred') if not (os.path.exists(self.predictions)): os.mkdir(self.predictions) start, total = epoch self.start = start self.total = total log_dir = os.path.join(self.checkpoints, 'logs') if not (os.path.exists(log_dir)): os.mkdir(log_dir) self.logger = Logger(log_dir) torch.cuda.empty_cache() self.save_every_k_epoch = cfg.save_every_k_epoch #-1 for not save and validate self.val_every_k_epoch = cfg.val_every_k_epoch self.upadte_grad_every_k_batch = 1 self.best_mAP = 0 self.best_mAP_epoch = 0 self.movingLoss = 0 self.bestMovingLoss = 10000 self.bestMovingLossEpoch = 1e9 self.early_stop_epochs = 50 self.alpha = 0.95 #for update moving loss self.lr_change = cfg.adjust_lr self.base_epochs = cfg.base_epochs self.nms_threshold = cfg.nms_threshold self.conf_threshold = cfg.dc_threshold self.save_pred = False #load from epoch if required if start: if (start == '-1') or (start == -1): self.load_last_epoch() else: self.load_epoch(start) else: self.start = 0 self.net = self.net.to(self.device)
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger( osp.join(args.save_dir, 'log_' + 'CIFAR-10_PC_Loss_PGD_AdvTrain' + '.txt')) if use_gpu: print("Currently using GPU: {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") # Data Load num_classes = 10 print('==> Preparing dataset') transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.train_batch, pin_memory=True, shuffle=True, num_workers=args.workers) testset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch, pin_memory=True, shuffle=False, num_workers=args.workers) # Loading the Model model = resnet(num_classes=num_classes, depth=110) if True: model = nn.DataParallel(model).cuda() criterion_xent = nn.CrossEntropyLoss() criterion_prox_1024 = Proximity(num_classes=num_classes, feat_dim=1024, use_gpu=use_gpu) criterion_prox_256 = Proximity(num_classes=num_classes, feat_dim=256, use_gpu=use_gpu) criterion_conprox_1024 = Con_Proximity(num_classes=num_classes, feat_dim=1024, use_gpu=use_gpu) criterion_conprox_256 = Con_Proximity(num_classes=num_classes, feat_dim=256, use_gpu=use_gpu) optimizer_model = torch.optim.SGD(model.parameters(), lr=args.lr_model, weight_decay=1e-04, momentum=0.9) optimizer_prox_1024 = torch.optim.SGD(criterion_prox_1024.parameters(), lr=args.lr_prox) optimizer_prox_256 = torch.optim.SGD(criterion_prox_256.parameters(), lr=args.lr_prox) optimizer_conprox_1024 = torch.optim.SGD( criterion_conprox_1024.parameters(), lr=args.lr_conprox) optimizer_conprox_256 = torch.optim.SGD(criterion_conprox_256.parameters(), lr=args.lr_conprox) filename = 'Models_Softmax/CIFAR10_Softmax.pth.tar' checkpoint = torch.load(filename) model.load_state_dict(checkpoint['state_dict']) optimizer_model.load_state_dict = checkpoint['optimizer_model'] start_time = time.time() for epoch in range(args.max_epoch): adjust_learning_rate(optimizer_model, epoch) adjust_learning_rate_prox(optimizer_prox_1024, epoch) adjust_learning_rate_prox(optimizer_prox_256, epoch) adjust_learning_rate_conprox(optimizer_conprox_1024, epoch) adjust_learning_rate_conprox(optimizer_conprox_256, epoch) print("==> Epoch {}/{}".format(epoch + 1, args.max_epoch)) train(model, criterion_xent, criterion_prox_1024, criterion_prox_256, criterion_conprox_1024, criterion_conprox_256, optimizer_model, optimizer_prox_1024, optimizer_prox_256, optimizer_conprox_1024, optimizer_conprox_256, trainloader, use_gpu, num_classes, epoch) if args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or ( epoch + 1) == args.max_epoch: print("==> Test") #Tests after every 10 epochs acc, err = test(model, testloader, use_gpu, num_classes, epoch) print("Accuracy (%): {}\t Error rate (%): {}".format(acc, err)) state_ = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer_model': optimizer_model.state_dict(), 'optimizer_prox_1024': optimizer_prox_1024.state_dict(), 'optimizer_prox_256': optimizer_prox_256.state_dict(), 'optimizer_conprox_1024': optimizer_conprox_1024.state_dict(), 'optimizer_conprox_256': optimizer_conprox_256.state_dict(), } torch.save( state_, 'Models_PCL_AdvTrain_PGD/CIFAR10_PCL_AdvTrain_PGD.pth.tar') elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
def main(): ''' set default hyperparams in default_hyperparams.py ''' parser = argparse.ArgumentParser() # Required arguments parser.add_argument('-d', '--dataset', choices=supported.datasets, required=True) parser.add_argument('--algorithm', required=True, choices=supported.algorithms) parser.add_argument('--root_dir', required=True, help='The directory where [dataset]/data can be found (or should be downloaded to, if it does not exist).') # Dataset parser.add_argument('--split_scheme', help='Identifies how the train/val/test split is constructed. Choices are dataset-specific.') parser.add_argument('--dataset_kwargs', nargs='*', action=ParseKwargs, default={}) parser.add_argument('--download', default=False, type=parse_bool, const=True, nargs='?', help='If true, tries to downloads the dataset if it does not exist in root_dir.') parser.add_argument('--frac', type=float, default=1.0, help='Convenience parameter that scales all dataset splits down to the specified fraction, for development purposes.') # Loaders parser.add_argument('--loader_kwargs', nargs='*', action=ParseKwargs, default={}) parser.add_argument('--train_loader', choices=['standard', 'group']) parser.add_argument('--uniform_over_groups', type=parse_bool, const=True, nargs='?') parser.add_argument('--distinct_groups', type=parse_bool, const=True, nargs='?') parser.add_argument('--n_groups_per_batch', type=int) parser.add_argument('--batch_size', type=int) parser.add_argument('--eval_loader', choices=['standard'], default='standard') # Model parser.add_argument('--model', choices=supported.models) parser.add_argument('--model_kwargs', nargs='*', action=ParseKwargs, default={}, help='keyword arguments for model initialization passed as key1=value1 key2=value2') # Transforms parser.add_argument('--train_transform', choices=supported.transforms) parser.add_argument('--eval_transform', choices=supported.transforms) parser.add_argument('--target_resolution', nargs='+', type=int, help='target resolution. for example --target_resolution 224 224 for standard resnet.') parser.add_argument('--resize_scale', type=float) parser.add_argument('--max_token_length', type=int) # Objective parser.add_argument('--loss_function', choices=supported.losses) # Algorithm parser.add_argument('--groupby_fields', nargs='+') parser.add_argument('--group_dro_step_size', type=float) parser.add_argument('--coral_penalty_weight', type=float) parser.add_argument('--irm_lambda', type=float) parser.add_argument('--irm_penalty_anneal_iters', type=int) parser.add_argument('--algo_log_metric') parser.add_argument('--hsic_beta', type=float) parser.add_argument('--sd_penalty_lamb', type=float) parser.add_argument('--grad_penalty_lamb', type=float) parser.add_argument('--params_regex', type=str, help='Regular expression specifying which gradients to penalize.') parser.add_argument('--label_cond', type=parse_bool, const=True, nargs='?', default=False) parser.add_argument('--dann_lamb', type=float) parser.add_argument('--dann_dc_name', type=str) parser.add_argument('--freeze_pretrained', type=parse_bool, const=True, nargs='?', default=False) parser.add_argument('--resnet_byol_path', type=str, default=None) # Model selection parser.add_argument('--val_metric') parser.add_argument('--val_metric_decreasing', type=parse_bool, const=True, nargs='?') # Optimization parser.add_argument('--n_epochs', type=int) parser.add_argument('--optimizer', choices=supported.optimizers) parser.add_argument('--lr', type=float) parser.add_argument('--weight_decay', type=float) parser.add_argument('--max_grad_norm', type=float) parser.add_argument('--optimizer_kwargs', nargs='*', action=ParseKwargs, default={}) # Scheduler parser.add_argument('--scheduler', choices=supported.schedulers) parser.add_argument('--scheduler_kwargs', nargs='*', action=ParseKwargs, default={}) parser.add_argument('--scheduler_metric_split', choices=['train', 'val'], default='val') parser.add_argument('--scheduler_metric_name') # Evaluation parser.add_argument('--evaluate_all_splits', type=parse_bool, const=True, nargs='?', default=True) parser.add_argument('--eval_splits', nargs='+', default=[]) parser.add_argument('--eval_only', type=parse_bool, const=True, nargs='?', default=False) parser.add_argument('--eval_epoch', default=None, type=int) parser.add_argument('--save_z', type=parse_bool, const=True, nargs='?', default=False) # Misc parser.add_argument('--device', type=int, default=0) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--log_dir', default='./logs') parser.add_argument('--log_every', default=50, type=int) parser.add_argument('--save_step', type=int) parser.add_argument('--save_best', type=parse_bool, const=True, nargs='?', default=True) parser.add_argument('--save_last', type=parse_bool, const=True, nargs='?', default=True) parser.add_argument('--num_subepochs_to_save', type=int, default=None, help="If specified determines how many times to save inside an epoch.") parser.add_argument('--num_epochs_to_save_in_detail', type=int, default=None, help="If specified determines in how many of the first epochs intermediate states are saved.") parser.add_argument('--no_group_logging', type=parse_bool, const=True, nargs='?') parser.add_argument('--use_wandb', type=parse_bool, const=True, nargs='?', default=False) parser.add_argument('--progress_bar', type=parse_bool, const=True, nargs='?', default=False) parser.add_argument('--resume', type=parse_bool, const=True, nargs='?', default=False) config = parser.parse_args() config = populate_defaults(config) # set device config.device = torch.device("cuda:" + str(config.device)) if torch.cuda.is_available() else torch.device("cpu") ## Initialize logs if os.path.exists(config.log_dir) and config.resume: resume = True mode = 'a' elif os.path.exists(config.log_dir) and config.eval_only: resume = False mode = 'a' else: resume = False mode = 'w' if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) logger = Logger(os.path.join(config.log_dir, 'log.txt'), mode) # Record config log_config(config, logger) # Set random seed set_seed(config.seed) # Data full_dataset = supported.datasets[config.dataset]( root_dir=config.root_dir, download=config.download, split_scheme=config.split_scheme, **config.dataset_kwargs) # To implement data augmentation (i.e., have different transforms # at training time vs. test time), modify these two lines: train_transform = initialize_transform( transform_name=config.train_transform, config=config, dataset=full_dataset) eval_transform = initialize_transform( transform_name=config.eval_transform, config=config, dataset=full_dataset) train_grouper = CombinatorialGrouper( dataset=full_dataset, groupby_fields=config.groupby_fields) datasets = defaultdict(dict) for split in full_dataset.split_dict.keys(): if split == 'train': transform = train_transform verbose = True elif split == 'val': transform = eval_transform verbose = True else: transform = eval_transform verbose = False # Get subset datasets[split]['dataset'] = full_dataset.get_subset( split, frac=config.frac, transform=transform) if split == 'train': datasets[split]['loader'] = get_train_loader( loader=config.train_loader, dataset=datasets[split]['dataset'], batch_size=config.batch_size, uniform_over_groups=config.uniform_over_groups, grouper=train_grouper, distinct_groups=config.distinct_groups, n_groups_per_batch=config.n_groups_per_batch, **config.loader_kwargs) else: datasets[split]['loader'] = get_eval_loader( loader=config.eval_loader, dataset=datasets[split]['dataset'], grouper=train_grouper, batch_size=config.batch_size, **config.loader_kwargs) # Set fields datasets[split]['split'] = split datasets[split]['name'] = full_dataset.split_names[split] datasets[split]['verbose'] = verbose # Loggers datasets[split]['eval_logger'] = BatchLogger( os.path.join(config.log_dir, f'{split}_eval.csv'), mode=mode, use_wandb=(config.use_wandb and verbose)) datasets[split]['algo_logger'] = BatchLogger( os.path.join(config.log_dir, f'{split}_algo.csv'), mode=mode, use_wandb=(config.use_wandb and verbose)) if config.use_wandb: initialize_wandb(config) # Logging dataset info if config.no_group_logging and full_dataset.is_classification and full_dataset.y_size == 1: log_grouper = CombinatorialGrouper( dataset=full_dataset, groupby_fields=['y']) elif config.no_group_logging: log_grouper = None else: log_grouper = train_grouper log_group_data(datasets, log_grouper, logger) ## Initialize algorithm algorithm = initialize_algorithm( config=config, datasets=datasets, train_grouper=train_grouper) if not config.eval_only: ## Load saved results if resuming resume_success = False if resume: save_path = os.path.join(config.log_dir, 'last_model.pth') if not os.path.exists(save_path): epochs = [ int(file.split('_')[0]) for file in os.listdir(config.log_dir) if file.endswith('.pth')] if len(epochs) > 0: latest_epoch = max(epochs) save_path = os.path.join(config.log_dir, f'{latest_epoch}_model.pth') try: prev_epoch, best_val_metric = load(algorithm, save_path) epoch_offset = prev_epoch + 1 logger.write(f'Resuming from epoch {epoch_offset} with best val metric {best_val_metric}') resume_success = True except FileNotFoundError: pass if resume_success == False: epoch_offset = 0 best_val_metric = None train(algorithm=algorithm, datasets=datasets, general_logger=logger, config=config, epoch_offset=epoch_offset, best_val_metric=best_val_metric) else: if config.eval_epoch is None: eval_model_path = os.path.join(config.log_dir, 'best_model.pth') else: eval_model_path = os.path.join(config.log_dir, f'{config.eval_epoch}_model.pth') best_epoch, best_val_metric = load(algorithm, eval_model_path) if config.eval_epoch is None: epoch = best_epoch else: epoch = config.eval_epoch evaluate( algorithm=algorithm, datasets=datasets, epoch=epoch, general_logger=logger, config=config) logger.close() for split in datasets: datasets[split]['eval_logger'].close() datasets[split]['algo_logger'].close()
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[1., 1., 1.], std=[1., 1., 1.]) # trans_train_dataset = torch.utils.data.DataLoader( # datasets.ImageFolder(traindir, transforms.Compose([ # transforms.RandomSizedCrop(224), # transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, # ])), # batch_size=args.train_batch, shuffle=True, # num_workers=args.workers, pin_memory=True) # trans_test_dataset = torch.utils.data.DataLoader( # datasets.ImageFolder(valdir, transforms.Compose([ # transforms.Scale(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, # ])), # batch_size=args.test_batch, shuffle=False, # num_workers=args.workers, pin_memory=True) train_dataset = CelebA_Dataset( root_dir=args.data, train=True, transform=transforms.Compose([ # transform引数にcomposeを与える transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) trans_train_dataset = torch.utils.data.DataLoader( train_dataset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) test_dataset = CelebA_Dataset( root_dir=args.data, train=False, transform=transforms.Compose([ # transform引数にcomposeを与える transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) trans_test_dataset = torch.utils.data.DataLoader( test_dataset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif args.arch.startswith('resnext'): model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) # define loss function (criterion) and optimizer criterion = nn.BCELoss(size_average=True).cuda() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume title = 'CelebA-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict'], strict=False) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(trans_test_dataset, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(trans_train_dataset, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(trans_test_dataset, model, criterion, epoch, use_cuda) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def main(): title = 'Plant-' + args.arch best_acc = 0 cudnn.benchmark = True start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch num_classes = 100 ''' num_classes = dset.hierarchy.get_class_level_size(0) parent_num_classes = dset.hierarchy.get_class_level_size(2) hierarchy_matrix = dset.hierarchy.get_hierarchy_mask(2, 0) if use_cuda: hierarchy_matrix = torch.FloatTensor(hierarchy_matrix) else: hierarchy_matrix = torch.FloatTensor(hierarchy_matrix) ''' if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) model = chooser.create_model(chooser.predefined_model(args, cuda=use_cuda), num_classes, cuda=use_cuda) #criterion = CrossEntropyLossTSoftmax(hierarchy_matrix=hierarchy_matrix) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc. Top 1', 'Train Acc. Top 5', 'Valid Acc. Top 1', 'Valid Acc. Top 5', 'USM Alpha']) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(dataloaders['val'], model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, args) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc, train_acc5 = train(dataloaders['train'], model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc, test_acc5 = test(dataloaders['val'], model, criterion, epoch, use_cuda) # append logger file logger.append([state['lr'], train_loss, test_loss, train_acc, train_acc5, test_acc, test_acc5, float(model.filter.alpha.detach().data)]) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint) and args.local_rank == 0: mkdir_p(args.checkpoint) args.distributed = True args.gpu = args.local_rank torch.cuda.set_device(args.gpu) torch.distributed.init_process_group(backend='nccl', init_method='env://') args.world_size = torch.distributed.get_world_size() print('world_size = ', args.world_size) assert torch.backends.cudnn.enabled, "Amp requires cudnn backend to be enabled." # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif 'resnext' in args.arch: model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() flops, params = get_model_complexity_info(model, (224, 224), as_strings=False, print_per_layer_stat=False) print('Flops: %.3f' % (flops / 1e9)) print('Params: %.2fM' % (params / 1e6)) cudnn.benchmark = True # define loss function (criterion) and optimizer # criterion = nn.CrossEntropyLoss().cuda() criterion = SoftCrossEntropyLoss( label_smoothing=args.label_smoothing).cuda() model = model.cuda() args.lr = float(0.1 * float(args.train_batch * args.world_size) / 256.) state['lr'] = args.lr optimizer = set_optimizer(model) #optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) model, optimizer = amp.initialize( model, optimizer, opt_level=args.opt_level, keep_batchnorm_fp32=args.keep_batchnorm_fp32, loss_scale=args.loss_scale) #model = torch.nn.DataParallel(model).cuda() #model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) model = DDP(model, delay_allreduce=True) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'valf') #normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) data_aug_scale = (0.08, 1.0) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224, scale=data_aug_scale), transforms.RandomHorizontalFlip(), # transforms.ToTensor(), # normalize, ])) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, ])) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=val_sampler, collate_fn=fast_collate) # Resume title = 'ImageNet-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..', args.resume) assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] # model may have more keys t = model.state_dict() c = checkpoint['state_dict'] flag = True for k in t: if k not in c: print('not in loading dict! fill it', k, t[k]) c[k] = t[k] flag = False model.load_state_dict(c) #if flag: # print('optimizer load old state') # optimizer.load_state_dict(checkpoint['optimizer']) #else: print('new optimizer !') if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: if args.local_rank == 0: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(val_loader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) if args.local_rank == 0: print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(val_loader, model, criterion, epoch, use_cuda) # save model if args.local_rank == 0: # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) if args.local_rank == 0: logger.close() print('Best acc:') print(best_acc)
title = 'cifar-10-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoinxt..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' #args.checkpoint = os.path.dirname(args.checkpoint) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] #start_epoch = checkpoint['epoch'] start_epoch = args.start_epoch model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.retrain_layer!='none': logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) params = trainableParams(model) print('number of trainable params:',len(list(params))) model = reinit_model_layer(model,args.retrain_layer,initial_dict) params = trainableParams(model) print('number of trainable params:',len(list(params))) optimizer = optim.SGD(params, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=False) # Was True logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
def main(): global BEST_ACC, LR_STATE start_epoch = cfg.CLS.start_epoch # start from epoch 0 or last checkpoint epoch # Create ckpt folder if not os.path.isdir(cfg.CLS.ckpt): mkdir_p(cfg.CLS.ckpt) if args.cfg_file is not None and not cfg.CLS.evaluate: shutil.copyfile(args.cfg_file, os.path.join(cfg.CLS.ckpt, args.cfg_file.split('/')[-1])) # Dataset and Loader if cfg.CLS.num_classes == 10: cifarloader = datasets.CIFAR10 elif cfg.CLS.num_classes == 100: cifarloader = datasets.CIFAR100 else: cifarloader = None print('Just support cifar10 or cifar100 datasets!') exit() normalize = transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std) _pad = (cfg.CLS.base_size - cfg.CLS.crop_size) // 2 train_aug = [transforms.RandomCrop(cfg.CLS.crop_size, padding=_pad), transforms.RandomHorizontalFlip()] if len(cfg.CLS.rotation) > 0: train_aug.append(transforms.RandomRotation(cfg.CLS.rotation)) if len(cfg.CLS.pixel_jitter) > 0: train_aug.append(RandomPixelJitter(cfg.CLS.pixel_jitter)) if cfg.CLS.grayscale > 0: train_aug.append(transforms.RandomGrayscale(cfg.CLS.grayscale)) train_aug.append(transforms.ToTensor()) train_aug.append(normalize) train_loader = torch.utils.data.DataLoader( cifarloader(root=cfg.CLS.data_root, train=True, download=True, transform=transforms.Compose(train_aug)), batch_size=cfg.CLS.train_batch, shuffle=True, num_workers=cfg.workers) if cfg.CLS.validate or cfg.CLS.evaluate: val_loader = torch.utils.data.DataLoader( cifarloader(root=cfg.CLS.data_root, train=False, download=False, transform=transforms.Compose([ transforms.ToTensor(), normalize, ])), batch_size=cfg.CLS.test_batch, shuffle=False, num_workers=cfg.workers) # Create model model = models.__dict__[cfg.CLS.arch]() print(model) # Calculate FLOPs & Param n_flops, n_convops, n_params = measure_model(model, cfg.CLS.crop_size, cfg.CLS.crop_size) print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'. format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6)) del model model = models.__dict__[cfg.CLS.arch]() # Load pre-train model if cfg.CLS.pretrained: print("==> Using pre-trained model '{}'".format(cfg.CLS.pretrained)) pretrained_dict = torch.load(cfg.CLS.pretrained) try: pretrained_dict = pretrained_dict['state_dict'] except: pretrained_dict = pretrained_dict model_dict = model.state_dict() updated_dict, match_layers, mismatch_layers = weight_filler(pretrained_dict, model_dict) model_dict.update(updated_dict) model.load_state_dict(model_dict) else: print("==> Creating model '{}'".format(cfg.CLS.arch)) # Define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() if cfg.CLS.pretrained: def param_filter(param): return param[1] new_params = map(param_filter, filter(lambda p: p[0] in mismatch_layers, model.named_parameters())) base_params = map(param_filter, filter(lambda p: p[0] in match_layers, model.named_parameters())) model_params = [{'params': base_params}, {'params': new_params, 'lr': cfg.CLS.base_lr * 10}] else: model_params = model.parameters() model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True optimizer = optim.SGD(model_params, lr=cfg.CLS.base_lr, momentum=cfg.CLS.momentum, weight_decay=cfg.CLS.weight_decay) # Evaluate model if cfg.CLS.evaluate: print('\n==> Evaluation only') test_loss, test_top1, test_top5 = test(val_loader, model, criterion, start_epoch, USE_CUDA) print('==> Test Loss: {:.8f} | Test_top1: {:.4f}% | Test_top5: {:.4f}%'.format(test_loss, test_top1, test_top5)) return # Resume training title = 'Pytorch-CLS-' + cfg.CLS.arch if cfg.CLS.resume: # Load checkpoint. print("==> Resuming from checkpoint '{}'".format(cfg.CLS.resume)) assert os.path.isfile(cfg.CLS.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(cfg.CLS.resume) BEST_ACC = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) # Train and val for epoch in range(start_epoch, cfg.CLS.epochs): print('\nEpoch: [{}/{}] | LR: {:.8f}'.format(epoch + 1, cfg.CLS.epochs, LR_STATE)) train_loss, train_acc = mixup_train(train_loader, model, criterion, optimizer, epoch, USE_CUDA) if cfg.CLS.validate: test_loss, test_top1, test_top5 = test(val_loader, model, criterion, epoch, USE_CUDA) else: test_loss, test_top1, test_top5 = 0.0, 0.0, 0.0 # Append logger file logger.append([LR_STATE, train_loss, test_loss, train_acc, test_top1]) # Save model save_checkpoint(model, optimizer, test_top1, epoch) # Draw curve try: draw_curve(cfg.CLS.arch, cfg.CLS.ckpt) print('==> Success saving log curve...') except: print('==> Saving log curve error...') logger.close() try: savefig(os.path.join(cfg.CLS.ckpt, 'log.eps')) shutil.copyfile(os.path.join(cfg.CLS.ckpt, 'log.txt'), os.path.join(cfg.CLS.ckpt, 'log{}.txt'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S')))) except: print('copy log error.') print('==> Training Done!') print('==> Best acc: {:.4f}%'.format(BEST_ACC))
criterionGAN = GANLoss().to(device) criterionL1 = nn.L1Loss().to(device) # 设置优化方法 optimizer_g = optim.Adam(net_g.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) optimizer_d = optim.Adam(net_d.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # 设置学习率调整策略 net_g_scheduler = get_scheduler(optimizer_g, opt) net_d_scheduler = get_scheduler(optimizer_d, opt) # 打印日志,初始化 logger = Logger(opt.niter + opt.niter_decay, len(data_loader)) ###### 训练 ###### for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): for iteration, batch in enumerate(data_loader, 1): real_a, real_b = batch[0].to(device), batch[1].to(device) fake_b = net_g(real_a) ###################### # (1) 更新判别器网络 ###################### optimizer_d.zero_grad() """
#graphics_helper.py import pygame import os from utils import code_generator, Logger dir_path = os.path.dirname(os.path.realpath(__file__)) logger = Logger('graphics/text_renderer.py') class TextRenderer: def __init__(self): self.requests = [] pygame.font.init() #Adds a text message to the rendering queue def render(self, text: str, x: int, y: int, surface, size=16, speed=0.5, static=False, foreground=(255, 255, 255)): while True: id = code_generator(6) unique = True
# norm_method = Normalize([0, 0, 0], [1, 1, 1]) #elif not opt.std_norm: # norm_method = Normalize(opt.mean, [1, 1, 1]) #else: # norm_method = Normalize(opt.mean, opt.std) if not opt.no_train: training_data = Gesturedata("train.txt") train_dataloader = torch.utils.data.DataLoader( training_data, batch_size=opt.batch_size, shuffle=True, num_workers=4, pin_memory=True) train_logger = Logger( os.path.join(opt.result_path, 'train.log'), ['epoch', 'num_epochs', 'batch_i', 'loss', 'loss(mean)', 'acc']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), ['epoch', 'num_epochs', 'batch_i', 'loss', 'loss(mean)', 'acc']) if not opt.no_val: validation_data = Gesturedata("valid.txt") test_dataloader = torch.utils.data.DataLoader( validation_data, batch_size=opt.batch_size, shuffle=False, num_workers=4, pin_memory=True) val_logger = Logger(
def main(env_name, num_episodes, gamma, lam, kl_targ, batch_size, net_size_factor, noise_bias, weight, use_ppoclip): """ Main training loop Args: env_name: OpenAI Gym environment name, e.g. 'Hopper-v1' num_episodes: maximum number of episodes to run gamma: reward discount factor (float) lam: lambda from Generalized Advantage Estimate kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new) batch_size: number of episodes per policy training batch """ global alive_coef, progress_coef killer = GracefulKiller() env, obs_dim, act_dim = init_gym(env_name) obs_dim += 1 # add 1 to obs dimension for time step feature (see run_episode()) # now = datetime.utcnow().strftime("%b-%d_%H:%M:%S") # create unique directories now = datetime.now().strftime("%b-%d_%H:%M:%S") + "_multi" logger = Logger(logname=env_name, now=now) aigym_path = os.path.join('/tmp', env_name, now) # env = wrappers.Monitor(env, aigym_path, force=True) scaler = Scaler(obs_dim) if weight == "None": val_func = NNValueFunction(obs_dim, net_size_factor=net_size_factor, alive_coef=alive_coef, progress_coef=progress_coef) policy = Policy(obs_dim, act_dim, kl_targ, net_size_factor=net_size_factor, noise_bias=noise_bias) else: token = weight.split(".") token[-3] = token[-3][:-5] + "value" weight_2 = ".".join(token) # assert False, "unreachable" val_func = NNValueFunctionContinue(weight_2, obs_dim, net_size_factor=net_size_factor, alive_coef=alive_coef, progress_coef=progress_coef) policy = PolicyContinue(weight, obs_dim, act_dim, kl_targ, net_size_factor=net_size_factor, noise_bias=noise_bias) # run a few episodes of untrained policy to initialize scaler: run_policy(env, policy, scaler, logger, episodes=5) episode = 0 while episode < num_episodes: trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size) episode += len(trajectories) add_value(trajectories, val_func) # add estimated values to episodes add_disc_sum_rew(trajectories, gamma) # calculated discounted sum of Rs add_gae(trajectories, gamma, lam) # calculate advantage # concatenate all episodes into single NumPy arrays observes, actions, advantages, disc_sum_rew = build_train_set( trajectories) # add various stats to training log: log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode) policy.update(observes, actions, advantages, logger, scaler) # update policy val_func.fit(observes, disc_sum_rew, logger) # update value function logger.write(display=True) # write logger results to file and stdout if killer.kill_now: if input('Terminate training (y/[n])? ') == 'y': break if input('Change alive_coef (y/[n])? ') == 'y': a = input("alive_coef value: ") alive_coef = float(a) val_func.alive_coef = float(a) if input('Change progress_coef (y/[n])? ') == 'y': a = input("progress_coef value: ") progress_coef = float(a) val_func.progress_coef = float(a) killer.kill_now = False logger.close() # with open("test_dump", 'w') as f: # pickle.dump(policy, f) policy.close_sess() val_func.close_sess()
def main(): # init or load model print("init model with input shape", config["input_shape"]) model = NvNet(config=config, input_shape=config["input_shape"], seg_outChans=config["n_labels"]) parameters = model.parameters() optimizer = optim.Adam(parameters, lr=config["initial_learning_rate"], weight_decay=config["L2_norm"]) start_epoch = 1 if config["VAE_enable"]: loss_function = CombinedLoss(k1=config["loss_k1_weight"], k2=config["loss_k2_weight"]) else: loss_function = SoftDiceLoss() # data_generator print("data generating") training_data = BratsDataset(phase="train", config=config) # train_loader = torch.utils.data.DataLoader(dataset=training_data, # batch_size=config["batch_size"], # shuffle=True, # pin_memory=True) valildation_data = BratsDataset(phase="validate", config=config) # valildation_loader = torch.utils.data.DataLoader(dataset=valildation_data, # batch_size=config["batch_size"], # shuffle=True, # pin_memory=True) train_logger = Logger(model_name=config["model_file"], header=['epoch', 'loss', 'acc', 'lr']) if config["cuda_devices"] is not None: model = model.cuda() loss_function = loss_function.cuda() # if not config["overwrite"] and os.path.exists(config["model_file"]) or os.path.exists(config["saved_model_file"]): # model, start_epoch, optimizer = load_old_model(model, optimizer, saved_model_path=config["saved_model_file"]) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=config["lr_decay"], patience=config["patience"]) print("training on label:{}".format(config["labels"])) for i in range(start_epoch, config["epochs"]): train_epoch(epoch=i, data_set=training_data, model=model, model_name=config["model_file"], criterion=loss_function, optimizer=optimizer, opt=config, epoch_logger=train_logger) val_loss = val_epoch(epoch=i, data_set=valildation_data, model=model, criterion=loss_function, opt=config, optimizer=optimizer, logger=train_logger) scheduler.step(val_loss)
def main2(env_name, num_episodes, gamma, lam, kl_targ, batch_size, net_size_factor, noise_bias, weight, use_ppoclip): """ Main training loop Args: env_name: OpenAI Gym environment name, e.g. 'Hopper-v1' num_episodes: maximum number of episodes to run gamma: reward discount factor (float) lam: lambda from Generalized Advantage Estimate kl_targ: D_KL target for policy update [D_KL(pi_old || pi_new) batch_size: number of episodes per policy training batch """ global alive_coef, progress_coef, threshold1, threshold2, change_rate killer = GracefulKiller() env, obs_dim, act_dim = init_gym(env_name) obs_dim += 1 # add 1 to obs dimension for time step feature (see run_episode()) # now = datetime.utcnow().strftime("%b-%d_%H:%M:%S") # create unique directories now = datetime.now().strftime( "%b-%d_%H:%M:%S") + "_multi_hop_{},{},{}".format( change_rate, threshold1, threshold2) logger = Logger(logname=env_name, now=now) aigym_path = os.path.join('/tmp', env_name, now) # env = wrappers.Monitor(env, aigym_path, force=True) scaler = Scaler(obs_dim) if weight == "None": val_func = NNValueFunction(obs_dim, net_size_factor=net_size_factor, alive_coef=alive_coef, progress_coef=progress_coef, reward_dim=reward_dim) policy = Policy(obs_dim, act_dim, kl_targ, net_size_factor=net_size_factor, noise_bias=noise_bias) else: token = weight.split(".") token[-3] = token[-3][:-5] + "value" weight_2 = ".".join(token) # assert False, "unreachable" val_func = NNValueFunctionContinue(weight_2, obs_dim, net_size_factor=net_size_factor, alive_coef=alive_coef, progress_coef=progress_coef) policy = PolicyContinue(weight, obs_dim, act_dim, kl_targ, net_size_factor=net_size_factor, noise_bias=noise_bias) # run a few episodes of untrained policy to initialize scaler: run_policy(env, policy, scaler, logger, episodes=5) episode = 0 flag1 = False flag2 = False flag3 = False reward_queue = [] queue_num = 100 while episode < num_episodes: trajectories = run_policy(env, policy, scaler, logger, episodes=batch_size) episode += len(trajectories) add_value(trajectories, val_func) # add estimated values to episodes add_disc_sum_rew(trajectories, gamma) # calculated discounted sum of Rs add_gae(trajectories, gamma, lam) # calculate advantage # concatenate all episodes into single NumPy arrays observes, actions, advantages, disc_sum_rew = build_train_set( trajectories) # add various stats to training log: log_batch_stats(observes, actions, advantages, disc_sum_rew, logger, episode) policy.update(observes, actions, advantages, logger, scaler) # update policy val_func.fit(observes, disc_sum_rew, logger) # update value function logger.write(display=True) # write logger results to file and stdout if killer.kill_now: if input('Terminate training (y/[n])? ') == 'y': break killer.kill_now = False alive_sum = 0 progr_sum = 0 for t in trajectories: tmp_rewards = t['orig_rewards'] tmp_rewards = np.sum(tmp_rewards, axis=0) alive_sum += tmp_rewards[0] progr_sum += tmp_rewards[1] reward_queue.append(np.mean([t['rewards'].sum() for t in trajectories])) reward_queue = reward_queue[-queue_num:] reward_std = np.std(np.array(reward_queue)) print("Reward std by {} episode : {}".format(queue_num, reward_std)) if alive_sum >= 5000: flag3 = True if (flag3 and alive_sum > progr_sum * threshold1) or flag1: flag1 = True alive_coef -= change_rate progress_coef += change_rate val_func.alive_coef = float(alive_coef) val_func.progress_coef = float(progress_coef) if alive_sum < progr_sum * threshold2: flag1 = False if progr_sum > alive_sum * threshold1 or flag2: flag2 = True alive_coef += change_rate progress_coef -= change_rate val_func.alive_coef = float(alive_coef) val_func.progress_coef = float(progress_coef) if progr_sum < alive_sum * threshold2: flag2 = False print(alive_sum, progr_sum) logger.log_model_3({ "alive_coef": alive_coef, "progress_coef": progress_coef, "alive_sum": alive_sum, "progr_sum": progr_sum }) logger.close() policy.close_sess() val_func.close_sess()
if epoch >= 100: lr = args.lr * 0.001 for param_group in optimizer.param_groups: param_group['lr'] = lr if __name__ == "__main__": args = parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id gpu_num = max(len(args.gpu_id.split(',')), 1) model_name = 'resnet18' log_dir = "logs/%s_%s" % (time.strftime("%b%d-%H%M", time.localtime()), model_name) check_mkdir(log_dir) log = Logger(log_dir + '/train.log') log.print(args) device = torch.device('cuda') model = ResNet18().to(device) model = nn.DataParallel(model, device_ids=[i for i in range(gpu_num)]) train_loader, test_loader = prepare_cifar(args.batch_size, args.test_batch_size) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) best_epoch, best_robust_acc = 0, 0. for e in range(args.epoch):
from ast_modifications.modifier import add_method from fpcore.ast import ASTNode, Atom, FPCore, Operation from utils import Logger logger = Logger(level=Logger.EXTRA) UNARY_PREFIX = {"+", "-"} INFIX = {"+", "-", "*", "/"} @add_method(ASTNode) def to_c(self, assignment, lines, main_args): # Make sure calling to_c leads to an error if not overridden class_name = type(self).__name__ msg = "to_c not implemented for class {}".format(class_name) raise NotImplementedError(msg) @add_method(Atom) def to_c(self, assignment, lines, main_args): if str(self) in main_args: return str(self) if self.duplicate:
def main(): args.cuda = True # dataset train_dataset = BSDSLoader(root=args.dataset, split="train") test_dataset = BSDSLoader(root=args.dataset, split="test") train_loader = DataLoader( train_dataset, batch_size=args.batch_size, num_workers=4, drop_last=True,shuffle=True) test_loader = DataLoader( test_dataset, batch_size=args.batch_size, num_workers=4, drop_last=True,shuffle=False) with open(join(args.dataset, 'test.lst'), 'r') as f: test_list = f.readlines() test_list = [split(i.rstrip())[1] for i in test_list] assert len(test_list) == len(test_loader), "%d vs %d" % (len(test_list), len(test_loader)) # default hyperparameters if args.use_cfg: if args.pretrained and not args.small: args.stepsize = 2 args.lr = 0.001 if args.harmonic else 0.0002 elif args.small: args.stepsize = 6 args.lr = 0.005 if args.harmonic else 0.001 else: args.stepsize = 4 args.lr = 0.0005 if args.harmonic else 0.0002 args.maxepoch = args.stepsize + 1 # model model = HEDSmall(harmonic=args.harmonic) if args.small else HED(harmonic=args.harmonic) model.cuda() model.apply(weights_init) if args.pretrained and not args.small: if args.harmonic: load_harm_vgg16pretrain(model) else: load_vgg16pretrain(model) #tune lr net_parameters_id = {} if args.pretrained and not args.small: for pname, p in model.named_parameters(): if pname in ['conv1_1.weight','conv1_2.weight', 'conv2_1.weight','conv2_2.weight', 'conv3_1.weight','conv3_2.weight','conv3_3.weight', 'conv4_1.weight','conv4_2.weight','conv4_3.weight', 'conv5_1.weight','conv5_2.weight','conv5_3.weight']: print(pname, 'lr:1 de:1') if 'conv1-5.weight' not in net_parameters_id: net_parameters_id['conv1-5.weight'] = [] net_parameters_id['conv1-5.weight'].append(p) elif pname in ['conv1_1.bias','conv1_2.bias', 'conv2_1.bias','conv2_2.bias', 'conv3_1.bias','conv3_2.bias','conv3_3.bias', 'conv4_1.bias','conv4_2.bias','conv4_3.bias', 'conv5_1.bias','conv5_2.bias','conv5_3.bias']: print(pname, 'lr:2 de:0') if 'conv1-5.bias' not in net_parameters_id: net_parameters_id['conv1-5.bias'] = [] net_parameters_id['conv1-5.bias'].append(p) elif pname in ['score_dsn1.weight','score_dsn2.weight','score_dsn3.weight', 'score_dsn4.weight','score_dsn5.weight']: print(pname, 'lr:0.01 de:1') if 'score_dsn_1-5.weight' not in net_parameters_id: net_parameters_id['score_dsn_1-5.weight'] = [] net_parameters_id['score_dsn_1-5.weight'].append(p) elif pname in ['score_dsn1.bias','score_dsn2.bias','score_dsn3.bias', 'score_dsn4.bias','score_dsn5.bias']: print(pname, 'lr:0.02 de:0') if 'score_dsn_1-5.bias' not in net_parameters_id: net_parameters_id['score_dsn_1-5.bias'] = [] net_parameters_id['score_dsn_1-5.bias'].append(p) elif pname in ['score_final.weight']: print(pname, 'lr:0.001 de:1') if 'score_final.weight' not in net_parameters_id: net_parameters_id['score_final.weight'] = [] net_parameters_id['score_final.weight'].append(p) elif pname in ['score_final.bias']: print(pname, 'lr:0.002 de:0') if 'score_final.bias' not in net_parameters_id: net_parameters_id['score_final.bias'] = [] net_parameters_id['score_final.bias'].append(p) param_groups = [ {'params': net_parameters_id['conv1-5.weight'] , 'lr': args.lr*1 , 'weight_decay': args.weight_decay}, {'params': net_parameters_id['conv1-5.bias'] , 'lr': args.lr*2 , 'weight_decay': 0.}, {'params': net_parameters_id['score_dsn_1-5.weight'], 'lr': args.lr*0.01 , 'weight_decay': args.weight_decay}, {'params': net_parameters_id['score_dsn_1-5.bias'] , 'lr': args.lr*0.02 , 'weight_decay': 0.}, {'params': net_parameters_id['score_final.weight'] , 'lr': args.lr*0.001, 'weight_decay': args.weight_decay}, {'params': net_parameters_id['score_final.bias'] , 'lr': args.lr*0.002, 'weight_decay': 0.} ] else: net_parameters_id = {'weights': [], 'biases': []} for pname, p in model.named_parameters(): if 'weight' in pname: net_parameters_id['weights'].append(p) elif 'bias' in pname: net_parameters_id['biases'].append(p) param_groups = [ {'params': net_parameters_id['weights'], 'weight_decay': args.weight_decay}, {'params': net_parameters_id['biases'], 'weight_decay': 0.} ] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'" .format(args.resume)) optimizer.load_state_dict(checkpoint['optimizer']) args.start_epoch = checkpoint['epoch'] else: print("=> no checkpoint found at '{}'".format(args.resume)) # log log = Logger(join(OUT_DIR, 'log.txt')) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") test(model, test_loader, epoch=epoch, test_list=test_list, save_dir = join(OUT_DIR, 'initial-testing-record')) tr_avg_loss, tr_detail_loss = train( train_loader, model, optimizer, epoch, save_dir = join(OUT_DIR, 'epoch-%d-training-record' % epoch)) test(model, test_loader, epoch=epoch, test_list=test_list, save_dir = join(OUT_DIR, 'epoch-%d-testing-record' % epoch)) log.flush() # write log # Save checkpoint save_file = os.path.join(OUT_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint({ 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, filename=save_file) scheduler.step() # will adjust learning rate # save train/val loss/accuracy, save every epoch in case of early stop train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def main(): # N.B.: parameters defined in cv_cfg.ini override args! parser = argparse.ArgumentParser( description= 'Cross-validation over source domains for the Amazon dataset.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-m', '--model', default='MODAFM', type=str, metavar='', help='model type (\'MDAN\' / \'MODA\' / \'MODAFM\'') parser.add_argument('-d', '--data_path', default='/ctm-hdd-pool01/DB/Amazon', type=str, metavar='', help='data directory path') parser.add_argument( '-t', '--target', default='books', type=str, metavar='', help= 'target domain (\'books\' / \'dvd\' / \'electronics\' / \'kitchen\')') parser.add_argument('-o', '--output', default='msda_hyperparams.ini', type=str, metavar='', help='output file') parser.add_argument('-n', '--n_iter', default=20, type=int, metavar='', help='number of CV iterations') parser.add_argument('--n_samples', default=2000, type=int, metavar='', help='number of samples from each domain') parser.add_argument('--n_features', default=5000, type=int, metavar='', help='number of features to use') parser.add_argument( '--mu', type=float, default=1e-2, help="hyperparameter of the coefficient for the domain adversarial loss" ) parser.add_argument( '--beta', type=float, default=2e-1, help="hyperparameter of the non-sparsity regularization") parser.add_argument('--lambda', type=float, default=1e-1, help="hyperparameter of the FixMatch loss") parser.add_argument('--min_dropout', type=int, default=2e-1, help="minimum dropout rate") parser.add_argument('--max_dropout', type=int, default=8e-1, help="maximum dropout rate") parser.add_argument('--weight_decay', default=0., type=float, metavar='', help='hyperparameter of weight decay regularization') parser.add_argument('--lr', default=1e0, type=float, metavar='', help='learning rate') parser.add_argument('--epochs', default=15, type=int, metavar='', help='number of training epochs') parser.add_argument('--batch_size', default=20, type=int, metavar='', help='batch size (per domain)') parser.add_argument( '--checkpoint', default=0, type=int, metavar='', help= 'number of epochs between saving checkpoints (0 disables checkpoints)') parser.add_argument('--use_cuda', default=True, type=int, metavar='', help='use CUDA capable GPU') parser.add_argument('--use_visdom', default=False, type=int, metavar='', help='use Visdom to visualize plots') parser.add_argument('--visdom_env', default='amazon_train', type=str, metavar='', help='Visdom environment name') parser.add_argument('--visdom_port', default=8888, type=int, metavar='', help='Visdom port') parser.add_argument('--verbosity', default=2, type=int, metavar='', help='log verbosity level') parser.add_argument('--seed', default=42, type=int, metavar='', help='random seed') args = vars(parser.parse_args()) # override args with cv_cfg.ini cfg = args.copy() cv_parser = ConfigParser() cv_parser.read('cv_cfg.ini') cv_param_names = [] for key, val in cv_parser.items('main'): cfg[key] = ast.literal_eval(val) cv_param_names.append(key) # use a fixed random seed for reproducibility purposes if cfg['seed'] > 0: random.seed(cfg['seed']) np.random.seed(seed=cfg['seed']) torch.manual_seed(cfg['seed']) torch.cuda.manual_seed(cfg['seed']) device = 'cuda' if (cfg['use_cuda'] and torch.cuda.is_available()) else 'cpu' log = Logger(cfg['verbosity']) log.print('device:', device, level=0) domains = ['books', 'dvd', 'electronics', 'kitchen'] datasets = {} for domain in domains: if domain == cfg['target']: continue datasets[domain] = Amazon('./amazon.npz', domain, dimension=cfg['n_features'], transform=torch.from_numpy) indices = random.sample(range(len(datasets[domain])), cfg['n_samples']) datasets[domain] = Subset(datasets[domain], indices) cfg['test_transform'] = torch.from_numpy if cfg['model'] == 'MDAN': model = MDANet(input_dim=cfg['n_features'], n_classes=2, n_domains=len(domains) - 2).to(device) cfg['model'] = model cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: mdan_train_routine( model, optimizer, train_loader, dict(), cfg) elif cfg['model'] == 'MODA': model = MODANet(input_dim=cfg['n_features'], n_classes=2).to(device) cfg['model'] = model cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: moda_train_routine( model, optimizer, train_loader, dict(), cfg) elif cfg['model'] == 'MODAFM': model = MODANet(input_dim=cfg['n_features'], n_classes=2).to(device) cfg['model'] = model cfg['train_routine'] = lambda model, optimizer, train_loader, cfg: moda_mlp_fm_train_routine( model, optimizer, train_loader, dict(), cfg) best_params, _ = cross_validation(datasets, cfg, cv_param_names) log.print('best_params:', best_params, level=1) results = ConfigParser() results.add_section('main') for key, value in best_params.items(): results.set('main', key, str(value)) with open(cfg['output'], 'w') as f: results.write(f)
def main(): global best_top1, best_top5 args.world_size = 1 start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') crop_size = 224 val_size = 256 pipe = HybridTrainPipe(batch_size=args.train_batch, num_threads=args.workers, device_id=args.local_rank, data_dir=traindir, crop=crop_size, dali_cpu=args.dali_cpu) pipe.build() train_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) pipe = HybridValPipe(batch_size=args.test_batch, num_threads=args.workers, device_id=args.local_rank, data_dir=valdir, crop=crop_size, size=val_size) pipe.build() val_loader = DALIClassificationIterator(pipe, size=int(pipe.epoch_size("Reader") / args.world_size)) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) elif args.arch.startswith('resnext'): model = models.__dict__[args.arch]( baseWidth=args.base_width, cardinality=args.cardinality, ) else: print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch]() if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() if args.optimizer.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer.lower() == 'adam': optimizer = AdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'radam': optimizer = RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'lsadam': optimizer = LSAdamW(model.parameters(), lr=args.lr*((1.+4.*args.sigma)**(0.25)), betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, sigma=args.sigma) elif args.optimizer.lower() == 'lsradam': sigma = 0.1 optimizer = LSRAdam(model.parameters(), lr=args.lr*((1.+4.*args.sigma)**(0.25)), betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, sigma=args.sigma) # Resume title = 'ImageNet-' + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_top1 = checkpoint['best_top1'] best_top5 = checkpoint['best_top5'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Top1', 'Valid Top1', 'Train Top5', 'Valid Top5']) logger.file.write(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) if args.evaluate: logger.file.write('\nEvaluation only') test_loss, test_top1, test_top5 = test(val_loader, model, criterion, start_epoch, use_cuda, logger) logger.file.write(' Test Loss: %.8f, Test Top1: %.2f, Test Top5: %.2f' % (test_loss, test_top1, test_top5)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) logger.file.write('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_top1, train_top5 = train(train_loader, model, criterion, optimizer, epoch, use_cuda, logger) test_loss, test_top1, test_top5 = test(val_loader, model, criterion, epoch, use_cuda, logger) # append logger file logger.append([state['lr'], train_loss, test_loss, train_top1, test_top1, train_top5, test_top5]) writer.add_scalars('train_loss', {args.model_name: train_loss}, epoch) writer.add_scalars('test_loss', {args.model_name: test_loss}, epoch) writer.add_scalars('train_top1', {args.model_name: train_top1}, epoch) writer.add_scalars('test_top1', {args.model_name: test_top1}, epoch) writer.add_scalars('train_top5', {args.model_name: train_top5}, epoch) writer.add_scalars('test_top5', {args.model_name: test_top5}, epoch) # save model is_best = test_top1 > best_top1 best_top1 = max(test_top1, best_top1) best_top5 = max(test_top5, best_top5) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'top1': test_top1, 'top5': test_top5, 'best_top1': best_top1, 'best_top5': best_top5, 'optimizer' : optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) # reset DALI iterators train_loader.reset() val_loader.reset() logger.file.write('Best top1: %f'%best_top1) logger.file.write('Best top5: %f'%best_top5) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best top1: %f'%best_top1) print('Best top5: %f'%best_top5)
def predictor_worker(recv_queue, send_queue, worker_alive): predictor = None predictor_args = () timing = AccumDict() log = Logger('./var/log/predictor_worker.log', verbose=opt.verbose) try: while worker_alive.value: tt = TicToc() try: method, data = recv_queue.get(timeout=GET_TIMEOUT) except queue.Empty: continue # get the latest non-critical request from the queue # don't skip critical request while not recv_queue.empty() and not method['critical']: log(f"skip {method}") method, data = recv_queue.get() log("working on", method) try: tt.tic() if method['name'] == 'predict': image = cv2.imdecode(np.frombuffer(data, dtype='uint8'), -1) else: args = msgpack.unpackb(data) timing.add('UNPACK', tt.toc()) except ValueError: log("Invalid Message", important=True) continue tt.tic() if method['name'] == "hello": result = "OK" elif method['name'] == "__init__": if args == predictor_args: log("Same config as before... reusing previous predictor") else: del predictor predictor_args = args predictor = PredictorLocal(*predictor_args[0], **predictor_args[1]) log("Initialized predictor with:", predictor_args, important=True) result = True tt.tic() # don't account for init elif method['name'] == 'predict': assert predictor is not None, "Predictor was not initialized" result = getattr(predictor, method['name'])(image) else: assert predictor is not None, "Predictor was not initialized" result = getattr(predictor, method['name'])(*args[0], **args[1]) timing.add('CALL', tt.toc()) tt.tic() if method['name'] == 'predict': assert isinstance(result, np.ndarray), f'Expected np.ndarray, got {result.__class__}' ret_code, data_send = cv2.imencode(".jpg", result, [int(cv2.IMWRITE_JPEG_QUALITY), opt.jpg_quality]) else: data_send = msgpack.packb(result) timing.add('PACK', tt.toc()) if method['critical']: send_queue.put((method, data_send)) else: try: send_queue.put((method, data_send), block=False) except queue.Full: log("send_queue full") pass Once(timing, log, per=1) except KeyboardInterrupt: log("predictor_worker: user interrupt", important=True) except Exception as e: log("predictor_worker error", important=True) traceback.print_exc() worker_alive.value = 0 log("predictor_worker exit", important=True)
norm = nn.BatchNorm2d(out_channels) active = nn.ReLU() net = nn.Sequential(OrderedDict([ (net_name+'_conv', conv), (net_name+'_norm', norm), (net_name+'_relu', active) ])) return net if __name__ == '__main__': # lr = 0.001 # weight_decay = 1e-4 # epochs = 100 # batch_size = 128 log = Logger('.\\logs\\cnn\\CNN_.log', level='debug') # writer = SummaryWriter(comment='CNN') model_path = '.\\Model\\cnn_.pkl' best_model_path = '.\\Model\\cnn_best_.pkl' my_dataset, transform = data_loader('.\\data', size=28, equal=True, deep=True) # Mnist digits dataset if not (os.path.exists('./mnist/')) or not os.listdir('./mnist/'): # not mnist dir or mnist is empyt dir DOWNLOAD_MNIST = True else: DOWNLOAD_MNIST = False # download the mnist dataset # train_data = MNIST(root='.\\mnist\\train', train=True, transform=torchvision.transforms.ToTensor(), download=DOWNLOAD_MNIST) test_data = MNIST(root='.\\mnist\\train', train=False, transform=torchvision.transforms.ToTensor(), download=DOWNLOAD_MNIST)
transforms_ = [ transforms.Resize(int(opt.size * 1.12), Image.BICUBIC), transforms.RandomCrop(opt.size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] dataloader = DataLoader(ImageDataset(opt.dataroot, transforms_=transforms_, unaligned=True), batch_size=opt.batchSize, shuffle=True, num_workers=opt.n_cpu) # Loss plot logger = Logger(opt.n_epochs, len(dataloader)) ################################### ###### Training ###### for epoch in range(opt.epoch, opt.n_epochs): for i, batch in enumerate(dataloader): # Set model input real_A = Variable(input_A.copy_(batch['A'])) real_B = Variable(input_B.copy_(batch['B'])) ###### Generators A2B and B2A ###### optimizer_G.zero_grad() # Identity loss # G_A2B(B) should equal B if real B is fed same_B = netG_A2B(real_B)
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Dataset preprocessing title = 'CIFAR-10' # Create Datasets transform_train_poisoned = transforms.Compose([ TriggerAppending(trigger=args.trigger, alpha=args.alpha), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_train_benign = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test_poisoned = transforms.Compose([ TriggerAppending(trigger=args.trigger, alpha=args.alpha), transforms.ToTensor(), ]) transform_test_benign = transforms.Compose([ transforms.ToTensor(), ]) print('==> Loading the dataset') dataloader = datasets.CIFAR10 poisoned_trainset = dataloader(root='./data', train=True, download=True, transform=transform_train_poisoned) benign_trainset = dataloader(root='./data', train=True, download=True, transform=transform_train_benign) poisoned_testset = dataloader(root='./data', train=False, download=True, transform=transform_test_poisoned) benign_testset = dataloader(root='./data', train=False, download=True, transform=transform_test_benign) num_training = len(poisoned_trainset) num_poisoned = int(num_training * args.poison_rate) idx = list(np.arange(num_training)) random.shuffle(idx) poisoned_idx = idx[:num_poisoned] benign_idx = idx[num_poisoned:] poisoned_img = poisoned_trainset.data[poisoned_idx, :, :, :] poisoned_target = [args.y_target] * len( poisoned_trainset.data) # Reassign their label to the target label poisoned_trainset.data, poisoned_trainset.targets = poisoned_img, poisoned_target benign_img = benign_trainset.data[benign_idx, :, :, :] benign_target = [benign_trainset.targets[i] for i in benign_idx] benign_trainset.data, benign_trainset.targets = benign_img, benign_target poisoned_target = [args.y_target] * len( poisoned_testset.data) # Reassign their label to the target label poisoned_testset.targets = poisoned_target poisoned_trainloader = torch.utils.data.DataLoader( poisoned_trainset, batch_size=int(args.train_batch * args.poison_rate), shuffle=True, num_workers=args.workers) benign_trainloader = torch.utils.data.DataLoader( benign_trainset, batch_size=int(args.train_batch * (1 - args.poison_rate) * 0.9), shuffle=True, num_workers=args.workers ) # *0.9 to prevent the iterations of benign data is less than that of poisoned data poisoned_testloader = torch.utils.data.DataLoader( poisoned_testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) benign_testloader = torch.utils.data.DataLoader(benign_testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) print( "Num of training samples %i, Num of poisoned samples %i, Num of benign samples %i" % (num_training, num_poisoned, num_training - num_poisoned)) # Model print('==> Loading the model') model = vgg19_bn() model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print('Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Resume if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Benign Valid Loss', 'Poisoned Valid Loss', 'Train ACC.', 'Benign Valid ACC.', 'Poisoned Valid ACC.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) print('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) train_loss, train_acc = train(args, model, poisoned_trainloader, benign_trainloader, criterion, optimizer, epoch, use_cuda) test_loss_benign, test_acc_benign = test(benign_testloader, model, criterion, epoch, use_cuda) test_loss_poisoned, test_acc_poisoned = test(poisoned_testloader, model, criterion, epoch, use_cuda) # append logger file logger.append([ state['lr'], train_loss, test_loss_benign, test_loss_poisoned, train_acc, test_acc_benign, test_acc_poisoned ]) # save model is_best = test_acc_benign > best_acc best_acc = max(test_acc_benign, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'acc': test_acc_benign, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, checkpoint=args.checkpoint) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc)
def main(): global BEST_ACC, LR_STATE start_epoch = cfg.CLS.start_epoch # start from epoch 0 or last checkpoint epoch # Create ckpt folder if not os.path.isdir(cfg.CLS.ckpt): mkdir_p(cfg.CLS.ckpt) if args.cfg_file is not None and not cfg.CLS.evaluate: shutil.copyfile(args.cfg_file, os.path.join(cfg.CLS.ckpt, args.cfg_file.split('/')[-1])) # Dataset and Loader normalize = transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std) if cfg.CLS.train_crop_type == 'center': train_aug = [ transforms.Resize(cfg.CLS.base_size), transforms.CenterCrop(cfg.CLS.crop_size), transforms.RandomHorizontalFlip(), ] elif cfg.CLS.train_crop_type == 'random_resized': train_aug = [transforms.RandomResizedCrop(cfg.CLS.crop_size), transforms.RandomHorizontalFlip()] else: train_aug = [transforms.RandomHorizontalFlip()] if len(cfg.CLS.rotation) > 0: train_aug.append(transforms.RandomRotation(cfg.CLS.rotation)) if len(cfg.CLS.pixel_jitter) > 0: train_aug.append(RandomPixelJitter(cfg.CLS.pixel_jitter)) if cfg.CLS.grayscale > 0: train_aug.append(transforms.RandomGrayscale(cfg.CLS.grayscale)) train_aug.append(transforms.ToTensor()) train_aug.append(normalize) val_aug = [ transforms.Resize(cfg.CLS.base_size), transforms.CenterCrop(cfg.CLS.crop_size), transforms.ToTensor(), normalize, ] if os.path.isfile(cfg.CLS.train_root): # if cfg.CLS.have_data_list: train_datasets = CustomData(img_path=cfg.CLS.data_root, txt_path=cfg.CLS.train_root, data_transforms=transforms.Compose(train_aug)) val_datasets = CustomData(img_path=cfg.CLS.data_root, txt_path=cfg.CLS.val_root, data_transforms=transforms.Compose(val_aug)) # else: elif os.path.isdir(cfg.CLS.data_root + cfg.CLS.train_root): traindir = os.path.join(cfg.CLS.data_root, cfg.CLS.train_root) train_datasets = datasets.ImageFolder(traindir, transforms.Compose(train_aug)) valdir = os.path.join(cfg.CLS.data_root, cfg.CLS.val_root) val_datasets = datasets.ImageFolder(valdir, transforms.Compose(val_aug)) train_loader = torch.utils.data.DataLoader(train_datasets, batch_size=cfg.CLS.train_batch, shuffle=False, sampler=RandomIdentitySampler(train_datasets,num_instances=4), num_workers=cfg.workers, pin_memory=True,drop_last=True) print(type(train_loader)) if cfg.CLS.validate or cfg.CLS.evaluate: val_loader = torch.utils.data.DataLoader(val_datasets, batch_size=cfg.CLS.test_batch, shuffle=False, num_workers=cfg.workers, pin_memory=True,drop_last=True) # Create model model = models.__dict__[cfg.CLS.arch]() print(model) # Calculate FLOPs & Param n_flops, n_convops, n_params = measure_model(model, cfg.CLS.crop_size, cfg.CLS.crop_size) print('==> FLOPs: {:.4f}M, Conv_FLOPs: {:.4f}M, Params: {:.4f}M'. format(n_flops / 1e6, n_convops / 1e6, n_params / 1e6)) del model model = models.__dict__[cfg.CLS.arch]() # Load pre-train model if cfg.CLS.pretrained: print("==> Using pre-trained model '{}'".format(cfg.CLS.pretrained)) pretrained_dict = torch.load(cfg.CLS.pretrained) try: pretrained_dict = pretrained_dict['state_dict'] except: pretrained_dict = pretrained_dict model_dict = model.state_dict() updated_dict, match_layers, mismatch_layers = weight_filler(pretrained_dict, model_dict) model_dict.update(updated_dict) model.load_state_dict(model_dict) else: print("==> Creating model '{}'".format(cfg.CLS.arch)) # Define loss function (criterion) and optimizer #criterion = nn.CrossEntropyLoss().cuda() criterion = TripletLoss(margin=args.margin).cuda() if cfg.CLS.pretrained: def param_filter(param): return param[1] new_params = map(param_filter, filter(lambda p: p[0] in mismatch_layers, model.named_parameters())) base_params = map(param_filter, filter(lambda p: p[0] in match_layers, model.named_parameters())) model_params = [{'params': base_params}, {'params': new_params, 'lr': cfg.CLS.base_lr * 10}] else: model_params = model.parameters() model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True optimizer = optim.SGD(model_params, lr=cfg.CLS.base_lr, momentum=cfg.CLS.momentum, weight_decay=cfg.CLS.weight_decay) # Evaluate model if cfg.CLS.evaluate: print('\n==> Evaluation only') test_loss, test_top1, test_top5 = test(val_loader, model, criterion, start_epoch, USE_CUDA) print('==> Test Loss: {:.8f} | Test_top1: {:.4f}% | Test_top5: {:.4f}%'.format(test_loss, test_top1, test_top5)) return # Resume training title = 'Pytorch-CLS-' + cfg.CLS.arch if cfg.CLS.resume: # Load checkpoint. print("==> Resuming from checkpoint '{}'".format(cfg.CLS.resume)) assert os.path.isfile(cfg.CLS.resume), 'Error: no checkpoint directory found!' checkpoint = torch.load(cfg.CLS.resume) BEST_ACC = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(cfg.CLS.ckpt, 'log.txt'), title=title) #logger.set_names(['Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.']) logger.set_names(['Learning Rate', 'Train Loss', 'Train Acc.']) # Train and val for epoch in range(start_epoch, cfg.CLS.epochs): print('\nEpoch: [{}/{}] | LR: {:.8f}'.format(epoch + 1, cfg.CLS.epochs, LR_STATE)) train_loss, train_acc = train(train_loader, model, criterion, optimizer, epoch, USE_CUDA) # top1 = train_acc # BEST_ACC = max(top1, BEST_ACC) if cfg.CLS.validate: #test_loss, test_top1, test_top5 = test(val_loader, model, criterion, epoch, USE_CUDA) top1 = evaluator.evaluate(val_loader, dataset.val, dataset.val) best_acc = max(top1, BEST_ACC) print('\n * Finished epoch {:3d} top1: {:5.1%} best: {:5.1%}{}\n'. format(epoch, top1, best_acc)) #else: #test_loss, test_top1, test_top5 = 0.0, 0.0, 0.0 # Append logger file #logger.append([LR_STATE, train_loss, test_loss, train_acc, test_top1]) logger.append([LR_STATE, train_loss , train_acc]) # Save model save_checkpoint(model, optimizer, train_acc, epoch) # Draw curve try: draw_curve(cfg.CLS.arch, cfg.CLS.ckpt) print('==> Success saving log curve...') except: print('==> Saving log curve error...') logger.close() try: savefig(os.path.join(cfg.CLS.ckpt, 'log.eps')) shutil.copyfile(os.path.join(cfg.CLS.ckpt, 'log.txt'), os.path.join(cfg.CLS.ckpt, 'log{}.txt'.format( datetime.datetime.now().strftime('%Y%m%d%H%M%S')))) except: print('Copy log error.') print('==> Training Done!') print('==> Best acc: {:.4f}%'.format(best_top1))
# Main entrance for the short-term operation of both universal ems and local ems import threading import time from configuration.configuration_time_line import default_time from data_management.information_collection import Information_Collection_Thread from data_management.information_management import information_formulation_extraction from data_management.information_management import information_receive_send from optimal_power_flow.short_term_forecasting import ForecastingThread from configuration.configuration_time_line import default_dead_line_time from utils import Logger from configuration.configuration_time_line import default_look_ahead_time_step logger_uems = Logger("Short_term_dispatch_UEMS") logger_lems = Logger("Short_term_dispatch_LEMS") class short_term_operation(): ##short term operation for ems # Two modes are proposed for the local ems and def short_term_operation_uems(*args): from data_management.database_management import database_operation from optimal_power_flow.problem_formulation import problem_formulation from optimal_power_flow.problem_solving import Solving_Thread # Short term operation # General procedure for short-term operation # 1)Information collection # 1.1)local EMS forecasting # 1.2)Information exchange universal_models = args[0] local_models = args[1] socket_upload = args[2] socket_download = args[3]
crop_method, SpatialElasticDisplacement(), ToTensor(opt.norm_value), norm_method ]) temporal_transform = Compose([TemporalRandomCrop(opt.sample_duration)]) target_transform = ClassLabel() training_data = get_training_set(opt, spatial_transform, temporal_transform, target_transform) train_loader = torch.utils.data.DataLoader(training_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.n_threads, pin_memory=True) train_logger = Logger( os.path.join(opt.result_path, 'train.log'), ['epoch', 'loss', 'acc', 'precision', 'recall', 'lr']) train_batch_logger = Logger( os.path.join(opt.result_path, 'train_batch.log'), [ 'epoch', 'batch', 'iter', 'loss', 'acc', 'precision', 'recall', 'lr' ]) if opt.nesterov: dampening = 0 else: dampening = opt.dampening optimizer = optim.SGD(parameters, lr=opt.learning_rate, momentum=opt.momentum, dampening=dampening,
def main(): global best_acc start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root='./data', train=True, download=True, transform=transform_train) trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=args.workers) testset = dataloader(root='./data', train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format(args.arch)) if args.arch.startswith('resnext'): model = models.__dict__[args.arch]( cardinality=args.cardinality, num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('densenet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, growthRate=args.growthRate, compressionRate=args.compressionRate, dropRate=args.drop, ) elif args.arch.startswith('wrn'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, widen_factor=args.widen_factor, dropRate=args.drop, ) elif args.arch.startswith('resnet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, block_name=args.block_name, ) elif args.arch.startswith('preresnet'): model = models.__dict__[args.arch]( num_classes=num_classes, depth=args.depth, block_name=args.block_name, ) elif args.arch.startswith('horesnet'): model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('hopreresnet'): model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('nagpreresnet'): model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('mompreresnet'): model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, eta=args.eta, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('momentumnet'): model = models.__dict__[args.arch](num_classes=num_classes, depth=args.depth, step_size=args.step_size, momentum=args.momentum_const, block_name=args.block_name, feature_vec=args.feature_vec) elif args.arch.startswith('v2_preresnet'): if args.depth == 18: block_name = 'basicblock' num_blocks = [2, 2, 2, 2] elif args.depth == 34: block_name = 'basicblock' num_blocks = [3, 4, 6, 3] elif args.depth == 50: block_name = 'bottleneck' num_blocks = [3, 4, 6, 3] elif args.depth == 101: block_name = 'bottleneck' num_blocks = [3, 4, 23, 3] elif args.depth == 152: block_name = 'bottleneck' num_blocks = [3, 8, 36, 3] model = models.__dict__[args.arch](block_name=block_name, num_blocks=num_blocks, num_classes=num_classes) else: print('Model is specified wrongly - Use standard model') model = models.__dict__[args.arch](num_classes=num_classes) model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() if args.optimizer.lower() == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # elif args.optimizer.lower() == 'adam': # optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'radam': optimizer = RAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'adamw': optimizer = AdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, warmup=args.warmup) elif args.optimizer.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer.lower() == 'srsgd': iter_count = 1 optimizer = SGD_Adaptive(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[0]) elif args.optimizer.lower() == 'sradam': iter_count = 1 optimizer = SRNAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, restarting_iter=args.restart_schedule[0]) elif args.optimizer.lower() == 'sradamw': iter_count = 1 optimizer = SRAdamW(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup=args.warmup, restarting_iter=args.restart_schedule[0]) elif args.optimizer.lower() == 'srradam': #NOTE: need to double-check this iter_count = 1 optimizer = SRRAdam(model.parameters(), lr=args.lr, betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup=args.warmup, restarting_iter=args.restart_schedule[0]) # Resume title = 'cifar-10-' + args.arch logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) schedule_index = 1 # Resume title = '%s-' % args.dataset + args.arch if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) best_acc = checkpoint['best_acc'] start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) if args.optimizer.lower() == 'srsgd' or args.optimizer.lower( ) == 'sradam' or args.optimizer.lower( ) == 'sradamw' or args.optimizer.lower() == 'srradam': iter_count = optimizer.param_groups[0]['iter_count'] schedule_index = checkpoint['schedule_index'] logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) else: logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) logger.set_names([ 'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Acc.', 'Valid Acc.' ]) if args.evaluate: print('\nEvaluation only') test_loss, test_acc = test(testloader, model, criterion, start_epoch, use_cuda) print(' Test Loss: %.8f, Test Acc: %.2f' % (test_loss, test_acc)) return # Train and val for epoch in range(start_epoch, args.epochs): if args.optimizer.lower() == 'srsgd': if epoch in args.schedule: optimizer = SGD_Adaptive( model.parameters(), lr=args.lr * (args.gamma**schedule_index), weight_decay=args.weight_decay, iter_count=iter_count, restarting_iter=args.restart_schedule[schedule_index]) schedule_index += 1 elif args.optimizer.lower() == 'sradam': if epoch in args.schedule: optimizer = SRNAdam( model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, restarting_iter=args.restart_schedule[schedule_index]) schedule_index += 1 elif args.optimizer.lower() == 'sradamw': if epoch in args.schedule: optimizer = SRAdamW( model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup=0, restarting_iter=args.restart_schedule[schedule_index]) schedule_index += 1 elif args.optimizer.lower() == 'srradam': if epoch in args.schedule: optimizer = SRRAdam( model.parameters(), lr=args.lr * (args.gamma**schedule_index), betas=(args.beta1, args.beta2), iter_count=iter_count, weight_decay=args.weight_decay, warmup=0, restarting_iter=args.restart_schedule[schedule_index]) schedule_index += 1 else: adjust_learning_rate(optimizer, epoch) logger.file.write('\nEpoch: [%d | %d] LR: %f' % (epoch + 1, args.epochs, state['lr'])) if args.optimizer.lower() == 'srsgd' or args.optimizer.lower( ) == 'sradam' or args.optimizer.lower( ) == 'sradamw' or args.optimizer.lower() == 'srradam': train_loss, train_acc, iter_count = train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger) else: train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda, logger) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda, logger) # append logger file logger.append( [state['lr'], train_loss, test_loss, train_acc, test_acc]) writer.add_scalars('train_loss', {args.model_name: train_loss}, epoch) writer.add_scalars('test_loss', {args.model_name: test_loss}, epoch) writer.add_scalars('train_acc', {args.model_name: train_acc}, epoch) writer.add_scalars('test_acc', {args.model_name: test_acc}, epoch) # save model is_best = test_acc > best_acc best_acc = max(test_acc, best_acc) save_checkpoint( { 'epoch': epoch + 1, 'schedule_index': schedule_index, 'state_dict': model.state_dict(), 'acc': test_acc, 'best_acc': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, epoch, checkpoint=args.checkpoint) logger.file.write('Best acc:%f' % best_acc) logger.close() logger.plot() savefig(os.path.join(args.checkpoint, 'log.eps')) print('Best acc:') print(best_acc) with open("./all_results.txt", "a") as f: fcntl.flock(f, fcntl.LOCK_EX) f.write("%s\n" % args.checkpoint) f.write("best_acc %f\n\n" % best_acc) fcntl.flock(f, fcntl.LOCK_UN)