def create_opt(parameters: Iterator, opt: Optimizer, lr: float = None, l2: float = None, lr_patience: int = None): if opt == Optimizer.AdaBound: optimizer = AdaBound(parameters, lr=lr if lr is not None else 0.001, weight_decay=l2 if l2 is not None else 0.) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, 150, gamma=0.1) elif opt == Optimizer.SGD: optimizer = optim.SGD(parameters, lr=lr if lr is not None else 0.1, weight_decay=l2 if l2 is not None else 0.) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=lr_patience if lr_patience is not None else 5) elif opt == Optimizer.Adam: optimizer = optim.Adam(parameters, lr=lr if lr is not None else 0.001, weight_decay=l2 if l2 is not None else 0.) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=lr_patience if lr_patience is not None else 3) else: raise ValueError return optimizer, lr_scheduler
def _optimizer(self, parameters): """ Args: parameters: Returns: """ return AdaBound( parameters, lr=self.learning_rate, betas=self.betas, final_lr=self.final_learning_rate, gamma=self.gamma, eps=self.epsilon, weight_decay=self.weight_decay, amsbound=self.amsbound, )
def create_optimizer(args, model_params): if args.optim == 'sgd': return optim.SGD(model_params, args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'adagrad': return optim.Adagrad(model_params, args.lr, weight_decay=args.weight_decay) elif args.optim == 'adam': return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optim == 'amsgrad': return optim.Adam(model_params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay, amsgrad=True) elif args.optim == 'adabound': return NesterovAdaBound(model_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay) else: assert args.optim == 'amsbound' return AdaBound(model_params, args.lr, betas=(args.beta1, args.beta2), final_lr=args.final_lr, gamma=args.gamma, weight_decay=args.weight_decay, amsbound=True)
def get_optimizer(model, hp: RunConfiguration) -> torch.optim.Optimizer: if hp.use_random_classifier: return LazyOptimizer() if hp.optimizer_type == OptimizerType.Adam: optimizer = torch.optim.Adam(model.parameters(), lr=hp.learning_rate, betas=(hp.adam_beta1, hp.adam_beta2), eps=hp.adam_eps, weight_decay=hp.adam_weight_decay, amsgrad=hp.adam_amsgrad) elif hp.optimizer_type == OptimizerType.SGD: optimizer = torch.optim.SGD(model.parameters(), lr=hp.learning_rate, momentum=hp.sgd_momentum, dampening=hp.sgd_dampening, nesterov=hp.sgd_nesterov) elif hp.optimizer_type == OptimizerType.RMS_PROP: optimizer = torch.optim.RMSprop(model.parameters(), lr=hp.learning_rate, alpha=hp.rmsprop_alpha, eps=hp.rmsprop_eps, weight_decay=hp.rmsprop_weight_decay, centered=hp.rmsprop_centered, momentum=hp.rmsprop_momentum) elif hp.optimizer_type == OptimizerType.AdaBound: from adabound import AdaBound optimizer = AdaBound(model.parameters(), lr=hp.learning_rate, final_lr=hp.adabound_finallr) # elif hp.learning_rate_type == LearningSchedulerType.Adadelta: # optimizer = torch.optim.Adadelta(model.parameters(), # lr=hp.learning_rate) return wrap_optimizer(hp, optimizer)
def GetOptimizer(conf, parameter, **kwargs): # 必須パラメータ: if 'optimizer' not in conf: raise NameError('オプティマイザが指定されていません (--optimizer)') name = conf['optimizer'].lower() if 'lr' not in conf: conf['lr'] = 1e-3 lr = conf['lr'] # 任意パラメータ: option = {} if 'weight_decay' in conf: option['weight_decay'] = conf['weight_decay'] # オプティマイザ選択: if name == 'sgd': if 'momentum' in conf: option['momentum'] = conf['momentum'] if 'nesterov' in conf: option['nesterov'] = conf['momentum'] optim = torch.optim.SGD(parameter, lr=lr, **option) elif name == 'adam': optim = torch.optim.Adam(parameter, lr=lr, **option) elif name == 'adadelta': optim = torch.optim.Adadelta(parameter, lr=lr, **option) elif name == 'adagrad': optim = torch.optim.Adagrad(parameter, lr=lr, **option) elif name == 'adamw': optim = torch.optim.AdamW(parameter, lr=lr, **option) elif name == 'adamax': optim = torch.optim.Adamax(parameter, lr=lr, **option) elif name == 'adabound' and AVAILABLE_OPTIM_ADABOUND: optim = AdaBound(parameter, lr=lr, **option) elif name == 'radam' and AVAILABLE_OPTIM_RADAM: optim = RAdam(parameter, lr=lr, **option) else: raise NameError( '指定された名前のオプティマイザは定義されていません (--optimizer={})'.format(name)) return optim
def set_model(args, cfg, checkpoint): # model if checkpoint: model = Classifier(pretrained=False) model.load_state_dict(checkpoint['model']) else: model = Classifier(pretrained=True) if args.data_parallel: model = DataParallel(model) model = model.to(device=args.device) # optimizer if cfg['optimizer'] == 'sgd': optimizer = optim.ASGD(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['weight_decay']) elif cfg['optimizer'] == 'adam': optimizer = optim.Adam(model.parameters(), lr=cfg['learning_rate'], weight_decay=cfg['weight_decay']) elif cfg['optimizer'] == 'adabound': optimizer = AdaBound(model.parameters(), lr=cfg['learning_rate'], final_lr=0.1, weight_decay=cfg['weight_decay']) elif cfg['optimizer'] == 'amsbound': optimizer = AdaBound(model.parameters(), lr=cfg['learning_rate'], final_lr=0.1, weight_decay=cfg['weight_decay'], amsbound=True) # checkpoint if checkpoint and args.load_optimizer: optimizer.load_state_dict(checkpoint['optimizer']) return model, optimizer
def main_worker(gpu, ngpus_per_node, args): filename = 'model-{}-optimizer-{}-lr-{}-epochs-{}-decay-epoch-{}-eps{}-beta1{}-beta2{}-centralize-{}-reset{}-start-epoch-{}-l2-decay{}-l1-decay{}-batch-{}-warmup-{}-fixed-decay-{}'.format( args.arch, args.optimizer, args.lr, args.epochs, args.when, args.eps, args.beta1, args.beta2, args.centralize, args.reset, args.start_epoch, args.weight_decay, args.l1_decay, args.batch_size, args.warmup, args.fixed_decay) print(filename) global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) if args.arch == 'shufflenet_v2_x0_5': model = shufflenet_v2_x0_5(pretrained=False) elif args.arch == 'se_resnet18': model = se_resnet18() else: model = models.__dict__[args.arch]() ''' model.half() # convert to half precision for layer in model.modules(): if isinstance(layer, nn.BatchNorm2d): layer.float() ''' if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) if args.optimizer == 'sgd' and (not args.centralize): optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'sgd' and args.centralize: optimizer = SGD_GC(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adabound': optimizer = AdaBound(model.parameters(), args.lr, eps=args.eps, betas=(args.beta1, args.beta2)) elif args.optimizer == 'adabelief': optimizer = AdaBelief(model.parameters(), args.lr, eps=args.eps, betas=(args.beta1, args.beta2), weight_decouple=args.weight_decouple, weight_decay=args.weight_decay, fixed_decay=args.fixed_decay, rectify=False) elif args.optimizer == 'adamw': optimizer = AdamW(model.parameters(), args.lr, eps=args.eps, betas=(args.beta1, args.beta2), weight_decay=args.weight_decay) #elif args.optimizer == 'msvag': # optimizer = MSVAG(model.parameters(), args.lr, eps=args.eps, betas=(args.beta1, args.beta2), weight_decay = args.weight_decay) else: print('Optimizer not found') # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) if args.start_epoch is None: args.start_epoch = checkpoint['epoch'] + 1 df = pd.read_csv(filename + '.csv') train1, train5, test1, test5 = df['train1'].tolist( ), df['train5'].tolist(), df['test1'].tolist( ), df['test5'].tolist() else: # if specify start epoch, and resume from checkpoint, not resume previous accuracy curves train1, train5, test1, test5 = [], [], [], [] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) if not args.reset_resume_optim: optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: if args.start_epoch is None: args.start_epoch = 0 train1, train5, test1, test5 = [], [], [], [] cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) train_loader, val_loader = DataPrefetcher(train_loader), DataPrefetcher( val_loader) if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch _train1, _train5 = train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1, _test5 = validate(val_loader, model, criterion, args) train1.append(_train1.data.cpu().numpy()) train5.append(_train5.data.cpu().numpy()) test1.append(acc1.data.cpu().numpy()) test5.append(_test5.data.cpu().numpy()) results = {} results['train1'] = train1 results['train5'] = train5 results['test1'] = test1 results['test5'] = test5 df = pd.DataFrame(data=results) df.to_csv(filename + '.csv') # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best, filename=filename, epoch=epoch, decay_epoch=args.decay_epoch)
opt = Opt().parse() ######################################## # Model # ######################################## torch.manual_seed(opt.manual_seed) model = get_model(opt) if opt.optimizer == 'Adam': optimizer = torch.optim.Adam( model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) elif opt.optimizer == 'AdaBound': optimizer = AdaBound( model.parameters(),lr=opt.lr,final_lr=0.1, weight_decay=opt.weight_decay) elif opt.optimizer == 'SGD': optimizer = torch.optim.SGD( model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay) else: NotImplementedError("Only Adam and SGD are supported") best_mAP = 0 ######################################## # Transforms # ######################################## if not opt.no_train:
def main(args): """ :param args: :return: """ print('Training pretrained {} with images on {}'.format(args.model_name, args.data_file)) batch_size = args.batch_size n_epochs = args.epoch log_interval = args.log_interval lr = args.learning_rate data_file = args.data_file optimizer_name = args.optimizer momentum = args.momentum embedding_size = args.embedding_size model_name = args.model_name print('Train Parameters: \n' 'Batch_size: {}; \n' 'Epoches: {}; \n' 'log_interval: {}; \n' 'Learning Rate: {}: \n' 'Data File: {}: \n' 'Embedding Size: {}\n' 'Model: {}'.format(batch_size, n_epochs, log_interval, lr, data_file, embedding_size, model_name)) writer_train = SummaryWriter(comment='multilabel_training pretrain-{}-train_{}-{}'.format(model_name, embedding_size, data_file)) writer_test = SummaryWriter(comment='multilabel_training pretrain-{}-test_{}-{}'.format(model_name, embedding_size, data_file)) # Prepare the dataloader loc_path = os.path.join(DATA_FOLDER_PATH, data_file) x, y = csv_to_x_y(pd.read_csv(loc_path)) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.01, random_state=RANDOM_STATE) num_class = len(y_train[0]) train_dataset = MeshImageDataset(x_train, y_train, IMAGE_FOLDER_PATH, normalize=True) val_dataset = MeshImageDataset(x_test, y_test, IMAGE_FOLDER_PATH, normalize=True) kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, **kwargs) # Prepare the model if model_name == 'Resnet-18': embedding_net = Resent18EmbeddingNet(embedding_size=embedding_size, pretrained=True) elif model_name == 'Resnet-50': embedding_net = Resent50EmbeddingNet(embedding_size=embedding_size, pretrained=True) elif model_name == 'Dense-121': embedding_net = Densenet121EmbeddingNet(embedding_size=embedding_size, pretrained=True) model = MultiLabelClassifer(embedding_net, num_class, embedding_size=embedding_size) if CUDA: model.cuda() loss_fn = nn.MultiLabelSoftMarginLoss() if optimizer_name == 'Adam': optimizer = optim.Adam(model.parameters(), lr=lr) elif optimizer_name == 'SGD': optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) elif optimizer_name == 'AdaBound': optimizer = AdaBound(model.parameters(), lr=lr, betas=(0.9, 0.999), final_lr=0.1, gamma=0.001, weight_decay=5e-4) elif optimizer_name == 'AMSBound': optimizer = AdaBound(model.parameters(), lr=lr, etas=(0.9, 0.999), final_lr=0.1, gamma=0.001, weight_decay=5e-4, amsbound=True) scheduler = lr_scheduler.StepLR(optimizer, 8, gamma=0.1, last_epoch=-1) # train_loader, test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, # log_interval, embedding_size, writer, start_epoch=0 _, _, test_p, test_a, test_r, test_f1, test_a2, test_m = fit_multilabel(train_loader, val_loader, model, loss_fn, optimizer, scheduler, n_epochs, CUDA, log_interval, writer_train, writer_test) # print("Best precision = {} at epoch {};" \ # "Best accuracy = {} at epoch {};" \ # "Best recall = {} at epoch {};" \ # "Best f1 score = {} at epoch {}; ".format(max(test_p), test_p.index(max(test_p)), # max(test_a), test_p.index(max(test_a)), # max(test_r), test_p.index(max(test_r)), # max(test_f1), test_p.index(max(test_f1)))) folder_path = os.path.join(MODEL_PATH, datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + 'b_' + str(batch_size) + '_eb_' + str(embedding_size) + '_epoch_' + str(n_epochs) + '_' + optimizer_name + '_multilabel_pretrained_{}_'.format(model_name) + data_file[:-4]) if not os.path.exists(folder_path): os.makedirs(folder_path) torch.save(model.state_dict(), os.path.join(folder_path, 'trained_model')) writer_train.close() writer_test.close()
betas=(opt.beta1, opt.beta2), eps=opt.eps) optimizerG = AdaBelief(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps) elif opt.optimizer == 'sgd': optimizerD = torch.optim.SGD(netD.parameters(), lr=opt.lr) optimizerG = torch.optim.SGD(netG.parameters(), lr=opt.lr) elif opt.optimizer == 'rmsprop': optimizerD = torch.optim.RMSprop(netD.parameters(), lr=opt.lr) optimizerG = torch.optim.RMSprop(netG.parameters(), lr=opt.lr) elif opt.optimizer == 'adabound': optimizerD = AdaBound(netD.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps, final_lr=opt.final_lr) optimizerG = AdaBound(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps, final_lr=opt.final_lr) elif opt.optimizer == 'yogi': optimizerD = Yogi(netD.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2), eps=opt.eps) optimizerG = Yogi(netG.parameters(), lr=opt.lr, betas=(opt.beta1, opt.beta2),
class CQTModel(BaseModel): @staticmethod def modify_commandline_options(parser, is_train=True): """Add new model-specific options and rewrite default values for existing options. Parameters: parser -- the option parser is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options. Returns: the modified parser. """ opt, _ = parser.parse_known_args() preprocess = 'mulaw,normalize,cqt' parser.set_defaults(preprocess=preprocess) parser.add_argument('--wavenet_layers', type=int, default=30, help='wavenet layers') parser.add_argument('--wavenet_blocks', type=int, default=15, help='wavenet layers') parser.add_argument('--width', type=int, default=128, help='width') return parser def __init__(self, opt): BaseModel.__init__(self, opt) # call the initialization method of BaseModel self.loss_names = ['D_A', 'D_B'] if opt.isTrain: self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B'] else: self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A'] self.params_names = ['params_A', 'params_B'] * 2 self.model_names = ['D_A', 'D_B'] if 'stft' in self.preprocess: stride = 2 * ((opt.nfft // 8) - 1) window = opt.nfft // opt.duration_ratio elif 'cqt' in self.preprocess: stride = opt.hop_length window = opt.hop_length self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256, 256, opt.tensor_height, window, stride).to(self.devices[-1]) self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256, 256, opt.tensor_height, window, stride).to(self.devices[-1]) self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256 if self.isTrain: self.criterionDecode = nn.CrossEntropyLoss(reduction='mean') self.optimizer_D_A = AdaBound(self.netD_A.parameters(), lr=opt.lr, final_lr=0.1) self.optimizer_D_B = AdaBound(self.netD_B.parameters(), lr=opt.lr, final_lr=0.1) self.optimizers = [self.optimizer_D_A, self.optimizer_D_B] else: self.preprocesses = [] load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch self.load_networks(load_suffix) self.netD_A.eval() self.netD_B.eval() self.infer_A = NVWaveNet(**(self.netD_A.export_weights())) self.infer_B = NVWaveNet(**(self.netD_B.export_weights())) def set_input(self, input): A, params_A = input[0] B, params_B = input[1] self.real_A = params_A['original'].to(self.devices[0]) self.real_B = params_B['original'].to(self.devices[0]) self.aug_A = A.to(self.devices[0]) self.aug_B = B.to(self.devices[0]) self.params_A = self.decollate_params(params_A) self.params_B = self.decollate_params(params_B) def get_indices(self, y): y = (y + 1.) * .5 * self.opt.mu return y.long() def inv_indices(self, y): return y.float() / self.opt.mu * 2. - 1. def train(self): self.optimizer_D_A.zero_grad() real_A = self.get_indices(self.real_A).to(self.devices[-1]) pred_D_A = self.netD_A((self.aug_A, real_A)) self.loss_D_A = self.criterionDecode(pred_D_A, real_A) self.loss_D_A.backward() self.optimizer_D_A.step() self.optimizer_D_B.zero_grad() real_B = self.get_indices(self.real_B).to(self.devices[-1]) pred_D_B = self.netD_B((self.aug_B, real_B)) self.loss_D_B = self.criterionDecode(pred_D_B, real_B) self.loss_D_B.backward() self.optimizer_D_B.step() def test(self): with torch.no_grad(): self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(self.aug_A), Impl.AUTO) self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(self.aug_B), Impl.AUTO) self.fake_B = self.inv_indices(self.fake_B) self.fake_A = self.inv_indices(self.fake_A)
def __init__(self, import_trained=(False, ''), model_pretrained=(True, True), save_model=True, resnet_depth=50, lr=1e-3, momentum=0.09, nesterov=False, threshold=0.5, epochs=50, batch_size=64, train_val_split=0.7, data_interval='1min', predict_period=1, mins_interval=30, start_date='2020-08-24', end_date='2020-08-29'): ''' import_trained = (whether if you want to import a trained pth file, if yes what is the filename) model_pretrained = (whether if you want to import a pretrained model, whether if you want to only want to train the linear layers) save_model = whether to save model when training finished resnet_depth = to decide the depth of the residual network lr = learning rate for the stochastic gradient descend optimizer momentum = momentum for the sgd nesterov = whether to use nesterov momentum for sgd threshold = investment threshold, advices to invest if the returned probability > threshold epochs = training hyperparameter: the number of times the entire dataset is exposed to the neural network batch_size = training hyperparameter: the number of items to show the dataset at once train_val_split = training hyperparameter: how to split the data data_interval = the time interval between each datapoint predict_period = the amount of time period to predict forwards days = the amount of days to use mins_interval = the amount of minutes to show in the graph start_date = the first date to get data - data for each day would start from 9am and end at 8pm end_date = the last date to get data - data for each day would start from 9am and end at 8pm ''' self.__import_trained = import_trained self.__model_pretrained = model_pretrained self.__saveModel = save_model self.__resnet_depth = resnet_depth self.__threshold = threshold self.__epochs = epochs self.__batch_size = batch_size data = dataset.stockGraphGenerator(split=train_val_split, interval=data_interval, predict_period=predict_period, mins_interval=mins_interval, start_date=start_date, end_date=end_date, stride=15) self.__train_set = torch.utils.data.DataLoader( data.train_data, batch_size=self.__batch_size, shuffle=False) self.__test_set = torch.utils.data.DataLoader( data.test_data, batch_size=self.__batch_size, shuffle=False) self.__model = self.__loadmodelInstance( ) if self.__import_trained[0] else self.__createmodelInstance() self.__criterion = nn.BCELoss() self.__optim = AdaBound(self.__model.parameters(), amsbound=True, lr=lr, final_lr=0.1) self.__trainHist = [[], [], [], []]
class OriginalModel(BaseModel): @staticmethod def modify_commandline_options(parser, is_train=True): """Add new model-specific options and rewrite default values for existing options. Parameters: parser -- the option parser is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options. Returns: the modified parser. """ preprocess = 'normalize,mulaw,cqt' parser.set_defaults(preprocess=preprocess, flatten=True) parser.add_argument('--wavenet_layers', type=int, default=40, help='wavenet layers') parser.add_argument('--wavenet_blocks', type=int, default=10, help='wavenet layers') parser.add_argument('--width', type=int, default=128, help='width') parser.add_argument('--dc_lambda', type=float, default=0.01, help='dc lambda') parser.add_argument('--tanh', action='store_true', help='tanh') parser.add_argument('--sigmoid', action='store_true', help='sigmoid') return parser def __init__(self, opt): BaseModel.__init__(self, opt) # call the initialization method of BaseModel self.loss_names = ['C_A_right', 'C_B_right', 'C_A_wrong', 'C_B_wrong', 'D_A', 'D_B'] if opt.isTrain: self.output_names = [] # ['aug_A', 'aug_B', 'rec_A', 'rec_B'] else: self.output_names = ['real_A', 'real_B', 'fake_B', 'fake_A'] self.params_names = ['params_A', 'params_B'] * 2 self.model_names = ['E', 'C', 'D_A', 'D_B'] # use get generator self.netE = getGenerator(self.devices[0], opt) self.netC = getDiscriminator(opt, self.devices[0]) self.netD_A = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256, 256, opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length self.netD_B = WaveNet(opt.mu+1, opt.wavenet_layers, opt.wavenet_blocks, opt.width, 256, 256, opt.tensor_height, 1, 1).to(self.devices[-1]) # opt.pool_length, opt.pool_length self.softmax = nn.LogSoftmax(dim=1) # (1, 256, audio_len) -> pick 256 if self.isTrain: self.A_target = torch.zeros(opt.batch_size).to(self.devices[0]) self.B_target = torch.ones(opt.batch_size).to(self.devices[0]) self.criterionDC = nn.MSELoss(reduction='mean') self.criterionDecode = nn.CrossEntropyLoss(reduction='mean') self.optimizer_C = AdaBound(self.netC.parameters(), lr=opt.lr, final_lr=0.1) self.optimizer_D = AdaBound(itertools.chain(self.netE.parameters(), self.netD_A.parameters(), self.netD_B.parameters()), lr=opt.lr, final_lr=0.1) self.optimizers = [self.optimizer_C, self.optimizer_D] else: self.preprocesses = [] # TODO change structure of test.py and setup() instead load_suffix = str(opt.load_iter) if opt.load_iter > 0 else opt.epoch self.load_networks(load_suffix) self.netC.eval() self.netD_A.eval() self.netD_B.eval() self.infer_A = NVWaveNet(**(self.netD_A.export_weights())) self.infer_B = NVWaveNet(**(self.netD_B.export_weights())) def set_input(self, input): A, params_A = input[0] B, params_B = input[1] self.real_A = params_A['original'].to(self.devices[0]) self.real_B = params_B['original'].to(self.devices[0]) self.aug_A = A.to(self.devices[0]) self.aug_B = B.to(self.devices[0]) self.params_A = self.decollate_params(params_A) self.params_B = self.decollate_params(params_B) def get_indices(self, y): y = (y + 1.) * .5 * self.opt.mu return y.long() def inv_indices(self, y): return y.float() / self.opt.mu * 2. - 1. def train(self): self.optimizer_C.zero_grad() encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64 encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1]) pred_C_A = self.netC(encoded_A) self.loss_C_A_right = self.opt.dc_lambda * self.criterionDC(pred_C_A, self.A_target) self.loss_C_A_right.backward() encoded_B = self.netE(self.aug_B) encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1]) pred_C_B = self.netC(encoded_B) self.loss_C_B_right = self.opt.dc_lambda * self.criterionDC(pred_C_B, self.B_target) self.loss_C_B_right.backward() self.optimizer_C.step() self.optimizer_D.zero_grad() encoded_A = self.netE(self.aug_A) # Input range: (-1, 1) Output: R^64 encoded_A = nn.functional.interpolate(encoded_A, size=self.opt.audio_length).to(self.devices[-1]) pred_C_A = self.netC(encoded_A) self.loss_C_A_wrong = self.criterionDC(pred_C_A, self.A_target) real_A = self.get_indices(self.real_A).to(self.devices[-1]) pred_D_A = self.netD_A((encoded_A, real_A)) self.loss_D_A = self.criterionDecode(pred_D_A, real_A) loss = self.loss_D_A - self.opt.dc_lambda * self.loss_C_A_wrong loss.backward() encoded_B = self.netE(self.aug_B) encoded_B = nn.functional.interpolate(encoded_B, size=self.opt.audio_length).to(self.devices[-1]) pred_C_B = self.netC(encoded_B) self.loss_C_B_wrong = self.criterionDC(pred_C_B, self.B_target) real_B = self.get_indices(self.real_B).to(self.devices[-1]) pred_D_B = self.netD_B((encoded_B, real_B)) self.loss_D_B = self.criterionDecode(pred_D_B, real_B) loss = self.loss_D_B - self.opt.dc_lambda * self.loss_C_B_wrong loss.backward() self.optimizer_D.step() def test(self): with torch.no_grad(): encoded_A = self.netE(self.aug_A) encoded_B = self.netE(self.aug_B) self.fake_B = self.infer_A.infer(self.netD_A.get_cond_input(encoded_B), Impl.AUTO) self.fake_A = self.infer_B.infer(self.netD_B.get_cond_input(encoded_A), Impl.AUTO) self.fake_B = self.inv_indices(self.fake_B) self.fake_A = self.inv_indices(self.fake_A)
batch_size = 64 nb_epoch = 30 img_rows, img_cols = 32, 32 img_channels = 3 model = densenet.DenseNet([1, 2, 3, 2], include_top=True, weights=None, pooling='avg', input_shape=(img_rows, img_cols, img_channels), classes=10) model.compile( loss='categorical_crossentropy', optimizer=AdaBound(), #keras.optimizers.SGD(momentum=0.9), metrics=['acc']) model.summary() (trainX, trainY), (testX, testY) = keras.datasets.cifar10.load_data() trainX = trainX.astype('float32') testX = testX.astype('float32') trainX = densenet.preprocess_input(trainX) testX = densenet.preprocess_input(testX) Y_train = keras.utils.to_categorical(trainY, 10) Y_test = keras.utils.to_categorical(testY, 10) history = model.fit(trainX,
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler #optimizer = optim.Adamax(model.parameters(), args.lr, # weight_decay=args.l2_wd) #scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR optimizer = AdaBound(model.parameters()) # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids, cwf in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) cwf = cwf.to(device) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cc_idxs, qc_idxs, cw_idxs, qw_idxs, cwf) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() #scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def train_model(cfg: DictConfig) -> None: output_dir = Path.cwd() logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=str(output_dir / 'log.txt'), level=logging.DEBUG) # hydraでlogがコンソールにも出力されてしまうのを抑制する logger = logging.getLogger() assert isinstance(logger.handlers[0], logging.StreamHandler) logger.handlers[0].setLevel(logging.CRITICAL) if cfg.gpu >= 0: device = torch.device(f"cuda:{cfg.gpu}") # noinspection PyUnresolvedReferences torch.backends.cudnn.benchmark = True else: device = torch.device("cpu") model = load_model(model_name=cfg.model_name) model.to(device) if cfg.swa.enable: swa_model = AveragedModel(model=model, device=device) else: swa_model = None # optimizer = optim.SGD( # model.parameters(), lr=cfg.optimizer.lr, # momentum=cfg.optimizer.momentum, # weight_decay=cfg.optimizer.weight_decay, # nesterov=cfg.optimizer.nesterov # ) optimizer = AdaBound(model.parameters(), lr=cfg.optimizer.lr, final_lr=cfg.optimizer.final_lr, weight_decay=cfg.optimizer.weight_decay, amsbound=False) scaler = torch.cuda.amp.GradScaler(enabled=cfg.use_amp) if cfg.scheduler.enable: scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer=optimizer, T_0=1, T_mult=1, eta_min=cfg.scheduler.eta_min) # scheduler = optim.lr_scheduler.CyclicLR( # optimizer, base_lr=cfg.scheduler.base_lr, # max_lr=cfg.scheduler.max_lr, # step_size_up=cfg.scheduler.step_size, # mode=cfg.scheduler.mode # ) else: scheduler = None if cfg.input_dir is not None: input_dir = Path(cfg.input_dir) model_path = input_dir / 'model.pt' print('load model from {}'.format(model_path)) model.load_state_dict(torch.load(model_path)) state_path = input_dir / 'state.pt' print('load optimizer state from {}'.format(state_path)) checkpoint = torch.load(state_path, map_location=device) epoch = checkpoint['epoch'] t = checkpoint['t'] optimizer.load_state_dict(checkpoint['optimizer']) if cfg.swa.enable and 'swa_model' in checkpoint: swa_model.load_state_dict(checkpoint['swa_model']) if cfg.scheduler.enable and 'scheduler' in checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) if cfg.use_amp and 'scaler' in checkpoint: scaler.load_state_dict(checkpoint['scaler']) else: epoch = 0 t = 0 # カレントディレクトリが変更されるので、データのパスを修正 if isinstance(cfg.train_data, str): train_path_list = (hydra.utils.to_absolute_path(cfg.train_data), ) else: train_path_list = [ hydra.utils.to_absolute_path(path) for path in cfg.train_data ] logging.info('train data path: {}'.format(train_path_list)) train_data = load_train_data(path_list=train_path_list) train_dataset = train_data train_data = train_dataset[0] test_data = load_test_data( path=hydra.utils.to_absolute_path(cfg.test_data)) logging.info('train position num = {}'.format(len(train_data))) logging.info('test position num = {}'.format(len(test_data))) train_loader = DataLoader(train_data, device=device, batch_size=cfg.batch_size, shuffle=True) validation_loader = DataLoader(test_data[:cfg.test_batch_size * 10], device=device, batch_size=cfg.test_batch_size) test_loader = DataLoader(test_data, device=device, batch_size=cfg.test_batch_size) train_writer = SummaryWriter(log_dir=str(output_dir / 'train')) test_writer = SummaryWriter(log_dir=str(output_dir / 'test')) train_metrics = Metrics() eval_interval = cfg.eval_interval total_epoch = cfg.epoch + epoch for e in range(cfg.epoch): train_metrics_epoch = Metrics() model.train() desc = 'train [{:03d}/{:03d}]'.format(epoch + 1, total_epoch) train_size = len(train_loader) * 4 for x1, x2, t1, t2, z, value, mask in tqdm(train_loader, desc=desc): with torch.cuda.amp.autocast(enabled=cfg.use_amp): model.zero_grad() metric_value = compute_metric(model=model, x1=x1, x2=x2, t1=t1, t2=t2, z=z, value=value, mask=mask, val_lambda=cfg.val_lambda, beta=cfg.beta) scaler.scale(metric_value.loss).backward() if cfg.clip_grad_max_norm: scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.clip_grad_max_norm) scaler.step(optimizer) scaler.update() if cfg.swa.enable and t % cfg.swa.freq == 0: swa_model.update_parameters(model=model) t += 1 if cfg.scheduler.enable: scheduler.step(t / train_size) train_metrics.update(metric_value=metric_value) train_metrics_epoch.update(metric_value=metric_value) # print train loss if t % eval_interval == 0: model.eval() validation_metrics = Metrics() with torch.no_grad(): # noinspection PyAssignmentToLoopOrWithParameter for x1, x2, t1, t2, z, value, mask in validation_loader: m = compute_metric(model=model, x1=x1, x2=x2, t1=t1, t2=t2, z=z, value=value, mask=mask, val_lambda=cfg.val_lambda) validation_metrics.update(metric_value=m) last_lr = (scheduler.get_last_lr()[-1] if cfg.scheduler.enable else cfg.optimizer.lr) logging.info( 'epoch = {}, iteration = {}, lr = {}, {}, {}'.format( epoch + 1, t, last_lr, make_metric_log('train', train_metrics), make_metric_log('validation', validation_metrics))) write_summary(writer=train_writer, metrics=train_metrics, t=t, prefix='iteration') write_summary(writer=test_writer, metrics=validation_metrics, t=t, prefix='iteration') train_metrics = Metrics() train_writer.add_scalar('learning_rate', last_lr, global_step=t) model.train() elif t % cfg.train_log_interval == 0: last_lr = (scheduler.get_last_lr()[-1] if cfg.scheduler.enable else cfg.optimizer.lr) logging.info('epoch = {}, iteration = {}, lr = {}, {}'.format( epoch + 1, t, last_lr, make_metric_log('train', train_metrics))) write_summary(writer=train_writer, metrics=train_metrics, t=t, prefix='iteration') train_metrics = Metrics() train_writer.add_scalar('learning_rate', last_lr, global_step=t) if cfg.swa.enable: with torch.cuda.amp.autocast(enabled=cfg.use_amp): desc = 'update BN [{:03d}/{:03d}]'.format( epoch + 1, total_epoch) np.random.shuffle(train_data) # モーメントの計算にはそれなりのデータ数が必要 # 1/16に減らすより全部使ったほうが精度が高かった # データ量を10分程度で処理できる分量に制限 # メモリが連続でないとDataLoaderで正しく処理できないかもしれない train_data = np.ascontiguousarray(train_data[::4]) torch.optim.swa_utils.update_bn(loader=tqdm( hcpe_loader(data=train_data, device=device, batch_size=cfg.batch_size), desc=desc, total=len(train_data) // cfg.batch_size), model=swa_model) # print train loss for each epoch test_metrics = Metrics() if cfg.swa.enable: test_model = swa_model else: test_model = model test_model.eval() with torch.no_grad(): desc = 'test [{:03d}/{:03d}]'.format(epoch + 1, total_epoch) for x1, x2, t1, t2, z, value, mask in tqdm(test_loader, desc=desc): metric_value = compute_metric(model=test_model, x1=x1, x2=x2, t1=t1, t2=t2, z=z, value=value, mask=mask, val_lambda=cfg.val_lambda) test_metrics.update(metric_value=metric_value) logging.info('epoch = {}, iteration = {}, {}, {}'.format( epoch + 1, t, make_metric_log('train', train_metrics_epoch), make_metric_log('test', test_metrics))) write_summary(writer=train_writer, metrics=train_metrics_epoch, t=epoch + 1, prefix='epoch') write_summary(writer=test_writer, metrics=test_metrics, t=epoch + 1, prefix='epoch') epoch += 1 if e != cfg.epoch - 1: # 訓練データを入れ替える train_data = train_dataset[e + 1] train_loader.data = train_data train_writer.close() test_writer.close() print('save the model') torch.save(model.state_dict(), output_dir / 'model.pt') print('save the optimizer') state = {'epoch': epoch, 't': t, 'optimizer': optimizer.state_dict()} if cfg.scheduler.enable: state['scheduler'] = scheduler.state_dict() if cfg.swa.enable: state['swa_model'] = swa_model.state_dict() if cfg.use_amp: state['scaler'] = scaler.state_dict() torch.save(state, output_dir / 'state.pt')
hidden_dim=1024, dropout=0.1, emb_share=True).to(device) criterion = nn.CrossEntropyLoss(ignore_index=vocab[PAD], reduction="none").to(device) # crit = LabelSmoothing(size=vocab_size, padding_idx=vocab[PAD], smoothing=0.1).to(device) # def criterion(x,y): # x = F.log_softmax(x, dim=-1) # n_token = (y != vocab[PAD]).data.sum().item() # # n_token = y.shape[0] # return crit(x, y)/n_token # optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-3) lr = 1e-3 w_decay = 1e-6 optimizer = AdaBound(model.parameters(), lr=lr, final_lr=0.1, weight_decay=w_decay) # In[7]: import wandb wandb.init(entity="george0828zhang", project="contextual-matching-policy-gradient") wandb.config.update({ "batch_size": batch_size, "learning rate": lr, "weight decay": w_decay }) wandb.watch([model])
class ECGTrainer(object): def __init__(self, block_config='small', num_threads=2): torch.set_num_threads(num_threads) self.n_epochs = 60 self.batch_size = 128 self.scheduler = None self.num_threads = num_threads self.cuda = torch.cuda.is_available() if block_config == 'small': self.block_config = (3, 6, 12, 8) else: self.block_config = (6, 12, 24, 16) self.__build_model() self.__build_criterion() self.__build_optimizer() self.__build_scheduler() return def __build_model(self): self.model = DenseNet( num_classes=55, block_config=self.block_config ) if self.cuda: self.model.cuda() return def __build_criterion(self): self.criterion = ComboLoss( losses=['mlsml', 'f1', 'focal'], weights=[1, 1, 3] ) return def __build_optimizer(self): opt_params = {'lr': 1e-3, 'weight_decay': 0.0, 'params': self.model.parameters()} self.optimizer = AdaBound(amsbound=True, **opt_params) return def __build_scheduler(self): self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, 'max', factor=0.333, patience=5, verbose=True, min_lr=1e-5) return def run(self, trainset, validset, model_dir): print('=' * 100 + '\n' + 'TRAINING MODEL\n' + '-' * 100 + '\n') model_path = os.path.join(model_dir, 'model.pth') thresh_path = os.path.join(model_dir, 'threshold.npy') dataloader = { 'train': ECGLoader(trainset, self.batch_size, True, self.num_threads).build(), 'valid': ECGLoader(validset, 64, False, self.num_threads).build() } best_metric, best_preds = None, None for epoch in range(self.n_epochs): e_message = '[EPOCH {:0=3d}/{:0=3d}]'.format(epoch + 1, self.n_epochs) for phase in ['train', 'valid']: ep_message = e_message + '[' + phase.upper() + ']' if phase == 'train': self.model.train() else: self.model.eval() losses, preds, labels = [], [], [] batch_num = len(dataloader[phase]) for ith_batch, data in enumerate(dataloader[phase]): ecg, label = [d.cuda() for d in data] if self.cuda else data pred = self.model(ecg) loss = self.criterion(pred, label) if phase == 'train': self.optimizer.zero_grad() loss.backward() self.optimizer.step() pred = torch.sigmoid(pred) pred = pred.data.cpu().numpy() label = label.data.cpu().numpy() bin_pred = np.copy(pred) bin_pred[bin_pred > 0.5] = 1 bin_pred[bin_pred <= 0.5] = 0 f1 = f1_score(label.flatten(), bin_pred.flatten()) losses.append(loss.item()) preds.append(pred) labels.append(label) sr_message = '[STEP {:0=3d}/{:0=3d}]-[Loss: {:.6f} F1: {:.6f}]' sr_message = ep_message + sr_message print(sr_message.format(ith_batch + 1, batch_num, loss, f1), end='\r') preds = np.concatenate(preds, axis=0) labels = np.concatenate(labels, axis=0) bin_preds = np.copy(preds) bin_preds[bin_preds > 0.5] = 1 bin_preds[bin_preds <= 0.5] = 0 avg_loss = np.mean(losses) avg_f1 = f1_score(labels.flatten(), bin_preds.flatten()) er_message = '-----[Loss: {:.6f} F1: {:.6f}]' er_message = '\n\033[94m' + ep_message + er_message + '\033[0m' print(er_message.format(avg_loss, avg_f1)) if phase == 'valid': if self.scheduler is not None: self.scheduler.step(avg_f1) if best_metric is None or best_metric < avg_f1: best_metric = avg_f1 best_preds = [labels, preds] best_loss_metrics = [epoch + 1, avg_loss, avg_f1] torch.save(self.model.state_dict(), model_path) print('[Best validation metric, model: {}]'.format(model_path)) print() best_f1, best_th = best_f1_score(*best_preds) np.save(thresh_path, np.array(best_th)) print('[Searched Best F1: {:.6f}]\n'.format(best_f1)) res_message = '[VALIDATION PERFORMANCE: BEST F1]' + '\n' \ + '[EPOCH:{} LOSS:{:.6f} F1:{:.6f} BEST F1:{:.6f}]\n'.format( best_loss_metrics[0], best_loss_metrics[1], best_loss_metrics[2], best_f1) \ + '[BEST THRESHOLD:\n{}]\n'.format(best_th) \ + '=' * 100 + '\n' print(res_message) return
def __build_optimizer(self): opt_params = {'lr': 1e-3, 'weight_decay': 0.0, 'params': self.model.parameters()} self.optimizer = AdaBound(amsbound=True, **opt_params) return
def main(args): # load data datapath = "./data" validation_size = args.valid train_imgs, train_lbls, validation_imgs, validation_lbls = KMNISTDataLoader( validation_size).load(datapath) test_imgs = LoadTestData(datapath) # dir settings settings = f'{args.model}_o{args.optimizer}_b{args.batchsize}_e{args.epochs}_f{args.factor}_p{args.patience}_m{args.mixup}_v{args.valid}' if args.swa: settings = f'{settings}_SWA' dir_name = f'./out/{settings}' nowtime = datetime.now().strftime("%y%m%d_%H%M") if args.force: dir_name = f'{dir_name}_{nowtime}' if args.ensemble > 1: settings = f'{settings}_ensemble{args.ensemble}' dir_name_base = f'{dir_name}_ensemble{args.ensemble}' models = [] results = np.zeros((test_imgs.shape[0], 10)) # define model for i in range(args.ensemble): model = eval(f'{args.model}') loss = keras.losses.categorical_crossentropy if args.optimizer == 'adam': optimizer = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999) if args.optimizer == 'adabound': optimizer = AdaBound(lr=1e-03, final_lr=0.1, gamma=1e-03, weight_decay=5e-4, amsbound=False) model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) # model.summary() if args.ensemble > 1: models.append(model) # data generator datagen = MyImageDataGenerator( rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.08, mix_up_alpha=args.mixup, #random_crop=(28, 28), random_erasing=True, ) # train each model for i in range(args.ensemble): # train settings batch_size = args.batchsize initial_epoch = args.initialepoch epochs = args.epochs steps_per_epoch = train_imgs.shape[0] // batch_size if epochs > initial_epoch: if args.ensemble > 1: dir_name = f'{dir_name_base}/{i}' model = models[i] # load best weight if only already trained if len(sorted(glob.glob(f'./{dir_name}/*.hdf5'))): best_weight_path = sorted( glob.glob(f'./{dir_name}/*.hdf5'))[-1] model.load_weights(best_weight_path) initial_epoch = re.search(r'weights.[0-9]{4}', best_weight_path) initial_epoch = int(initial_epoch.group().replace( 'weights.', '')) else: os.makedirs(f'./{dir_name}', exist_ok=True) # each epoch settings if validation_size > 0: reduce_lr = keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=args.factor, patience=args.patience, verbose=1, cooldown=1, min_lr=1e-5) cp = keras.callbacks.ModelCheckpoint( filepath=f'./{dir_name}' + '/weights.{epoch:04d}-{loss:.6f}-{acc:.6f}-{val_loss:.6f}-{val_acc:.6f}.hdf5', monitor='val_loss', verbose=0, save_best_only=True, mode='auto') else: reduce_lr = keras.callbacks.ReduceLROnPlateau( monitor='loss', factor=args.factor, patience=args.patience, verbose=1, cooldown=1, min_lr=1e-5) cp = keras.callbacks.ModelCheckpoint( filepath=f'./{dir_name}' + '/weights.{epoch:04d}-{loss:.6f}-{acc:.6f}.hdf5', monitor='loss', verbose=0, save_best_only=True, mode='auto') cbs = [reduce_lr, cp] if args.swa: swa = SWA(f'{dir_name}/swa.hdf5', epochs - 40) cbs.append(swa) # start training print(f'===============train start:{dir_name}===============') history = model.fit_generator( datagen.flow(train_imgs, train_lbls, batch_size=batch_size), steps_per_epoch=steps_per_epoch, initial_epoch=initial_epoch, epochs=epochs, validation_data=(validation_imgs, validation_lbls), callbacks=cbs, verbose=1, ) # output history plot_history(history, dir_name=dir_name) # test each model for i in range(args.ensemble): if args.ensemble > 1: dir_name = f'{dir_name_base}/{i}' model = models[i] print(f'test start:{dir_name}') # load best weight if len(sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))) > 1: for p in sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))[:-1]: os.remove(p) best_weight_path = sorted(glob.glob(f'./{dir_name}/weights*.hdf5'))[-1] if args.swa: print('Load SWA weights.') best_weight_path = sorted(glob.glob(f'./{dir_name}/swa.hdf5'))[-1] model.load_weights(best_weight_path) # test with test time augmentation predicts = TTA(model, test_imgs, tta_steps=50) np.save(f'./{dir_name}/predicts_vec.npy', predicts) if args.ensemble > 1: results += predicts # get argmax index if args.ensemble > 1: predict_labels = np.argmax(results, axis=1) dir_name = dir_name_base else: predict_labels = np.argmax(predicts, axis=1) # create submit file submit = pd.DataFrame(data={"ImageId": [], "Label": []}) submit.ImageId = list(range(1, predict_labels.shape[0] + 1)) submit.Label = predict_labels submit.to_csv(f"./{dir_name}/submit{nowtime}_{settings}.csv", index=False)
def train(region): np.random.seed(0) torch.manual_seed(0) input_len = 10 encoder_units = 32 decoder_units = 64 encoder_rnn_layers = 3 encoder_dropout = 0.2 decoder_dropout = 0.2 input_size = 2 output_size = 1 predict_len = 5 batch_size = 16 epochs = 500 force_teacher = 0.8 train_dataset, test_dataset, train_max, train_min = create_dataset( input_len, predict_len, region) train_loader = DataLoader( train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) test_loader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, drop_last=True) enc = Encoder(input_size, encoder_units, input_len, encoder_rnn_layers, encoder_dropout) dec = Decoder(encoder_units*2, decoder_units, input_len, input_len, decoder_dropout, output_size) optimizer = AdaBound(list(enc.parameters()) + list(dec.parameters()), 0.01, final_lr=0.1) # optimizer = optim.Adam(list(enc.parameters()) + list(dec.parameters()), 0.01) criterion = nn.MSELoss() mb = master_bar(range(epochs)) for ep in mb: train_loss = 0 enc.train() dec.train() for encoder_input, decoder_input, target in progress_bar(train_loader, parent=mb): optimizer.zero_grad() enc_vec = enc(encoder_input) h = enc_vec[:, -1, :] _, c = dec.initHidden(batch_size) x = decoder_input[:, 0] pred = [] for pi in range(predict_len): x, h, c = dec(x, h, c, enc_vec) rand = np.random.random() pred += [x] if rand < force_teacher: x = decoder_input[:, pi] pred = torch.cat(pred, dim=1) # loss = quantile_loss(pred, target) loss = criterion(pred, target) loss.backward() optimizer.step() train_loss += loss.item() test_loss = 0 enc.eval() dec.eval() for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb): with torch.no_grad(): enc_vec = enc(encoder_input) h = enc_vec[:, -1, :] _, c = dec.initHidden(batch_size) x = decoder_input[:, 0] pred = [] for pi in range(predict_len): x, h, c = dec(x, h, c, enc_vec) pred += [x] pred = torch.cat(pred, dim=1) # loss = quantile_loss(pred, target) loss = criterion(pred, target) test_loss += loss.item() print( f"Epoch {ep} Train Loss {train_loss/len(train_loader)} Test Loss {test_loss/len(test_loader)}") if not os.path.exists("models"): os.mkdir("models") torch.save(enc.state_dict(), f"models/{region}_enc.pth") torch.save(dec.state_dict(), f"models/{region}_dec.pth") test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=False) rmse = 0 p = 0 predicted = [] true_target = [] enc.eval() dec.eval() for encoder_input, decoder_input, target in progress_bar(test_loader, parent=mb): with torch.no_grad(): enc_vec = enc(encoder_input) x = decoder_input[:, 0] h, c = dec.initHidden(1) pred = [] for pi in range(predict_len): x, h, c = dec(x, h, c, enc_vec) pred += [x] pred = torch.cat(pred, dim=1) predicted += [pred[0, p].item()] true_target += [target[0, p].item()] predicted = np.array(predicted).reshape(1, -1) predicted = predicted * (train_max - train_min) + train_min true_target = np.array(true_target).reshape(1, -1) true_target = true_target * (train_max - train_min) + train_min rmse, peasonr = calc_metric(predicted, true_target) print(f"{region} RMSE {rmse}") print(f"{region} r {peasonr[0]}") return f"{region} RMSE {rmse} r {peasonr[0]}"
metric_fc.to(device) params = [{ 'params': model.parameters() }, { 'params': metric_fc.parameters() }] if Config.optimizer == 'sgd': optimizer = torch.optim.SGD(params, lr=opt.lr, weight_decay=opt.weight_decay, momentum=.9, nesterov=True) elif Config.optimizer == 'adabound': optimizer = AdaBound(params=params, lr=opt.lr, final_lr=opt.final_lr, amsbound=opt.amsbound) elif Config.optimizer == 'adam': optimizer = torch.optim.Adam(params, lr=opt.lr, weight_decay=opt.weight_decay) else: raise ValueError('Invalid Optimizer Name: {}'.format(Config.optimizer)) scheduler = StepLR(optimizer, step_size=opt.lr_step, gamma=0.1) callback_manager = CallbackManager([ TensorboardLogger(log_dir=Config.checkpoints_path), LoggingCallback(), WeightCheckpointCallback(save_to=Config.checkpoints_path, metric_model=metric_fc) ])
if args.optimizer == 'fromage': optimizer = Fromage(params, lr=args.lr) if args.optimizer == 'adamw': optimizer = AdamW(params, lr=args.lr, weight_decay=args.wdecay) if args.optimizer == 'radam': optimizer = RAdam(params, lr=args.lr, weight_decay=args.wdecay) if args.optimizer.lower() == 'adabelief': optimizer = AdaBelief(params, lr=args.lr, weight_decay=args.wdecay, eps=args.eps, betas=(args.beta1, args.beta2)) if args.optimizer == 'adabound': optimizer = AdaBound(params, lr=args.lr, weight_decay=args.wdecay, final_lr=30, gamma=1e-3) if args.optimizer == 'amsbound': optimizer = AdaBound(params, lr=args.lr, weight_decay=args.wdecay, final_lr=30, gamma=1e-3, amsbound=True) elif args.optimizer == 'yogi': optimizer = Yogi(params, args.lr, betas=(args.beta1, args.beta2), weight_decay=args.wdecay) elif args.optimizer == 'msvag':
def train_model_v2_1(net, trainloader, validloader, epochs, lr, grad_accum_steps=1, warmup_epoch=1, patience=5, factor=0.5, opt='AdaBound', weight_decay=0.0, loss_w=[0.5, 0.25, 0.25], reference_labels=None, cb_beta=0.99, start_epoch=0, opt_state_dict=None): """ mixup, ReduceLROnPlateau, class balance """ net = net.cuda() # loss loss_w = loss_w if loss_w is not None else [0.5, 0.25, 0.25] if reference_labels is None: if len(loss_w) == 3: criterion = multiloss_wrapper_v1_mixup(loss_funcs=[ mixup.CrossEntropyLossForMixup(num_class=168), mixup.CrossEntropyLossForMixup(num_class=11), mixup.CrossEntropyLossForMixup(num_class=7) ], weights=loss_w) elif len(loss_w) == 4: criterion = multiloss_wrapper_v1_mixup(loss_funcs=[ mixup.CrossEntropyLossForMixup(num_class=168), mixup.CrossEntropyLossForMixup(num_class=11), mixup.CrossEntropyLossForMixup(num_class=7), mixup.CrossEntropyLossForMixup(num_class=1292) ], weights=loss_w) else: if len(loss_w) == 3: criterion = multiloss_wrapper_v1_mixup(loss_funcs=[ cbl.CB_CrossEntropyLoss(reference_labels[:, 0], num_class=168, beta=cb_beta, label_smooth=0.0), cbl.CB_CrossEntropyLoss(reference_labels[:, 1], num_class=11, beta=cb_beta, label_smooth=0.0), cbl.CB_CrossEntropyLoss(reference_labels[:, 2], num_class=7, beta=cb_beta, label_smooth=0.0) ], weights=loss_w) elif len(loss_w) == 4: criterion = multiloss_wrapper_v1_mixup(loss_funcs=[ cbl.CB_CrossEntropyLoss(reference_labels[:, 0], num_class=168, beta=cb_beta, label_smooth=0.0), cbl.CB_CrossEntropyLoss(reference_labels[:, 1], num_class=11, beta=cb_beta, label_smooth=0.0), cbl.CB_CrossEntropyLoss(reference_labels[:, 2], num_class=7, beta=cb_beta, label_smooth=0.0), cbl.CB_CrossEntropyLoss(reference_labels[:, 3], num_class=1292, beta=cb_beta, label_smooth=0.0) ], weights=loss_w) test_criterion = multiloss_wrapper_v1(loss_funcs=[ nn.CrossEntropyLoss(), nn.CrossEntropyLoss(), nn.CrossEntropyLoss() ], weights=loss_w) # opt if opt == 'SGD': optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9) elif opt == 'AdaBound': optimizer = AdaBound(net.parameters(), lr=lr, final_lr=0.1, weight_decay=weight_decay) # scheduler scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", patience=patience, factor=factor, verbose=True) warmup_scheduler = WarmUpLR(optimizer, len(trainloader) * warmup_epoch) if opt_state_dict is not None: optimizer.load_state_dict(opt_state_dict) # train loglist = [] val_loss = 100 for epoch in range(start_epoch, epochs): if epoch > warmup_epoch - 1: scheduler.step(val_loss) print('epoch ', epoch) tr_log = _trainer_v1(net, trainloader, criterion, optimizer, epoch, grad_accum_steps, warmup_epoch, warmup_scheduler, use_mixup=True) vl_log = _tester_v1(net, validloader, test_criterion) loglist.append(list(tr_log) + list(vl_log)) val_loss = vl_log[0] save_checkpoint(epoch, net, optimizer, 'checkpoint') save_log(loglist, 'training_log.csv') return net
#======================================================================== # NN Setting from nn_keras import MS_NN # NN Model Setting if is_debug: N_EPOCHS = 2 else: N_EPOCHS = 10 # learning_rate = 1e-4 learning_rate = 1e-3 first_batch = 10 # 7: 128 from adabound import AdaBound adabound = AdaBound(lr=learning_rate, final_lr=0.1, gamma=1e-03, weight_decay=0., amsbound=False) model = MS_NN(input_cols=len(use_cols)) metric = "accuracy" opt = optimizers.Adam(lr=learning_rate) model.compile(loss="binary_crossentropy", optimizer=adabound, metrics=[metric]) callbacks = [ EarlyStopping(monitor='val_loss', patience=2, verbose=0), ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1,
def main(): args = parse_args() update_config(cfg_hrnet, args) # create checkpoint dir if not isdir(args.checkpoint): mkdir_p(args.checkpoint) # create model #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net') model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')( cfg_hrnet, is_train=True) model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda() # show net args.channels = 3 args.height = cfg.data_shape[0] args.width = cfg.data_shape[1] #net_vision(model, args) # define loss function (criterion) and optimizer criterion = torch.nn.MSELoss(reduction='mean').cuda() #torch.optim.Adam optimizer = AdaBound(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) pretrained_dict = checkpoint['state_dict'] model.load_state_dict(pretrained_dict) args.start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) logger = Logger(join(args.checkpoint, 'log.txt'), resume=True) else: print("=> no checkpoint found at '{}'".format(args.resume)) else: logger = Logger(join(args.checkpoint, 'log.txt')) logger.set_names(['Epoch', 'LR', 'Train Loss']) cudnn.benchmark = True torch.backends.cudnn.enabled = True print(' Total params: %.2fMB' % (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4)) train_loader = torch.utils.data.DataLoader( #MscocoMulti(cfg), KPloader(cfg), batch_size=cfg.batch_size * len(args.gpus)) #, shuffle=True, #num_workers=args.workers, pin_memory=True) #for i, (img, targets, valid) in enumerate(train_loader): # print(i, img, targets, valid) for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma) print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) # train for one epoch train_loss = train(train_loader, model, criterion, optimizer) print('train_loss: ', train_loss) # append logger file logger.append([epoch + 1, lr, train_loss]) save_model( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, checkpoint=args.checkpoint) logger.close()
# lr *= 5e-1 if epoch >= 150: lr *= 0.1 print('Learning rate: ', lr) return lr if n == 18: model = ResNet18(input_shape=input_shape, depth=depth) else: model = ResNet34(input_shape=input_shape, depth=depth) model.compile(loss='categorical_crossentropy', optimizer=AdaBound(lr=lr_schedule(0), final_lr=adabound_final_lr, gamma=adabound_gamma, weight_decay=weight_decay, amsbound=amsbound), metrics=['accuracy']) model.summary() print(model_type) # Prepare model model saving directory. save_dir = os.path.join(os.getcwd(), 'weights') model_name = 'cifar10_%s_model.h5' % model_type if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) # Prepare callbacks for model saving and for learning rate adjustment. checkpoint = ModelCheckpoint(filepath=filepath,
def _init_model(self): self.train_queue, self.valid_queue = self._load_dataset_queue() def _init_scheduler(): if 'cifar' in self.args.train_dataset: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, float(self.args.epochs)) else: scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, self.args.decay_period, gamma=self.args.gamma) return scheduler genotype = eval('geno_types.%s' % self.args.arch) reduce_level = (0 if 'cifar10' in self.args.train_dataset else 0) model = EvalNetwork(self.args.init_channels, self.args.num_classes, 0, self.args.layers, self.args.auxiliary, genotype, reduce_level) # Try move model to multi gpus if torch.cuda.device_count() > 1 and self.args.multi_gpus: self.logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) else: self.logger.info('gpu device = %d' % self.device_id) torch.cuda.set_device(self.device_id) self.model = model.to(self.device) self.logger.info('param size = %fM', dutils.calc_parameters_count(model)) criterion = nn.CrossEntropyLoss() if self.args.num_classes >= 50: criterion = CrossEntropyLabelSmooth(self.args.num_classes, self.args.label_smooth) self.criterion = criterion.to(self.device) if self.args.opt == 'adam': self.optimizer = torch.optim.Adamax( model.parameters(), self.args.learning_rate, weight_decay=self.args.weight_decay ) elif self.args.opt == 'adabound': self.optimizer = AdaBound(model.parameters(), self.args.learning_rate, weight_decay=self.args.weight_decay) else: self.optimizer = torch.optim.SGD( model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay ) self.best_acc_top1 = 0 # optionally resume from a checkpoint if self.args.resume: if os.path.isfile(self.args.resume): print("=> loading checkpoint {}".format(self.args.resume)) checkpoint = torch.load(self.args.resume) self.dur_time = checkpoint['dur_time'] self.args.start_epoch = checkpoint['epoch'] self.best_acc_top1 = checkpoint['best_acc_top1'] self.args.drop_path_prob = checkpoint['drop_path_prob'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format(self.args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(self.args.resume)) self.scheduler = _init_scheduler() # reload the scheduler if possible if self.args.resume and os.path.isfile(self.args.resume): checkpoint = torch.load(self.args.resume) self.scheduler.load_state_dict(checkpoint['scheduler'])
def main(): parser = argparse.ArgumentParser(description='Training CC') parser.add_argument('-lr', type=float, help='learning rate (3e-4)') parser.add_argument('-net', help='network (resnet50, resnet34,...)') parser.add_argument( '-num_trainimages', type=int, default=28000, help= 'number of training images (number < 28000). For otf1: number of images per epoch' ) parser.add_argument('-dat_augment', default=1, type=int, help='data augmentation during training? (0 or 1)') parser.add_argument('-otf', default=1, type=int, help='on the fly data generation? (0 or 1') parser.add_argument('-unique', default='pairs', help='(pairs, unique)') # training bagnets requires smaller batch_size, otherwise memory issues parser.add_argument( '-batch_size', default=64, type=int, help='batchsize: default 64, for bagnets smaller b/c of memory issues') parser.add_argument( '-optimizer', default='Adam', help= 'The default optimizer is Adam. Optionally, you can choose adabound ("adabound"), which is used for BagNet training.' ) parser.add_argument( '-contrast', default='contrastrandom', help= 'The default is to train on random contrast images. You can choose "contrast0"' ) parser.add_argument('-n_epochs', default=10, type=int, help='number of epochs') parser.add_argument( '-regularization', default=0, type=int, help= 'Flag to choose (1) or not choose (0, default) regularization techniques: scaling, rotation and dropout.' ) parser.add_argument( '-load_checkpoint', default='', help= 'String to choose loading given checkpoint from best precision (1) or to opt for leaving initialization at ImageNet/random (empty string, default).' ) parser.add_argument('-load_checkpoint_epoch', default=0, type=int, help='') parser.add_argument( '-crop_margin', default=0, type=int, help='crop 16 px margin from each side (1), keep original image (0)') args = parser.parse_args() print('regularization', args.regularization) # set seed for reproducibility np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.deterministic = True epochs = args.n_epochs print('number of epochs:', epochs) # after this many epochs the learning rate decays by a factor of 0.1 epoch_decay = epochs // 2 now = datetime.datetime.now() # add the date to the experiment name exp_name = args.net + '_lr' + str(args.lr) + '_numtrain' + str( args.num_trainimages) + '_augment' + str(args.dat_augment) + '_' + str( args.unique) + '_batchsize' + str( args.batch_size) + '_optimizer' + str( args.optimizer) + '_' + str(args.contrast) + '_reg' + str( args.regularization) + '_otf' + str( args.otf) + '_cropmargin' + str( args.crop_margin) + '_' + str(now.month) + str( now.day) + str(now.year) if args.load_checkpoint: exp_name = '_CONTINUED_FINETUNING_' + exp_name # load model print('load model') if args.net[:6] == 'bagnet': model = my_models.load_model(args.net, args.regularization) else: model = my_models.load_model(args.net) # load checkpoint if resuming fine-tuning from later epoch if args.load_checkpoint: model.load_state_dict( torch.load('cc_checkpoints/' + args.load_checkpoint + '/best_prec.pt')) # load dataset print('load dataset') valloader = cc_utils.load_dataset_cc( set_num=1, contrast=args.contrast, batch_size=args.batch_size, split='val', regularization=args. regularization, # whether to use super-augmentation crop_margin=args.crop_margin) # crop 16px margin if args.otf: # online datageneration. Works only for set1, contrast0, unique, no dataaugmentation or regularisation dataset = cc_utils.Dataset_OTF(epoch_len=args.num_trainimages, crop_margin=args.crop_margin) trainloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, num_workers=8) else: trainloader = cc_utils.load_dataset_cc( set_num=1, contrast=args.contrast, batch_size=args.batch_size, split='trainmany', # CAREFUL! This is the LARGE dataset regularization=args. regularization, # whether to use super-augmentation num_trainimages=args.num_trainimages, dat_augment=args.dat_augment, unique=args.unique, # number of images in the trainingset crop_margin=args.crop_margin) # loss criterion and optimizer criterion = nn.BCEWithLogitsLoss() if args.optimizer == 'Adam': optimizer = optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) # skip parameters that have requires_grad==False elif args.optimizer == 'adabound': optimizer = AdaBound(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, final_lr=0.1) # create new checkpoints- and tensorboard-directories for version in range(100): checkpointdir = 'cc_checkpoints/' + \ exp_name + '_v' + str(version) + '/' tensorboarddir = 'cc_tensorboard_logs/' + \ exp_name + '_v' + str(version) + '/' # if checkpointdir already exists, skip it if not os.path.exists(checkpointdir): break print('tensorboarddir', tensorboarddir) os.makedirs(checkpointdir) os.makedirs(tensorboarddir) # create writer writer = SummaryWriter(tensorboarddir) print('writing to this tensorboarddir', tensorboarddir) # steps (x-axis) for plotting tensorboard step = 0 best_prec = 0 first_epoch = 0 val_loss = [ ] # list to store all validation losses to detect plateau and potentially decrease the lr # if fine-tuning is continued, load old loss values to guarantee lr # adjustment works properly if args.load_checkpoint: with open('cc_checkpoints/' + args.load_checkpoint + '/epoch_loss_lr.csv', newline='') as csvfile: training_log = csv.reader(csvfile, delimiter=',', lineterminator='\n') for row_counter, row in enumerate(training_log): if row_counter == 1: # skip title row for list_idx in range(int(row[0])): val_loss.append('NaN') val_loss.append(row[1]) elif row_counter > 1: val_loss.append(float(row[1])) # first epoch is the one after the last epoch in the csv file first_epoch = int(row[0]) + 1 csvfile.close() n_epoch_plateau = 25 # number of epochs over which the presence of a plateau is evaluated counter_lr_adjust = 1 epoch_of_last_lr_adjust = 0 with open(checkpointdir + '/epoch_loss_lr.csv', 'w') as csvFile: csv_writer = csv.writer(csvFile, delimiter=',', lineterminator='\n') csv_writer.writerow(['epoch', 'prec', 'loss', 'lr']) csvFile.close() net_string = args.net[:6] for epoch in range(first_epoch, epochs): print('current epoch ', epoch) print('train model') _, step = utils.train(net_string, model, args.regularization, trainloader, optimizer, criterion, writer, epoch, checkpointdir, step) # validate after every epoch print('validate model after training') prec, loss = utils.validate(net_string, model, args.regularization, valloader, criterion, writer, epoch, step) val_loss.append(loss) # save to csv file with open(checkpointdir + '/epoch_loss_lr.csv', 'a') as csvFile: csv_writer = csv.writer(csvFile, delimiter=',', lineterminator='\n') for param_group in optimizer.param_groups: # find current lr curr_lr = param_group['lr'] csv_writer.writerow([epoch, prec, loss, curr_lr]) csvFile.close() # after more than n_epoch_plateaus, check if there is a plateau if epoch >= n_epoch_plateau: # only adjust lr if no adjustment has ever happened or # if the last adjustment happened more than n_epoch_plateau epochs # ago if epoch_of_last_lr_adjust == 0 or epoch - \ n_epoch_plateau >= epoch_of_last_lr_adjust: adjust_lr_counter = 0 print('len(val_loss)', len(val_loss)) for idx in range(epoch - n_epoch_plateau + 2, epoch + 1): print('idx', idx) if val_loss[idx] - val_loss[idx - 1] < 0.05: adjust_lr_counter += 1 else: break if adjust_lr_counter == n_epoch_plateau - 1: print('adjust lr!!!') utils.adjust_learning_rate_plateau(optimizer, epoch, args.lr, counter_lr_adjust) counter_lr_adjust += 1 epoch_of_last_lr_adjust = epoch # remember best prec on valset and save checkpoint if prec > best_prec: best_prec = prec torch.save(model.state_dict(), checkpointdir + '/best_prec.pt') # save checkpoint for every epoch torch.save( model.state_dict(), checkpointdir + '/epoch' + str(epoch) + '_step' + str(step) + '.pt') # close writer writer.close() print('Wohoooo, completely done!')
class TrainNetwork(object): """The main train network""" def __init__(self, args): super(TrainNetwork, self).__init__() self.args = args self.dur_time = 0 self.logger = self._init_log() if not torch.cuda.is_available(): self.logger.info('no gpu device available') sys.exit(1) self._init_hyperparam() self._init_random_and_device() self._init_model() def _init_hyperparam(self): if 'cifar100' == self.args.train_dataset: # cifar10: 6000 images per class, 10 classes, 50000 training images and 10000 test images # cifar100: 600 images per class, 100 classes, 500 training images and 100 testing images per class self.args.num_classes = 100 self.args.layers = 20 self.args.data = '/train_tiny_data/train_data/cifar100' elif 'imagenet' == self.args.train_dataset: self.args.data = '/train_data/imagenet' self.args.num_classes = 1000 self.args.weight_decay = 3e-5 self.args.report_freq = 100 self.args.init_channels = 50 self.args.drop_path_prob = 0 elif 'tiny-imagenet' == self.args.train_dataset: self.args.data = '/train_tiny_data/train_data/tiny-imagenet' self.args.num_classes = 200 elif 'food101' == self.args.train_dataset: self.args.data = '/train_tiny_data/train_data/food-101' self.args.num_classes = 101 self.args.init_channels = 48 def _init_log(self): self.args.save = '../logs/eval/' + self.args.arch + '/' + self.args.train_dataset + '/eval-{}-{}'.format(self.args.save, time.strftime('%Y%m%d-%H%M')) dutils.create_exp_dir(self.args.save, scripts_to_save=None) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(self.args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logger = logging.getLogger('Architecture Training') logger.addHandler(fh) return logger def _init_random_and_device(self): # Set random seed and cuda device np.random.seed(self.args.seed) cudnn.benchmark = True torch.manual_seed(self.args.seed) cudnn.enabled = True torch.cuda.manual_seed(self.args.seed) max_free_gpu_id, gpus_info = dutils.get_gpus_memory_info() self.device_id = max_free_gpu_id self.gpus_info = gpus_info self.device = torch.device('cuda:{}'.format(0 if self.args.multi_gpus else self.device_id)) def _init_model(self): self.train_queue, self.valid_queue = self._load_dataset_queue() def _init_scheduler(): if 'cifar' in self.args.train_dataset: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, float(self.args.epochs)) else: scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, self.args.decay_period, gamma=self.args.gamma) return scheduler genotype = eval('geno_types.%s' % self.args.arch) reduce_level = (0 if 'cifar10' in self.args.train_dataset else 0) model = EvalNetwork(self.args.init_channels, self.args.num_classes, 0, self.args.layers, self.args.auxiliary, genotype, reduce_level) # Try move model to multi gpus if torch.cuda.device_count() > 1 and self.args.multi_gpus: self.logger.info('use: %d gpus', torch.cuda.device_count()) model = nn.DataParallel(model) else: self.logger.info('gpu device = %d' % self.device_id) torch.cuda.set_device(self.device_id) self.model = model.to(self.device) self.logger.info('param size = %fM', dutils.calc_parameters_count(model)) criterion = nn.CrossEntropyLoss() if self.args.num_classes >= 50: criterion = CrossEntropyLabelSmooth(self.args.num_classes, self.args.label_smooth) self.criterion = criterion.to(self.device) if self.args.opt == 'adam': self.optimizer = torch.optim.Adamax( model.parameters(), self.args.learning_rate, weight_decay=self.args.weight_decay ) elif self.args.opt == 'adabound': self.optimizer = AdaBound(model.parameters(), self.args.learning_rate, weight_decay=self.args.weight_decay) else: self.optimizer = torch.optim.SGD( model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay ) self.best_acc_top1 = 0 # optionally resume from a checkpoint if self.args.resume: if os.path.isfile(self.args.resume): print("=> loading checkpoint {}".format(self.args.resume)) checkpoint = torch.load(self.args.resume) self.dur_time = checkpoint['dur_time'] self.args.start_epoch = checkpoint['epoch'] self.best_acc_top1 = checkpoint['best_acc_top1'] self.args.drop_path_prob = checkpoint['drop_path_prob'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format(self.args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(self.args.resume)) self.scheduler = _init_scheduler() # reload the scheduler if possible if self.args.resume and os.path.isfile(self.args.resume): checkpoint = torch.load(self.args.resume) self.scheduler.load_state_dict(checkpoint['scheduler']) def _load_dataset_queue(self): if 'cifar' in self.args.train_dataset: train_transform, valid_transform = dutils.data_transforms_cifar(self.args) if 'cifar10' == self.args.train_dataset: train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=self.args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4 ) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size = self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4 ) elif 'tiny-imagenet' == self.args.train_dataset: train_transform, valid_transform = dutils.data_transforms_tiny_imagenet() train_data = dartsdset.TinyImageNet200(self.args.data, train=True, download=True, transform=train_transform) valid_data = dartsdset.TinyImageNet200(self.args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4 ) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4 ) elif 'imagenet' == self.args.train_dataset: traindir = os.path.join(self.args.data, 'train') validdir = os.path.join(self.args.data, 'val') train_transform, valid_transform = dutils.data_transforms_imagenet() train_data = dset.ImageFolder( traindir,train_transform) valid_data = dset.ImageFolder( validdir,valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4) elif 'food101' == self.args.train_dataset: traindir = os.path.join(self.args.data, 'train') validdir = os.path.join(self.args.data, 'val') train_transform, valid_transform = dutils.data_transforms_food101() train_data = dset.ImageFolder( traindir,train_transform) valid_data = dset.ImageFolder( validdir,valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=True, num_workers=4) return train_queue, valid_queue def run(self): self.logger.info('args = %s', self.args) run_start = time.time() for epoch in range(self.args.start_epoch, self.args.epochs): self.scheduler.step() self.logger.info('epoch % d / %d lr %e', epoch, self.args.epochs, self.scheduler.get_lr()[0]) if self.args.no_dropout: self.model._drop_path_prob = 0 else: self.model._drop_path_prob = self.args.drop_path_prob * epoch / self.args.epochs self.logger.info('drop_path_prob %e', self.model._drop_path_prob) train_acc, train_obj = self.train() self.logger.info('train loss %e, train acc %f', train_obj, train_acc) valid_acc_top1, valid_acc_top5, valid_obj = self.infer() self.logger.info('valid loss %e, top1 valid acc %f top5 valid acc %f', valid_obj, valid_acc_top1, valid_acc_top5) self.logger.info('best valid acc %f', self.best_acc_top1) is_best = False if valid_acc_top1 > self.best_acc_top1: self.best_acc_top1 = valid_acc_top1 is_best = True dutils.save_checkpoint({ 'epoch': epoch+1, 'dur_time': self.dur_time + time.time() - run_start, 'state_dict': self.model.state_dict(), 'drop_path_prob': self.args.drop_path_prob, 'best_acc_top1': self.best_acc_top1, 'optimizer': self.optimizer.state_dict(), 'scheduler': self.scheduler.state_dict() }, is_best, self.args.save) self.logger.info('train epoches %d, best_acc_top1 %f, dur_time %s', self.args.epochs, self.best_acc_top1, dutils.calc_time(self.dur_time + time.time() - run_start)) def train(self): objs = dutils.AverageMeter() top1 = dutils.AverageMeter() top5 = dutils.AverageMeter() self.model.train() for step, (input, target) in enumerate(self.train_queue): input = input.cuda(self.device, non_blocking=True) target = target.cuda(self.device, non_blocking=True) self.optimizer.zero_grad() logits, logits_aux = self.model(input) loss = self.criterion(logits, target) if self.args.auxiliary: loss_aux = self.criterion(logits_aux, target) loss += self.args.auxiliary_weight*loss_aux loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) self.optimizer.step() prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: self.logger.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg def infer(self): objs = dutils.AverageMeter() top1 = dutils.AverageMeter() top5 = dutils.AverageMeter() self.model.eval() with torch.no_grad(): for step, (input, target) in enumerate(self.valid_queue): input = input.cuda(self.device, non_blocking=True) target = target.cuda(self.device, non_blocking=True) logits, _ = self.model(input) loss = self.criterion(logits, target) prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: self.logger.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, top5.avg, objs.avg