def _init_params(self): self.net = get_net(self.config['model'], self.config['load_weights']) self.net.cuda() self.model = get_model(self.config['model']) self.criterion = get_loss(self.config['model']) self.optimizer = self._get_optim() self.scheduler = optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=[40, 55, 70, 95], gamma=0.5)
def _init_params(self): self.criterionG, criterionD = get_loss(self.config['model']) self.netG, netD = get_nets(self.config['model']) self.netG.to(self.device) self.adv_trainer = self._get_adversarial_trainer(self.config['model']['d_name'], netD, criterionD) self.model = get_model(self.config['model']) self.optimizer_G = self._get_optim(filter(lambda p: p.requires_grad, self.netG.parameters())) self.optimizer_D = self._get_optim(self.adv_trainer.get_params()) self.scheduler_G = self._get_scheduler(self.optimizer_G) self.scheduler_D = self._get_scheduler(self.optimizer_D)
def _init_params(self): self.criterionG, criterionD = get_loss(self.config['model']) self.netG, netD = get_nets(self.config['model']) self.netG.cuda() self.adv_trainer = self._get_adversarial_trainer( self.config['model']['d_name'], netD, criterionD) self.model = get_model(self.config['model']) self.optimizer_G = self._get_optim( filter(lambda p: p.requires_grad, self.netG.parameters())) self.optimizer_D = self._get_optim(self.adv_trainer.get_params()) self.scheduler_G = self._get_scheduler(self.optimizer_G) self.scheduler_D = self._get_scheduler(self.optimizer_D) # load state dict self.netG.load_state_dict( torch.load("best_fpn.h5", map_location='cpu')['model'])
def _init_params(self): self.criterionG, criterionD = get_loss(self.config['model']) netG, netD = get_nets(self.config['model']) model = netG.cuda() ############ model 加载 继续训练 checkpoint = torch.load('best_{}.h5'.format( self.config['experiment_desc'])) model.load_state_dict(checkpoint['model']) self.netG = model self.adv_trainer = self._get_adversarial_trainer( self.config['model']['d_name'], netD, criterionD) self.model = get_model(self.config['model']) self.optimizer_G = self._get_optim( filter(lambda p: p.requires_grad, self.netG.parameters())) self.optimizer_D = self._get_optim(self.adv_trainer.get_params()) self.scheduler_G = self._get_scheduler(self.optimizer_G) self.scheduler_D = self._get_scheduler(self.optimizer_D)
def main(): global args, use_gpu, writer, rank, logger, best_top1, world_size, rank args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) ####### visualize configs ###### visualize_configurations(config) ####### set args ###### for key in config: for k, v in config[key].items(): setattr(args, k, v) if args.verbose: print('Config parsing complete') ####### world initial ###### if args.distributed: rank, world_size = dist.dist_init(args.port, 'nccl') if rank == 0: tbpath = os.path.join(args.logpath, 'tb', args.task_name) if os.path.isdir(tbpath): writer = SummaryWriter(log_dir=tbpath) else: os.makedirs(tbpath) writer = SummaryWriter(log_dir=tbpath) writer.add_text('config_infomation', transfer_txt(args)) logger = loggers.get_logger( os.path.join(args.logpath, '{}.distlog'.format(args.task_name))) logger.info("Logger is set ") logger.info("Logger with distribution") else: tbpath = os.path.join(args.logpath, 'tb', args.task_name) if os.path.isdir(tbpath): writer = SummaryWriter(log_dir=tbpath) else: os.makedirs(tbpath) writer = SummaryWriter(log_dir=tbpath) writer.add_text('config_infomation', transfer_txt(args)) logger = loggers.get_logger( os.path.join(args.logpath, '{}.log'.format(args.task_name))) logger.info("Logger is set ") logger.info("Logger without distribution") ######## initial random setting ####### np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = True ######## test data reading ######## since = time.time() dataset_train_val = base_dataset.baseline_dataset(args) train_loader, val_loader = dataset_train_val.get_loader() logger.info( "Initializing dataset used {} basic time unit".format(time.time() - since)) logger.info("The training classes labels length : {}".format( len(dataset_train_val.train_classnames))) since = time.time() inputs, classes = next(iter(train_loader)) logger.info('batch loading time example is {}'.format(time.time() - since)) ######### Init model ############ if args.model_name == 'resnet50_middle': model = baseline_cls.resnet50_middle( len(dataset_train_val.train_classnames), droprate=args.dropoutrate, pretrain=args.pretrain, return_f=args.reture_bottleneck_feature, return_mid=args.return_middle_level_feature) else: model = baseline_cls.PCB(len(dataset_train_val.train_classnames)) #logger.info(model) if args.PCB: model = baseline_cls.PCB(len(dataset_train_val.train_classnames)) ########## lauch training ########### woptimizer = optimizers.get_optimizer(args, model) lr_schedular = optimizers.get_lr_scheduler(args, woptimizer) criterion = losses.get_loss(args) if args.resume != '' and os.path.isfile(args.resume): if args.distributed: if rank == 0: print('resuem from [%s]' % config.resume) checkpoint = torch.load(args.resume, map_location='cuda:%d' % torch.cuda.current_device()) else: print('resuem from [%s]' % config.resume) checkpoint = torch.load(config.resume, map_location="cpu") model.load_state_dict(checkpoint['network']) #woptimizer.load_state_dict(checkpoint['optimizer']) #lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) epoch_offset = checkpoint['epoch'] else: epoch_offset = 0 model.to(device) if args.distributed: dist.sync_state(model) if args.fp16: model, woptimizer = amp.initialize(model, woptimizer, opt_level="O1") for epoch in range(epoch_offset, args.epochs): # train train(args, train_loader, val_loader, model, woptimizer, lr_schedular, epoch=epoch, criterion=criterion) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(args, val_loader, model, epoch=epoch, cur_step=cur_step, criterion=criterion) if args.distributed: if rank == 0: if best_top1 < top1: best_top1 = top1 save_network(args, model, epoch, top1, isbest=True) else: if epoch % args.forcesave == 0: save_network(args, model, epoch, top1) writer.add_scalar('val/best_top1', best_top1, cur_step) else: if best_top1 < top1: best_top1 = top1 save_network(args, model, epoch, top1, isbest=True) else: if epoch % args.forcesave == 0: save_network(args, model, epoch, top1) writer.add_scalar('val/best_top1', best_top1, cur_step) if args.distributed: if rank == 0: logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) #logger.info("Best Genotype = {}".format(best_genotype)) else: logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
def main(): global args, use_gpu, writer, rank, logger, best_top1, world_size, rank args = parser.parse_args() with open(args.config) as f: config = yaml.load(f) ####### visualize configs ###### visualize_configurations(config) ####### set args ###### for key in config: for k, v in config[key].items(): setattr(args, k, v) if args.verbose: print('Config parsing complete') ####### world initial ###### if args.distributed: rank, world_size = dist.dist_init(args.port, 'nccl') logger = loggers.get_logger( os.path.join(args.logpath, '{}.distlog'.format(args.task_name))) if rank == 0: tbpath = os.path.join(args.logpath, 'tb', args.task_name) if os.path.isdir(tbpath): writer = SummaryWriter(log_dir=tbpath) else: os.makedirs(tbpath) writer = SummaryWriter(log_dir=tbpath) writer.add_text('config_infomation', transfer_txt(args)) logger.info("Logger is set ") logger.info("Logger with distribution") else: tbpath = os.path.join(args.logpath, 'tb', args.task_name) if os.path.isdir(tbpath): writer = SummaryWriter(log_dir=tbpath) else: os.makedirs(tbpath) writer = SummaryWriter(log_dir=tbpath) writer.add_text('config_infomation', transfer_txt(args)) logger = loggers.get_logger( os.path.join(args.logpath, '{}.log'.format(args.task_name))) logger.info("Logger is set ") logger.info("Logger without distribution") ######## initial random setting ####### np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) torch.backends.cudnn.benchmark = True ######## test data reading ######## since = time.time() dataset_train_val = base_dataset.baseline_dataset(args) train_loader, val_loader = dataset_train_val.get_loader() logger.info( "Initializing dataset used {} basic time unit".format(time.time() - since)) logger.info("The training classes labels length : {}".format( len(dataset_train_val.train_classnames))) since = time.time() inputs, classes = next(iter(train_loader)) logger.info('batch loading time example is {}'.format(time.time() - since)) ######### Init model ############ #woptimizer = optimizers.get_optimizer(args, model) #lr_schedular = optimizers.get_lr_scheduler(args, woptimizer) criterion = losses.get_loss(args) criterion.to(device) if args.model_name == 'Darts_normal': model = SearchCNNController(args.input_channels, args.init_channels, len(dataset_train_val.train_classnames), args.Search_layers, criterion) else: model = SearchCNNController(args.input_channels, args.init_channels, len(dataset_train_val.train_classnames), args.Search_layers, criterion) model = model.to(device) if args.distributed: dist.sync_state(model) w_optim = torch.optim.SGD(model.weights(), args.w_lr, momentum=args.w_momentum, weight_decay=args.w_weight_decay) alpha_optim = torch.optim.Adam(model.alphas(), args.alpha_lr, betas=(0.5, 0.999), weight_decay=args.alpha_weight_decay) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optim, args.epochs, eta_min=args.w_lr_min) architect = Architect(model, args.w_momentum, args.w_weight_decay, args) ########## lauch training ########### if args.resume != '' and os.path.isfile(args.resume): if args.distributed: if rank == 0: print('resuem from [%s]' % config.resume) checkpoint = torch.load(args.resume, map_location='cuda:%d' % torch.cuda.current_device()) else: print('resuem from [%s]' % config.resume) checkpoint = torch.load(config.resume, map_location="cpu") model.load_state_dict(checkpoint['network']) #woptimizer.load_state_dict(checkpoint['optimizer']) #lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) epoch_offset = checkpoint['epoch'] else: epoch_offset = 0 model.to(device) if args.fp16: model, w_optim = amp.initialize(model, w_optim, opt_level="O1") for epoch in range(epoch_offset, args.epochs): if args.distributed: if rank == 0: model.print_alphas(logger) else: model.print_alphas(logger) # train if epoch % args.real_val_freq == 0: train(args, train_loader, val_loader, model, architect, w_optim, alpha_optim, lr_scheduler, epoch=epoch) else: train(args, train_loader, train_loader, model, architect, w_optim, alpha_optim, lr_scheduler, epoch=epoch) # validation cur_step = (epoch + 1) * len(train_loader) top1 = validate(args, val_loader, model, epoch=epoch, cur_step=cur_step) if args.distributed: if rank == 0: if best_top1 < top1: best_top1 = top1 save_network(args, model, epoch, top1, isbest=True) else: if epoch % args.forcesave == 0: save_network(args, model, epoch, top1) writer.add_scalar('val/best_top1', best_top1, cur_step) else: if best_top1 < top1: best_top1 = top1 save_network(args, model, epoch, top1, isbest=True) else: if epoch % args.forcesave == 0: save_network(args, model, epoch, top1) writer.add_scalar('val/best_top1', best_top1, cur_step) if args.distributed: if rank == 0: logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) #logger.info("Best Genotype = {}".format(best_genotype)) else: logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) genotype = model.genotype() if args.distributed: if rank == 0: logger.info("genotype = {}".format(genotype)) if args.plot_path != False: plot_path = os.path.join(args.plot_path, args.task_name, "EP{:02d}".format(epoch + 1)) if not os.path.isdir( os.path.join(args.plot_path, args.task_name)): os.makedirs( os.path.join(args.plot_path, args.task_name)) caption = "Epoch {}".format(epoch + 1) plot(genotype.normal, plot_path + "-normal", caption) plot(genotype.reduce, plot_path + "-reduce", caption) writer.add_image(plot_path + '.png') else: logger.info("genotype = {}".format(genotype)) if args.plot_path != False: if not os.path.isdir( os.path.join(args.plot_path, args.task_name)): os.makedirs(os.path.join(args.plot_path, args.task_name)) plot_path = os.path.join(args.plot_path, args.task_name, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) plot(genotype.normal, plot_path + "-normal", caption) plot(genotype.reduce, plot_path + "-reduce", caption) writer.add_image(plot_path + '.png')
def train(self, x_train: np.ndarray, y_train: np.ndarray): """ Train a BNN using input datapoints `x_train` with corresponding labels `y_train`. Parameters ---------- x_train : numpy.ndarray (N, D) Input training datapoints. y_train : numpy.ndarray (N,) Input training labels. """ logging.debug("Training started.") logging.debug("Clearing list of sampled weights.") self.sampled_weights.clear() num_datapoints, input_dimensionality = x_train.shape output_dimensionality = 1 if y_train.ndim > 1: _, output_dimensionality = y_train.shape logging.debug( "Processing %d training datapoints " " with % dimensions each." % (num_datapoints, input_dimensionality) ) x_train_ = np.asarray(x_train) if self.normalize_input: logging.debug( "Normalizing training datapoints to " " zero mean and unit variance." ) x_train_, self.x_mean, self.x_std = zero_mean_unit_var_normalization(x_train) y_train_ = np.asarray(y_train) if self.normalize_output: logging.debug("Normalizing training labels to zero mean and unit variance.") y_train_, self.y_mean, self.y_std = zero_mean_unit_var_normalization(y_train) train_loader = infinite_dataloader( data_utils.DataLoader( data_utils.TensorDataset( torch.from_numpy(x_train_).float().type(dtype), torch.from_numpy(y_train_).float().type(dtype) ), batch_size=self.batch_size ) ) try: architecture_name = self.network_architecture.__name__ except AttributeError: architecture_name = str(self.network_architecture) logging.debug("Using network architecture: %s" % architecture_name) if output_dimensionality == 1: self.model = self.network_architecture( input_dimensionality=input_dimensionality ) else: self.model = self.network_architecture( input_dimensionality=input_dimensionality, output_dimensionality=output_dimensionality ) try: optimizer_name = self.optimizer.__name__ except AttributeError: optimizer_name = str(self.optimizer) logging.debug("Using optimizer: %s" % optimizer_name) optimizer = get_optimizer( optimizer_cls=self.optimizer, parameters=self.model.parameters(), num_datapoints=num_datapoints, **self.optimizer_kwargs ) loss_function = get_loss( self.loss, parameters=self.model.parameters(), num_datapoints=num_datapoints, size_average=True ) if self.use_progressbar: logging.info( "Progress bar enabled. To disable pass " "`logging_configuration={level: debug.WARN}`." ) losses = OrderedDict(((get_name(self.loss), loss_function),)) losses.update( (get_name(metric), to_bayesian_loss(metric)()) for metric in self.metrics ) batch_generator = TrainingProgressbar( iterable=islice(enumerate(train_loader), self.num_steps), losses=losses, total=self.num_steps, bar_format="{n_fmt}/{total_fmt}[{bar}] - {remaining} - {postfix}" ) else: batch_generator = islice(enumerate(train_loader), self.num_steps) for epoch, (x_batch, y_batch) in batch_generator: optimizer.zero_grad() loss = loss_function(input=self.model(x_batch), target=y_batch) loss.backward() optimizer.step() if self.use_progressbar: predictions = self.model(x_batch) batch_generator.update( predictions=predictions, y_batch=y_batch, epoch=epoch ) if self._keep_sample(epoch): logging.debug("Recording sample, epoch = %d " % (epoch)) weights = self.network_weights logging.debug("Sampled weights:\n%s" % str(weights)) self.sampled_weights.append(weights) self.is_trained = True return self
OneToThreeDimension(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) acdc_dataset = {x: ACDCDataset(train_args["pos_samps_"+x], train_args["neg_samps_"+x], transform=composed) for x in ["train", "val", "test"]} dataloader = {x: DataLoader(acdc_dataset[x], batch_size=train_args["batch_size"], shuffle=True, num_workers=4, # sampler=sampler[x] ) for x in ["train", "val", "test"]} dataset_sizes = {x: len(acdc_dataset[x]) for x in ["train", "val", "test"]} model_ft = get_model(train_args["model"], device, pretrained=train_args["pretrained"]) criterion = get_loss(train_args["loss_name"]) optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-5) model_ft = train(model_ft, criterion, optimizer_ft, num_epochs=train_args["epoch"]) test(model_ft, dataloader["test"], dataset_sizes["test"])