def main_train_worker(args): if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) print("=> creating model '{}'".format(args.arch)) network = MetaLearnerModelBuilder.construct_cifar_model(args.arch, args.dataset) model_path = '{}/train_pytorch_model/real_image_model/{}@{}@epoch_{}@lr_{}@batch_{}.pth.tar'.format( PY_ROOT, args.dataset, args.arch, args.epochs, args.lr, args.batch_size) os.makedirs(os.path.dirname(model_path), exist_ok=True) print("after train, model will be saved to {}".format(model_path)) network.cuda() image_classifier_loss = nn.CrossEntropyLoss().cuda() optimizer = RAdam(network.parameters(), args.lr, weight_decay=args.weight_decay) cudnn.benchmark = True train_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, True) val_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False) for epoch in range(0, args.epochs): # adjust_learning_rate(optimizer, epoch, args) # train_simulate_grad_mode for one epoch train(train_loader, network, image_classifier_loss, optimizer, epoch, args) # evaluate_accuracy on validation set validate(val_loader, network, image_classifier_loss, args) # remember best acc@1 and save checkpoint save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': network.state_dict(), 'optimizer': optimizer.state_dict(), }, filename=model_path)
def main(args): mnasnet = models.mnasnet1_0(pretrained=True).to(device).eval() cvae = CVAE(1000, 128, args.n_class * 2, args.n_class).to(device) cvae.encoder.eval() regressor = Regressor().to(device) if Path(args.cvae_resume_model).exists(): print("load cvae model:", args.cvae_resume_model) cvae.load_state_dict(torch.load(args.cvae_resume_model)) if Path(args.regressor_resume_model).exists(): print("load regressor model:", args.regressor_resume_model) regressor.load_state_dict(torch.load(args.regressor_resume_model)) image_label = pandas.read_csv( Path(args.data_root, args.metadata_file_name.format( args.subset))).sample(frac=1, random_state=551)[:250] image_label["class"] = image_label["class"] - 1 dataset = WBCDataset(args.n_class, image_label[:250].values, args.data_root, subset=args.subset, train=True) data_loader = loader(dataset, args.batch_size, True) cvae_optimizer = RAdam(cvae.parameters(), weight_decay=1e-3) regressor_optimizer = RAdam(regressor.parameters(), weight_decay=1e-3) train(args, mnasnet, cvae, regressor, cvae_optimizer, regressor_optimizer, data_loader)
def main_train_worker(args): if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.dataset.startswith("CIFAR"): compress_mode = 2 use_tanh = False resize = None img_size = 32 if args.dataset == "ImageNet": compress_mode = 3 use_tanh = True resize = 128 img_size = 299 elif args.dataset in ["MNIST", "FashionMNIST"]: compress_mode = 1 use_tanh = False resize = None img_size = 28 network = Codec(img_size, IN_CHANNELS[args.dataset], compress_mode, resize=resize, use_tanh=use_tanh) model_path = '{}/train_pytorch_model/AutoZOOM/AutoEncoder_{}@compress_{}@use_tanh_{}@epoch_{}@lr_{}@batch_{}.pth.tar'.format( PY_ROOT, args.dataset, compress_mode, use_tanh, args.epochs, args.lr, args.batch_size) os.makedirs(os.path.dirname(model_path), exist_ok=True) print("Model will be saved to {}".format(model_path)) network.cuda() mse_loss_fn = nn.MSELoss().cuda() optimizer = RAdam(network.parameters(), args.lr, weight_decay=args.weight_decay) cudnn.benchmark = True train_loader = DataLoaderMaker.get_img_label_data_loader( args.dataset, args.batch_size, True, (img_size, img_size)) # val_loader = DataLoaderMaker.get_img_label_data_loader(args.dataset, args.batch_size, False) for epoch in range(0, args.epochs): # adjust_learning_rate(optimizer, epoch, args) # train_simulate_grad_mode for one epoch train(train_loader, network, mse_loss_fn, optimizer, epoch, args, use_tanh) # evaluate_accuracy on validation set save_checkpoint( { 'epoch': epoch + 1, 'encoder': network.encoder.state_dict(), 'decoder': network.decoder.state_dict(), "compress_mode": compress_mode, "use_tanh": use_tanh, 'optimizer': optimizer.state_dict(), }, filename=model_path)
def main(args): n_relational_embeddings = args.n_class**2 n_tag_embeddings = args.n_class in_ch, out_ch = 1, 128 model = TransNFCM(in_ch, out_ch, n_relational_embeddings, n_tag_embeddings, embedding_dim=128).to(device) if Path(args.resume_model).exists(): print("load model:", args.resume_model) model.load_state_dict(torch.load(args.resume_model)) optimizer = RAdam(model.parameters(), weight_decay=1e-3) train_dataset = FMNISTDataset(n_class=args.n_class, train=True) test_dataset = FMNISTDataset(n_class=args.n_class, train=False) train_loader = loader(train_dataset, args.batch_size) test_loader = loader(test_dataset, 1, shuffle=False) # train(args, model, optimizer, train_loader) test(args, model, test_loader, show_image_on_board=args.show_image_on_board, show_all_embedding=args.show_all_embedding)
def main(args): model = EteWave(args.n_class).to(device) if Path(args.resume_model).exists(): print("load model:", args.resume_model) model.load_state_dict(torch.load(args.resume_model)) # setup optimizer optimizer = RAdam(model.parameters()) train_data_file_names =\ [line.rstrip() for line in open(args.train_data_file_pointer_path)] test_data_file_names =\ [line.rstrip() for line in open(args.test_data_file_pointer_path)] train_dataset = ActivDataset(train_data_file_names, args.root_dir, seq_len=args.train_seq_len, time_step=args.time_step, is_train=True) test_dataset = ActivDataset(test_data_file_names, args.root_dir, seq_len=args.test_seq_len, time_step=args.time_step, is_train=False, test_in_train=True) train_loader = loader(train_dataset, args.batch_size) test_loader = loader(test_dataset, 1, shuffle=False) train(args, model, optimizer, train_loader) test(args, model, test_loader)
def main(args): n_relational_embeddings = args.n_class**2 n_tag_embeddings = args.n_class in_ch, out_ch, emb_dim = 3, 128, 128 model = TransNFCM(in_ch, out_ch, n_relational_embeddings, n_tag_embeddings, embedding_dim=emb_dim).to(device) optimizer = RAdam(model.parameters(), weight_decay=1e-3) image_label = pandas.read_csv( Path("gs://", args.bucket_name, args.data_root, args.metadata_file_name.format(args.subset))) image_label = image_label.sample(frac=1, random_state=551) image_label["class"] = image_label["class"] - 1 image_label = image_label.values train_dataset = WBCDataset(args.n_class, image_label[:250], args.data_root, project=args.project, bucket_name=args.bucket_name, subset=args.subset, train=True) train_loader = loader(train_dataset, args.batch_size) train(args, model, optimizer, train_loader)
def main(args): mnasnet1_0 = models.mnasnet1_0(pretrained=True).to(device).eval() model = CVAE(1000, 128, 128, args.n_class, 128).to(device) image_label = pandas.read_csv( Path(args.data_root, args.metadata_file_name.format(args.subset)) ).sample(frac=1, random_state=551)[:250] image_label["class"] = image_label["class"] - 1 dataset = WBCDataset(image_label.values, args.data_root, subset=args.subset) data_loader = loader(dataset, args.batch_size, True) optimizer = RAdam(model.parameters(), weight_decay=1e-3) train(args, mnasnet1_0, model, optimizer, data_loader)
def get_optimizer(params, train_weight, train_quant, train_bnbias, train_w_theta, train_a_theta): #global lr_quant (weight, quant, bnbias, theta_w, theta_a, skip) = params if args.optimizer.lower() == 'sgd': optimizer = optim.SGD([ {'params': weight, 'weight_decay': args.decay, 'lr': args.lr if train_weight else 0}, {'params': quant, 'weight_decay': 0., 'lr': args.lr_quant if train_quant else 0}, {'params': bnbias, 'weight_decay': 0., 'lr': args.lr_bn if train_bnbias else 0}, {'params': theta_w, 'weight_decay': 0., 'lr': args.lr_w_theta if train_w_theta else 0}, {'params': theta_a, 'weight_decay': 0., 'lr': args.lr_a_theta if train_a_theta else 0}, {'params': skip, 'weight_decay': 0, 'lr': 0}, ], momentum=args.momentum, nesterov=True) elif args.optimizer.lower() == 'radam': optimizer = RAdam([ {'params': weight, 'weight_decay': args.decay, 'lr': args.lr if train_weight else 0}, {'params': quant, 'weight_decay': 0., 'lr': args.lr_quant if train_quant else 0}, {'params': bnbias, 'weight_decay': 0., 'lr': args.lr_bn if train_bnbias else 0}, {'params': theta_w, 'weight_decay': 0., 'lr': args.lr_w_theta if train_w_theta else 0}, {'params': theta_a, 'weight_decay': 0., 'lr': args.lr_a_theta if train_a_theta else 0}, {'params': skip, 'weight_decay': 0, 'lr': 0}, ],) else: raise ValueError return optimizer
def main(args): with mlflow.start_run(): # Log our parameters into mlflow for key, value in vars(args).items(): mlflow.log_param(key, value) n_tag_embeddings = args.n_class in_ch, out_ch = 1, 128 model = TransNFCM(in_ch, out_ch, n_tag_embeddings, embedding_dim=128).to(device) if Path(args.resume_model).exists(): print("load model:", args.resume_model) model.load_state_dict(torch.load(args.resume_model)) optimizer = RAdam(model.parameters(), weight_decay=1e-3) train_dataset = FMNISTDataset(n_class=args.n_class, train=True) test_dataset = FMNISTDataset(n_class=args.n_class, train=False) train_loader = loader(train_dataset, args.batch_size) test_loader = loader(test_dataset, 1, shuffle=False) if args.train: train(args, model, optimizer, train_loader) test(args, model, test_loader, show_image_on_board=args.show_image_on_board, show_all_embedding=args.show_all_embedding) # Upload the TensorBoard event logs as a run artifact print("Uploading TensorBoard events as a run artifact...") mlflow.log_artifacts(args.out_dir, artifact_path="events") print("\nLaunch TensorBoard with:\n\ntensorboard --logdir=%s" % Path(mlflow.get_artifact_uri(), "events"))
def get_bert_optimizer(self, opt, model): # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] diff_part = ["bert.embeddings", "bert.encoder"] if opt.diff_lr: logger.info("layered learning rate on") optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) and any( nd in n for nd in diff_part) ], "weight_decay": opt.weight_decay, "lr": opt.bert_lr }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in diff_part) ], "weight_decay": 0.0, "lr": opt.bert_lr }, { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) and not any( nd in n for nd in diff_part) ], "weight_decay": opt.weight_decay, "lr": opt.layers_lr }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) and not any( nd in n for nd in diff_part) ], "weight_decay": 0.0, "lr": opt.layers_lr }, ] # 选择优化器 if opt.optimizer == 'AdamW': optimizer = AdamW(optimizer_grouped_parameters, eps=opt.adam_epsilon) logger.info("Choose AdamW") elif opt.optimizer == 'RAdam': optimizer = RAdam(optimizer_grouped_parameters, eps=opt.adam_epsilon) logger.info("Choose RAdam") elif opt.optimizer == 'Ranger': optimizer = Ranger(optimizer_grouped_parameters, eps=opt.adam_epsilon) logger.info("Choose Ranger") else: logger.info("Please input correct optimizer!") else: logger.info("bert learning rate on") optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': opt.weight_decay }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] # 选择优化器 if opt.optimizer == 'AdamW': optimizer = AdamW( optimizer_grouped_parameters, lr=opt.bert_lr, eps=opt.adam_epsilon) # weight_decay=opt.l2reg logger.info("Choose AdamW") elif opt.optimizer == 'RAdam': optimizer = RAdam(optimizer_grouped_parameters, lr=opt.bert_lr, eps=opt.adam_epsilon) logger.info("Choose RAdam") elif opt.optimizer == 'Ranger': optimizer = Ranger(optimizer_grouped_parameters, lr=opt.bert_lr, eps=opt.adam_epsilon) logger.info("Choose Ranger") else: logger.info("Please input correct optimizer!") return optimizer
def main(): if config.gpu and not torch.cuda.is_available(): raise ValueError("GPU not supported or enabled on this system.") use_gpu = config.gpu log.info("Loading train dataset") train_dataset = COVIDxFolder( config.train_imgs, config.train_labels, transforms.train_transforms(config.width, config.height)) train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, drop_last=True, num_workers=config.n_threads, pin_memory=use_gpu) log.info("Number of training examples {}".format(len(train_dataset))) log.info("Loading val dataset") val_dataset = COVIDxFolder( config.val_imgs, config.val_labels, transforms.val_transforms(config.width, config.height)) val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.n_threads, pin_memory=use_gpu) log.info("Number of validation examples {}".format(len(val_dataset))) if config.weights: # state = torch.load(config.weights) state = None log.info("Loaded model weights from: {}".format(config.weights)) else: state = None state_dict = state["state_dict"] if state else None model = architecture.COVIDEfficientnet(n_classes=config.n_classes) if state_dict: model = util.load_model_weights(model=model, state_dict=state_dict) if use_gpu: model.cuda() model = torch.nn.DataParallel(model) optim_layers = filter(lambda p: p.requires_grad, model.parameters()) # optimizer and lr scheduler optimizer = RAdam(optim_layers, lr=config.lr, weight_decay=config.weight_decay) scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=config.lr_reduce_factor, patience=config.lr_reduce_patience, mode='max', min_lr=1e-7) # Load the last global_step from the checkpoint if existing global_step = 0 if state is None else state['global_step'] + 1 class_weights = util.to_device(torch.FloatTensor(config.loss_weights), gpu=use_gpu) loss_fn = CrossEntropyLoss() # Reset the best metric score best_score = -1 # Training for epoch in range(config.epochs): log.info("Started epoch {}/{}".format(epoch + 1, config.epochs)) for data in train_loader: imgs, labels = data imgs = util.to_device(imgs, gpu=use_gpu) labels = util.to_device(labels, gpu=use_gpu) logits = model(imgs) loss = loss_fn(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() if global_step % config.log_steps == 0 and global_step > 0: probs = model.module.probability(logits) preds = torch.argmax(probs, dim=1).detach().cpu().numpy() labels = labels.cpu().detach().numpy() acc, f1, _, _ = util.clf_metrics(preds, labels) lr = util.get_learning_rate(optimizer) log.info("Step {} | TRAINING batch: Loss {:.4f} | F1 {:.4f} | " "Accuracy {:.4f} | LR {:.2e}".format( global_step, loss.item(), f1, acc, lr)) if global_step % config.eval_steps == 0 and global_step > 0: best_score = validate(val_loader, model, best_score=best_score, global_step=global_step, cfg=config) scheduler.step(best_score) global_step += 1
class Optimizer(nn.Module): def __init__(self, model): super(Optimizer, self).__init__() self.setup_optimizer(model) def setup_optimizer(self, model): params = [] for key, value in model.named_parameters(): if not value.requires_grad: continue lr = cfg.SOLVER.BASE_LR weight_decay = cfg.SOLVER.WEIGHT_DECAY if "bias" in key: lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS params += [{ "params": [value], "lr": lr, "weight_decay": weight_decay }] if cfg.SOLVER.TYPE == 'SGD': self.optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.SGD.MOMENTUM) elif cfg.SOLVER.TYPE == 'ADAM': self.optimizer = torch.optim.Adam(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) elif cfg.SOLVER.TYPE == 'ADAMAX': self.optimizer = torch.optim.Adamax(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) elif cfg.SOLVER.TYPE == 'ADAGRAD': self.optimizer = torch.optim.Adagrad(params, lr=cfg.SOLVER.BASE_LR) elif cfg.SOLVER.TYPE == 'RMSPROP': self.optimizer = torch.optim.RMSprop(params, lr=cfg.SOLVER.BASE_LR) elif cfg.SOLVER.TYPE == 'RADAM': self.optimizer = RAdam(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) else: raise NotImplementedError if cfg.SOLVER.LR_POLICY.TYPE == 'Fix': self.scheduler = None elif cfg.SOLVER.LR_POLICY.TYPE == 'Step': self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=cfg.SOLVER.LR_POLICY.STEP_SIZE, gamma=cfg.SOLVER.LR_POLICY.GAMMA) elif cfg.SOLVER.LR_POLICY.TYPE == 'Plateau': self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, factor=cfg.SOLVER.LR_POLICY.PLATEAU_FACTOR, patience=cfg.SOLVER.LR_POLICY.PLATEAU_PATIENCE) elif cfg.SOLVER.LR_POLICY.TYPE == 'Noam': self.scheduler = lr_scheduler.create( 'Noam', self.optimizer, model_size=cfg.SOLVER.LR_POLICY.MODEL_SIZE, factor=cfg.SOLVER.LR_POLICY.FACTOR, warmup=cfg.SOLVER.LR_POLICY.WARMUP) elif cfg.SOLVER.LR_POLICY.TYPE == 'MultiStep': self.scheduler = lr_scheduler.create( 'MultiStep', self.optimizer, milestones=cfg.SOLVER.LR_POLICY.STEPS, gamma=cfg.SOLVER.LR_POLICY.GAMMA) else: raise NotImplementedError def zero_grad(self): self.optimizer.zero_grad() def step(self): self.optimizer.step() def scheduler_step(self, lrs_type, val=None): if self.scheduler is None: return if cfg.SOLVER.LR_POLICY.TYPE != 'Plateau': val = None if lrs_type == cfg.SOLVER.LR_POLICY.SETP_TYPE: self.scheduler.step(val) def get_lr(self): lr = [] for param_group in self.optimizer.param_groups: lr.append(param_group['lr']) lr = sorted(list(set(lr))) return lr
def setup_optimizer(self, model): params = [] for key, value in model.named_parameters(): if not value.requires_grad: continue lr = cfg.SOLVER.BASE_LR weight_decay = cfg.SOLVER.WEIGHT_DECAY if "bias" in key: lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS params += [{ "params": [value], "lr": lr, "weight_decay": weight_decay }] if cfg.SOLVER.TYPE == 'SGD': self.optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.SGD.MOMENTUM) elif cfg.SOLVER.TYPE == 'ADAM': self.optimizer = torch.optim.Adam(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) elif cfg.SOLVER.TYPE == 'ADAMAX': self.optimizer = torch.optim.Adamax(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) elif cfg.SOLVER.TYPE == 'ADAGRAD': self.optimizer = torch.optim.Adagrad(params, lr=cfg.SOLVER.BASE_LR) elif cfg.SOLVER.TYPE == 'RMSPROP': self.optimizer = torch.optim.RMSprop(params, lr=cfg.SOLVER.BASE_LR) elif cfg.SOLVER.TYPE == 'RADAM': self.optimizer = RAdam(params, lr=cfg.SOLVER.BASE_LR, betas=cfg.SOLVER.ADAM.BETAS, eps=cfg.SOLVER.ADAM.EPS) else: raise NotImplementedError if cfg.SOLVER.LR_POLICY.TYPE == 'Fix': self.scheduler = None elif cfg.SOLVER.LR_POLICY.TYPE == 'Step': self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=cfg.SOLVER.LR_POLICY.STEP_SIZE, gamma=cfg.SOLVER.LR_POLICY.GAMMA) elif cfg.SOLVER.LR_POLICY.TYPE == 'Plateau': self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, factor=cfg.SOLVER.LR_POLICY.PLATEAU_FACTOR, patience=cfg.SOLVER.LR_POLICY.PLATEAU_PATIENCE) elif cfg.SOLVER.LR_POLICY.TYPE == 'Noam': self.scheduler = lr_scheduler.create( 'Noam', self.optimizer, model_size=cfg.SOLVER.LR_POLICY.MODEL_SIZE, factor=cfg.SOLVER.LR_POLICY.FACTOR, warmup=cfg.SOLVER.LR_POLICY.WARMUP) elif cfg.SOLVER.LR_POLICY.TYPE == 'MultiStep': self.scheduler = lr_scheduler.create( 'MultiStep', self.optimizer, milestones=cfg.SOLVER.LR_POLICY.STEPS, gamma=cfg.SOLVER.LR_POLICY.GAMMA) else: raise NotImplementedError
def base_runner(): ####################################### # Search Working Device ####################################### print("-" * 100) cc = get_arguments() if cc.get('local_rank') == 0: wandb.init(project=PROJECT_NAME, dir=cc.get('log_dir')) wandb.config.update( cc) # adds all of the arguments as config variables #print(f"W & B Log Dir:{wandb.wandb_dir()}") print("-" * 100) ####################################### # Setting Dataset ####################################### if cc.get('fp16'): torch.cuda.set_device(cc.get('local_rank')) torch.distributed.init_process_group(backend='nccl', init_method='env://') train_loader, test_loader, train_sampler = ds.get_dataset( ds.Dataset[cc.get('dataset')], cc.get('data_dir'), batch_size=cc.get('batch_size'), num_workers=cc.get('num_workers'), distributed=cc.get('fp16'), enable_auto_augmentation='efficient' in cc.get('network_name')) loss = nn.CrossEntropyLoss() ####################################### # Search Working Device ####################################### working_device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print(working_device.type) print("-" * 100) ###################################### # Set Model and Load ##################################### nc = networks.NetworkQuantizationController( quantization_config=get_q_config_same( list( range(cc.get('activation_min_bit_width'), cc.get('activation_max_bit_width') + 1)), cc.get('bits_list'), cc.get('n_thresholds_shifts')), quantization_part=QUANTIZATION[cc.get('quantization_part')], ste=cc.get('gumbel_ste')) net = networks.get_network_function(cc.get('network_name'))( nc, pretrained=True) init_net(net, train_loader) net = update_quantization_coefficient(net) param_out_list, activation_scale_params, variable_scale_params = model_coefficient_split( net) ###################################### # Build Optimizer and Loss function ##################################### optimizer = RAdam([{ 'params': activation_scale_params, 'lr': cc.get('lr_activation'), 'weight_decay': 0.0 }, { 'params': variable_scale_params, 'lr': cc.get('lr_coefficient'), 'weight_decay': 0.0 }]) optimizer_net = RAdam([{ 'params': param_out_list, 'lr': cc.get('lr_start'), 'weight_decay': cc.get('weight_decay') }]) net = net.to('cuda') #if cc.get('fp16'): # net, optimizers = amp.initialize(net, [optimizer, optimizer_net], # opt_level='O1', # keep_batchnorm_fp32=None, # loss_scale=None # ) optimizers = [optimizer, optimizer_net] net = common.multiple_gpu_enable(net, apex=cc.get('fp16')) ################################## # Inital accuracy evalution ################################## #test_base_acc = common.accuracy_evaluation(net, test_loader, working_device) #print("Network Weight Loading Done with Accuracy:", test_base_acc) print('-' * 100) ###################################### # Enable Quantization ##################################### nc.apply_fix_point() ##################################### # Search Max thresholds ##################################### print("Initial thresholds", get_thresholds_list(nc, net)[0]) nc.set_temperature(1) nc.enable_statistics_update() # enable statistics collection train_acc = common.accuracy_evaluation(net, train_loader, working_device) nc.disable_statistics_update() # disable statistics collection print("Initial Thresholds at the end of statistics update", get_thresholds_list(nc, net)[0], train_acc) ##################################### # Retrain ##################################### temp_func = common.get_exp_cycle_annealing( cc.get('cycle_size') * len(train_loader), cc.get('temp_step'), np.round(len(train_loader) / cc.get('n_gumbel'))) gamma_target_func = common.get_step_annealing(cc.get('cycle_size'), CR_START_W, cc.get('target_compression'), cc.get('n_target_steps')) gamma_target_func_activation = common.get_step_annealing( cc.get('cycle_size'), CR_START_A, cc.get('target_compression_activation'), cc.get('n_target_steps')) print("-" * 100) print("Starting Training") scaler = torch.cuda.amp.GradScaler() single_iteration_training_joint(net, cc, nc, train_loader, test_loader, optimizers, loss, temp_func, cc.get('gamma'), gamma_target_func, gamma_target_func_activation, working_device, amp_flag=cc.get('fp16'), train_sampler=train_sampler, gamma_rate=cc.get('gamma_rate'), scaler=scaler) final_stage_training(net, cc, nc, train_loader, test_loader, [optimizers[1]], loss, working_device, cc.get('fp16'), train_sampler, scaler=scaler)
) valid_dataloader = DataLoader( valid_dataset, batch_size=int(config.batch_size // (config.valid_waveform_sec / config.waveform_sec)), shuffle=False, num_workers=config.num_workers, pin_memory=False, ) # -------------------------------------/> # Setting Optimizer if config.optimizer == "adam": optimizer = optim.Adam(filter(lambda x: x.requires_grad, net.parameters()), lr=config.lr) elif config.optimizer == "radam": optimizer = RAdam(filter(lambda x: x.requires_grad, net.parameters()), lr=config.lr) else: raise NotImplementedError # -------------------------------------/> # Setting Scheduler if config.lr_scheduler == "cosine": # restart every T_0 * validation_interval steps scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, T_0=20, eta_min=config.lr_min ) elif config.lr_scheduler == "plateau": scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", patience=5, factor=config.lr_decay ) elif config.lr_scheduler == "multi":