def __init__(self, shape): """ shape: [osize, hsize] """ self.w = np.random.random(shape) - 0.5 self.b = np.random.random((shape[0], 1)) - 0.5 self.wg = opt.create_optimizer() self.bg = opt.create_optimizer()
def get_model(): # mask_model.py에 정의된 특정 모델을 가져옵니다. model_module = getattr(import_module("recycle_model"), CFG.model) model = model_module(num_classes=12) # 모델의 파라미터를 GPU메모리로 옮깁니다. model.cuda() # wandb에서 model 감독 wandb.watch(model) # 모델의 파라미터 수를 출력합니다. print('parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad)) # GPU가 2개 이상이면 데이터패러럴로 학습 가능하게 만듭니다. n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) # loss.py에 정의된 criterion을 가져옵니다. criterion = create_criterion(CFG.criterion) # optimizer.py에 정의된 optimizer를 가져옵니다. optimizer_encoder = create_optimizer( CFG.optimizer, params=model.seg_model.encoder.parameters(), lr=1e-8) optimizer_decoder = create_optimizer( CFG.optimizer, params=[{ "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }], lr=1e-8) # scheduler.py에 정의된 scheduler를 가져옵니다. scheduler_encoder = create_scheduler(CFG.scheduler, optimizer=optimizer_encoder, T_0=30, T_mult=2, eta_max=CFG.learning_rate * 0.1, T_up=5, gamma=0.3) scheduler_decoder = create_scheduler(CFG.scheduler, optimizer=optimizer_decoder, T_0=30, T_mult=2, eta_max=CFG.learning_rate, T_up=5, gamma=0.3) return model, criterion, optimizer_encoder, optimizer_decoder, scheduler_encoder, scheduler_decoder
def init_optimizer(self): self.wr_g = opt.create_optimizer() self.ur_g = opt.create_optimizer() self.br_g = opt.create_optimizer() self.wz_g = opt.create_optimizer() self.uz_g = opt.create_optimizer() self.bz_g = opt.create_optimizer() self.whs_g = opt.create_optimizer() self.uhs_g = opt.create_optimizer() self.bhs_g = opt.create_optimizer()
def train(model, epochs, train_dl, val_dl): best_score = 0.0 # create optimizer with differential learning rates optimizer = create_optimizer(model, BASE_OPTIMIZER, args.init_lr_0, DIFF_LR_FACTORS) iterations = epochs * len(train_dl) idx = 0 for epoch in range(epochs): lr0 = lr_scheduler(epoch, args.lr_decay_factor, args.init_lr_0, args.lr_decay_epoch) # set base lr for this epoch optimizer = create_optimizer(model, BASE_OPTIMIZER, lr0, DIFF_LR_FACTORS) total_loss = 0 # training loop for batch_idx, (data, target) in enumerate(train_dl): data, target = data.cuda().float(), target.cuda().float() output = model(data) loss = F.binary_cross_entropy_with_logits(output, target) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() idx += 1 # unfreeze deeper layers sequentially if idx == int(0.1 * iterations): model.unfreeze(1) logger.info("Iteration %d: Unfreezing group 1" % idx) if idx == int(0.2 * iterations): model.unfreeze(0) logger.info("Iteration %d: Unfreezing group 0" % idx) if batch_idx % 100 == 0: logger.info("Epoch %d (Batch %d / %d)\t Train loss: %.3f" % \ (epoch+1, batch_idx, len(train_dl), loss.item())) # train loss train_loss = total_loss / len(train_dl) logger.info("Epoch %d\t Train loss: %.3f" % (epoch + 1, train_loss)) mlflow.log_metric('train_loss', train_loss, step=epoch) # validation scores val_f2_score, val_loss = validate(model, val_dl, 0.2) logger.info("Epoch %d \t Validation loss: %.3f, F2 score: %.3f" % \ (epoch+1, val_loss, val_f2_score)) mlflow.log_metric('val_loss', val_loss, step=epoch) mlflow.log_metric('val_f2_score', val_f2_score, step=epoch) # model saving if val_f2_score > best_score: best_score = val_f2_score best_model_path = os.path.join(MODEL_DIR, 'model_resnet34_%d.pth' % \ (100*val_f2_score)) logger.info("Saving model to %s" % best_model_path) save_model(model, best_model_path)
def build_program(main_program, startup_program, image_shape, archs, args, is_test=False): with fluid.program_guard(main_program, startup_program): with fluid.unique_name.guard(): data_loader, data, label = create_data_loader(image_shape) output = archs(data) output = fluid.layers.fc(input=output, size=args.class_dim) softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) cost = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_cost = fluid.layers.mean(cost) acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) if is_test == False: optimizer = create_optimizer(args) optimizer.minimize(avg_cost) return data_loader, avg_cost, acc_top1, acc_top5
def train(model, epochs, train_dl, val_dl, fold): best_score = 0.0 lr0 = args.init_lr_0 iterations = epochs * len(train_dl) idx = 0 # create optimizer with differential learning rates optimizer = create_optimizer(MODEL, BASE_OPTIMIZER, args.init_lr_0, DIFF_LR_FACTORS) # set up lr schedule based on val loss lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', verbose=True, patience=args.patience) for epoch in range(epochs): total_loss = 0 # training loop model.train() for batch_idx, (data, target) in enumerate(train_dl): data, target = data.cuda().float(), target.cuda().float() output = model(data) loss = F.binary_cross_entropy_with_logits(output, target) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() idx += 1 # unfreeze deeper layers sequentially if idx == int(0.1 * iterations): model.unfreeze(1) logger.info("Iteration %d: Unfreezing group 1" % idx) if idx == int(0.2 * iterations): model.unfreeze(0) logger.info("Iteration %d: Unfreezing group 0" % idx) if batch_idx % 100 == 0: logger.info("Epoch %d (Batch %d / %d)\t Train loss: %.3f" % \ (epoch+1, batch_idx, len(train_dl), loss.item())) # train loss train_loss = total_loss / len(train_dl) logger.info("Epoch %d\t Train loss: %.3f" % (epoch + 1, train_loss)) mlflow.log_metric('train_loss', train_loss, step=epoch) # validation scores val_f2_score, val_loss = validate(model, val_dl, 0.2) # lr monitoring val_loss lr_scheduler.step(val_loss) logger.info("Epoch %d \t Validation loss: %.3f, F2 score: %.3f" % \ (epoch+1, val_loss, val_f2_score)) mlflow.log_metric('val_loss', val_loss, step=epoch) mlflow.log_metric('val_f2_score', val_f2_score, step=epoch) # model saving if val_f2_score > best_score: best_score = val_f2_score best_model_path = os.path.join(MODEL_DIR, 'fold_%s' % fold, 'model_VGG19_%d.pth' % \ (100*val_f2_score)) logger.info("Saving model to %s" % best_model_path) save_model(model, best_model_path)
def get_model(): ''' get defined model from recycle_model.py Returns: model: pytorch model that would be trained optimizer: pytorch optimizer for gradient descent scheduler: pytorch lr scheduler ''' model_module = getattr(import_module("recycle_model"), CFG.model) model = model_module(num_classes=11) # move model to cuda memory model.cuda() # watch model in wandb # wandb.watch(model) # check the number of model parameters print('parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad)) # if using multi-gpu, train model in parallel n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) # setting weight_decay different param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)] }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] # get optimizer from optimizer.py optimizer = create_optimizer(CFG.optimizer, params=optimizer_grouped_parameters, lr=CFG.learning_rate, **CFG.optimizer_params) # get scheduler from scheduler.py scheduler = create_scheduler(CFG.scheduler, optimizer=optimizer, **CFG.scheduler_params) return model, optimizer, scheduler
def build_program(main_program, startup_program, image_shape, dataset, archs, args, places, is_test=False): with static.program_guard(main_program, startup_program): with paddle.utils.unique_name.guard(): data_shape = [None] + image_shape data = static.data(name='data', shape=data_shape, dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') if args.data == 'cifar10': paddle.assign(paddle.reshape(label, [-1, 1]), label) if is_test: data_loader = paddle.io.DataLoader(dataset, places=places, feed_list=[data, label], drop_last=False, batch_size=args.batch_size, return_list=False, shuffle=False) else: data_loader = paddle.io.DataLoader(dataset, places=places, feed_list=[data, label], drop_last=True, batch_size=args.batch_size, return_list=False, shuffle=True, use_shared_memory=True, num_workers=4) output = archs(data) output = static.nn.fc(output, size=args.class_dim) softmax_out = F.softmax(output) cost = F.cross_entropy(softmax_out, label=label) avg_cost = paddle.mean(cost) acc_top1 = paddle.metric.accuracy(input=softmax_out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=softmax_out, label=label, k=5) if is_test == False: optimizer = create_optimizer(args) optimizer.minimize(avg_cost) return data_loader, avg_cost, acc_top1, acc_top5
def get_model(train_iter): # get model from mask_model.py and define with parameters model_module = getattr(import_module("mask_model"), CFG.model) model = model_module() # Upload data to gpu memory model.cuda() # print number of parameters(weights) of defined model print('parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad)) # if exists more than 2 GPUs, use DataParallel training n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) # get criterion from loss.py and define with parameters criterion_mask = create_criterion(CFG.criterion, classes=3, smoothing=0.05) criterion_gender = create_criterion('cross_entropy') criterion_age = create_criterion(CFG.criterion, classes=3, smoothing=0.05) # get optimizer from optimizer.py and define with parameters optimizer_backbone = create_optimizer( CFG.optimizer, params=model.backbone.parameters(), lr = CFG.learning_rate * 0.1, momentum=0.9, weight_decay=1e-2 ) optimizer_classifier = create_optimizer( CFG.optimizer, params=[ {"params": model.mask_layer.parameters()}, {"params": model.gender_layer.parameters()}, {"params": model.age_layer.parameters()}, ], lr = CFG.learning_rate, momentum=0.9, weight_decay=1e-2 ) # get scheduler from scheduler.py and define with parameters scheduler_backbone = create_scheduler( CFG.scheduler, optimizer=optimizer_backbone, max_lr=CFG.learning_rate * 0.1, epochs=CFG.nepochs, steps_per_epoch=len(train_iter), pct_start=5/CFG.nepochs, anneal_strategy='cos' ) scheduler_classifier = create_scheduler( CFG.scheduler, optimizer=optimizer_classifier, max_lr=CFG.learning_rate, epochs=CFG.nepochs, steps_per_epoch=len(train_iter), pct_start=5/CFG.nepochs, anneal_strategy='cos' ) return model, criterion_mask, criterion_gender, criterion_age, optimizer_backbone, optimizer_classifier, scheduler_backbone, scheduler_classifier
augmentations=config["learning_config"]["augmentations"], stage="train", cache=args.cache, shuffle=True) eval_dataset = ASRSliceDataset( data_paths=config["learning_config"]["dataset_config"]["eval_paths"], speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="eval", cache=args.cache, shuffle=True) # Build DS2 model with ctc_trainer.strategy.scope(): satt_ds2_model = SelfAttentionDS2(input_shape=speech_featurizer.shape, arch_config=config["model_config"], num_classes=text_featurizer.num_classes) satt_ds2_model._build(speech_featurizer.shape) satt_ds2_model.summary(line_length=150) optimizer = create_optimizer( name=config["learning_config"]["optimizer_config"]["name"], d_model=config["model_config"]["att"]["head_size"], **config["learning_config"]["optimizer_config"]["config"]) # Compile ctc_trainer.compile(satt_ds2_model, optimizer, max_to_keep=args.max_ckpts) ctc_trainer.fit(train_dataset, eval_dataset, train_bs=args.tbs, eval_bs=args.ebs)
def get_model(): # model.py에 정의된 특정 모델을 가져옵니다. model_module = getattr(import_module("recycle_model"), CFG.model) model = model_module(num_classes=12) # 모델의 파라미터를 GPU메모리로 옮깁니다. model.cuda() # wandb에서 model 감독 wandb.watch(model) # 모델의 파라미터 수를 출력합니다. print('parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad)) # GPU가 2개 이상이면 데이터패러럴로 학습 가능하게 만듭니다. n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) # loss.py에 정의된 criterion을 가져옵니다. criterion = create_criterion(CFG.criterion) # optimizer.py에 정의된 optimizer를 가져옵니다. if CFG.optimizer == "Adam": optimizer = create_optimizer( CFG.optimizer, params=[ { "params": model.seg_model.encoder.parameters(), "lr": CFG.learning_rate * 0.1 }, { "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }, ], lr=CFG.learning_rate, weight_decay=1e-6) elif CFG.optimizer == "RAdam": optimizer = create_optimizer( CFG.optimizer, params=[ { "params": model.seg_model.encoder.parameters(), "lr": CFG.learning_rate * 0.1 }, { "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }, ], lr=CFG.learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0) elif CFG.optimizer == "AdamP": optimizer = create_optimizer( CFG.optimizer, params=[ { "params": model.seg_model.encoder.parameters(), "lr": CFG.learning_rate * 0.1 }, { "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }, ], lr=CFG.learning_rate, betas=(0.9, 0.999), eps=1e-8, weight_decay=0) elif CFG.optimizer == "AdamW": optimizer = create_optimizer( CFG.optimizer, params=[ { "params": model.seg_model.encoder.parameters(), "lr": CFG.learning_rate * 0.1 }, { "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }, ], lr=CFG.learning_rate, amsgrad=True) elif CFG.optimizer == "RMSprop": optimizer = create_optimizer( CFG.optimizer, params=[ { "params": model.seg_model.encoder.parameters(), "lr": CFG.learning_rate * 0.1 }, { "params": model.seg_model.decoder.parameters() }, { "params": model.seg_model.segmentation_head.parameters() }, ], lr=CFG.learning_rate) # scheduler.py에 정의된 scheduler를 가져옵니다. if CFG.scheduler == "StepLR": scheduler = create_scheduler(CFG.scheduler, optimizer=optimizer, step_size=5, gamma=0.95) elif CFG.scheduler == "CosineAnnealingWarmupRestarts": scheduler = create_scheduler( CFG.scheduler, optimizer=optimizer, first_cycle_steps=5, cycle_mult=1., max_lr=1e-4, min_lr=1e-7, ) return model, criterion, optimizer, scheduler
def run(args): assert args.mode in modes, f"Mode must in {modes}" config = UserConfig(DEFAULT_YAML, args.config, learning=True) speech_featurizer = SpeechFeaturizer(config["speech_config"]) text_featurizer = TextFeaturizer(config["decoder_config"]) if args.mode == "train": tf.random.set_seed(2020) if args.mixed_precision: policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16") tf.keras.mixed_precision.experimental.set_policy(policy) print("Enabled mixed precision training") ctc_trainer = CTCTrainer(speech_featurizer, text_featurizer, config["learning_config"]["running_config"], args.mixed_precision) if args.tfrecords: train_dataset = ASRTFRecordDataset( config["learning_config"]["dataset_config"]["train_paths"], config["learning_config"]["dataset_config"]["tfrecords_dir"], speech_featurizer, text_featurizer, "train", augmentations=config["learning_config"]["augmentations"], shuffle=True, ) eval_dataset = ASRTFRecordDataset( config["learning_config"]["dataset_config"]["eval_paths"], config["learning_config"]["dataset_config"]["tfrecords_dir"], speech_featurizer, text_featurizer, "eval", shuffle=False ) else: train_dataset = ASRSliceDataset( stage="train", speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=config["learning_config"]["dataset_config"]["train_paths"], augmentations=config["learning_config"]["augmentations"], shuffle=True, ) eval_dataset = ASRSliceDataset( stage="eval", speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=config["learning_config"]["dataset_config"]["eval_paths"], shuffle=False ) # Build DS2 model f, c = speech_featurizer.compute_feature_dim() with ctc_trainer.strategy.scope(): satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c], arch_config=config["model_config"], num_classes=text_featurizer.num_classes) satt_ds2_model._build([1, 50, f, c]) optimizer = create_optimizer( name=config["learning_config"]["optimizer_config"]["name"], d_model=config["model_config"]["att"]["head_size"], **config["learning_config"]["optimizer_config"]["config"] ) # Compile ctc_trainer.compile(satt_ds2_model, optimizer, max_to_keep=args.max_ckpts) ctc_trainer.fit(train_dataset, eval_dataset, args.eval_train_ratio) if args.export: if args.from_weights: ctc_trainer.model.save_weights(args.export) else: ctc_trainer.model.save(args.export) elif args.mode == "test": tf.random.set_seed(0) assert args.export text_featurizer.add_scorer( Scorer(**text_featurizer.decoder_config["lm_config"], vocabulary=text_featurizer.vocab_array)) # Build DS2 model f, c = speech_featurizer.compute_feature_dim() satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c], arch_config=config["model_config"], num_classes=text_featurizer.num_classes) satt_ds2_model._build([1, 50, f, c]) satt_ds2_model.summary(line_length=100) optimizer = create_optimizer( name=config["learning_config"]["optimizer_config"]["name"], d_model=config["model_config"]["att"]["head_size"], **config["learning_config"]["optimizer_config"]["config"] ) batch_size = config["learning_config"]["running_config"]["batch_size"] if args.tfrecords: test_dataset = ASRTFRecordDataset( config["learning_config"]["dataset_config"]["test_paths"], config["learning_config"]["dataset_config"]["tfrecords_dir"], speech_featurizer, text_featurizer, "test", augmentations=config["learning_config"]["augmentations"], shuffle=False ).create(batch_size * args.eval_train_ratio) else: test_dataset = ASRSliceDataset( stage="test", speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, data_paths=config["learning_config"]["dataset_config"]["test_paths"], augmentations=config["learning_config"]["augmentations"], shuffle=False ).create(batch_size * args.eval_train_ratio) ctc_tester = BaseTester( config=config["learning_config"]["running_config"], saved_path=args.export, from_weights=args.from_weights ) ctc_tester.compile(satt_ds2_model, speech_featurizer, text_featurizer) ctc_tester.run(test_dataset) else: assert args.export # Build DS2 model f, c = speech_featurizer.compute_feature_dim() satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c], arch_config=config["model_config"], num_classes=text_featurizer.num_classes) satt_ds2_model._build([1, 50, f, c]) optimizer = create_optimizer( name=config["learning_config"]["optimizer_config"]["name"], d_model=config["model_config"]["att"]["head_size"], **config["learning_config"]["optimizer_config"]["config"] ) def save_func(**kwargs): if args.from_weights: kwargs["model"].save_weights(args.export) else: kwargs["model"].save(args.export) save_from_checkpoint(func=save_func, outdir=config["learning_config"]["running_config"]["outdir"], model=satt_ds2_model, optimizer=optimizer)
def compress(args): if args.data == "cifar10": transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) train_dataset = paddle.vision.datasets.Cifar10(mode="train", backend="cv2", transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode="test", backend="cv2", transform=transform) class_dim = 10 image_shape = [3, 32, 32] pretrain = False args.total_images = 50000 elif args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 place = paddle.set_device('gpu' if args.use_gpu else 'cpu') # model definition if use_data_parallel: paddle.distributed.init_parallel_env() pretrain = True if args.data == "imagenet" else False if args.model == "mobilenet_v1": net = mobilenet_v1(pretrained=pretrain, num_classes=class_dim) elif args.model == "mobilenet_v3": net = MobileNetV3_large_x1_0(class_dim=class_dim) if pretrain: load_dygraph_pretrain(net, args.pretrained_model, True) else: raise ValueError("{} is not supported.".format(args.model)) _logger.info("Origin model summary:") paddle.summary(net, (1, 3, 224, 224)) ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight preprocess type, default is None and no preprocessing is performed. 'weight_preprocess_type': None, # activation preprocess type, default is None and no preprocessing is performed. 'activation_preprocess_type': None, # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. default is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, # for dygraph quantization, layers of type in quantizable_layer_type will be quantized 'quantizable_layer_type': ['Conv2D', 'Linear'], } if args.use_pact: quant_config['activation_preprocess_type'] = 'PACT' ############################################################################################################ # 2. Quantize the model with QAT (quant aware training) ############################################################################################################ quanter = QAT(config=quant_config) quanter.quantize(net) _logger.info("QAT model summary:") paddle.summary(net, (1, 3, 224, 224)) opt, lr = create_optimizer(net, trainer_num, args) if use_data_parallel: net = paddle.DataParallel(net) train_batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, batch_sampler=train_batch_sampler, places=place, return_list=True, num_workers=4) valid_loader = paddle.io.DataLoader(val_dataset, places=place, batch_size=args.batch_size, shuffle=False, drop_last=False, return_list=True, num_workers=4) @paddle.no_grad() def test(epoch, net): net.eval() batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] eval_reader_cost = 0.0 eval_run_cost = 0.0 total_samples = 0 reader_start = time.time() for data in valid_loader(): eval_reader_cost += time.time() - reader_start image = data[0] label = data[1] if args.data == "cifar10": label = paddle.reshape(label, [-1, 1]) eval_start = time.time() out = net(image) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) eval_run_cost += time.time() - eval_start batch_size = image.shape[0] total_samples += batch_size if batch_id % args.log_period == 0: log_period = 1 if batch_id == 0 else args.log_period _logger.info( "Eval epoch[{}] batch[{}] - top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), eval_reader_cost / log_period, (eval_reader_cost + eval_run_cost) / log_period, total_samples / log_period, total_samples / (eval_reader_cost + eval_run_cost))) eval_reader_cost = 0.0 eval_run_cost = 0.0 total_samples = 0 acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) batch_id += 1 reader_start = time.time() _logger.info( "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def cross_entropy(input, target, ls_epsilon): if ls_epsilon > 0: if target.shape[-1] != class_dim: target = paddle.nn.functional.one_hot(target, class_dim) target = paddle.nn.functional.label_smooth(target, epsilon=ls_epsilon) target = paddle.reshape(target, shape=[-1, class_dim]) input = -paddle.nn.functional.log_softmax(input, axis=-1) cost = paddle.sum(target * input, axis=-1) else: cost = paddle.nn.functional.cross_entropy(input=input, label=target) avg_cost = paddle.mean(cost) return avg_cost def train(epoch, net): net.train() batch_id = 0 train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for data in train_loader(): train_reader_cost += time.time() - reader_start image = data[0] label = data[1] if args.data == "cifar10": label = paddle.reshape(label, [-1, 1]) train_start = time.time() out = net(image) avg_cost = cross_entropy(out, label, args.ls_epsilon) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) avg_cost.backward() opt.step() opt.clear_grad() lr.step() loss_n = np.mean(avg_cost.numpy()) acc_top1_n = np.mean(acc_top1.numpy()) acc_top5_n = np.mean(acc_top5.numpy()) train_run_cost += time.time() - train_start batch_size = image.shape[0] total_samples += batch_size if batch_id % args.log_period == 0: log_period = 1 if batch_id == 0 else args.log_period _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s" .format( epoch, batch_id, lr.get_lr(), loss_n, acc_top1_n, acc_top5_n, train_reader_cost / log_period, (train_reader_cost + train_run_cost) / log_period, total_samples / log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 batch_id += 1 reader_start = time.time() ############################################################################################################ # train loop ############################################################################################################ best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, net) acc1 = test(i, net) if paddle.distributed.get_rank() == 0: model_prefix = os.path.join(args.model_save_dir, "epoch_" + str(i)) paddle.save(net.state_dict(), model_prefix + ".pdparams") paddle.save(opt.state_dict(), model_prefix + ".pdopt") if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i if paddle.distributed.get_rank() == 0: model_prefix = os.path.join(args.model_save_dir, "best_model") paddle.save(net.state_dict(), model_prefix + ".pdparams") paddle.save(opt.state_dict(), model_prefix + ".pdopt") ############################################################################################################ # 3. Save quant aware model ############################################################################################################ if paddle.distributed.get_rank() == 0: # load best model load_dygraph_pretrain(net, os.path.join(args.model_save_dir, "best_model")) path = os.path.join(args.model_save_dir, "inference_model", 'qat_model') quanter.save_quantized_model(net, path, input_spec=[ paddle.static.InputSpec( shape=[None, 3, 224, 224], dtype='float32') ])
def __init__(self, shape): """ shape: [vsize, embedding_size] """ self.w = np.random.random(shape) - 0.5 self.g = opt.create_optimizer()
def main(args): seed_everything(21) load_dotenv() if WANDB: if args.ENCODER: run_name = args.MODEL + "_" + args.ENCODER else: run_name = args.MODEL if args.KFOLD > 1: if args.KFOLD != 5: print("Only 5 KFOLD is available") return # pt 저장 폴더 생성 path_pair = args.MODEL_PATH.split(".") os.makedirs(path_pair[0], exist_ok=True) # 재사용위해 args 복사 args_origin = copy.deepcopy(args) for fold in range(args.KFOLD): # hold-out, kfold에 따라서 dataloader 다르게 설정 if args.KFOLD > 1: args = copy.deepcopy(args_origin) path_pair = args_origin.MODEL_PATH.split(".") # MODEL_PATH 변경 args.MODEL_PATH = (path_pair[0] + f"/kfold_{fold+1}." + path_pair[1]) # wandb if WANDB: wandb.init( project=os.environ.get("WANDB_PROJECT_NAME"), name=run_name + f"_k{fold+1}", config=args, reinit=True, ) args = wandb.config # dataloader dataloader = get_dataloader(args.BATCH_SIZE, fold_index=fold) print(f"\nfold {fold+1} start") else: # wandb if WANDB: wandb.init( project=os.environ.get("WANDB_PROJECT_NAME"), name=run_name, reinit=True, ) wandb.config.update(args) args = wandb.config # dataloader dataloader = get_dataloader(args.BATCH_SIZE) print("Get loader") model = get_model(args.MODEL, args.ENCODER).to(args.device) print("Load model") if WANDB: wandb.watch(model) criterion = [] if "+" in args.LOSS: criterion.append("+") criterion.append(create_criterion(args.LOSS.split("+")[0])) criterion.append(create_criterion(args.LOSS.split("+")[1])) elif "-" in args.LOSS: criterion.append("-") criterion.append(create_criterion(args.LOSS.split("-")[0])) criterion.append(create_criterion(args.LOSS.split("-")[1])) else: criterion.append("0") criterion.append(create_criterion(args.LOSS)) optimizer = create_optimizer(args.OPTIMIZER, model, args.LEARNING_RATE) if args.SCHEDULER: scheduler = create_scheduler(args.SCHEDULER, optimizer) else: scheduler = None # optimizer = optim.Adam(params = model.parameters(), lr = args.LEARNING_RATE, weight_decay=1e-6) print("Run") run(args, model, criterion, optimizer, dataloader, fold, scheduler)