Example #1
0
 def __init__(self, shape):
     """
     shape: [osize, hsize]
     """
     self.w = np.random.random(shape) - 0.5
     self.b = np.random.random((shape[0], 1)) - 0.5
     self.wg = opt.create_optimizer()
     self.bg = opt.create_optimizer()
Example #2
0
def get_model():
    # mask_model.py에 정의된 특정 모델을 가져옵니다.
    model_module = getattr(import_module("recycle_model"), CFG.model)
    model = model_module(num_classes=12)

    # 모델의 파라미터를 GPU메모리로 옮깁니다.
    model.cuda()

    # wandb에서 model 감독
    wandb.watch(model)

    # 모델의 파라미터 수를 출력합니다.
    print('parameters: ',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    # GPU가 2개 이상이면 데이터패러럴로 학습 가능하게 만듭니다.
    n_gpu = torch.cuda.device_count()
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # loss.py에 정의된 criterion을 가져옵니다.
    criterion = create_criterion(CFG.criterion)

    # optimizer.py에 정의된 optimizer를 가져옵니다.
    optimizer_encoder = create_optimizer(
        CFG.optimizer, params=model.seg_model.encoder.parameters(), lr=1e-8)

    optimizer_decoder = create_optimizer(
        CFG.optimizer,
        params=[{
            "params": model.seg_model.decoder.parameters()
        }, {
            "params": model.seg_model.segmentation_head.parameters()
        }],
        lr=1e-8)

    # scheduler.py에 정의된 scheduler를 가져옵니다.
    scheduler_encoder = create_scheduler(CFG.scheduler,
                                         optimizer=optimizer_encoder,
                                         T_0=30,
                                         T_mult=2,
                                         eta_max=CFG.learning_rate * 0.1,
                                         T_up=5,
                                         gamma=0.3)

    scheduler_decoder = create_scheduler(CFG.scheduler,
                                         optimizer=optimizer_decoder,
                                         T_0=30,
                                         T_mult=2,
                                         eta_max=CFG.learning_rate,
                                         T_up=5,
                                         gamma=0.3)

    return model, criterion, optimizer_encoder, optimizer_decoder, scheduler_encoder, scheduler_decoder
Example #3
0
 def init_optimizer(self):
     self.wr_g = opt.create_optimizer()
     self.ur_g = opt.create_optimizer()
     self.br_g = opt.create_optimizer()
     self.wz_g = opt.create_optimizer()
     self.uz_g = opt.create_optimizer()
     self.bz_g = opt.create_optimizer()
     self.whs_g = opt.create_optimizer()
     self.uhs_g = opt.create_optimizer()
     self.bhs_g = opt.create_optimizer()
def train(model, epochs, train_dl, val_dl):
    best_score = 0.0
    # create optimizer with differential learning rates
    optimizer = create_optimizer(model, BASE_OPTIMIZER, args.init_lr_0,
                                 DIFF_LR_FACTORS)
    iterations = epochs * len(train_dl)
    idx = 0
    for epoch in range(epochs):
        lr0 = lr_scheduler(epoch, args.lr_decay_factor, args.init_lr_0,
                           args.lr_decay_epoch)  # set base lr for this epoch
        optimizer = create_optimizer(model, BASE_OPTIMIZER, lr0,
                                     DIFF_LR_FACTORS)
        total_loss = 0
        # training loop
        for batch_idx, (data, target) in enumerate(train_dl):
            data, target = data.cuda().float(), target.cuda().float()
            output = model(data)
            loss = F.binary_cross_entropy_with_logits(output, target)
            total_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            # unfreeze deeper layers sequentially
            if idx == int(0.1 * iterations):
                model.unfreeze(1)
                logger.info("Iteration %d: Unfreezing group 1" % idx)
            if idx == int(0.2 * iterations):
                model.unfreeze(0)
                logger.info("Iteration %d: Unfreezing group 0" % idx)
            if batch_idx % 100 == 0:
                logger.info("Epoch %d (Batch %d / %d)\t Train loss: %.3f" % \
                    (epoch+1, batch_idx, len(train_dl), loss.item()))
        # train loss
        train_loss = total_loss / len(train_dl)
        logger.info("Epoch %d\t Train loss: %.3f" % (epoch + 1, train_loss))
        mlflow.log_metric('train_loss', train_loss, step=epoch)
        # validation scores
        val_f2_score, val_loss = validate(model, val_dl, 0.2)
        logger.info("Epoch %d \t Validation loss: %.3f, F2 score: %.3f" % \
            (epoch+1, val_loss, val_f2_score))
        mlflow.log_metric('val_loss', val_loss, step=epoch)
        mlflow.log_metric('val_f2_score', val_f2_score, step=epoch)
        # model saving
        if val_f2_score > best_score:
            best_score = val_f2_score
            best_model_path = os.path.join(MODEL_DIR, 'model_resnet34_%d.pth' % \
                (100*val_f2_score))
            logger.info("Saving model to %s" % best_model_path)
            save_model(model, best_model_path)
Example #5
0
def build_program(main_program,
                  startup_program,
                  image_shape,
                  archs,
                  args,
                  is_test=False):
    with fluid.program_guard(main_program, startup_program):
        with fluid.unique_name.guard():
            data_loader, data, label = create_data_loader(image_shape)
            output = archs(data)
            output = fluid.layers.fc(input=output, size=args.class_dim)

            softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
            cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
            avg_cost = fluid.layers.mean(cost)
            acc_top1 = fluid.layers.accuracy(input=softmax_out,
                                             label=label,
                                             k=1)
            acc_top5 = fluid.layers.accuracy(input=softmax_out,
                                             label=label,
                                             k=5)

            if is_test == False:
                optimizer = create_optimizer(args)
                optimizer.minimize(avg_cost)
    return data_loader, avg_cost, acc_top1, acc_top5
def train(model, epochs, train_dl, val_dl, fold):
    best_score = 0.0
    lr0 = args.init_lr_0
    iterations = epochs * len(train_dl)
    idx = 0
    # create optimizer with differential learning rates
    optimizer = create_optimizer(MODEL, BASE_OPTIMIZER, args.init_lr_0,
                                 DIFF_LR_FACTORS)
    # set up lr schedule based on val loss
    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                        'min',
                                                        verbose=True,
                                                        patience=args.patience)
    for epoch in range(epochs):
        total_loss = 0
        # training loop
        model.train()
        for batch_idx, (data, target) in enumerate(train_dl):
            data, target = data.cuda().float(), target.cuda().float()
            output = model(data)
            loss = F.binary_cross_entropy_with_logits(output, target)
            total_loss += loss.item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            # unfreeze deeper layers sequentially
            if idx == int(0.1 * iterations):
                model.unfreeze(1)
                logger.info("Iteration %d: Unfreezing group 1" % idx)
            if idx == int(0.2 * iterations):
                model.unfreeze(0)
                logger.info("Iteration %d: Unfreezing group 0" % idx)
            if batch_idx % 100 == 0:
                logger.info("Epoch %d (Batch %d / %d)\t Train loss: %.3f" % \
                    (epoch+1, batch_idx, len(train_dl), loss.item()))
        # train loss
        train_loss = total_loss / len(train_dl)
        logger.info("Epoch %d\t Train loss: %.3f" % (epoch + 1, train_loss))
        mlflow.log_metric('train_loss', train_loss, step=epoch)
        # validation scores
        val_f2_score, val_loss = validate(model, val_dl, 0.2)
        # lr monitoring val_loss
        lr_scheduler.step(val_loss)
        logger.info("Epoch %d \t Validation loss: %.3f, F2 score: %.3f" % \
            (epoch+1, val_loss, val_f2_score))
        mlflow.log_metric('val_loss', val_loss, step=epoch)
        mlflow.log_metric('val_f2_score', val_f2_score, step=epoch)
        # model saving
        if val_f2_score > best_score:
            best_score = val_f2_score
            best_model_path = os.path.join(MODEL_DIR, 'fold_%s' % fold, 'model_VGG19_%d.pth' % \
                (100*val_f2_score))
            logger.info("Saving model to %s" % best_model_path)
            save_model(model, best_model_path)
def get_model():
    '''
        get defined model from recycle_model.py
        
        Returns:
            model: pytorch model that would be trained
            optimizer: pytorch optimizer for gradient descent
            scheduler: pytorch lr scheduler
    '''
    model_module = getattr(import_module("recycle_model"), CFG.model)
    model = model_module(num_classes=11)

    # move model to cuda memory
    model.cuda()

    # watch model in wandb
    # wandb.watch(model)

    # check the number of model parameters
    print('parameters: ',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    # if using multi-gpu, train model in parallel
    n_gpu = torch.cuda.device_count()
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # setting weight_decay different
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)]
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    # get optimizer from optimizer.py
    optimizer = create_optimizer(CFG.optimizer,
                                 params=optimizer_grouped_parameters,
                                 lr=CFG.learning_rate,
                                 **CFG.optimizer_params)

    # get scheduler from scheduler.py
    scheduler = create_scheduler(CFG.scheduler,
                                 optimizer=optimizer,
                                 **CFG.scheduler_params)

    return model, optimizer, scheduler
Example #8
0
def build_program(main_program,
                  startup_program,
                  image_shape,
                  dataset,
                  archs,
                  args,
                  places,
                  is_test=False):
    with static.program_guard(main_program, startup_program):
        with paddle.utils.unique_name.guard():
            data_shape = [None] + image_shape
            data = static.data(name='data', shape=data_shape, dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')
            if args.data == 'cifar10':
                paddle.assign(paddle.reshape(label, [-1, 1]), label)
            if is_test:
                data_loader = paddle.io.DataLoader(dataset,
                                                   places=places,
                                                   feed_list=[data, label],
                                                   drop_last=False,
                                                   batch_size=args.batch_size,
                                                   return_list=False,
                                                   shuffle=False)
            else:
                data_loader = paddle.io.DataLoader(dataset,
                                                   places=places,
                                                   feed_list=[data, label],
                                                   drop_last=True,
                                                   batch_size=args.batch_size,
                                                   return_list=False,
                                                   shuffle=True,
                                                   use_shared_memory=True,
                                                   num_workers=4)
            output = archs(data)
            output = static.nn.fc(output, size=args.class_dim)

            softmax_out = F.softmax(output)
            cost = F.cross_entropy(softmax_out, label=label)
            avg_cost = paddle.mean(cost)
            acc_top1 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=1)
            acc_top5 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=5)

            if is_test == False:
                optimizer = create_optimizer(args)
                optimizer.minimize(avg_cost)
    return data_loader, avg_cost, acc_top1, acc_top5
def get_model(train_iter):
    # get model from mask_model.py and define with parameters
    model_module = getattr(import_module("mask_model"), CFG.model)
    model = model_module()

    # Upload data to gpu memory
    model.cuda()    
    
    # print number of parameters(weights) of defined model
    print('parameters: ', sum(p.numel() for p in model.parameters() if p.requires_grad))
    
    # if exists more than 2 GPUs, use DataParallel training
    n_gpu = torch.cuda.device_count()
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # get criterion from loss.py and define with parameters 
    criterion_mask = create_criterion(CFG.criterion, classes=3, smoothing=0.05)
    criterion_gender = create_criterion('cross_entropy')
    criterion_age = create_criterion(CFG.criterion, classes=3, smoothing=0.05)

    # get optimizer from optimizer.py and define with parameters 
    optimizer_backbone = create_optimizer(
        CFG.optimizer,
        params=model.backbone.parameters(),
        lr = CFG.learning_rate * 0.1,
        momentum=0.9,
        weight_decay=1e-2
    )
    optimizer_classifier = create_optimizer(
        CFG.optimizer,
        params=[
            {"params": model.mask_layer.parameters()},
            {"params": model.gender_layer.parameters()},
            {"params": model.age_layer.parameters()},
        ],
        lr = CFG.learning_rate,
        momentum=0.9,
        weight_decay=1e-2
    )

    # get scheduler from scheduler.py and define with parameters 
    scheduler_backbone = create_scheduler(
        CFG.scheduler,
        optimizer=optimizer_backbone,
        max_lr=CFG.learning_rate * 0.1,
        epochs=CFG.nepochs,
        steps_per_epoch=len(train_iter),
        pct_start=5/CFG.nepochs,
        anneal_strategy='cos'
    )
    scheduler_classifier = create_scheduler(
        CFG.scheduler,
        optimizer=optimizer_classifier,
        max_lr=CFG.learning_rate,
        epochs=CFG.nepochs,
        steps_per_epoch=len(train_iter),
        pct_start=5/CFG.nepochs,
        anneal_strategy='cos'
    )

    return model, criterion_mask, criterion_gender, criterion_age, optimizer_backbone, optimizer_classifier, scheduler_backbone, scheduler_classifier
Example #10
0
        augmentations=config["learning_config"]["augmentations"],
        stage="train",
        cache=args.cache,
        shuffle=True)
    eval_dataset = ASRSliceDataset(
        data_paths=config["learning_config"]["dataset_config"]["eval_paths"],
        speech_featurizer=speech_featurizer,
        text_featurizer=text_featurizer,
        stage="eval",
        cache=args.cache,
        shuffle=True)

# Build DS2 model
with ctc_trainer.strategy.scope():
    satt_ds2_model = SelfAttentionDS2(input_shape=speech_featurizer.shape,
                                      arch_config=config["model_config"],
                                      num_classes=text_featurizer.num_classes)
    satt_ds2_model._build(speech_featurizer.shape)
    satt_ds2_model.summary(line_length=150)
    optimizer = create_optimizer(
        name=config["learning_config"]["optimizer_config"]["name"],
        d_model=config["model_config"]["att"]["head_size"],
        **config["learning_config"]["optimizer_config"]["config"])
# Compile
ctc_trainer.compile(satt_ds2_model, optimizer, max_to_keep=args.max_ckpts)

ctc_trainer.fit(train_dataset,
                eval_dataset,
                train_bs=args.tbs,
                eval_bs=args.ebs)
Example #11
0
def get_model():
    # model.py에 정의된 특정 모델을 가져옵니다.
    model_module = getattr(import_module("recycle_model"), CFG.model)
    model = model_module(num_classes=12)

    # 모델의 파라미터를 GPU메모리로 옮깁니다.
    model.cuda()

    # wandb에서 model 감독
    wandb.watch(model)

    # 모델의 파라미터 수를 출력합니다.
    print('parameters: ',
          sum(p.numel() for p in model.parameters() if p.requires_grad))

    # GPU가 2개 이상이면 데이터패러럴로 학습 가능하게 만듭니다.
    n_gpu = torch.cuda.device_count()
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # loss.py에 정의된 criterion을 가져옵니다.
    criterion = create_criterion(CFG.criterion)

    # optimizer.py에 정의된 optimizer를 가져옵니다.
    if CFG.optimizer == "Adam":
        optimizer = create_optimizer(
            CFG.optimizer,
            params=[
                {
                    "params": model.seg_model.encoder.parameters(),
                    "lr": CFG.learning_rate * 0.1
                },
                {
                    "params": model.seg_model.decoder.parameters()
                },
                {
                    "params": model.seg_model.segmentation_head.parameters()
                },
            ],
            lr=CFG.learning_rate,
            weight_decay=1e-6)
    elif CFG.optimizer == "RAdam":
        optimizer = create_optimizer(
            CFG.optimizer,
            params=[
                {
                    "params": model.seg_model.encoder.parameters(),
                    "lr": CFG.learning_rate * 0.1
                },
                {
                    "params": model.seg_model.decoder.parameters()
                },
                {
                    "params": model.seg_model.segmentation_head.parameters()
                },
            ],
            lr=CFG.learning_rate,
            betas=(0.9, 0.999),
            eps=1e-8,
            weight_decay=0)
    elif CFG.optimizer == "AdamP":
        optimizer = create_optimizer(
            CFG.optimizer,
            params=[
                {
                    "params": model.seg_model.encoder.parameters(),
                    "lr": CFG.learning_rate * 0.1
                },
                {
                    "params": model.seg_model.decoder.parameters()
                },
                {
                    "params": model.seg_model.segmentation_head.parameters()
                },
            ],
            lr=CFG.learning_rate,
            betas=(0.9, 0.999),
            eps=1e-8,
            weight_decay=0)
    elif CFG.optimizer == "AdamW":
        optimizer = create_optimizer(
            CFG.optimizer,
            params=[
                {
                    "params": model.seg_model.encoder.parameters(),
                    "lr": CFG.learning_rate * 0.1
                },
                {
                    "params": model.seg_model.decoder.parameters()
                },
                {
                    "params": model.seg_model.segmentation_head.parameters()
                },
            ],
            lr=CFG.learning_rate,
            amsgrad=True)
    elif CFG.optimizer == "RMSprop":
        optimizer = create_optimizer(
            CFG.optimizer,
            params=[
                {
                    "params": model.seg_model.encoder.parameters(),
                    "lr": CFG.learning_rate * 0.1
                },
                {
                    "params": model.seg_model.decoder.parameters()
                },
                {
                    "params": model.seg_model.segmentation_head.parameters()
                },
            ],
            lr=CFG.learning_rate)

    # scheduler.py에 정의된 scheduler를 가져옵니다.
    if CFG.scheduler == "StepLR":
        scheduler = create_scheduler(CFG.scheduler,
                                     optimizer=optimizer,
                                     step_size=5,
                                     gamma=0.95)
    elif CFG.scheduler == "CosineAnnealingWarmupRestarts":
        scheduler = create_scheduler(
            CFG.scheduler,
            optimizer=optimizer,
            first_cycle_steps=5,
            cycle_mult=1.,
            max_lr=1e-4,
            min_lr=1e-7,
        )

    return model, criterion, optimizer, scheduler
Example #12
0
    def run(args):
        assert args.mode in modes, f"Mode must in {modes}"

        config = UserConfig(DEFAULT_YAML, args.config, learning=True)
        speech_featurizer = SpeechFeaturizer(config["speech_config"])
        text_featurizer = TextFeaturizer(config["decoder_config"])

        if args.mode == "train":
            tf.random.set_seed(2020)

            if args.mixed_precision:
                policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
                tf.keras.mixed_precision.experimental.set_policy(policy)
                print("Enabled mixed precision training")

            ctc_trainer = CTCTrainer(speech_featurizer, text_featurizer,
                                     config["learning_config"]["running_config"],
                                     args.mixed_precision)

            if args.tfrecords:
                train_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["train_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "train",
                    augmentations=config["learning_config"]["augmentations"], shuffle=True,
                )
                eval_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["eval_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "eval", shuffle=False
                )
            else:
                train_dataset = ASRSliceDataset(
                    stage="train", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["train_paths"],
                    augmentations=config["learning_config"]["augmentations"], shuffle=True,
                )
                eval_dataset = ASRSliceDataset(
                    stage="eval", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["eval_paths"],
                    shuffle=False
                )

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            with ctc_trainer.strategy.scope():
                satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                                  arch_config=config["model_config"],
                                                  num_classes=text_featurizer.num_classes)
                satt_ds2_model._build([1, 50, f, c])
                optimizer = create_optimizer(
                    name=config["learning_config"]["optimizer_config"]["name"],
                    d_model=config["model_config"]["att"]["head_size"],
                    **config["learning_config"]["optimizer_config"]["config"]
                )
            # Compile
            ctc_trainer.compile(satt_ds2_model, optimizer, max_to_keep=args.max_ckpts)

            ctc_trainer.fit(train_dataset, eval_dataset, args.eval_train_ratio)

            if args.export:
                if args.from_weights:
                    ctc_trainer.model.save_weights(args.export)
                else:
                    ctc_trainer.model.save(args.export)

        elif args.mode == "test":
            tf.random.set_seed(0)
            assert args.export

            text_featurizer.add_scorer(
                Scorer(**text_featurizer.decoder_config["lm_config"],
                       vocabulary=text_featurizer.vocab_array))

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                              arch_config=config["model_config"],
                                              num_classes=text_featurizer.num_classes)
            satt_ds2_model._build([1, 50, f, c])
            satt_ds2_model.summary(line_length=100)
            optimizer = create_optimizer(
                name=config["learning_config"]["optimizer_config"]["name"],
                d_model=config["model_config"]["att"]["head_size"],
                **config["learning_config"]["optimizer_config"]["config"]
            )

            batch_size = config["learning_config"]["running_config"]["batch_size"]
            if args.tfrecords:
                test_dataset = ASRTFRecordDataset(
                    config["learning_config"]["dataset_config"]["test_paths"],
                    config["learning_config"]["dataset_config"]["tfrecords_dir"],
                    speech_featurizer, text_featurizer, "test",
                    augmentations=config["learning_config"]["augmentations"], shuffle=False
                ).create(batch_size * args.eval_train_ratio)
            else:
                test_dataset = ASRSliceDataset(
                    stage="test", speech_featurizer=speech_featurizer,
                    text_featurizer=text_featurizer,
                    data_paths=config["learning_config"]["dataset_config"]["test_paths"],
                    augmentations=config["learning_config"]["augmentations"], shuffle=False
                ).create(batch_size * args.eval_train_ratio)

            ctc_tester = BaseTester(
                config=config["learning_config"]["running_config"],
                saved_path=args.export, from_weights=args.from_weights
            )
            ctc_tester.compile(satt_ds2_model, speech_featurizer, text_featurizer)
            ctc_tester.run(test_dataset)

        else:
            assert args.export

            # Build DS2 model
            f, c = speech_featurizer.compute_feature_dim()
            satt_ds2_model = SelfAttentionDS2(input_shape=[None, f, c],
                                              arch_config=config["model_config"],
                                              num_classes=text_featurizer.num_classes)
            satt_ds2_model._build([1, 50, f, c])
            optimizer = create_optimizer(
                name=config["learning_config"]["optimizer_config"]["name"],
                d_model=config["model_config"]["att"]["head_size"],
                **config["learning_config"]["optimizer_config"]["config"]
            )

            def save_func(**kwargs):
                if args.from_weights:
                    kwargs["model"].save_weights(args.export)
                else:
                    kwargs["model"].save(args.export)

            save_from_checkpoint(func=save_func,
                                 outdir=config["learning_config"]["running_config"]["outdir"],
                                 model=satt_ds2_model, optimizer=optimizer)
Example #13
0
def compress(args):
    if args.data == "cifar10":
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode="train",
                                                       backend="cv2",
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode="test",
                                                     backend="cv2",
                                                     transform=transform)
        class_dim = 10
        image_shape = [3, 32, 32]
        pretrain = False
        args.total_images = 50000
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1

    place = paddle.set_device('gpu' if args.use_gpu else 'cpu')
    # model definition
    if use_data_parallel:
        paddle.distributed.init_parallel_env()

    pretrain = True if args.data == "imagenet" else False
    if args.model == "mobilenet_v1":
        net = mobilenet_v1(pretrained=pretrain, num_classes=class_dim)
    elif args.model == "mobilenet_v3":
        net = MobileNetV3_large_x1_0(class_dim=class_dim)
        if pretrain:
            load_dygraph_pretrain(net, args.pretrained_model, True)
    else:
        raise ValueError("{} is not supported.".format(args.model))
    _logger.info("Origin model summary:")
    paddle.summary(net, (1, 3, 224, 224))

    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight preprocess type, default is None and no preprocessing is performed.
        'weight_preprocess_type': None,
        # activation preprocess type, default is None and no preprocessing is performed.
        'activation_preprocess_type': None,
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. default is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
        # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
        'quantizable_layer_type': ['Conv2D', 'Linear'],
    }

    if args.use_pact:
        quant_config['activation_preprocess_type'] = 'PACT'

    ############################################################################################################
    # 2. Quantize the model with QAT (quant aware training)
    ############################################################################################################

    quanter = QAT(config=quant_config)
    quanter.quantize(net)

    _logger.info("QAT model summary:")
    paddle.summary(net, (1, 3, 224, 224))

    opt, lr = create_optimizer(net, trainer_num, args)

    if use_data_parallel:
        net = paddle.DataParallel(net)

    train_batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True)
    train_loader = paddle.io.DataLoader(train_dataset,
                                        batch_sampler=train_batch_sampler,
                                        places=place,
                                        return_list=True,
                                        num_workers=4)

    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=place,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        drop_last=False,
                                        return_list=True,
                                        num_workers=4)

    @paddle.no_grad()
    def test(epoch, net):
        net.eval()
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []

        eval_reader_cost = 0.0
        eval_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()
        for data in valid_loader():
            eval_reader_cost += time.time() - reader_start
            image = data[0]
            label = data[1]
            if args.data == "cifar10":
                label = paddle.reshape(label, [-1, 1])

            eval_start = time.time()

            out = net(image)
            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

            eval_run_cost += time.time() - eval_start
            batch_size = image.shape[0]
            total_samples += batch_size

            if batch_id % args.log_period == 0:
                log_period = 1 if batch_id == 0 else args.log_period
                _logger.info(
                    "Eval epoch[{}] batch[{}] - top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()),
                            eval_reader_cost / log_period,
                            (eval_reader_cost + eval_run_cost) / log_period,
                            total_samples / log_period, total_samples /
                            (eval_reader_cost + eval_run_cost)))
                eval_reader_cost = 0.0
                eval_run_cost = 0.0
                total_samples = 0
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))
            batch_id += 1
            reader_start = time.time()

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def cross_entropy(input, target, ls_epsilon):
        if ls_epsilon > 0:
            if target.shape[-1] != class_dim:
                target = paddle.nn.functional.one_hot(target, class_dim)
            target = paddle.nn.functional.label_smooth(target,
                                                       epsilon=ls_epsilon)
            target = paddle.reshape(target, shape=[-1, class_dim])
            input = -paddle.nn.functional.log_softmax(input, axis=-1)
            cost = paddle.sum(target * input, axis=-1)
        else:
            cost = paddle.nn.functional.cross_entropy(input=input,
                                                      label=target)
        avg_cost = paddle.mean(cost)
        return avg_cost

    def train(epoch, net):

        net.train()
        batch_id = 0

        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()
        for data in train_loader():
            train_reader_cost += time.time() - reader_start

            image = data[0]
            label = data[1]
            if args.data == "cifar10":
                label = paddle.reshape(label, [-1, 1])

            train_start = time.time()
            out = net(image)
            avg_cost = cross_entropy(out, label, args.ls_epsilon)

            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
            avg_cost.backward()
            opt.step()
            opt.clear_grad()
            lr.step()

            loss_n = np.mean(avg_cost.numpy())
            acc_top1_n = np.mean(acc_top1.numpy())
            acc_top5_n = np.mean(acc_top5.numpy())

            train_run_cost += time.time() - train_start
            batch_size = image.shape[0]
            total_samples += batch_size

            if batch_id % args.log_period == 0:
                log_period = 1 if batch_id == 0 else args.log_period
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s"
                    .format(
                        epoch, batch_id, lr.get_lr(), loss_n, acc_top1_n,
                        acc_top5_n, train_reader_cost / log_period,
                        (train_reader_cost + train_run_cost) / log_period,
                        total_samples / log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            batch_id += 1
            reader_start = time.time()

    ############################################################################################################
    # train loop
    ############################################################################################################
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, net)
        acc1 = test(i, net)
        if paddle.distributed.get_rank() == 0:
            model_prefix = os.path.join(args.model_save_dir, "epoch_" + str(i))
            paddle.save(net.state_dict(), model_prefix + ".pdparams")
            paddle.save(opt.state_dict(), model_prefix + ".pdopt")

        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            if paddle.distributed.get_rank() == 0:
                model_prefix = os.path.join(args.model_save_dir, "best_model")
                paddle.save(net.state_dict(), model_prefix + ".pdparams")
                paddle.save(opt.state_dict(), model_prefix + ".pdopt")

    ############################################################################################################
    # 3. Save quant aware model
    ############################################################################################################
    if paddle.distributed.get_rank() == 0:
        # load best model
        load_dygraph_pretrain(net,
                              os.path.join(args.model_save_dir, "best_model"))

        path = os.path.join(args.model_save_dir, "inference_model",
                            'qat_model')
        quanter.save_quantized_model(net,
                                     path,
                                     input_spec=[
                                         paddle.static.InputSpec(
                                             shape=[None, 3, 224, 224],
                                             dtype='float32')
                                     ])
Example #14
0
 def __init__(self, shape):
     """
     shape: [vsize, embedding_size]
     """
     self.w = np.random.random(shape) - 0.5
     self.g = opt.create_optimizer()
Example #15
0
def main(args):
    seed_everything(21)
    load_dotenv()

    if WANDB:
        if args.ENCODER:
            run_name = args.MODEL + "_" + args.ENCODER
        else:
            run_name = args.MODEL

    if args.KFOLD > 1:
        if args.KFOLD != 5:
            print("Only 5 KFOLD is available")
            return

        # pt 저장 폴더 생성
        path_pair = args.MODEL_PATH.split(".")
        os.makedirs(path_pair[0], exist_ok=True)
        # 재사용위해 args 복사
        args_origin = copy.deepcopy(args)

    for fold in range(args.KFOLD):
        # hold-out, kfold에 따라서 dataloader 다르게 설정
        if args.KFOLD > 1:
            args = copy.deepcopy(args_origin)
            path_pair = args_origin.MODEL_PATH.split(".")
            # MODEL_PATH 변경
            args.MODEL_PATH = (path_pair[0] + f"/kfold_{fold+1}." +
                               path_pair[1])
            # wandb
            if WANDB:
                wandb.init(
                    project=os.environ.get("WANDB_PROJECT_NAME"),
                    name=run_name + f"_k{fold+1}",
                    config=args,
                    reinit=True,
                )
                args = wandb.config
            # dataloader
            dataloader = get_dataloader(args.BATCH_SIZE, fold_index=fold)
            print(f"\nfold {fold+1} start")
        else:
            # wandb
            if WANDB:
                wandb.init(
                    project=os.environ.get("WANDB_PROJECT_NAME"),
                    name=run_name,
                    reinit=True,
                )
                wandb.config.update(args)
                args = wandb.config
            # dataloader
            dataloader = get_dataloader(args.BATCH_SIZE)
        print("Get loader")

        model = get_model(args.MODEL, args.ENCODER).to(args.device)
        print("Load model")

        if WANDB:
            wandb.watch(model)

        criterion = []
        if "+" in args.LOSS:
            criterion.append("+")
            criterion.append(create_criterion(args.LOSS.split("+")[0]))
            criterion.append(create_criterion(args.LOSS.split("+")[1]))
        elif "-" in args.LOSS:
            criterion.append("-")
            criterion.append(create_criterion(args.LOSS.split("-")[0]))
            criterion.append(create_criterion(args.LOSS.split("-")[1]))
        else:
            criterion.append("0")
            criterion.append(create_criterion(args.LOSS))
        optimizer = create_optimizer(args.OPTIMIZER, model, args.LEARNING_RATE)
        if args.SCHEDULER:
            scheduler = create_scheduler(args.SCHEDULER, optimizer)
        else:
            scheduler = None
        # optimizer = optim.Adam(params = model.parameters(), lr = args.LEARNING_RATE, weight_decay=1e-6)

        print("Run")
        run(args, model, criterion, optimizer, dataloader, fold, scheduler)