Ejemplo n.º 1
0
def main():
    # set GPU ID
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
    cudnn.benchmark = True

    # check save path
    save_path = args.save_path
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # make dataloader
    train_loader, test_loader = dataset.get_loader(args)

    # set model
    if args.model == 'res':
        model = resnet.ResNet18().cuda()

    # set criterion
    criterion = nn.CrossEntropyLoss().cuda()

    # set optimizer (default:sgd)
    optimizer = optim.SGD(model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0001,
                          nesterov=True)
    # set scheduler
    scheduler = MultiStepLR(optimizer, milestones=[80, 120], gamma=0.1)

    # make logger
    train_logger = utils.Logger(os.path.join(save_path, 'train.log'))
    test_logger = utils.Logger(os.path.join(save_path, 'test.log'))

    # forgetting
    forgetting_histroy = utils.Forgetting_Events(
        data_size=len(train_loader.dataset))

    # Start Train
    for epoch in range(1, args.epochs + 1):
        # scheduler
        scheduler.step()
        # Train
        train(train_loader, model, criterion, optimizer, epoch,
              forgetting_histroy, train_logger)
        validate(test_loader, model, criterion, epoch, test_logger, 'test')
        # Save Model each epoch
        if epoch == int(args.epochs):
            torch.save(
                model.state_dict(),
                os.path.join(save_path, '{0}_{1}.pth'.format('model', epoch)))
    # Finish Train
    torch.save(forgetting_histroy,
               os.path.join(save_path, 'train_forgetting.pth'))
    # Draw Plot
    plot_curves.draw_plot(save_path)
Ejemplo n.º 2
0
def Modellist(model_name, num_classes=1000, use_attention=None):
    if model_name == 'vgg11':
        return vgg.VGG11(num_classes, use_attention=use_attention)
    elif model_name == 'vgg13':
        return vgg.VGG13(num_classes, use_attention=use_attention)
    elif model_name == 'vgg16':
        return vgg.VGG16(num_classes, use_attention=use_attention)
    elif model_name == 'vgg19':
        return vgg.VGG19(num_classes, use_attention=use_attention)
    elif model_name == 'resnet18':
        return resnet.ResNet18(num_classes, use_attention=use_attention)
    elif model_name == 'resnet34':
        return resnet.ResNet34(num_classes, use_attention=use_attention)
    elif model_name == 'resnet50':
        return resnet.ResNet50(num_classes, use_attention=use_attention)
    elif model_name == 'resnet101':
        return resnet.ResNet101(num_classes, use_attention=use_attention)
    elif model_name == 'resnet152':
        return resnet.ResNet152(num_classes, use_attention=use_attention)
    elif model_name == 'densenet121':
        return densenet.DenseNet121(num_classes, use_attention=use_attention)
    elif model_name == 'densenet169':
        return densenet.DenseNet169(num_classes, use_attention=use_attention)
    elif model_name == 'densenet201':
        return densenet.DenseNet201(num_classes, use_attention=use_attention)
    elif model_name == 'densenet161':
        return densenet.DenseNet161(num_classes, use_attention=use_attention)
    elif model_name == 'mobilenetv3_small':
        return mobilenetv3.mobilenetv3_small(num_classes)
    elif model_name == 'mobilenetv3_large':
        return mobilenetv3.mobilenetv3_large(num_classes)
    elif model_name == 'efficientnet_b0':
        return efficientnet.efficientnet_b0(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b1':
        return efficientnet.efficientnet_b1(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b2':
        return efficientnet.efficientnet_b2(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b3':
        return efficientnet.efficientnet_b3(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b4':
        return efficientnet.efficientnet_b4(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b5':
        return efficientnet.efficientnet_b5(num_classes,
                                            use_attention=use_attention)
    elif model_name == 'efficientnet_b6':
        return efficientnet.efficientnet_b6(num_classes,
                                            use_attention=use_attention)
    else:
        raise ValueError("The model_name does not exist.")
Ejemplo n.º 3
0
def main(rank, world_size):
    init_process(rank, world_size)

    # make dataloader
    train_loader, test_loader = dataset.get_loader(args, rank, world_size)

    # set model
    if args.model == 'res':
        model = resnet.ResNet18()

    model = model.cuda(rank)
    model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    model = DDP(model, device_ids=[rank])
    cudnn.benchmark = True

    # set criterion
    criterion = nn.CrossEntropyLoss().cuda(rank)

    # set optimizer (default:sgd)
    optimizer = optim.SGD(model.parameters(),
                          lr=0.1,
                          momentum=0.9,
                          weight_decay=0.0001,
                          nesterov=True)
    # set scheduler
    scheduler = MultiStepLR(optimizer, milestones=[60, 80], gamma=0.1)

    # make logger
    train_logger = utils.Logger(os.path.join(args.save_path, 'train.log'))
    test_logger = utils.Logger(os.path.join(args.save_path, 'test.log'))

    # Start Train
    for epoch in range(1, args.epochs + 1):
        # scheduler
        if dist.get_rank() == 0:
            scheduler.step()
        # Train
        train(train_loader, model, criterion, optimizer, epoch, train_logger)
        validate(test_loader, model, criterion, epoch, test_logger, 'test')
        # Save Model each epoch
        if dist.get_rank() == 0:
            if epoch == int(args.epochs):
                torch.save(
                    model.state_dict(),
                    os.path.join(args.save_path,
                                 '{0}_{1}.pth'.format('model', epoch)))
    # Finish Train

    # Draw Plot
    if dist.get_rank() == 0:
        plot_curves.draw_plot(args.save_path)

    dist.destroy_process_group()
Ejemplo n.º 4
0
 def __init__(self, model, num_workers, lr, job_name):
     self.lock = threading.Lock()
     self.logger = Logger(job_name=job_name, file_dir=f"./measurement/logs/{job_name}_ps.log").logger
     self.cm_t1_start = np.zeros(num_workers)
     self.future_model = torch.futures.Future()
     self.batch_update_size = num_workers
     self.curr_update_size = 0
     self.stop_flag = False
     if model == 'resnet20':
         self.model = resnet3.resnet20()
     elif model == 'resnet56':
         self.model = resnet3.resnet56()
     elif model == 'resnet18':
         self.model = resnet.ResNet18()
     elif model == 'resnet50':
         self.model = resnet.ResNet50()
     elif model == 'vgg13':
         self.model = vgg.VGG13()
     elif model == 'vgg16':
         self.model = vgg.VGG16()
     elif model == 'densenet121':
         self.model = densenet.DenseNet121()
     elif model == 'alexnet':
         self.model = alexnet.AlexNet()
     elif model == 'googlenet':
         self.model = googlenet.GoogLeNet()
     elif model == 'mobilenet':
         self.model = mobilenetv2.MobileNetV2()
     self.lr = lr
     for p in self.model.parameters():
         p.grad = torch.zeros_like(p)
     self.optimizer = optim.SGD(self.model.parameters(), lr=lr, momentum=0.9)
     self.info_socketm = znet.SocketMsger.tcp_connect(DORKER0_IP, INFO_PORT)
     self.info_socketm.send("PS")
     self.info_socketm.send(f"1.0\n/home/ubuntu/measurement/logs/{job_name}_info0.log\n{job_name}")
     self.ps_launched_lock = threading.Lock()
     self.ps_launched = False
    """Based on the model_version, determine model/optimizer and KD training mode
       WideResNet and DenseNet were trained on multi-GPU; need to specify a dummy
       nn.DataParallel module to correctly load the model parameters
    """
    if "distill" in params.model_version:
        student_model_load_start = time.time()
        # train a 5-layer CNN or a 18-layer ResNet with knowledge distillation
        if params.model_version == "cnn_distill":
            model = net.Net(params).cuda() if params.cuda else net.Net(params)
            optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)
            # fetch loss function and metrics definition in model files
            loss_fn_kd = net.loss_fn_kd
            metrics = net.metrics
        
        elif params.model_version == 'resnet18_distill':
            model = resnet.ResNet18().cuda() if params.cuda else resnet.ResNet18()
            optimizer = optim.SGD(model.parameters(), lr=params.learning_rate,
                                  momentum=0.9, weight_decay=5e-4)
            # fetch loss function and metrics definition in model files
            loss_fn_kd = net.loss_fn_kd
            metrics = resnet.metrics
        
        student_model_load_time = time.time() - student_model_load_start
        logging.info("student_model_load_time: {}".format(student_model_load_time))

        """ 
            Specify the pre-trained teacher models for knowledge distillation
            Important note: wrn/densenet/resnext/preresnet were pre-trained models using multi-GPU,
            therefore need to call "nn.DaraParallel" to correctly load the model weights
            Trying to run on CPU will then trigger errors (too time-consuming anyway)!
        """
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


if __name__ == '__main__':

    model_size = 0

    args = parser.parse_args()
    cnn_dir = 'experiments/cnn_distill'
    json_path = os.path.join(cnn_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    if args.model == "resnet18":
        model = resnet.ResNet18()
        model_checkpoint = 'experiments/base_resnet18/best.pth.tar'

    elif args.model == "wrn":
        model = wrn.wrn(depth=28,
                        num_classes=10,
                        widen_factor=10,
                        dropRate=0.3)
        model_checkpoint = 'experiments/base_wrn/best.pth.tar'

    elif args.model == "distill_resnext":
        model = resnet.ResNet18()
        model_checkpoint = 'experiments/resnet18_distill/resnext_teacher/best.pth.tar'

    elif args.model == "distill_densenet":
        model = resnet.ResNet18()
Ejemplo n.º 7
0
    """Based on the model_version, determine model/optimizer and KD training mode
       WideResNet and DenseNet were trained on multi-GPU; need to specify a dummy
       nn.DataParallel module to correctly load the model parameters
    """
    if "distill" in params.model_version:

        # train a 5-layer CNN or a 18-layer ResNet with knowledge distillation
        if params.model_version == "cnn_distill":
            model = net.Net(params).cuda() if params.cuda else net.Net(params)
            optimizer = optim.Adam(model.parameters(), lr=params.learning_rate)
            # fetch loss function and metrics definition in model files
            loss_fn_kd = net.loss_fn_kd
            metrics = net.metrics

        elif params.model_version == 'resnet18_distill':
            model = resnet.ResNet18().cuda(
            ) if params.cuda else resnet.ResNet18()
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate,
                                  momentum=0.9,
                                  weight_decay=5e-4)
            # fetch loss function and metrics definition in model files
            loss_fn_kd = net.loss_fn_kd
            metrics = resnet.metrics
        """ 
            Specify the pre-trained teacher models for knowledge distillation
            Important note: wrn/densenet/resnext/preresnet were pre-trained models using multi-GPU,
            therefore need to call "nn.DaraParallel" to correctly load the model weights
            Trying to run on CPU will then trigger errors (too time-consuming anyway)!
        """

        teacher_model = get_vgg()
Ejemplo n.º 8
0
def main():
    # Load the parameters from json file
    args = parser.parse_args()
    json_path = os.path.join(args.model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # Set the random seed for reproducible experiments
    random.seed(230)
    torch.manual_seed(230)
    np.random.seed(230)
    torch.cuda.manual_seed(230)
    warnings.filterwarnings("ignore")

    # Set the logger
    utils.set_logger(os.path.join(args.model_dir, 'train.log'))

    # Create the input data pipeline
    logging.info("Loading the datasets...")

    # fetch dataloaders, considering full-set vs. sub-set scenarios
    if params.subset_percent < 1.0:
        train_dl = data_loader.fetch_subset_dataloader('train', params)
    else:
        train_dl = data_loader.fetch_dataloader('train', params)

    dev_dl = data_loader.fetch_dataloader('dev', params)

    logging.info("- done.")
    """
    Load student and teacher model
    """
    if "distill" in params.model_version:

        # Specify the student models
        if params.model_version == "cnn_distill":  # 5-layers Plain CNN
            print("Student model: {}".format(params.model_version))
            model = net.Net(params).cuda()

        elif params.model_version == "shufflenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "mobilenet_v2_distill":
            print("Student model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == 'resnet18_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == 'resnet50_distill':
            print("Student model: {}".format(params.model_version))
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "alexnet_distill":
            print("Student model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19_distill":
            print("Student model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet_distill":
            print("Student model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "resnext29_distill":
            print("Student model: {}".format(params.model_version))
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()

        elif params.model_version == "densenet121_distill":
            print("Student model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        # optimizer
        if params.model_version == "cnn_distill":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(
            optimizer, iter_per_epoch *
            args.warm)  # warmup the learning rate in the first epoch

        # specify loss function
        if args.self_training:
            print(
                '>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>self training>>>>>>>>>>>>>>>>>>>>>>>>>>>>>'
            )
            loss_fn_kd = loss_kd_self
        else:
            loss_fn_kd = loss_kd
        """ 
            Specify the pre-trained teacher models for knowledge distillation
            Checkpoints can be obtained by regular training or downloading our pretrained models
            For model which is pretrained in multi-GPU, use "nn.DaraParallel" to correctly load the model weights.
        """
        if params.teacher == "resnet18":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet18(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet18/0.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "alexnet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = alexnet.alexnet(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_alexnet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "googlenet":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = googlenet.GoogleNet(num_class=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_googlenet/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "vgg19":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = models.vgg19_bn(num_classes=args.num_class)
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_vgg19/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "resnet50":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet50(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet50/50.pth.tar'

        elif params.teacher == "resnet101":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnet.ResNet101(num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnet101/best.pth.tar'
            teacher_model = teacher_model.cuda()

        elif params.teacher == "densenet121":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = densenet.densenet121(
                num_class=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_densenet121/best.pth.tar'
            # teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "resnext29":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = resnext.CifarResNeXt(
                cardinality=8, depth=29, num_classes=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/best.pth.tar'
            if args.pt_teacher:  # poorly-trained teacher for Defective KD experiments
                teacher_checkpoint = 'experiments/pretrained_teacher_models/base_resnext29/50.pth.tar'
                teacher_model = nn.DataParallel(teacher_model).cuda()

        elif params.teacher == "mobilenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = mobilenet.mobilenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_mobilenet_v2/best.pth.tar'

        elif params.teacher == "shufflenet_v2":
            print("Teacher model: {}".format(params.teacher))
            teacher_model = shufflenet.shufflenetv2(
                class_num=args.num_class).cuda()
            teacher_checkpoint = 'experiments/pretrained_teacher_models/base_shufflenet_v2/best.pth.tar'

        utils.load_checkpoint(teacher_checkpoint, teacher_model)

        # Train the model with KD
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate_kd(model, teacher_model, train_dl, dev_dl,
                              optimizer, loss_fn_kd, warmup_scheduler, params,
                              args, args.restore_file)

    # non-KD mode: regular training to obtain a baseline model
    else:
        print("Train base model")
        if params.model_version == "cnn":
            model = net.Net(params).cuda()

        elif params.model_version == "mobilenet_v2":
            print("model: {}".format(params.model_version))
            model = mobilenet.mobilenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "shufflenet_v2":
            print("model: {}".format(params.model_version))
            model = shufflenet.shufflenetv2(class_num=args.num_class).cuda()

        elif params.model_version == "alexnet":
            print("model: {}".format(params.model_version))
            model = alexnet.alexnet(num_classes=args.num_class).cuda()

        elif params.model_version == "vgg19":
            print("model: {}".format(params.model_version))
            model = models.vgg19_bn(num_classes=args.num_class).cuda()

        elif params.model_version == "googlenet":
            print("model: {}".format(params.model_version))
            model = googlenet.GoogleNet(num_class=args.num_class).cuda()

        elif params.model_version == "densenet121":
            print("model: {}".format(params.model_version))
            model = densenet.densenet121(num_class=args.num_class).cuda()

        elif params.model_version == "resnet18":
            model = resnet.ResNet18(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet50":
            model = resnet.ResNet50(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet101":
            model = resnet.ResNet101(num_classes=args.num_class).cuda()

        elif params.model_version == "resnet152":
            model = resnet.ResNet152(num_classes=args.num_class).cuda()

        elif params.model_version == "resnext29":
            model = resnext.CifarResNeXt(cardinality=8,
                                         depth=29,
                                         num_classes=args.num_class).cuda()
            # model = nn.DataParallel(model).cuda()

        if args.regularization:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Loss of Regularization>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_kd_regularization
        elif args.label_smoothing:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Label Smoothing>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = loss_label_smoothing
        else:
            print(
                ">>>>>>>>>>>>>>>>>>>>>>>>Normal Training>>>>>>>>>>>>>>>>>>>>>>>>"
            )
            loss_fn = nn.CrossEntropyLoss()
            if args.double_training:  # double training, compare to self-KD
                print(
                    ">>>>>>>>>>>>>>>>>>>>>>>>Double Training>>>>>>>>>>>>>>>>>>>>>>>>"
                )
                checkpoint = 'experiments/pretrained_teacher_models/base_' + str(
                    params.model_version) + '/best.pth.tar'
                utils.load_checkpoint(checkpoint, model)

        if params.model_version == "cnn":
            optimizer = optim.Adam(model.parameters(),
                                   lr=params.learning_rate *
                                   (params.batch_size / 128))
        else:
            optimizer = optim.SGD(model.parameters(),
                                  lr=params.learning_rate *
                                  (params.batch_size / 128),
                                  momentum=0.9,
                                  weight_decay=5e-4)

        iter_per_epoch = len(train_dl)
        warmup_scheduler = utils.WarmUpLR(optimizer,
                                          iter_per_epoch * args.warm)

        # Train the model
        logging.info("Starting training for {} epoch(s)".format(
            params.num_epochs))
        train_and_evaluate(model, train_dl, dev_dl, optimizer, loss_fn, params,
                           args.model_dir, warmup_scheduler, args,
                           args.restore_file)