Beispiel #1
0
        opt.is_smart_model = True
        smartModel = SmartModel(opt)
        smartModel = smartModel.to(device)

    # Parallelize model to multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        StudentModel = nn.DataParallel(StudentModel)
        parms = list(StudentModel.module.parameters())
    
        if opt.isSource:
            smartModel = nn.DataParallel(smartModel)
     
    elif torch.cuda.device_count() == 1:
        print("Using", torch.cuda.device_count(), "GPU!")
        parms = list(StudentModel.parameters())

    optimizer = torch.optim.SGD(parms, opt.lr,
                            momentum=opt.momentum,
                            weight_decay=opt.weight_decay)


    if opt.optim == "sgd":
        optimizer = torch.optim.SGD(parms, opt.lr,
                                momentum=opt.momentum,
                                weight_decay=opt.weight_decay)
    elif opt.optim == "adam":
        optimizer = torch.optim.Adam(parms, lr=opt.lr)


    if opt.dataset in ['cifar10', 'cifar100', 'stl10']:

    if args.do_train:
        train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.pool_size, shuffle=True,
                                      collate_fn=train_collate_fn)
        eval_dataloader = DataLoader(dataset=eval_dataset, batch_size=args.eval_pool_size, shuffle=False,
                                     collate_fn=eval_collate_fn)

        if args.task=='distill':
            t_model = TeacherModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())
            t_state_dict=torch.load(args.teacher_model_path)
            t_model.load_state_dict(t_state_dict)
            model = StudentModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())

            bridges=_get_clones(Bridge(params=args), args.num_layers)
            param_groups=[{'params':model.parameters(), 'lr':args.learning_rate},
                          {'params':bridges.parameters(), 'lr':args.learning_rate}]
            t_model = t_model.to(args.device)
            bridge = bridges.to(args.device)
            t_model.eval()
        elif args.task=='student':
            model = StudentModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())
            param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}]
        elif args.task=='teacher':
            model = TeacherModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())
            param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}]
        if args.classify_loss:
            classifier = PathClassifier(params=args)
            param_groups.append({'params': classifier.parameters(),'lr':args.learning_rate})
            classifiers = classifier.to(args.device)
        optimizer = SGD(param_groups, lr=args.learning_rate)