Beispiel #1
0
    def __init__(self,
                 backbone: nn.Module,
                 num_classes: int,
                 bottleneck_dim: Optional[int] = 1024,
                 width: Optional[int] = 1024):
        super(ImageClassifier, self).__init__()
        self.backbone = backbone
        self.grl_layer = WarmStartGradientReverseLayer(alpha=1.0,
                                                       lo=0.0,
                                                       hi=0.1,
                                                       max_iters=1000.,
                                                       auto_step=False)

        self.bottleneck = nn.Sequential(
            nn.Linear(backbone.out_features, bottleneck_dim),
            nn.BatchNorm1d(bottleneck_dim), nn.ReLU(), nn.Dropout(0.5))
        self.bottleneck[0].weight.data.normal_(0, 0.005)
        self.bottleneck[0].bias.data.fill_(0.1)

        # The classifier head used for final predictions.
        self.head = nn.Sequential(nn.Linear(bottleneck_dim, width), nn.ReLU(),
                                  nn.Dropout(0.5),
                                  nn.Linear(width, num_classes))
        # The adversarial classifier head
        self.adv_head = nn.Sequential(nn.Linear(bottleneck_dim, width),
                                      nn.ReLU(), nn.Dropout(0.5),
                                      nn.Linear(width, num_classes))
        for dep in range(2):
            self.head[dep * 3].weight.data.normal_(0, 0.01)
            self.head[dep * 3].bias.data.fill_(0.0)
            self.adv_head[dep * 3].weight.data.normal_(0, 0.01)
            self.adv_head[dep * 3].bias.data.fill_(0.0)
Beispiel #2
0
class GeneralModule(nn.Module):
    def __init__(self,
                 backbone: nn.Module,
                 num_classes: int,
                 bottleneck: nn.Module,
                 head: nn.Module,
                 adv_head: nn.Module,
                 grl: Optional[WarmStartGradientReverseLayer] = None,
                 finetune: Optional[bool] = True):
        super(GeneralModule, self).__init__()
        self.backbone = backbone
        self.num_classes = num_classes
        self.bottleneck = bottleneck
        self.head = head
        self.adv_head = adv_head
        self.finetune = finetune
        self.grl_layer = WarmStartGradientReverseLayer(
            alpha=1.0, lo=0.0, hi=0.1, max_iters=1000,
            auto_step=False) if grl is None else grl

    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """"""
        features = self.backbone(x)
        features = self.bottleneck(features)
        outputs = self.head(features)
        features_adv = self.grl_layer(features)
        outputs_adv = self.adv_head(features_adv)
        if self.training:
            return outputs, outputs_adv
        else:
            return outputs

    def step(self):
        """
        Gradually increase :math:`\lambda` in GRL layer.
        """
        self.grl_layer.step()

    def get_parameters(self, base_lr=1.0) -> List[Dict]:
        """
        Return a parameters list which decides optimization hyper-parameters,
        such as the relative learning rate of each layer.
        """
        params = [{
            "params": self.backbone.parameters(),
            "lr": 0.1 * base_lr if self.finetune else base_lr
        }, {
            "params": self.bottleneck.parameters(),
            "lr": base_lr
        }, {
            "params": self.head.parameters(),
            "lr": base_lr
        }, {
            "params": self.adv_head.parameters(),
            "lr": base_lr
        }]
        return params
Beispiel #3
0
    def __init__(self,
                 backbone: nn.Module,
                 num_classes: int,
                 bottleneck_dim: Optional[int] = 1024,
                 width: Optional[int] = 1024,
                 grl: Optional[WarmStartGradientReverseLayer] = None,
                 finetune=True):
        grl_layer = WarmStartGradientReverseLayer(
            alpha=1.0, lo=0.0, hi=0.1, max_iters=1000,
            auto_step=False) if grl is None else grl

        bottleneck = nn.Sequential(
            nn.AdaptiveAvgPool2d(output_size=(1, 1)), nn.Flatten(),
            nn.Linear(backbone.out_features, bottleneck_dim),
            nn.BatchNorm1d(bottleneck_dim), nn.ReLU(), nn.Dropout(0.5))
        bottleneck[2].weight.data.normal_(0, 0.005)
        bottleneck[2].bias.data.fill_(0.1)

        # The classifier head used for final predictions.
        head = nn.Sequential(nn.Linear(bottleneck_dim, width), nn.ReLU(),
                             nn.Dropout(0.5), nn.Linear(width, num_classes))
        # The adversarial classifier head
        adv_head = nn.Sequential(nn.Linear(bottleneck_dim, width), nn.ReLU(),
                                 nn.Dropout(0.5),
                                 nn.Linear(width, num_classes))
        for dep in range(2):
            head[dep * 3].weight.data.normal_(0, 0.01)
            head[dep * 3].bias.data.fill_(0.0)
            adv_head[dep * 3].weight.data.normal_(0, 0.01)
            adv_head[dep * 3].bias.data.fill_(0.0)
        super(ImageClassifier,
              self).__init__(backbone, num_classes, bottleneck, head, adv_head,
                             grl_layer, finetune)
Beispiel #4
0
    def __init__(self,
                 domain_discriminator: nn.Module,
                 entropy_conditioning: Optional[bool] = False,
                 randomized: Optional[bool] = False,
                 num_classes: Optional[int] = -1,
                 features_dim: Optional[int] = -1,
                 randomized_dim: Optional[int] = 1024,
                 reduction: Optional[str] = 'mean'):
        super(ConditionalDomainAdversarialLoss, self).__init__()
        self.domain_discriminator = domain_discriminator
        self.grl = WarmStartGradientReverseLayer(alpha=1.,
                                                 lo=0.,
                                                 hi=1.,
                                                 max_iters=1000,
                                                 auto_step=True)
        self.entropy_conditioning = entropy_conditioning

        if randomized:
            assert num_classes > 0 and features_dim > 0 and randomized_dim > 0
            self.map = RandomizedMultiLinearMap(features_dim, num_classes,
                                                randomized_dim)
        else:
            self.map = MultiLinearMap()

        self.bce = lambda input, target, weight: F.binary_cross_entropy(input, target, weight,
                                                                        reduction=reduction) if self.entropy_conditioning \
            else F.binary_cross_entropy(input, target, reduction=reduction)
        self.domain_discriminator_accuracy = None
Beispiel #5
0
 def __init__(self,
              backbone: nn.Module,
              num_classes: int,
              bottleneck: nn.Module,
              head: nn.Module,
              adv_head: nn.Module,
              grl: Optional[WarmStartGradientReverseLayer] = None,
              finetune: Optional[bool] = True):
     super(GeneralModule, self).__init__()
     self.backbone = backbone
     self.num_classes = num_classes
     self.bottleneck = bottleneck
     self.head = head
     self.adv_head = adv_head
     self.finetune = finetune
     self.grl_layer = WarmStartGradientReverseLayer(
         alpha=1.0, lo=0.0, hi=0.1, max_iters=1000,
         auto_step=False) if grl is None else grl
Beispiel #6
0
    def __init__(self,
                 backbone: nn.Module,
                 num_factors: int,
                 bottleneck_dim: Optional[int] = 1024,
                 width: Optional[int] = 1024,
                 finetune=True):
        grl_layer = WarmStartGradientReverseLayer(alpha=1.0,
                                                  lo=0.0,
                                                  hi=0.1,
                                                  max_iters=1000,
                                                  auto_step=False)
        bottleneck = nn.Sequential(
            nn.Conv2d(backbone.out_features,
                      bottleneck_dim,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(bottleneck_dim),
            nn.ReLU(),
        )

        # The regressor head used for final predictions.
        head = nn.Sequential(
            nn.Conv2d(bottleneck_dim,
                      width,
                      kernel_size=3,
                      stride=1,
                      padding=1), nn.BatchNorm2d(width), nn.ReLU(),
            nn.Conv2d(width, width, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(width), nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(1, 1)), nn.Flatten(),
            nn.Linear(width, num_factors), nn.Sigmoid())
        for layer in head:
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.normal_(layer.weight, 0, 0.01)
                nn.init.constant_(layer.bias, 0)
        # The adversarial regressor head
        adv_head = nn.Sequential(
            nn.Conv2d(bottleneck_dim,
                      width,
                      kernel_size=3,
                      stride=1,
                      padding=1), nn.BatchNorm2d(width), nn.ReLU(),
            nn.Conv2d(width, width, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(width), nn.ReLU(),
            nn.AdaptiveAvgPool2d(output_size=(1, 1)), nn.Flatten(),
            nn.Linear(width, num_factors), nn.Sigmoid())
        for layer in adv_head:
            if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
                nn.init.normal_(layer.weight, 0, 0.01)
                nn.init.constant_(layer.bias, 0)
        super(ImageRegressor,
              self).__init__(backbone, num_factors, bottleneck, head, adv_head,
                             grl_layer, finetune)
        self.num_factors = num_factors
Beispiel #7
0
class ImageClassifier(nn.Module):
    r"""Classifier for MDD.
    Parameters:
        - **backbone** (class:`nn.Module` object): Any backbone to extract 1-d features from data
        - **num_classes** (int): Number of classes
        - **bottleneck_dim** (int, optional): Feature dimension of the bottleneck layer. Default: 1024
        - **width** (int, optional): Feature dimension of the classifier head. Default: 1024

    .. note::
        Classifier for MDD has one backbone, one bottleneck, while two classifier heads.
        The first classifier head is used for final predictions.
        The adversarial classifier head is only used when calculating MarginDisparityDiscrepancy.

    .. note::
        Remember to call function `step()` after function `forward()` **during training phase**! For instance,

        >>> # x is inputs, classifier is an ImageClassifier
        >>> outputs, outputs_adv = classifier(x)
        >>> classifier.step()

    Inputs:
        - **x** (Tensor): input data

    Outputs: (outputs, outputs_adv)
        - **outputs**: logits outputs by the main classifier
        - **outputs_adv**: logits outputs by the adversarial classifier

    Shapes:
        - x: :math:`(minibatch, *)`, same shape as the input of the `backbone`.
        - outputs, outputs_adv: :math:`(minibatch, C)`, where C means the number of classes.

    """
    def __init__(self,
                 backbone: nn.Module,
                 num_classes: int,
                 bottleneck_dim: Optional[int] = 1024,
                 width: Optional[int] = 1024):
        super(ImageClassifier, self).__init__()
        self.backbone = backbone
        self.grl_layer = WarmStartGradientReverseLayer(alpha=1.0,
                                                       lo=0.0,
                                                       hi=0.1,
                                                       max_iters=1000.,
                                                       auto_step=False)

        self.bottleneck = nn.Sequential(
            nn.Linear(backbone.out_features, bottleneck_dim),
            nn.BatchNorm1d(bottleneck_dim), nn.ReLU(), nn.Dropout(0.5))
        self.bottleneck[0].weight.data.normal_(0, 0.005)
        self.bottleneck[0].bias.data.fill_(0.1)

        # The classifier head used for final predictions.
        self.head = nn.Sequential(nn.Linear(bottleneck_dim, width), nn.ReLU(),
                                  nn.Dropout(0.5),
                                  nn.Linear(width, num_classes))
        # The adversarial classifier head
        self.adv_head = nn.Sequential(nn.Linear(bottleneck_dim, width),
                                      nn.ReLU(), nn.Dropout(0.5),
                                      nn.Linear(width, num_classes))
        for dep in range(2):
            self.head[dep * 3].weight.data.normal_(0, 0.01)
            self.head[dep * 3].bias.data.fill_(0.0)
            self.adv_head[dep * 3].weight.data.normal_(0, 0.01)
            self.adv_head[dep * 3].bias.data.fill_(0.0)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        features = self.backbone(x)
        features = self.bottleneck(features)
        outputs = self.head(features)
        features_adv = self.grl_layer(features)
        outputs_adv = self.adv_head(features_adv)
        return outputs, outputs_adv

    def step(self):
        """Call step() each iteration during training.
        Will increase :math:`\lambda` in GRL layer.
        """
        self.grl_layer.step()

    def get_parameters(self) -> List[Dict]:
        """
        :return: A parameters list which decides optimization hyper-parameters,
            such as the relative learning rate of each layer
        """
        params = [{
            "params": self.backbone.parameters(),
            "lr_mult": 0.1
        }, {
            "params": self.bottleneck.parameters(),
            "lr_mult": 1.
        }, {
            "params": self.head.parameters(),
            "lr_mult": 1.
        }, {
            "params": self.adv_head.parameters(),
            "lr_mult": 1
        }]
        return params
Beispiel #8
0
 def __init__(self, domain_discriminator: nn.Module, reduction: Optional[str] = 'mean'):
     super(DomainAdversarialLoss, self).__init__()
     self.grl = WarmStartGradientReverseLayer(alpha=1., lo=0., hi=1., max_iters=1000, auto_step=True)
     self.domain_discriminator = domain_discriminator
     self.bce = nn.BCELoss(reduction=reduction)
     self.domain_discriminator_accuracy = None
Beispiel #9
0
def main(args: argparse.Namespace):
    logger = CompleteLogger(args.log, args.phase)
    print(args)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    cudnn.benchmark = True

    # Data loading code
    train_transform = utils.get_train_transform(args.train_resizing, random_horizontal_flip=not args.no_hflip,
                                                random_color_jitter=False, resize_size=args.resize_size,
                                                norm_mean=args.norm_mean, norm_std=args.norm_std)
    val_transform = utils.get_val_transform(args.val_resizing, resize_size=args.resize_size,
                                            norm_mean=args.norm_mean, norm_std=args.norm_std)
    print("train_transform: ", train_transform)
    print("val_transform: ", val_transform)

    train_source_dataset, train_target_dataset, val_dataset, test_dataset, num_classes, args.class_names = \
        utils.get_dataset(args.data, args.root, args.source, args.target, train_transform, val_transform)
    train_source_loader = DataLoader(train_source_dataset, batch_size=args.batch_size,
                                     shuffle=True, num_workers=args.workers, drop_last=True)
    train_target_loader = DataLoader(train_target_dataset, batch_size=args.batch_size,
                                     shuffle=True, num_workers=args.workers, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers)

    train_source_iter = ForeverDataIterator(train_source_loader)
    train_target_iter = ForeverDataIterator(train_target_loader)

    # create model
    print("=> using model '{}'".format(args.arch))
    backbone = utils.get_model(args.arch, pretrain=not args.scratch)
    pool_layer = nn.Identity() if args.no_pool else None
    source_classifier = ImageClassifier(backbone, num_classes, bottleneck_dim=args.bottleneck_dim,
                                        pool_layer=pool_layer, finetune=not args.scratch).to(device)

    if args.phase == 'train' and args.pretrain is None:
        # first pretrain the classifier wish source data
        print("Pretraining the model on source domain.")
        args.pretrain = logger.get_checkpoint_path('pretrain')
        pretrain_model = ImageClassifier(backbone, num_classes, bottleneck_dim=args.bottleneck_dim,
                                         pool_layer=pool_layer, finetune=not args.scratch).to(device)
        pretrain_optimizer = SGD(pretrain_model.get_parameters(), args.pretrain_lr, momentum=args.momentum,
                                 weight_decay=args.weight_decay, nesterov=True)
        pretrain_lr_scheduler = LambdaLR(pretrain_optimizer,
                                         lambda x: args.pretrain_lr * (1. + args.lr_gamma * float(x)) ** (
                                             -args.lr_decay))
        # start pretraining
        for epoch in range(args.pretrain_epochs):
            print("lr:", pretrain_lr_scheduler.get_lr())
            # pretrain for one epoch
            utils.pretrain(train_source_iter, pretrain_model, pretrain_optimizer, pretrain_lr_scheduler, epoch, args,
                           device)
            # validate to show pretrain process
            utils.validate(val_loader, pretrain_model, args, device)

        torch.save(pretrain_model.state_dict(), args.pretrain)
        print("Pretraining process is done.")

    checkpoint = torch.load(args.pretrain, map_location='cpu')
    source_classifier.load_state_dict(checkpoint)
    target_classifier = copy.deepcopy(source_classifier)

    # freeze source classifier
    set_requires_grad(source_classifier, False)
    source_classifier.freeze_bn()

    domain_discri = DomainDiscriminator(in_feature=source_classifier.features_dim, hidden_size=1024).to(device)

    # define loss function
    grl = WarmStartGradientReverseLayer(alpha=1., lo=0., hi=2., max_iters=1000, auto_step=True)
    domain_adv = DomainAdversarialLoss(domain_discri, grl=grl).to(device)

    # define optimizer and lr scheduler
    # note that we only optimize target feature extractor
    optimizer = SGD(target_classifier.get_parameters(optimize_head=False) + domain_discri.get_parameters(), args.lr,
                    momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True)
    lr_scheduler = LambdaLR(optimizer, lambda x: args.lr * (1. + args.lr_gamma * float(x)) ** (-args.lr_decay))

    # resume from the best checkpoint
    if args.phase != 'train':
        checkpoint = torch.load(logger.get_checkpoint_path('best'), map_location='cpu')
        target_classifier.load_state_dict(checkpoint)

    # analysis the model
    if args.phase == 'analysis':
        # extract features from both domains
        feature_extractor = nn.Sequential(target_classifier.backbone, target_classifier.pool_layer,
                                          target_classifier.bottleneck).to(device)
        source_feature = collect_feature(train_source_loader, feature_extractor, device)
        target_feature = collect_feature(train_target_loader, feature_extractor, device)
        # plot t-SNE
        tSNE_filename = osp.join(logger.visualize_directory, 'TSNE.pdf')
        tsne.visualize(source_feature, target_feature, tSNE_filename)
        print("Saving t-SNE to", tSNE_filename)
        # calculate A-distance, which is a measure for distribution discrepancy
        A_distance = a_distance.calculate(source_feature, target_feature, device)
        print("A-distance =", A_distance)
        return

    if args.phase == 'test':
        acc1 = utils.validate(test_loader, target_classifier, args, device)
        print(acc1)
        return

    # start training
    best_acc1 = 0.
    for epoch in range(args.epochs):
        print(lr_scheduler.get_lr())
        # train for one epoch
        train(train_source_iter, train_target_iter, source_classifier, target_classifier, domain_adv,
              optimizer, lr_scheduler, epoch, args)

        # evaluate on validation set
        acc1 = utils.validate(val_loader, target_classifier, args, device)

        # remember best acc@1 and save checkpoint
        torch.save(target_classifier.state_dict(), logger.get_checkpoint_path('latest'))
        if acc1 > best_acc1:
            shutil.copy(logger.get_checkpoint_path('latest'), logger.get_checkpoint_path('best'))
        best_acc1 = max(acc1, best_acc1)

    print("best_acc1 = {:3.1f}".format(best_acc1))

    # evaluate on test set
    target_classifier.load_state_dict(torch.load(logger.get_checkpoint_path('best')))
    acc1 = utils.validate(test_loader, target_classifier, args, device)
    print("test_acc1 = {:3.1f}".format(acc1))

    logger.close()