예제 #1
0
 def monochrome_model(gaussian_p, solarize_p):
     return nn.Sequential(
         RandomResizedCrop((image_size, image_size),
                           interpolation="BICUBIC"), RandomHorizontalFlip(),
         RandomApply(GaussianBlur2d(get_kernel_size(image_size),
                                    (0.1, 2.0)),
                     p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
예제 #2
0
 def color_model(gaussian_p, solarize_p):
     return nn.Sequential(
         RandomResizedCrop((image_size, image_size),
                           interpolation="BICUBIC"), RandomHorizontalFlip(),
         ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8), RandomGrayscale(p=0.2),
         RandomApply(GaussianBlur2d(get_kernel_size(image_size),
                                    (0.1, 2.0)),
                     p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
예제 #3
0
 def __init__(self, opt, env):
     super().__init__(opt, env)
     dim_in = opt['dim_in']
     dim_out = opt['dim_out']
     scale = dim_out / dim_in
     self.operator = RandomResizedCrop(size=(dim_out, dim_out), scale=(scale, 1),
                                       ratio=(.99,1),  # An aspect ratio range is required, but .99,1 is effectively "none".
                                       resample='NEAREST')
예제 #4
0
    def test_param(self, scale, ratio, resample, align_corners,
                   return_transform, same_on_batch, device, dtype):

        _scale = (scale if isinstance(scale, (list, tuple)) else nn.Parameter(
            scale.clone().to(device=device, dtype=dtype)))
        _ratio = (ratio if isinstance(ratio, (list, tuple)) else nn.Parameter(
            ratio.clone().to(device=device, dtype=dtype)))

        torch.manual_seed(0)
        input = torch.randint(255, (2, 3, 10, 10), device=device,
                              dtype=dtype) / 255.0
        aug = RandomResizedCrop(
            (8, 8),
            _scale,
            _ratio,
            resample=resample,
            return_transform=return_transform,
            same_on_batch=same_on_batch,
            align_corners=align_corners,
        )

        if return_transform:
            output, _ = aug(input)
        else:
            output = aug(input)

        if len(list(aug.parameters())) != 0:
            mse = nn.MSELoss()
            opt = torch.optim.SGD(aug.parameters(), lr=0.1)
            loss = mse(output, torch.ones_like(output) * 2)
            loss.backward()
            opt.step()

        if not isinstance(scale, (list, tuple)):
            assert isinstance(aug.scale, torch.Tensor)
            # Assert if param not updated
            assert (scale.to(device=device, dtype=dtype) -
                    aug.scale.data).sum() != 0
        if not isinstance(ratio, (list, tuple)):
            assert isinstance(aug.ratio, torch.Tensor)
            # Assert if param not updated
            assert (ratio.to(device=device, dtype=dtype) -
                    aug.ratio.data).sum() != 0
예제 #5
0
def train_transforms(image_size,
                     train_img_scale=(0.35, 1),
                     normalize: bool = True,
                     mean=torch.tensor([0.485, 0.456, 0.406]),
                     std=torch.tensor([0.229, 0.224, 0.225])):
    """Transforms for train augmentation with Kornia."""

    transforms = [
        AccimageImageToTensorNN(),
        RandomResizedCrop((image_size, image_size),
                          train_img_scale,
                          keepdim=True),
        RandomHorizontalFlip(keepdim=True)
    ]
    if normalize:
        transforms.append(Normalize(mean=std, std=std, keepdim=True))
    return torch.nn.Sequential(*transforms)
예제 #6
0
    def __init__(self, input_shape, s=1.0, apply_transforms=None):

        assert len(input_shape) == 3, "input_shape should be (H, W, C)"

        self.input_shape = input_shape
        self.H, self.W, self.C = input_shape[0], input_shape[1], input_shape[2]
        self.s = s
        self.apply_transforms = apply_transforms

        if self.apply_transforms is None:
            kernel_size = int(0.1 * self.H)
            sigma = self._get_sigma()

            self.apply_transforms = KorniaCompose([
                RandomResizedCrop(size=(self.H, self.W), scale=(0.08, 1.0)),
                RandomHorizontalFlip(p=0.5),
                ColorJitter(0.8 * self.s, 0.8 * self.s, 0.8 * self.s, 0.2 * self.s),
                RandomGrayscale(p=0.2),
                GaussianBlur2d(kernel_size=(kernel_size, kernel_size),
                               sigma=(sigma, sigma))
            ])
 def random_resized_crop(self, p=1.0, scale=(0.08, 1.0)) -> TransformType:
     return RandomResizedCrop(self.random_crop_size, scale=scale, p=p)
예제 #8
0
파일: models.py 프로젝트: kevinghst/SPR
    def __init__(
            self,
            image_shape,
            output_size,
            n_atoms,
            dueling,
            jumps,
            spr,
            augmentation,
            target_augmentation,
            eval_augmentation,
            dynamics_blocks,
            norm_type,
            noisy_nets,
            aug_prob,
            classifier,
            imagesize,
            time_offset,
            local_spr,
            global_spr,
            momentum_encoder,
            shared_encoder,
            distributional,
            dqn_hidden_size,
            momentum_tau,
            renormalize,
            renormalize_type,
            q_l1_type,
            dropout,
            final_classifier,
            model_rl,
            noisy_nets_std,
            residual_tm,
            pred_hidden_ratio,
            encoder_type,
            transition_type,
            conv_proj_channel,
            proj_hidden_size,
            gru_input_size,
            gru_proj_size,
            ln_ratio,
            use_maxpool=False,
            channels=None,  # None uses default.
            kernel_sizes=None,
            strides=None,
            paddings=None,
            framestack=4,
    ):
        """Instantiates the neural network according to arguments; network defaults
        stored within this method."""
        super().__init__()
        self.noisy = noisy_nets
        self.time_offset = time_offset
        self.aug_prob = aug_prob
        self.classifier_type = classifier

        self.distributional = distributional
        n_atoms = 1 if not self.distributional else n_atoms
        self.dqn_hidden_size = dqn_hidden_size

        self.transforms = []
        self.eval_transforms = []

        self.uses_augmentation = False
        for aug in augmentation:
            if aug == "affine":
                transformation = RandomAffine(5, (.14, .14), (.9, 1.1), (-5, 5))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "crop":
                transformation = RandomCrop((84, 84))
                # Crashes if aug-prob not 1: use CenterCrop((84, 84)) or Resize((84, 84)) in that case.
                eval_transformation = CenterCrop((84, 84))
                self.uses_augmentation = True
                imagesize = 84
            elif aug == "rrc":
                transformation = RandomResizedCrop((100, 100), (0.8, 1))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "blur":
                transformation = GaussianBlur2d((5, 5), (1.5, 1.5))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "shift":
                transformation = nn.Sequential(nn.ReplicationPad2d(4), RandomCrop((84, 84)))
                eval_transformation = nn.Identity()
            elif aug == "intensity":
                transformation = Intensity(scale=0.05)
                eval_transformation = nn.Identity()
            elif aug == "none":
                transformation = eval_transformation = nn.Identity()
            else:
                raise NotImplementedError()
            self.transforms.append(transformation)
            self.eval_transforms.append(eval_transformation)

        self.dueling = dueling
        f, c = image_shape[:2]
        in_channels = np.prod(image_shape[:2])

        if encoder_type == 'conv2d':
            self.conv = Conv2dModel(
                in_channels=in_channels,
                channels=[32, 64, 64],
                kernel_sizes=[8, 4, 3],
                strides=[4, 2, 1],
                paddings=[0, 0, 0],
                use_maxpool=False,
                dropout=dropout,
                conv_proj_channel=conv_proj_channel,
            )
        elif encoder_type == 'resnet18':
            self.conv = resnet18()
        else:
            raise NotImplementedError

        fake_input = torch.zeros(1, f*c, imagesize, imagesize)
        fake_output = self.conv(fake_input)


        self.hidden_size = fake_output.shape[1]
        self.pixels = fake_output.shape[-1]*fake_output.shape[-2]
        print("Spatial latent size is {}".format(fake_output.shape[1:]))

        if proj_hidden_size:
            self.conv_proj = nn.Sequential(
                nn.Flatten(1, -1),
                nn.Linear(self.hidden_size * self.pixels, proj_hidden_size),
                nn.LayerNorm(proj_hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout),
            )
        else:
            self.conv_proj = nn.Identity()

        self.jumps = jumps
        self.model_rl = model_rl
        self.use_spr = spr
        self.target_augmentation = target_augmentation
        self.eval_augmentation = eval_augmentation
        self.num_actions = output_size
        self.transition_type = transition_type

        if dueling:
            self.head = DQNDistributionalDuelingHeadModel(self.hidden_size,
                                                          output_size,
                                                          hidden_size=self.dqn_hidden_size,
                                                          pixels=self.pixels,
                                                          noisy=self.noisy,
                                                          n_atoms=n_atoms,
                                                          std_init=noisy_nets_std,
                                                          proj_hidden_size=proj_hidden_size)
        else:
            self.head = DQNDistributionalHeadModel(self.hidden_size,
                                                   output_size,
                                                   hidden_size=self.dqn_hidden_size,
                                                   pixels=self.pixels,
                                                   noisy=self.noisy,
                                                   n_atoms=n_atoms,
                                                   std_init=noisy_nets_std)

        if self.jumps > 0:
            repr_size = proj_hidden_size if proj_hidden_size else (self.pixels * self.hidden_size)

            if transition_type == 'gru':
                self.dynamics_model = GRUModel(
                    input_size = gru_input_size,
                    repr_size = repr_size,
                    proj_size = gru_proj_size,
                    num_layers = 1,
                    num_actions = self.num_actions,
                    renormalize=renormalize,
                    renormalize_type=renormalize_type,
                    dropout=dropout
                )
            else:
                self.dynamics_model = TransitionModel(channels=self.hidden_size,
                                                      num_actions=output_size,
                                                      pixels=self.pixels,
                                                      hidden_size=self.hidden_size,
                                                      limit=1,
                                                      blocks=dynamics_blocks,
                                                      norm_type=norm_type,
                                                      renormalize=renormalize,
                                                      residual=residual_tm)
        else:
            self.dynamics_model = nn.Identity()

        self.renormalize = renormalize
        self.renormalize_type = renormalize_type
        self.ln_ratio = ln_ratio

        if renormalize_type == 'train_ln':
            self.renormalize_ln = nn.LayerNorm(repr_size)
        else:
            self.renormalize_ln = nn.Identity()

        if self.use_spr:
            self.local_spr = local_spr
            self.global_spr = global_spr
            self.momentum_encoder = momentum_encoder
            self.momentum_tau = momentum_tau
            self.shared_encoder = shared_encoder
            assert not (self.shared_encoder and self.momentum_encoder)

            # in case someone tries something silly like --local-spr 2
            self.num_sprs = int(bool(self.local_spr)) + \
                            int(bool(self.global_spr))

            if self.local_spr:
                self.local_final_classifier = nn.Identity()
                if self.classifier_type == "mlp":
                    self.local_classifier = nn.Sequential(nn.Linear(self.hidden_size,
                                                                    self.hidden_size),
                                                          nn.BatchNorm1d(self.hidden_size),
                                                          nn.ReLU(),
                                                          nn.Linear(self.hidden_size,
                                                                    self.hidden_size))
                elif self.classifier_type == "bilinear":
                    self.local_classifier = nn.Linear(self.hidden_size, self.hidden_size)
                elif self.classifier_type == "none":
                    self.local_classifier = nn.Identity()
                if final_classifier == "mlp":
                    self.local_final_classifier = nn.Sequential(nn.Linear(self.hidden_size, 2*self.hidden_size),
                                                                nn.BatchNorm1d(2*self.hidden_size),
                                                                nn.ReLU(),
                                                                nn.Linear(2*self.hidden_size,
                                                                    self.hidden_size))
                elif final_classifier == "linear":
                    self.local_final_classifier = nn.Linear(self.hidden_size, self.hidden_size)
                else:
                    self.local_final_classifier = nn.Identity()

                self.local_target_classifier = self.local_classifier
            else:
                self.local_classifier = self.local_target_classifier = nn.Identity()
            if self.global_spr:
                self.global_final_classifier = nn.Identity()
                if self.classifier_type == "mlp":
                    self.global_classifier = nn.Sequential(
                                                nn.Flatten(-3, -1),
                                                nn.Linear(self.pixels*self.hidden_size, 512),
                                                nn.BatchNorm1d(512),
                                                nn.ReLU(),
                                                nn.Linear(512, 256)
                                                )
                    self.global_target_classifier = self.global_classifier
                    global_spr_size = 256
                elif self.classifier_type == "q_l1":
                    self.global_classifier = QL1Head(self.head, dueling=dueling, type=q_l1_type)
                    global_spr_size = self.global_classifier.out_features
                    self.global_target_classifier = self.global_classifier
                elif self.classifier_type == "q_l2":
                    self.global_classifier = nn.Sequential(self.head, nn.Flatten(-2, -1))
                    self.global_target_classifier = self.global_classifier
                    global_spr_size = 256
                elif self.classifier_type == "bilinear":
                    self.global_classifier = nn.Sequential(nn.Flatten(-3, -1),
                                                           nn.Linear(self.hidden_size*self.pixels,
                                                                     self.hidden_size*self.pixels))
                    self.global_target_classifier = nn.Flatten(-3, -1)
                elif self.classifier_type == "none":
                    self.global_classifier = nn.Flatten(-3, -1)
                    self.global_target_classifier = nn.Flatten(-3, -1)

                    global_spr_size = self.hidden_size*self.pixels
                if final_classifier == "mlp":
                    global_final_hidden_size = int(global_spr_size * pred_hidden_ratio)
                    self.global_final_classifier = nn.Sequential(
                        nn.Linear(global_spr_size, global_final_hidden_size),
                        nn.BatchNorm1d(global_final_hidden_size),
                        nn.ReLU(),
                        nn.Linear(global_final_hidden_size, global_spr_size)
                    )
                elif final_classifier == "linear":
                    self.global_final_classifier = nn.Sequential(
                        nn.Linear(global_spr_size, global_spr_size),
                    )
                elif final_classifier == "none":
                    self.global_final_classifier = nn.Identity()
            else:
                self.global_classifier = self.global_target_classifier = nn.Identity()

            if self.momentum_encoder:
                self.target_encoder = copy.deepcopy(self.conv)
                self.target_encoder_proj = copy.deepcopy(self.conv_proj)
                self.target_renormalize_ln = copy.deepcopy(self.renormalize_ln)
                self.global_target_classifier = copy.deepcopy(self.global_target_classifier)
                self.local_target_classifier = copy.deepcopy(self.local_target_classifier)
                for param in (list(self.target_encoder.parameters())
                            + list(self.target_encoder_proj.parameters())
                            + list(self.target_renormalize_ln.parameters())
                            + list(self.global_target_classifier.parameters())
                            + list(self.local_target_classifier.parameters())):
                    param.requires_grad = False

            elif not self.shared_encoder:
                # Use a separate target encoder on the last frame only.
                self.global_target_classifier = copy.deepcopy(self.global_target_classifier)
                self.local_target_classifier = copy.deepcopy(self.local_target_classifier)
                if self.stack_actions:
                    input_size = c - 1
                else:
                    input_size = c
                self.target_encoder = Conv2dModel(in_channels=input_size,
                                                  channels=[32, 64, 64],
                                                  kernel_sizes=[8, 4, 3],
                                                  strides=[4, 2, 1],
                                                  paddings=[0, 0, 0],
                                                  use_maxpool=False,
                                                  )

            elif self.shared_encoder:
                self.target_encoder = self.conv

        print("Initialized model with {} parameters".format(count_parameters(self)))