def monochrome_model(gaussian_p, solarize_p): return nn.Sequential( RandomResizedCrop((image_size, image_size), interpolation="BICUBIC"), RandomHorizontalFlip(), RandomApply(GaussianBlur2d(get_kernel_size(image_size), (0.1, 2.0)), p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
def color_model(gaussian_p, solarize_p): return nn.Sequential( RandomResizedCrop((image_size, image_size), interpolation="BICUBIC"), RandomHorizontalFlip(), ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8), RandomGrayscale(p=0.2), RandomApply(GaussianBlur2d(get_kernel_size(image_size), (0.1, 2.0)), p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
def __init__(self, opt, env): super().__init__(opt, env) dim_in = opt['dim_in'] dim_out = opt['dim_out'] scale = dim_out / dim_in self.operator = RandomResizedCrop(size=(dim_out, dim_out), scale=(scale, 1), ratio=(.99,1), # An aspect ratio range is required, but .99,1 is effectively "none". resample='NEAREST')
def test_param(self, scale, ratio, resample, align_corners, return_transform, same_on_batch, device, dtype): _scale = (scale if isinstance(scale, (list, tuple)) else nn.Parameter( scale.clone().to(device=device, dtype=dtype))) _ratio = (ratio if isinstance(ratio, (list, tuple)) else nn.Parameter( ratio.clone().to(device=device, dtype=dtype))) torch.manual_seed(0) input = torch.randint(255, (2, 3, 10, 10), device=device, dtype=dtype) / 255.0 aug = RandomResizedCrop( (8, 8), _scale, _ratio, resample=resample, return_transform=return_transform, same_on_batch=same_on_batch, align_corners=align_corners, ) if return_transform: output, _ = aug(input) else: output = aug(input) if len(list(aug.parameters())) != 0: mse = nn.MSELoss() opt = torch.optim.SGD(aug.parameters(), lr=0.1) loss = mse(output, torch.ones_like(output) * 2) loss.backward() opt.step() if not isinstance(scale, (list, tuple)): assert isinstance(aug.scale, torch.Tensor) # Assert if param not updated assert (scale.to(device=device, dtype=dtype) - aug.scale.data).sum() != 0 if not isinstance(ratio, (list, tuple)): assert isinstance(aug.ratio, torch.Tensor) # Assert if param not updated assert (ratio.to(device=device, dtype=dtype) - aug.ratio.data).sum() != 0
def train_transforms(image_size, train_img_scale=(0.35, 1), normalize: bool = True, mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])): """Transforms for train augmentation with Kornia.""" transforms = [ AccimageImageToTensorNN(), RandomResizedCrop((image_size, image_size), train_img_scale, keepdim=True), RandomHorizontalFlip(keepdim=True) ] if normalize: transforms.append(Normalize(mean=std, std=std, keepdim=True)) return torch.nn.Sequential(*transforms)
def __init__(self, input_shape, s=1.0, apply_transforms=None): assert len(input_shape) == 3, "input_shape should be (H, W, C)" self.input_shape = input_shape self.H, self.W, self.C = input_shape[0], input_shape[1], input_shape[2] self.s = s self.apply_transforms = apply_transforms if self.apply_transforms is None: kernel_size = int(0.1 * self.H) sigma = self._get_sigma() self.apply_transforms = KorniaCompose([ RandomResizedCrop(size=(self.H, self.W), scale=(0.08, 1.0)), RandomHorizontalFlip(p=0.5), ColorJitter(0.8 * self.s, 0.8 * self.s, 0.8 * self.s, 0.2 * self.s), RandomGrayscale(p=0.2), GaussianBlur2d(kernel_size=(kernel_size, kernel_size), sigma=(sigma, sigma)) ])
def random_resized_crop(self, p=1.0, scale=(0.08, 1.0)) -> TransformType: return RandomResizedCrop(self.random_crop_size, scale=scale, p=p)
def __init__( self, image_shape, output_size, n_atoms, dueling, jumps, spr, augmentation, target_augmentation, eval_augmentation, dynamics_blocks, norm_type, noisy_nets, aug_prob, classifier, imagesize, time_offset, local_spr, global_spr, momentum_encoder, shared_encoder, distributional, dqn_hidden_size, momentum_tau, renormalize, renormalize_type, q_l1_type, dropout, final_classifier, model_rl, noisy_nets_std, residual_tm, pred_hidden_ratio, encoder_type, transition_type, conv_proj_channel, proj_hidden_size, gru_input_size, gru_proj_size, ln_ratio, use_maxpool=False, channels=None, # None uses default. kernel_sizes=None, strides=None, paddings=None, framestack=4, ): """Instantiates the neural network according to arguments; network defaults stored within this method.""" super().__init__() self.noisy = noisy_nets self.time_offset = time_offset self.aug_prob = aug_prob self.classifier_type = classifier self.distributional = distributional n_atoms = 1 if not self.distributional else n_atoms self.dqn_hidden_size = dqn_hidden_size self.transforms = [] self.eval_transforms = [] self.uses_augmentation = False for aug in augmentation: if aug == "affine": transformation = RandomAffine(5, (.14, .14), (.9, 1.1), (-5, 5)) eval_transformation = nn.Identity() self.uses_augmentation = True elif aug == "crop": transformation = RandomCrop((84, 84)) # Crashes if aug-prob not 1: use CenterCrop((84, 84)) or Resize((84, 84)) in that case. eval_transformation = CenterCrop((84, 84)) self.uses_augmentation = True imagesize = 84 elif aug == "rrc": transformation = RandomResizedCrop((100, 100), (0.8, 1)) eval_transformation = nn.Identity() self.uses_augmentation = True elif aug == "blur": transformation = GaussianBlur2d((5, 5), (1.5, 1.5)) eval_transformation = nn.Identity() self.uses_augmentation = True elif aug == "shift": transformation = nn.Sequential(nn.ReplicationPad2d(4), RandomCrop((84, 84))) eval_transformation = nn.Identity() elif aug == "intensity": transformation = Intensity(scale=0.05) eval_transformation = nn.Identity() elif aug == "none": transformation = eval_transformation = nn.Identity() else: raise NotImplementedError() self.transforms.append(transformation) self.eval_transforms.append(eval_transformation) self.dueling = dueling f, c = image_shape[:2] in_channels = np.prod(image_shape[:2]) if encoder_type == 'conv2d': self.conv = Conv2dModel( in_channels=in_channels, channels=[32, 64, 64], kernel_sizes=[8, 4, 3], strides=[4, 2, 1], paddings=[0, 0, 0], use_maxpool=False, dropout=dropout, conv_proj_channel=conv_proj_channel, ) elif encoder_type == 'resnet18': self.conv = resnet18() else: raise NotImplementedError fake_input = torch.zeros(1, f*c, imagesize, imagesize) fake_output = self.conv(fake_input) self.hidden_size = fake_output.shape[1] self.pixels = fake_output.shape[-1]*fake_output.shape[-2] print("Spatial latent size is {}".format(fake_output.shape[1:])) if proj_hidden_size: self.conv_proj = nn.Sequential( nn.Flatten(1, -1), nn.Linear(self.hidden_size * self.pixels, proj_hidden_size), nn.LayerNorm(proj_hidden_size), nn.ReLU(), nn.Dropout(dropout), ) else: self.conv_proj = nn.Identity() self.jumps = jumps self.model_rl = model_rl self.use_spr = spr self.target_augmentation = target_augmentation self.eval_augmentation = eval_augmentation self.num_actions = output_size self.transition_type = transition_type if dueling: self.head = DQNDistributionalDuelingHeadModel(self.hidden_size, output_size, hidden_size=self.dqn_hidden_size, pixels=self.pixels, noisy=self.noisy, n_atoms=n_atoms, std_init=noisy_nets_std, proj_hidden_size=proj_hidden_size) else: self.head = DQNDistributionalHeadModel(self.hidden_size, output_size, hidden_size=self.dqn_hidden_size, pixels=self.pixels, noisy=self.noisy, n_atoms=n_atoms, std_init=noisy_nets_std) if self.jumps > 0: repr_size = proj_hidden_size if proj_hidden_size else (self.pixels * self.hidden_size) if transition_type == 'gru': self.dynamics_model = GRUModel( input_size = gru_input_size, repr_size = repr_size, proj_size = gru_proj_size, num_layers = 1, num_actions = self.num_actions, renormalize=renormalize, renormalize_type=renormalize_type, dropout=dropout ) else: self.dynamics_model = TransitionModel(channels=self.hidden_size, num_actions=output_size, pixels=self.pixels, hidden_size=self.hidden_size, limit=1, blocks=dynamics_blocks, norm_type=norm_type, renormalize=renormalize, residual=residual_tm) else: self.dynamics_model = nn.Identity() self.renormalize = renormalize self.renormalize_type = renormalize_type self.ln_ratio = ln_ratio if renormalize_type == 'train_ln': self.renormalize_ln = nn.LayerNorm(repr_size) else: self.renormalize_ln = nn.Identity() if self.use_spr: self.local_spr = local_spr self.global_spr = global_spr self.momentum_encoder = momentum_encoder self.momentum_tau = momentum_tau self.shared_encoder = shared_encoder assert not (self.shared_encoder and self.momentum_encoder) # in case someone tries something silly like --local-spr 2 self.num_sprs = int(bool(self.local_spr)) + \ int(bool(self.global_spr)) if self.local_spr: self.local_final_classifier = nn.Identity() if self.classifier_type == "mlp": self.local_classifier = nn.Sequential(nn.Linear(self.hidden_size, self.hidden_size), nn.BatchNorm1d(self.hidden_size), nn.ReLU(), nn.Linear(self.hidden_size, self.hidden_size)) elif self.classifier_type == "bilinear": self.local_classifier = nn.Linear(self.hidden_size, self.hidden_size) elif self.classifier_type == "none": self.local_classifier = nn.Identity() if final_classifier == "mlp": self.local_final_classifier = nn.Sequential(nn.Linear(self.hidden_size, 2*self.hidden_size), nn.BatchNorm1d(2*self.hidden_size), nn.ReLU(), nn.Linear(2*self.hidden_size, self.hidden_size)) elif final_classifier == "linear": self.local_final_classifier = nn.Linear(self.hidden_size, self.hidden_size) else: self.local_final_classifier = nn.Identity() self.local_target_classifier = self.local_classifier else: self.local_classifier = self.local_target_classifier = nn.Identity() if self.global_spr: self.global_final_classifier = nn.Identity() if self.classifier_type == "mlp": self.global_classifier = nn.Sequential( nn.Flatten(-3, -1), nn.Linear(self.pixels*self.hidden_size, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Linear(512, 256) ) self.global_target_classifier = self.global_classifier global_spr_size = 256 elif self.classifier_type == "q_l1": self.global_classifier = QL1Head(self.head, dueling=dueling, type=q_l1_type) global_spr_size = self.global_classifier.out_features self.global_target_classifier = self.global_classifier elif self.classifier_type == "q_l2": self.global_classifier = nn.Sequential(self.head, nn.Flatten(-2, -1)) self.global_target_classifier = self.global_classifier global_spr_size = 256 elif self.classifier_type == "bilinear": self.global_classifier = nn.Sequential(nn.Flatten(-3, -1), nn.Linear(self.hidden_size*self.pixels, self.hidden_size*self.pixels)) self.global_target_classifier = nn.Flatten(-3, -1) elif self.classifier_type == "none": self.global_classifier = nn.Flatten(-3, -1) self.global_target_classifier = nn.Flatten(-3, -1) global_spr_size = self.hidden_size*self.pixels if final_classifier == "mlp": global_final_hidden_size = int(global_spr_size * pred_hidden_ratio) self.global_final_classifier = nn.Sequential( nn.Linear(global_spr_size, global_final_hidden_size), nn.BatchNorm1d(global_final_hidden_size), nn.ReLU(), nn.Linear(global_final_hidden_size, global_spr_size) ) elif final_classifier == "linear": self.global_final_classifier = nn.Sequential( nn.Linear(global_spr_size, global_spr_size), ) elif final_classifier == "none": self.global_final_classifier = nn.Identity() else: self.global_classifier = self.global_target_classifier = nn.Identity() if self.momentum_encoder: self.target_encoder = copy.deepcopy(self.conv) self.target_encoder_proj = copy.deepcopy(self.conv_proj) self.target_renormalize_ln = copy.deepcopy(self.renormalize_ln) self.global_target_classifier = copy.deepcopy(self.global_target_classifier) self.local_target_classifier = copy.deepcopy(self.local_target_classifier) for param in (list(self.target_encoder.parameters()) + list(self.target_encoder_proj.parameters()) + list(self.target_renormalize_ln.parameters()) + list(self.global_target_classifier.parameters()) + list(self.local_target_classifier.parameters())): param.requires_grad = False elif not self.shared_encoder: # Use a separate target encoder on the last frame only. self.global_target_classifier = copy.deepcopy(self.global_target_classifier) self.local_target_classifier = copy.deepcopy(self.local_target_classifier) if self.stack_actions: input_size = c - 1 else: input_size = c self.target_encoder = Conv2dModel(in_channels=input_size, channels=[32, 64, 64], kernel_sizes=[8, 4, 3], strides=[4, 2, 1], paddings=[0, 0, 0], use_maxpool=False, ) elif self.shared_encoder: self.target_encoder = self.conv print("Initialized model with {} parameters".format(count_parameters(self)))