def __init__(self, d, k=10, bn=True, vq_coef=1, commit_coef=0.5, num_channels_in=3, num_channels_out=3, **kwargs): super(VQ_CVAE, self).__init__() self.mse_hand_adv = 0 self.mse_obj_adv = 0 self.recon_loss = 0 self.mse_hand_obj_adv = 0 self.adv_loss = 0 self.encoder = nn.Sequential( nn.Conv2d(num_channels_in, d, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(d), nn.ReLU(inplace=True), nn.Conv2d(d, d, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(d), nn.ReLU(inplace=True), ResBlock(d, d, bn), nn.BatchNorm2d(d), ResBlock(d, d, bn), nn.BatchNorm2d(d), ) self.decoder = nn.Sequential( ResBlock(d, d), nn.BatchNorm2d(d), ResBlock(d, d), nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(d), nn.ReLU(inplace=True), nn.ConvTranspose2d(d, num_channels_out, kernel_size=4, stride=2, padding=1), ) self.d = d self.emb = NearestEmbed(k, d) self.vq_coef = vq_coef self.commit_coef = commit_coef self.mse = 0 self.vq_loss = Variable(torch.zeros(1)) self.commit_loss = 0 for l in self.modules(): if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d): l.weight.detach().normal_(0, 0.02) torch.fmod(l.weight, 0.04) nn.init.constant_(l.bias, 0) self.encoder[-1].weight.detach().fill_(1 / 40) self.emb.weight.detach().normal_(0, 0.02) torch.fmod(self.emb.weight, 0.04)
def __init__(self, modules, depth=1, dim_embeddings=128): super().__init__() self.depth = depth self.dim_embeddings = dim_embeddings self.inplane = modules[0].inplane self.outplane = modules[-1].outplane self.controller = nn.Sequential( nn.Conv2d(self.inplane, depth * dim_embeddings, kernel_size=1), nn.AdaptiveAvgPool2d(1) ) self.components = nn.ModuleList(modules) self.neareat_emb = NearestEmbed(len(modules), dim_embeddings)
def __init__(self, hidden=200, k=10, vq_coef=0.2, comit_coef=0.4, **kwargs): super(VQ_VAE, self).__init__() self.emb_size = k self.fc1 = nn.Linear(784, 400) self.fc2 = nn.Linear(400, hidden) self.fc3 = nn.Linear(hidden, 400) self.fc4 = nn.Linear(400, 784) self.emb = NearestEmbed(k, self.emb_size) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() self.vq_coef = vq_coef self.comit_coef = comit_coef self.hidden = hidden self.ce_loss = 0 self.vq_loss = 0 self.commit_loss = 0
def __init__(self, length=784, hidden=200, k=10, vq_coef=0.2, comit_coef=0.4, **kwargs): super(VQVAE, self).__init__() assert (hidden % 10 == 0), "Hidden must be divisible by 10" self.length = length self.emb_size = k self.encoder = nn.Sequential( nn.Linear(length, 500), nn.ReLU(), nn.Linear(500, 300), nn.ReLU(), nn.Linear(300, hidden), ) self.decoder = nn.Sequential( nn.Linear(hidden, 200), nn.ReLU(), nn.Linear(200, 300), nn.ReLU(), nn.Linear(300, 500), nn.ReLU(), nn.Linear(500, 784), ) self.emb = NearestEmbed(k, self.emb_size) self.relu = nn.ReLU() self.sigmoid = nn.Sigmoid() self.vq_coef = vq_coef self.comit_coef = comit_coef self.hidden = hidden self.ce_loss = 0 self.vq_loss = 0 self.commit_loss = 0
def __init__(self, d, k=10, kl=None, bn=True, vq_coef=1, commit_coef=0.5, in_chns=3, colour_space='rgb', out_chns=None, task=None, cos_distance=False, use_decor_loss=0, backbone=None, **kwargs): super(Backbone_VQ_VAE, self).__init__() self.backbone_encoder = pretrained_features.ResNetIntermediate( **backbone) if out_chns is None: out_chns = in_chns self.out_chns = out_chns if task == 'segmentation': out_chns = d self.use_decor_loss = use_decor_loss if self.use_decor_loss != 0: self.decor_loss = torch.zeros(1) self.d = d self.k = k if kl is None: kl = d self.kl = kl self.emb = NearestEmbed(k, kl) self.colour_space = colour_space self.task = task self.encoder = nn.Sequential( self.backbone_encoder, ResBlock(self.backbone_encoder.get_num_kernels(), kl, bn=True), nn.BatchNorm2d(kl), ) conv_transposes = [] num_conv_transpose = int(self.backbone_encoder.spatial_ratio / 2) for i in range(int(np.log2(num_conv_transpose))): conv_transposes.append( nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1)) conv_transposes.append(nn.BatchNorm2d(d)) conv_transposes.append(nn.ReLU(inplace=True)) self.decoder = nn.Sequential( ResBlock(kl, d), nn.BatchNorm2d(d), ResBlock(d, d), *conv_transposes, nn.ConvTranspose2d(d, out_chns, kernel_size=4, stride=2, padding=1)) if self.task == 'segmentation': self.fc = nn.Sequential(nn.BatchNorm2d(d), nn.ReLU(), nn.Conv2d(d, self.out_chns, 1)) self.vq_coef = vq_coef self.commit_coef = commit_coef self.mse = 0 self.vq_loss = torch.zeros(1) self.commit_loss = 0 for l in self.modules(): if (isinstance(l, pretrained_features.ResNetIntermediate) or l in self.backbone_encoder.modules()): continue if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d): l.weight.detach().normal_(0, 0.02) torch.fmod(l.weight, 0.04) nn.init.constant_(l.bias, 0) self.encoder[-1].weight.detach().fill_(1 / 40) self.emb.weight.detach().normal_(0, 0.02) torch.fmod(self.emb.weight, 0.04)
def __init__(self, d, k=10, kl=None, bn=True, vq_coef=1, commit_coef=0.5, num_channels=3, gabor_layer=False, **kwargs): super(VQ_CVAE, self).__init__() self.d = d self.k = k if kl is None: kl = d self.kl = kl self.emb = NearestEmbed(k, kl) if gabor_layer: first_layer = GaborLayer(num_channels, d, kernel_size=5, stride=2, padding=1, kernels=1) else: first_layer = nn.Conv2d(num_channels, d, kernel_size=4, stride=2, padding=1) self.encoder = nn.Sequential( first_layer, nn.BatchNorm2d(d), nn.ReLU(inplace=True), nn.Conv2d(d, d, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(d), nn.ReLU(inplace=True), ResBlock(d, d, bn=True), nn.BatchNorm2d(d), ResBlock(d, kl, bn=True), nn.BatchNorm2d(kl), ) self.decoder = nn.Sequential( ResBlock(kl, d), nn.BatchNorm2d(d), ResBlock(d, d), nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(d), nn.ReLU(inplace=True), nn.ConvTranspose2d(d, num_channels, kernel_size=4, stride=2, padding=1), ) self.classification_branch = self._make_classification_layer( Bottleneck, d, d, 3, stride=2) num_participants = 12 self.fc = nn.Linear(d * 1 * Bottleneck.expansion, num_participants) self.vq_coef = vq_coef self.commit_coef = commit_coef self.mse = 0 self.classification = 0 self.class_coef = 1 self.vq_loss = torch.zeros(1) self.commit_loss = 0 for l in self.modules(): if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d): l.weight.detach().normal_(0, 0.02) torch.fmod(l.weight, 0.04) if l.bias is not None: nn.init.constant_(l.bias, 0) self.encoder[-1].weight.detach().fill_(1 / 40) self.emb.weight.detach().normal_(0, 0.02) torch.fmod(self.emb.weight, 0.04)
def __init__(self, mem_slots, head_size, input_size, num_tokens, device, k=1024, num_heads=1, num_blocks=1, forget_bias=1., input_bias=0., gate_style='unit', attention_mlp_layers=2, key_size=None, use_adaptive_softmax=False, cutoffs=None): super(RelationalMemory, self).__init__() ########## add for reconstruct ################# self.near_emb = NearestEmbed(k, num_tokens) self.to_small_emb_dim = nn.Linear(num_tokens, input_size) ########## generic parameters for RMC ########## self.mem_slots = mem_slots self.head_size = head_size self.num_heads = num_heads self.mem_size = self.head_size * self.num_heads self.device = device # a new fixed params needed for pytorch port of RMC # +1 is the concatenated input per time step : we do self-attention with the concatenated memory & input # so if the mem_slots = 1, this value is 2 self.mem_slots_plus_input = self.mem_slots + 1 if num_blocks < 1: raise ValueError( 'num_blocks must be >=1. Got: {}.'.format(num_blocks)) self.num_blocks = num_blocks if gate_style not in ['unit', 'memory', None]: raise ValueError( 'gate_style must be one of [\'unit\', \'memory\', None]. got: ' '{}.'.format(gate_style)) self.gate_style = gate_style if attention_mlp_layers < 1: raise ValueError( 'attention_mlp_layers must be >= 1. Got: {}.'.format( attention_mlp_layers)) self.attention_mlp_layers = attention_mlp_layers self.key_size = key_size if key_size else self.head_size ########## parameters for multihead attention ########## # value_size is same as head_size self.value_size = self.head_size # total size for query-key-value self.qkv_size = 2 * self.key_size + self.value_size self.total_qkv_size = self.qkv_size * self.num_heads # denoted as F # each head has qkv_sized linear projector # just using one big param is more efficient, rather than this line # self.qkv_projector = [nn.Parameter(torch.randn((self.qkv_size, self.qkv_size))) for _ in range(self.num_heads)] self.qkv_projector = nn.Linear(self.mem_size, self.total_qkv_size) self.qkv_layernorm = nn.LayerNorm( [self.mem_slots_plus_input, self.total_qkv_size]) # used for attend_over_memory function self.attention_mlp = nn.ModuleList( [nn.Linear(self.mem_size, self.mem_size)] * self.attention_mlp_layers) self.attended_memory_layernorm = nn.LayerNorm( [self.mem_slots_plus_input, self.mem_size]) self.attended_memory_layernorm2 = nn.LayerNorm( [self.mem_slots_plus_input, self.mem_size]) ########## parameters for initial embedded input projection ########## self.input_size = input_size self.input_projector = nn.Linear(self.input_size, self.mem_size) ########## parameters for gating ########## self.num_gates = 2 * self.calculate_gate_size() self.input_gate_projector = nn.Linear(self.mem_size, self.num_gates) self.memory_gate_projector = nn.Linear(self.mem_size, self.num_gates) # trainable scalar gate bias tensors self.forget_bias = nn.Parameter( torch.tensor(forget_bias, dtype=torch.float32)) self.input_bias = nn.Parameter( torch.tensor(input_bias, dtype=torch.float32)) ########## parameters for token-to-embed & output-to-token logit for softmax self.dropout = nn.Dropout() self.num_tokens = num_tokens self.token_to_input_encoder = nn.Embedding(self.num_tokens, self.input_size) # needs 2 linear layers for tying weights for embedding layers # first match the "output" of the RMC to input_size, which is the embed dim self.output_to_embed_decoder = nn.Linear( self.mem_slots * self.mem_size, self.input_size) self.use_adaptive_softmax = use_adaptive_softmax if not self.use_adaptive_softmax: # then, this layer's weight can be tied to the embedding layer self.embed_to_logit_decoder = nn.Linear(self.input_size, self.num_tokens) # tie embedding weights of encoder & decoder self.embed_to_logit_decoder.weight = self.token_to_input_encoder.weight ########## loss function self.criterion = nn.CrossEntropyLoss() else: # use adaptive softmax from the self.input_size logits, instead of the tied embed weights above self.criterion_adaptive = nn.AdaptiveLogSoftmaxWithLoss( self.input_size, self.num_tokens, cutoffs=cutoffs)