コード例 #1
0
    def __init__(self,
                 d,
                 k=10,
                 bn=True,
                 vq_coef=1,
                 commit_coef=0.5,
                 num_channels_in=3,
                 num_channels_out=3,
                 **kwargs):
        super(VQ_CVAE, self).__init__()

        self.mse_hand_adv = 0
        self.mse_obj_adv = 0
        self.recon_loss = 0
        self.mse_hand_obj_adv = 0
        self.adv_loss = 0

        self.encoder = nn.Sequential(
            nn.Conv2d(num_channels_in, d, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            nn.Conv2d(d, d, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            ResBlock(d, d, bn),
            nn.BatchNorm2d(d),
            ResBlock(d, d, bn),
            nn.BatchNorm2d(d),
        )
        self.decoder = nn.Sequential(
            ResBlock(d, d),
            nn.BatchNorm2d(d),
            ResBlock(d, d),
            nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(d,
                               num_channels_out,
                               kernel_size=4,
                               stride=2,
                               padding=1),
        )
        self.d = d
        self.emb = NearestEmbed(k, d)
        self.vq_coef = vq_coef
        self.commit_coef = commit_coef
        self.mse = 0
        self.vq_loss = Variable(torch.zeros(1))
        self.commit_loss = 0

        for l in self.modules():
            if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d):
                l.weight.detach().normal_(0, 0.02)
                torch.fmod(l.weight, 0.04)
                nn.init.constant_(l.bias, 0)

        self.encoder[-1].weight.detach().fill_(1 / 40)

        self.emb.weight.detach().normal_(0, 0.02)
        torch.fmod(self.emb.weight, 0.04)
コード例 #2
0
ファイル: models.py プロジェクト: mehdidc/vqmod
 def __init__(self, modules, depth=1, dim_embeddings=128):
     super().__init__()
     self.depth = depth
     self.dim_embeddings = dim_embeddings
     self.inplane = modules[0].inplane
     self.outplane = modules[-1].outplane
     self.controller = nn.Sequential(
         nn.Conv2d(self.inplane, depth * dim_embeddings, kernel_size=1),
         nn.AdaptiveAvgPool2d(1)
     )
     self.components = nn.ModuleList(modules)
     self.neareat_emb = NearestEmbed(len(modules), dim_embeddings)
コード例 #3
0
    def __init__(self, hidden=200, k=10, vq_coef=0.2, comit_coef=0.4, **kwargs):
        super(VQ_VAE, self).__init__()

        self.emb_size = k
        self.fc1 = nn.Linear(784, 400)
        self.fc2 = nn.Linear(400, hidden)
        self.fc3 = nn.Linear(hidden, 400)
        self.fc4 = nn.Linear(400, 784)

        self.emb = NearestEmbed(k, self.emb_size)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.vq_coef = vq_coef
        self.comit_coef = comit_coef
        self.hidden = hidden
        self.ce_loss = 0
        self.vq_loss = 0
        self.commit_loss = 0
コード例 #4
0
    def __init__(self,
                 length=784,
                 hidden=200,
                 k=10,
                 vq_coef=0.2,
                 comit_coef=0.4,
                 **kwargs):
        super(VQVAE, self).__init__()

        assert (hidden % 10 == 0), "Hidden must be divisible by 10"

        self.length = length

        self.emb_size = k

        self.encoder = nn.Sequential(
            nn.Linear(length, 500),
            nn.ReLU(),
            nn.Linear(500, 300),
            nn.ReLU(),
            nn.Linear(300, hidden),
        )

        self.decoder = nn.Sequential(
            nn.Linear(hidden, 200),
            nn.ReLU(),
            nn.Linear(200, 300),
            nn.ReLU(),
            nn.Linear(300, 500),
            nn.ReLU(),
            nn.Linear(500, 784),
        )

        self.emb = NearestEmbed(k, self.emb_size)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.vq_coef = vq_coef
        self.comit_coef = comit_coef
        self.hidden = hidden
        self.ce_loss = 0
        self.vq_loss = 0
        self.commit_loss = 0
コード例 #5
0
    def __init__(self,
                 d,
                 k=10,
                 kl=None,
                 bn=True,
                 vq_coef=1,
                 commit_coef=0.5,
                 in_chns=3,
                 colour_space='rgb',
                 out_chns=None,
                 task=None,
                 cos_distance=False,
                 use_decor_loss=0,
                 backbone=None,
                 **kwargs):
        super(Backbone_VQ_VAE, self).__init__()

        self.backbone_encoder = pretrained_features.ResNetIntermediate(
            **backbone)

        if out_chns is None:
            out_chns = in_chns
        self.out_chns = out_chns
        if task == 'segmentation':
            out_chns = d
        self.use_decor_loss = use_decor_loss
        if self.use_decor_loss != 0:
            self.decor_loss = torch.zeros(1)

        self.d = d
        self.k = k
        if kl is None:
            kl = d
        self.kl = kl
        self.emb = NearestEmbed(k, kl)

        self.colour_space = colour_space
        self.task = task

        self.encoder = nn.Sequential(
            self.backbone_encoder,
            ResBlock(self.backbone_encoder.get_num_kernels(), kl, bn=True),
            nn.BatchNorm2d(kl),
        )
        conv_transposes = []
        num_conv_transpose = int(self.backbone_encoder.spatial_ratio / 2)
        for i in range(int(np.log2(num_conv_transpose))):
            conv_transposes.append(
                nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1))
            conv_transposes.append(nn.BatchNorm2d(d))
            conv_transposes.append(nn.ReLU(inplace=True))
        self.decoder = nn.Sequential(
            ResBlock(kl, d), nn.BatchNorm2d(d), ResBlock(d, d),
            *conv_transposes,
            nn.ConvTranspose2d(d, out_chns, kernel_size=4, stride=2,
                               padding=1))
        if self.task == 'segmentation':
            self.fc = nn.Sequential(nn.BatchNorm2d(d), nn.ReLU(),
                                    nn.Conv2d(d, self.out_chns, 1))
        self.vq_coef = vq_coef
        self.commit_coef = commit_coef
        self.mse = 0
        self.vq_loss = torch.zeros(1)
        self.commit_loss = 0

        for l in self.modules():
            if (isinstance(l, pretrained_features.ResNetIntermediate)
                    or l in self.backbone_encoder.modules()):
                continue
            if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d):
                l.weight.detach().normal_(0, 0.02)
                torch.fmod(l.weight, 0.04)
                nn.init.constant_(l.bias, 0)

        self.encoder[-1].weight.detach().fill_(1 / 40)

        self.emb.weight.detach().normal_(0, 0.02)
        torch.fmod(self.emb.weight, 0.04)
コード例 #6
0
    def __init__(self,
                 d,
                 k=10,
                 kl=None,
                 bn=True,
                 vq_coef=1,
                 commit_coef=0.5,
                 num_channels=3,
                 gabor_layer=False,
                 **kwargs):
        super(VQ_CVAE, self).__init__()

        self.d = d
        self.k = k
        if kl is None:
            kl = d
        self.kl = kl
        self.emb = NearestEmbed(k, kl)

        if gabor_layer:
            first_layer = GaborLayer(num_channels,
                                     d,
                                     kernel_size=5,
                                     stride=2,
                                     padding=1,
                                     kernels=1)
        else:
            first_layer = nn.Conv2d(num_channels,
                                    d,
                                    kernel_size=4,
                                    stride=2,
                                    padding=1)
        self.encoder = nn.Sequential(
            first_layer,
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            nn.Conv2d(d, d, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            ResBlock(d, d, bn=True),
            nn.BatchNorm2d(d),
            ResBlock(d, kl, bn=True),
            nn.BatchNorm2d(kl),
        )
        self.decoder = nn.Sequential(
            ResBlock(kl, d),
            nn.BatchNorm2d(d),
            ResBlock(d, d),
            nn.ConvTranspose2d(d, d, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(d),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(d,
                               num_channels,
                               kernel_size=4,
                               stride=2,
                               padding=1),
        )
        self.classification_branch = self._make_classification_layer(
            Bottleneck, d, d, 3, stride=2)
        num_participants = 12
        self.fc = nn.Linear(d * 1 * Bottleneck.expansion, num_participants)

        self.vq_coef = vq_coef
        self.commit_coef = commit_coef
        self.mse = 0
        self.classification = 0
        self.class_coef = 1
        self.vq_loss = torch.zeros(1)
        self.commit_loss = 0

        for l in self.modules():
            if isinstance(l, nn.Linear) or isinstance(l, nn.Conv2d):
                l.weight.detach().normal_(0, 0.02)
                torch.fmod(l.weight, 0.04)
                if l.bias is not None:
                    nn.init.constant_(l.bias, 0)

        self.encoder[-1].weight.detach().fill_(1 / 40)

        self.emb.weight.detach().normal_(0, 0.02)
        torch.fmod(self.emb.weight, 0.04)
コード例 #7
0
    def __init__(self,
                 mem_slots,
                 head_size,
                 input_size,
                 num_tokens,
                 device,
                 k=1024,
                 num_heads=1,
                 num_blocks=1,
                 forget_bias=1.,
                 input_bias=0.,
                 gate_style='unit',
                 attention_mlp_layers=2,
                 key_size=None,
                 use_adaptive_softmax=False,
                 cutoffs=None):
        super(RelationalMemory, self).__init__()

        ########## add for reconstruct #################
        self.near_emb = NearestEmbed(k, num_tokens)
        self.to_small_emb_dim = nn.Linear(num_tokens, input_size)

        ########## generic parameters for RMC ##########
        self.mem_slots = mem_slots
        self.head_size = head_size
        self.num_heads = num_heads
        self.mem_size = self.head_size * self.num_heads
        self.device = device

        # a new fixed params needed for pytorch port of RMC
        # +1 is the concatenated input per time step : we do self-attention with the concatenated memory & input
        # so if the mem_slots = 1, this value is 2
        self.mem_slots_plus_input = self.mem_slots + 1

        if num_blocks < 1:
            raise ValueError(
                'num_blocks must be >=1. Got: {}.'.format(num_blocks))
        self.num_blocks = num_blocks

        if gate_style not in ['unit', 'memory', None]:
            raise ValueError(
                'gate_style must be one of [\'unit\', \'memory\', None]. got: '
                '{}.'.format(gate_style))
        self.gate_style = gate_style

        if attention_mlp_layers < 1:
            raise ValueError(
                'attention_mlp_layers must be >= 1. Got: {}.'.format(
                    attention_mlp_layers))
        self.attention_mlp_layers = attention_mlp_layers

        self.key_size = key_size if key_size else self.head_size

        ########## parameters for multihead attention ##########
        # value_size is same as head_size
        self.value_size = self.head_size
        # total size for query-key-value
        self.qkv_size = 2 * self.key_size + self.value_size
        self.total_qkv_size = self.qkv_size * self.num_heads  # denoted as F

        # each head has qkv_sized linear projector
        # just using one big param is more efficient, rather than this line
        # self.qkv_projector = [nn.Parameter(torch.randn((self.qkv_size, self.qkv_size))) for _ in range(self.num_heads)]
        self.qkv_projector = nn.Linear(self.mem_size, self.total_qkv_size)
        self.qkv_layernorm = nn.LayerNorm(
            [self.mem_slots_plus_input, self.total_qkv_size])

        # used for attend_over_memory function
        self.attention_mlp = nn.ModuleList(
            [nn.Linear(self.mem_size, self.mem_size)] *
            self.attention_mlp_layers)
        self.attended_memory_layernorm = nn.LayerNorm(
            [self.mem_slots_plus_input, self.mem_size])
        self.attended_memory_layernorm2 = nn.LayerNorm(
            [self.mem_slots_plus_input, self.mem_size])

        ########## parameters for initial embedded input projection ##########
        self.input_size = input_size
        self.input_projector = nn.Linear(self.input_size, self.mem_size)

        ########## parameters for gating ##########
        self.num_gates = 2 * self.calculate_gate_size()
        self.input_gate_projector = nn.Linear(self.mem_size, self.num_gates)
        self.memory_gate_projector = nn.Linear(self.mem_size, self.num_gates)
        # trainable scalar gate bias tensors
        self.forget_bias = nn.Parameter(
            torch.tensor(forget_bias, dtype=torch.float32))
        self.input_bias = nn.Parameter(
            torch.tensor(input_bias, dtype=torch.float32))

        ########## parameters for token-to-embed & output-to-token logit for softmax
        self.dropout = nn.Dropout()
        self.num_tokens = num_tokens
        self.token_to_input_encoder = nn.Embedding(self.num_tokens,
                                                   self.input_size)

        # needs 2 linear layers for tying weights for embedding layers
        # first match the "output" of the RMC to input_size, which is the embed dim
        self.output_to_embed_decoder = nn.Linear(
            self.mem_slots * self.mem_size, self.input_size)
        self.use_adaptive_softmax = use_adaptive_softmax
        if not self.use_adaptive_softmax:
            # then, this layer's weight can be tied to the embedding layer
            self.embed_to_logit_decoder = nn.Linear(self.input_size,
                                                    self.num_tokens)

            # tie embedding weights of encoder & decoder
            self.embed_to_logit_decoder.weight = self.token_to_input_encoder.weight

            ########## loss function
            self.criterion = nn.CrossEntropyLoss()
        else:
            # use adaptive softmax from the self.input_size logits, instead of the tied embed weights above
            self.criterion_adaptive = nn.AdaptiveLogSoftmaxWithLoss(
                self.input_size, self.num_tokens, cutoffs=cutoffs)