Ejemplo n.º 1
0
    def forward(self, z):
        """
        B, Q, K, N: n_batch, n_quant_dims, n_quant_vecs, n_timesteps
        ze: (B, Q, N) 
        emb: (K, Q)
        """
        ze = self.linear(z)

        self.ze = ze
        sg_emb = self.sg(self.emb)
        l2norm_sq = ((ze.unsqueeze(1) - sg_emb.unsqueeze(2))**2).sum(
            dim=2)  # B, K, N
        self.l2norm_min, l2norm_min_ind = l2norm_sq.min(dim=1)  # B, N
        zq = util.gather_md(sg_emb, 0, l2norm_min_ind).permute(1, 0, 2)
        zq_rg, __ = self.rg(zq, self.ze)

        # Diagnostics
        ni = l2norm_min_ind.nelement()
        if self.circ_inds is None:
            self.write_pos = 0
            self.circ_inds = ze.new_full((100, ni), -1, dtype=torch.long)

        self.circ_inds[self.write_pos, 0:ni] = l2norm_min_ind.flatten(0)
        self.circ_inds[self.write_pos, ni:] = -1
        self.write_pos += 1
        self.write_pos = self.write_pos % 100

        ones = self.emb.new_ones(ni)
        util.int_hist(l2norm_min_ind, accu=self.ind_hist)
        self.uniq = l2norm_min_ind.unique(sorted=False)
        self.ze_norm = (self.ze**2).sum(dim=1).sqrt()
        self.emb_norm = (self.emb**2).sum(dim=1).sqrt()

        return zq_rg
Ejemplo n.º 2
0
 def one_hot(self, wav_compand):
     '''
     wav_compand: (B, T)
     B, Q, T: n_batch, n_quant, n_timesteps
     returns: (B, Q, T)
     '''
     return util.gather_md(self.quant_onehot, 0,
                           wav_compand.long()).transpose(1, 2)
Ejemplo n.º 3
0
 def one_hot(self, wav_compand):
     '''
     wav_compand: (B, T)
     B, Q, T: n_batch, n_quant, n_timesteps
     returns: (B, Q, T)
     '''
     wav_one_hot = util.gather_md(self.quant_onehot, 0,
                                  wav_compand.long()).permute(1, 0, 2)
     return wav_one_hot
Ejemplo n.º 4
0
 def forward(self, lc, speaker_inds):
     '''
     I, G, S: n_in_chan, n_embed_chan, n_speakers
     lc : (B, T, I)
     speaker_inds: (B)
     returns: (B, T, I+G)
     '''
     assert speaker_inds.dtype == torch.long
     # one_hot: (B, S)
     one_hot = util.gather_md(self.eye, 0, speaker_inds).permute(1, 0) 
     gc = self.speaker_embedding(one_hot) # gc: (B, G)
     gc_rep = gc.unsqueeze(2).expand(-1, -1, lc.shape[2])
     all_cond = torch.cat((lc, gc_rep), dim=1) 
     return all_cond
Ejemplo n.º 5
0
 def avg_prob_target(self):
     '''Average probability given to target'''
     target_probs = util.gather_md(self.probs, 1, self.target)
     mean = torch.mean(target_probs)
     return mean
Ejemplo n.º 6
0
    def forward(self, z):
        """
        B, Q, K, N: n_batch, n_quant_dims, n_quant_vecs, n_timesteps
        ze: (B, Q, N) 
        emb: (K, Q)
        """
        ze = self.linear(z)
        self.ze = ze
        sg_emb = self.sg(self.emb)

        l2norm_sq = ((ze.unsqueeze(1) - sg_emb.unsqueeze(2))**2).sum(
            dim=2)  # B, K, N
        # self.min_dist, min_ind = l2norm_sq.min(dim=1) # B, N

        snorm = scaled_l2_norm(ze.unsqueeze(1),
                               sg_emb.unsqueeze(2).unsqueeze(0))
        #print('snorm: ', snorm)
        self.min_dist, min_ind = snorm.min(dim=1)  # B, N
        zq = util.gather_md(sg_emb, 0, min_ind).permute(1, 0, 2)

        if self.training:
            # Diagnostics
            ni = min_ind.nelement()
            #if self.circ_inds is None:
            #    self.write_pos = 0
            #    self.circ_inds = ze.new_full((100, ni), -1, dtype=torch.long)

            #self.circ_inds[self.write_pos,0:ni] = min_ind.flatten(0)
            #self.circ_inds[self.write_pos,ni:] = -1
            #self.write_pos += 1
            #self.write_pos = self.write_pos % 100
            ones = self.emb.new_ones(ni)
            util.int_hist(min_ind, accu=self.ind_hist)
            self.uniq = min_ind.unique(sorted=False)
            self.ze_norm = (self.ze**2).sum(dim=1).sqrt()
            self.emb_norm = (self.emb**2).sum(dim=1).sqrt()
            self.min_ind = min_ind

            # EMA statistics
            # min_ind: B, W
            # ze: B, D, W
            # z_sum: K, D
            # n_sum: K
            # scatter_add has the limitation that the size of the indexing
            # vector cannot exceed that of the destination (even in the target
            # indexing dimension, which doesn't make much sense)
            # In this case, K is the indexing dimension
            # batch_size * window_batch_size
            flat_ind = min_ind.flatten(0, 1)
            idim = max(flat_ind.shape[0], self.k)

            z_tmp_shape = [idim, self.d]
            n_sum_tmp = self.n_sum.new_zeros(idim)

            z_sum_tmp = self.z_sum.new_zeros(z_tmp_shape)
            z_sum_tmp.scatter_add_(0,
                                   flat_ind.unsqueeze(1).repeat(1, self.d),
                                   self.ze.permute(0, 2, 1).flatten(0, 1))
            self.z_sum[...] = z_sum_tmp[0:self.k, :]

            self.n_sum.zero_()
            n_sum_ones = n_sum_tmp.new_ones((idim))
            n_sum_tmp.scatter_add_(0, flat_ind, n_sum_ones)
            self.n_sum[...] = n_sum_tmp[0:self.k]

            self.ema_numer = (self.ema_gamma * self.ema_numer +
                              self.ema_gamma_comp * self.z_sum)
            self.ema_denom = (self.ema_gamma * self.ema_denom +
                              self.ema_gamma_comp * self.n_sum)

            # construct the straight-through estimator ('ReplaceGrad')
            # What I need is
            # cb_update = self.ema_numer / self.ema_denom.unsqueeze(1).repeat(1,
            #         self.d)

            # print('z_sum_norm:', (self.z_sum ** 2).sum(dim=1).sqrt())
            # print('n_sum_norm:', self.n_sum)
            print('ze_norm:', self.ze_norm)
            print('emb_norm:', (self.emb**2).sum(dim=1).sqrt())
            print('min_ind:', self.min_ind)
            # print('cb_update_norm:', (cb_update ** 2).sum(dim=1).sqrt())
            # print('ema_numer_norm:',
            #         (self.ema_numer ** 2).sum(dim=1).sqrt().mean())
            # print('ema_denom_norm:',
            #         (self.ema_denom ** 2).sqrt().mean())
            zq_rg, __ = self.rg(zq, self.ze)

        return zq_rg