def forward(self, z): """ B, Q, K, N: n_batch, n_quant_dims, n_quant_vecs, n_timesteps ze: (B, Q, N) emb: (K, Q) """ ze = self.linear(z) self.ze = ze sg_emb = self.sg(self.emb) l2norm_sq = ((ze.unsqueeze(1) - sg_emb.unsqueeze(2))**2).sum( dim=2) # B, K, N self.l2norm_min, l2norm_min_ind = l2norm_sq.min(dim=1) # B, N zq = util.gather_md(sg_emb, 0, l2norm_min_ind).permute(1, 0, 2) zq_rg, __ = self.rg(zq, self.ze) # Diagnostics ni = l2norm_min_ind.nelement() if self.circ_inds is None: self.write_pos = 0 self.circ_inds = ze.new_full((100, ni), -1, dtype=torch.long) self.circ_inds[self.write_pos, 0:ni] = l2norm_min_ind.flatten(0) self.circ_inds[self.write_pos, ni:] = -1 self.write_pos += 1 self.write_pos = self.write_pos % 100 ones = self.emb.new_ones(ni) util.int_hist(l2norm_min_ind, accu=self.ind_hist) self.uniq = l2norm_min_ind.unique(sorted=False) self.ze_norm = (self.ze**2).sum(dim=1).sqrt() self.emb_norm = (self.emb**2).sum(dim=1).sqrt() return zq_rg
def one_hot(self, wav_compand): ''' wav_compand: (B, T) B, Q, T: n_batch, n_quant, n_timesteps returns: (B, Q, T) ''' return util.gather_md(self.quant_onehot, 0, wav_compand.long()).transpose(1, 2)
def one_hot(self, wav_compand): ''' wav_compand: (B, T) B, Q, T: n_batch, n_quant, n_timesteps returns: (B, Q, T) ''' wav_one_hot = util.gather_md(self.quant_onehot, 0, wav_compand.long()).permute(1, 0, 2) return wav_one_hot
def forward(self, lc, speaker_inds): ''' I, G, S: n_in_chan, n_embed_chan, n_speakers lc : (B, T, I) speaker_inds: (B) returns: (B, T, I+G) ''' assert speaker_inds.dtype == torch.long # one_hot: (B, S) one_hot = util.gather_md(self.eye, 0, speaker_inds).permute(1, 0) gc = self.speaker_embedding(one_hot) # gc: (B, G) gc_rep = gc.unsqueeze(2).expand(-1, -1, lc.shape[2]) all_cond = torch.cat((lc, gc_rep), dim=1) return all_cond
def avg_prob_target(self): '''Average probability given to target''' target_probs = util.gather_md(self.probs, 1, self.target) mean = torch.mean(target_probs) return mean
def forward(self, z): """ B, Q, K, N: n_batch, n_quant_dims, n_quant_vecs, n_timesteps ze: (B, Q, N) emb: (K, Q) """ ze = self.linear(z) self.ze = ze sg_emb = self.sg(self.emb) l2norm_sq = ((ze.unsqueeze(1) - sg_emb.unsqueeze(2))**2).sum( dim=2) # B, K, N # self.min_dist, min_ind = l2norm_sq.min(dim=1) # B, N snorm = scaled_l2_norm(ze.unsqueeze(1), sg_emb.unsqueeze(2).unsqueeze(0)) #print('snorm: ', snorm) self.min_dist, min_ind = snorm.min(dim=1) # B, N zq = util.gather_md(sg_emb, 0, min_ind).permute(1, 0, 2) if self.training: # Diagnostics ni = min_ind.nelement() #if self.circ_inds is None: # self.write_pos = 0 # self.circ_inds = ze.new_full((100, ni), -1, dtype=torch.long) #self.circ_inds[self.write_pos,0:ni] = min_ind.flatten(0) #self.circ_inds[self.write_pos,ni:] = -1 #self.write_pos += 1 #self.write_pos = self.write_pos % 100 ones = self.emb.new_ones(ni) util.int_hist(min_ind, accu=self.ind_hist) self.uniq = min_ind.unique(sorted=False) self.ze_norm = (self.ze**2).sum(dim=1).sqrt() self.emb_norm = (self.emb**2).sum(dim=1).sqrt() self.min_ind = min_ind # EMA statistics # min_ind: B, W # ze: B, D, W # z_sum: K, D # n_sum: K # scatter_add has the limitation that the size of the indexing # vector cannot exceed that of the destination (even in the target # indexing dimension, which doesn't make much sense) # In this case, K is the indexing dimension # batch_size * window_batch_size flat_ind = min_ind.flatten(0, 1) idim = max(flat_ind.shape[0], self.k) z_tmp_shape = [idim, self.d] n_sum_tmp = self.n_sum.new_zeros(idim) z_sum_tmp = self.z_sum.new_zeros(z_tmp_shape) z_sum_tmp.scatter_add_(0, flat_ind.unsqueeze(1).repeat(1, self.d), self.ze.permute(0, 2, 1).flatten(0, 1)) self.z_sum[...] = z_sum_tmp[0:self.k, :] self.n_sum.zero_() n_sum_ones = n_sum_tmp.new_ones((idim)) n_sum_tmp.scatter_add_(0, flat_ind, n_sum_ones) self.n_sum[...] = n_sum_tmp[0:self.k] self.ema_numer = (self.ema_gamma * self.ema_numer + self.ema_gamma_comp * self.z_sum) self.ema_denom = (self.ema_gamma * self.ema_denom + self.ema_gamma_comp * self.n_sum) # construct the straight-through estimator ('ReplaceGrad') # What I need is # cb_update = self.ema_numer / self.ema_denom.unsqueeze(1).repeat(1, # self.d) # print('z_sum_norm:', (self.z_sum ** 2).sum(dim=1).sqrt()) # print('n_sum_norm:', self.n_sum) print('ze_norm:', self.ze_norm) print('emb_norm:', (self.emb**2).sum(dim=1).sqrt()) print('min_ind:', self.min_ind) # print('cb_update_norm:', (cb_update ** 2).sum(dim=1).sqrt()) # print('ema_numer_norm:', # (self.ema_numer ** 2).sum(dim=1).sqrt().mean()) # print('ema_denom_norm:', # (self.ema_denom ** 2).sqrt().mean()) zq_rg, __ = self.rg(zq, self.ze) return zq_rg