def parse_matrix_part(matrix, szSub, ovSub): assert matrix.ndim == 3 assert np_ndim(szSub) == 1 assert len(szSub) == 3 assert np_ndim(ovSub) == 1 assert len(ovSub) == 3 matrix_shape = np_asarray(matrix.shape, dtype=int) len_each_section, _, _ = szSub shift_length, _, _ = ovSub len_each_section_range = np_arange(len_each_section) matrix_shape = np_ceil((matrix_shape - szSub + 1)/ovSub).astype(int) num_rows_overlap, num_elements, num_beams = matrix_shape result_matrix = np_zeros((np_prod(szSub), np_prod(matrix_shape))) cnt = 0 for i in range(num_beams): for j in range(num_elements): for k in range(num_rows_overlap): index_1 = len_each_section_range + k * shift_length index_2 = j index_3 = i tmp = matrix[index_1, index_2, index_3] result_matrix[:, cnt] = tmp cnt += 1 return result_matrix
def prod(*inputs: Tensor, dim: Optional[int] = None, keepdim=False) -> Tensor: ''' Product of tensor(s) Parameters: ----------- - inputs : varargs, tensors to be multiplied; if a single tensor is passed, its elements will be multiplied - dim : int (optional), dimension to reduce over - keepdim : bool, whether to keep `dim` Returns: -------- - result : Tensor ''' if len(inputs) == 1: return _InnerProd(inputs[0], dim=dim, keepdim=keepdim)() else: return np_prod(inputs, axis=dim, keepdims=keepdim)
def mean(*inputs: Tensor, dim: Optional[int] = None, keepdim=False) -> Tensor: ''' Mean of tensor(s) Parameters: ----------- - inputs : varargs, tensors to compute the mean of; if a single tensor is passed, the mean of its elements will be computed - dim : int (optional), dimension to reduce over - keepdim : bool, whether to keep `dim` Returns: -------- - result : Tensor ''' if len(inputs) == 1: n = np_prod(inputs[0].shape) if dim is None else inputs[0].shape[dim] return _InnerSum(inputs[0], dim=dim, keepdim=keepdim)() / n else: return np_sum(inputs, axis=dim, keepdims=keepdim) / len(inputs)
def __init__(self, filter_sz, n_lc_in, n_lc_out, lc_upsample_filt_sizes, lc_upsample_strides, n_res, n_dil, n_skp, n_post, n_quant, n_blocks, n_block_layers, jitter_prob, n_speakers, n_global_embed, bias=True, parent_rf=None): super(WaveNet, self).__init__() self.n_blocks = n_blocks self.n_block_layers = n_block_layers self.n_quant = n_quant self.quant_onehot = None self.bias = bias self.jitter = Jitter(jitter_prob) post_jitter_filt_sz = 3 lc_input_stepsize = np_prod(lc_upsample_strides) lc_conv_name = 'LC_Conv(filter_size={})'.format(post_jitter_filt_sz) self.lc_conv = Conv1dWrap(n_lc_in, n_lc_out, kernel_size=post_jitter_filt_sz, stride=1, bias=self.bias) cur_rf = rfield.Rfield(filter_info=post_jitter_filt_sz, stride=1, parent=parent_rf, name=lc_conv_name) self.beg_rf = cur_rf # This RF is the first processing of the local conditioning after the # Jitter. It is the starting point for the commitment loss aggregation self.pre_upsample_rf = cur_rf self.lc_upsample = nn.Sequential() # WaveNet is a stand-alone model, so parent_rf is None # The Autoencoder model in model.py will link parent_rfs together. for i, (filt_sz, stride) in enumerate(zip(lc_upsample_filt_sizes, lc_upsample_strides)): name = 'Upsampling_{}(filter_sz={}, stride={})'.format(i, filt_sz, stride) mod = Upsampling(n_lc_out, filt_sz, stride, cur_rf, name=name) self.lc_upsample.add_module(str(i), mod) cur_rf = mod.rf # This rf describes the bounds of the input wav corresponding to the # local conditioning vectors self.last_upsample_rf = cur_rf self.cond = Conditioning(n_speakers, n_global_embed) self.base_layer = Conv1dWrap(n_quant, n_res, kernel_size=1, stride=1, dilation=1, bias=self.bias) self.conv_layers = nn.ModuleList() n_cond = n_lc_out + n_global_embed for b in range(self.n_blocks): for bl in range(self.n_block_layers): dil = 2**bl name = 'GRCC_{},{}(dil={})'.format(b, bl, dil) grc = GatedResidualCondConv(n_cond, n_res, n_dil, n_skp, 1, dil, filter_sz, bias, cur_rf, name) self.conv_layers.append(grc) cur_rf = grc.rf self.last_grcc_rf = cur_rf # Each module in the stack needs to know the dimensions of # the input and output of the overall stack, in order to trim # residual connections beg_grcc_rf = self.conv_layers[0].rf end_grcc_rf = self.conv_layers[-1].rf for mod in self.conv_layers.children(): mod.init_bound_rfs(beg_grcc_rf, end_grcc_rf) self.relu = nn.ReLU() self.post1 = Conv1dWrap(n_skp, n_post, 1, bias=bias) self.post2 = Conv1dWrap(n_post, n_quant, 1, bias=bias) self.logsoftmax = nn.LogSoftmax(1) # (B, Q, N) self.rf = cur_rf
def size(self): """Returns the number of elements in the array.""" return np_prod(self.shape)
def __init__(self, hps, parent_vc=None): super(WaveNet, self).__init__() self.n_blocks = hps.n_blocks self.n_block_layers = hps.n_block_layers self.n_skp = hps.n_skp self.n_res = hps.n_res self.n_quant = hps.n_quant self.bias = hps.bias post_jitter_filt_sz = 3 lc_input_stepsize = np_prod(hps.lc_upsample_strides) lc_conv_name = f'LC_Conv(filter_size={post_jitter_filt_sz})' self.lc_conv = Conv1dWrap(lc_conv_name, parent_vc, in_channels=hps.n_lc_in, out_channels=hps.n_lc_out, kernel_size=post_jitter_filt_sz, stride=1, bias=hps.bias) self.vc = dict() self.vc['beg'] = self.lc_conv.vc cur_vc = self.vc['beg'] # This VC is the first processing of the local conditioning after the # Jitter. It is the starting point for the commitment loss aggregation self.lc_upsample = nn.Sequential() # WaveNet is a stand-alone model, so parent_vc is None # The Autoencoder model in model.py will link parent_vcs together. iterator = enumerate( zip(hps.lc_upsample_filt_sizes, hps.lc_upsample_strides)) for i, (filt_sz, stride) in iterator: name = f'Upsampling_{i}(filter_sz={filt_sz}, stride={stride})' mod = Upsampling(hps.n_lc_out, filt_sz, stride, cur_vc, name=name) self.lc_upsample.add_module(str(i), mod) cur_vc = mod.vc # This vc describes the bounds of the input wav corresponding to the # local conditioning vectors self.vc['last_upsample'] = cur_vc self.cond = Conditioning(hps.n_speakers, hps.n_global_embed) self.base_layer = Conv1dWrap('Base Layer', cur_vc, in_channels=hps.n_quant, out_channels=hps.n_res, kernel_size=1, stride=1, dilation=1, bias=self.bias) self.base_layer.vc.do_trim_input = True cur_vc = self.base_layer.vc self.conv_layers = nn.ModuleList() n_cond = hps.n_lc_out + hps.n_global_embed for b in range(self.n_blocks): for bl in range(self.n_block_layers): dil = 2**bl name = f'GRCC_{b},{bl}(dil={dil})' final_layer = (b + 1 == self.n_blocks and bl + 1 == self.n_block_layers) grc = GatedResidualCondConv(self.vc, hps, n_cond=n_cond, stride=1, dil=dil, final_layer=final_layer, parent_vc=cur_vc, name=name) self.conv_layers.append(grc) cur_vc = grc.vc # Each module in the stack needs to know the dimensions of # the input and output of the overall stack, in order to trim # residual connections self.vc['beg_grcc'] = self.conv_layers[0].vc self.vc['end_grcc'] = self.conv_layers[-1].vc self.relu = nn.ReLU() self.post1 = Conv1dWrap('Post1', cur_vc, in_channels=hps.n_skp, out_channels=hps.n_post, kernel_size=1, stride=1, bias=hps.bias) self.post2 = Conv1dWrap('Post2', self.post1.vc, in_channels=hps.n_post, out_channels=hps.n_quant, kernel_size=1, stride=1, bias=hps.bias) self.logsoftmax = nn.LogSoftmax(1) # (B, Q, N) self.vc['main'] = self.post2.vc