def get_summarized_data(self): dim = self.dim() if dim == 0: return self if dim == 1: if self.size(0) > 2 * PRINT_OPTS.edgeitems: return flow.cat( ( slice_wrapper(self, [0, PRINT_OPTS.edgeitems, 1]), slice_wrapper( self, [self.size(0) - PRINT_OPTS.edgeitems, self.size(0), 1] ), ) ) else: return self if self.size(0) > 2 * PRINT_OPTS.edgeitems: start = [ slice_wrapper(self, [i, i + 1, 1]) for i in range(0, PRINT_OPTS.edgeitems) ] end = [ slice_wrapper(self, [i, i + 1, 1]) for i in range(self.shape[0] - PRINT_OPTS.edgeitems, self.shape[0]) ] return flow.stack([get_summarized_data(x) for x in (start + end)]) else: return flow.stack( [ get_summarized_data(slice_wrapper(self, [i, i + 1, 1])) for i in range(len(self)) ] )
def forward(self, x): if x.dim() >= 3: raise RuntimeError( "{} accept 1/2D tensor as input, but got {:d}".format( self.__name__, x.dim() ) ) # when inference, only one utt if x.dim() == 1: x = flow.unsqueeze(x, 0) # n x 1 x S => n x N x T w = F.relu(self.encoder_1d(x)) # n x B x T y = self.proj(self.ln(w)) # n x B x T y = self.repeats(y) # n x 2N x T e = flow.chunk(self.mask(y), self.num_spks, 1) # n x N x T if self.non_linear_type == "softmax": m = self.non_linear(flow.stack(e, dim=0), dim=0) else: m = self.non_linear(flow.stack(e, dim=0)) # spks x [n x N x T] s = [w * m[n] for n in range(self.num_spks)] # spks x n x S return [self.decoder_1d(x, squeeze=True) for x in s]
def test_stack_runtime_error(test_case): with test_case.assertRaises(Exception) as context: x1 = flow.ones((2, 1), dtype=flow.float32, requires_grad=True) x2 = flow.ones((2, 2), dtype=flow.float32, requires_grad=True) y = flow.stack([x1, x2]) test_case.assertTrue("stack expects each tensor to be equal size" in str(context.exception))
def __init__( self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0.0, proj_drop=0.0, ): super().__init__() self.dim = dim self.window_size = window_size # Wh, Ww self.num_heads = num_heads head_dim = dim // num_heads self.scale = qk_scale or head_dim**-0.5 # define a parameter table of relative position bias # Author zzk: we add trunc normal here! self.relative_position_bias_table = nn.Parameter( flow.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads)) # 2*Wh-1 * 2*Ww-1, nH self.relative_position_bias_table.trunc_normal_(std=0.02) # get pair-wise relative position index for each token inside the window coords_h = flow.arange(self.window_size[0]) coords_w = flow.arange(self.window_size[1]) coords = flow.stack(flow.meshgrid(*[coords_h, coords_w])) # 2, Wh, Ww coords_flatten = flow.flatten(coords, 1) # 2, Wh*Ww relative_coords = (coords_flatten[:, :, None] - coords_flatten[:, None, :]) # 2, Wh*Ww, Wh*Ww relative_coords = relative_coords.permute(1, 2, 0) # Wh*Ww, Wh*Ww, 2 relative_coords[:, :, 0] += self.window_size[0] - 1 # shift to start from 0 relative_coords[:, :, 1] += self.window_size[1] - 1 relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1 relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww self.register_buffer("relative_position_index", relative_position_index) self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(dim, dim) self.proj_drop = nn.Dropout(proj_drop) self.softmax = nn.Softmax(dim=-1)
def forward(self, x, mask): # Conv2d x = flow.stack((x * mask, mask), dim=1) conv1 = self.conv1(x) * flow.sigmoid(self.conv1_gates(x)) # GLU # Downsampling downsample1 = self.downSample1(conv1) downsample2 = self.downSample2(downsample1) # Reshape reshape2dto1d = downsample2.view( downsample2.size(0), self.flattened_channels, 1, -1 ) reshape2dto1d = reshape2dto1d.squeeze(2) # 2D -> 1D conv2dto1d_layer = self.conv2dto1dLayer(reshape2dto1d) conv2dto1d_layer = self.conv2dto1dLayer_tfan(conv2dto1d_layer) # Residual Blocks residual_layer_1 = self.residualLayer1(conv2dto1d_layer) residual_layer_2 = self.residualLayer2(residual_layer_1) residual_layer_3 = self.residualLayer3(residual_layer_2) residual_layer_4 = self.residualLayer4(residual_layer_3) residual_layer_5 = self.residualLayer5(residual_layer_4) residual_layer_6 = self.residualLayer6(residual_layer_5) # 1D -> 2D conv1dto2d_layer = self.conv1dto2dLayer(residual_layer_6) conv1dto2d_layer = self.conv1dto2dLayer_tfan(conv1dto2d_layer) # Reshape reshape1dto2d = conv1dto2d_layer.unsqueeze(2) reshape1dto2d = reshape1dto2d.view(reshape1dto2d.size(0), 256, 20, -1) # UpSampling upsample_layer_1 = self.upSample1(reshape1dto2d) upsample_layer_2 = self.upSample2(upsample_layer_1) # Conv2d output = self.lastConvLayer(upsample_layer_2) output = output.squeeze(1) return output
def compute_loss(self, est, egs): # spks x n x S ests = est # spks x n x S refs = egs["ref"] num_spks = len(refs) def sisnr_loss(permute): # for one permute return sum( [self.sisnr(ests[s], refs[t]) for s, t in enumerate(permute)]) / len(permute) # P x N N = egs["mix"].size(0) sisnr_mat = flow.stack( [sisnr_loss(p) for p in permutations(range(num_spks))]) max_perutt, _ = flow.max(sisnr_mat, dim=0) # si-snr return -flow.sum(max_perutt) / N
def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (clip, pid, camid) where pid is identity of the clip. """ img_paths, pid, camid = self.dataset[index] if self.temporal_transform is not None: img_paths = self.temporal_transform(img_paths) clip = self.loader(img_paths) if self.spatial_transform is not None: self.spatial_transform.randomize_parameters() clip = [self.spatial_transform(img) for img in clip] # trans T x C x H x W to C x T x H x W clip = flow.stack(clip, axis=0) clip = flow.transpose(clip, perm=[1, 0, 2, 3]) return clip, pid, camid
def oneflow_stack( of_input_1: tp.Numpy.Placeholder(shape=input_1.shape), of_input_2: tp.Numpy.Placeholder(shape=input_2.shape), of_mul: tp.Numpy.Placeholder(shape=np_random_mul.shape), ) -> tp.Numpy: with flow.scope.placement(device_type, "0:0"): v = flow.get_variable( shape=input_1.shape, dtype=flow.float32, initializer=flow.zeros_initializer(), name="x_var", ) x_var = of_input_1 + v flow.watch_diff(x_var, assert_prediction_grad) of_stack_out = flow.stack([x_var, of_input_2], axis=axis) out = of_stack_out * of_mul with flow.scope.placement(device_type, "0:0"): flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler( [], [1e-3]), momentum=0).minimize(out) return of_stack_out
def clip_grad_norm_( parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.0, error_if_nonfinite: bool = False, ) -> Tensor: r"""Clips gradient norm of an iterable of parameters. The norm is computed over all gradients together, as if they were concatenated into a single vector. Args: parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a single Tensor that will have gradients normalized max_norm (float or int): max norm of the gradients norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for infinity norm. error_if_nonfinite (bool): if True, an error is thrown if the total norm of the gradients from :attr:``parameters`` is ``nan``, ``inf``, or ``-inf``. Default: False (will switch to True in the future) Returns: Parameters after cliping gradient norm Total norm of the parameters (viewed as a single vector). For example: .. code-block:: python >>> import oneflow as flow >>> import numpy as np >>> x1 = flow.tensor(np.array([[2, 3, 4], [1.5, 2.6, 3.7]]).astype(np.float32), requires_grad=True) >>> m1 = flow.nn.ReLU() >>> out1 = m1(x1) >>> out1 = out1.sum() >>> out1.backward() >>> norm1 = flow.nn.utils.clip_grad_norm_(x1, 0.6, 1.0) >>> norm1 tensor(6., dtype=oneflow.float32) >>> x1.grad tensor([[0.1000, 0.1000, 0.1000], [0.1000, 0.1000, 0.1000]], dtype=oneflow.float32) >>> x2 = flow.tensor(np.array([[-2, -3, -4], [2.5, 0, 3.2]]).astype(np.float32), requires_grad=True) >>> out2 = flow.atan(x2) >>> out2 = out2.sum() >>> out2.backward() >>> norm2 = flow.nn.utils.clip_grad_norm_(x2, 0.5) >>> norm2 tensor(1.0394, dtype=oneflow.float32) >>> x2.grad tensor([[0.0962, 0.0481, 0.0283], [0.0663, 0.4810, 0.0428]], dtype=oneflow.float32) """ if isinstance(parameters, (Tensor, flow._oneflow_internal.Tensor)): parameters = [parameters] parameters = [p for p in parameters if p.grad is not None] max_norm = float(max_norm) norm_type = float(norm_type) if len(parameters) == 0: return flow.tensor(0.0) if parameters[0].is_global: assert all([p.is_global for p in parameters ]), "All parameters must be consistent tensor." sbp_broadcast = [flow.sbp.broadcast for _ in parameters[0].sbp] param0_placement = parameters[0].placement if norm_type == float("inf"): norms = [ p.grad.detach().to_global( sbp=sbp_broadcast).abs().max().to_global( placement=param0_placement) for p in parameters ] total_norm = norms[0] if len(norms) == 1 else flow.max( flow.stack(norms)) elif norm_type == float("-inf"): norms = [ p.grad.detach().to_global( sbp=sbp_broadcast).abs().min().to_global( placement=param0_placement) for p in parameters ] total_norm = norms[0] if len(norms) == 1 else flow.min( flow.stack(norms)) else: total_norm = flow.linalg.vector_norm( flow.stack([ flow.linalg.vector_norm( p.grad.detach().to_global(sbp=sbp_broadcast), norm_type).to_global(placement=param0_placement) for p in parameters ]), norm_type, ) if error_if_nonfinite and flow.logical_or(total_norm.isnan(), total_norm.isinf()): raise RuntimeError( f"The total norm of order {norm_type} for gradients from " "`parameters` is non-finite, so it cannot be clipped. To disable " "this error and scale the gradients by the non-finite norm anyway, " "set `error_if_nonfinite=False`") clip_coef = max_norm / (total_norm + 1e-6) clip_coef_clamped = clip_coef.clamp(max=1.0) for p in parameters: p.grad.detach().mul_( clip_coef_clamped.to_global(placement=p.placement)) else: device = parameters[0].grad.device if norm_type == float("inf"): norms = [ p.grad.detach().abs().max().to(device) for p in parameters ] total_norm = norms[0] if len(norms) == 1 else flow.max( flow.stack(norms)) elif norm_type == float("-inf"): norms = [ p.grad.detach().abs().min().to(device) for p in parameters ] total_norm = norms[0] if len(norms) == 1 else flow.min( flow.stack(norms)) else: total_norm = flow.linalg.vector_norm( flow.stack([ flow.linalg.vector_norm(p.grad.detach(), norm_type).to(device) for p in parameters ]), norm_type, ) if error_if_nonfinite and flow.logical_or(total_norm.isnan(), total_norm.isinf()): raise RuntimeError( f"The total norm of order {norm_type} for gradients from " "`parameters` is non-finite, so it cannot be clipped. To disable " "this error and scale the gradients by the non-finite norm anyway, " "set `error_if_nonfinite=False`") clip_coef = max_norm / (total_norm + 1e-6) clip_coef_clamped = clip_coef.clamp(max=1.0) for p in parameters: p.grad.detach().mul_(clip_coef_clamped.to(p.grad.device)) return total_norm