Example #1
0
 def sample_inputs_generator():
     for sample_input in sample_inputs_func(device, dtype):
         mask = sample_input.kwargs.get('mask')
         if mask is None:
             yield sample_input
         else:
             if layout == sample_input.input.layout:
                 yield sample_input
             if layout != torch.strided:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 sample_input_kwargs.update(mask=mask.to_dense())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
             if layout != torch.sparse_coo and op.supports_sparse:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 sample_input_kwargs.update(mask=mask.to_sparse())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
             if layout != torch.sparse_csr and op.supports_sparse_csr and sample_input.input.ndim == 2:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 sample_input_kwargs.update(
                     mask=mask.to_sparse_csr())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
def sample_inputs_index_put(op_info, device, dtype, requires_grad, **kwargs):
    make_arg = partial(make_tensor,
                       dtype=dtype,
                       device=device,
                       requires_grad=requires_grad)
    make_idx = partial(make_tensor,
                       dtype=torch.long,
                       device=device,
                       requires_grad=False)
    S = 5
    inputs = []
    for accumulate in [False, True]:
        # putting vectors at indexed locations
        inputs.append(
            SampleInput(make_arg((S, S)),
                        args=((make_idx((2, ), low=0,
                                        high=4), ), make_arg((2, S))),
                        kwargs=dict(accumulate=accumulate)))

        # putting multi-dim tensors at indexed locations
        inputs.append(
            SampleInput(make_arg((S, S, 2)),
                        args=((make_idx((3, ), low=0,
                                        high=4), ), make_arg((3, S, 2))),
                        kwargs=dict(accumulate=accumulate)))

        # value with size `0` dim
        inputs.append(
            SampleInput(make_arg((S, 0)),
                        args=((make_idx((3, ), low=0,
                                        high=4), ), make_arg((3, 0))),
                        kwargs=dict(accumulate=accumulate)))

        # scalar value
        inputs.append(
            SampleInput(make_arg((S, )),
                        args=((make_idx((), low=0, high=S), ), make_arg(())),
                        kwargs=dict(accumulate=accumulate)))

        # cuda and accumulate don't work well
        # Reference: https://github.com/pytorch/pytorch/issues/72053
        if not accumulate and device == 'cuda':
            # Broadcast `values`
            inputs.append(
                SampleInput(make_arg((S, S)),
                            args=((make_idx((2, ), low=0,
                                            high=S), ), make_arg((S, ))),
                            kwargs=dict(accumulate=accumulate)))

    return inputs
Example #3
0
    def test_unsupported_expand_weights(self, device, dtype, op):
        sample_inputs = op.sample_inputs(device, dtype, requires_grad=True)
        unsupported_inputs = supported_inputs(op,
                                              sample_inputs,
                                              supported_inputs=False)
        for sample_input in unsupported_inputs:
            with self.assertRaisesRegex(RuntimeError, r"Expanded Weights"):
                if op.name == "nn.functional.embedding":  # embedding flips its argument order for autograd tests
                    sample_input = SampleInput(sample_input.args[0],
                                               args=(sample_input.input, ),
                                               kwargs=sample_input.kwargs)
                input = sample_input.input

                batch_size = input.shape[0] if len(input.shape) > 1 else 1

                # get per sample grads with ExpandedWeights objects
                (ew_input, ew_args,
                 ew_kwargs) = make_expanded_weight(sample_input, batch_size)
                result = run_op(op, ew_input, *ew_args, **ew_kwargs)
                diff_input_list = (ew_input, ) + tuple(ew_args) + tuple(
                    ew_kwargs.values())
                diff_input_list = [
                    i for i in diff_input_list if is_diff_tensor(i)
                ]
                diff_input_list = [
                    i.orig_weight if isinstance(i, ExpandedWeight) else i
                    for i in diff_input_list
                ]
                result.sum().backward(
                )  # grad doesn't work with ExpandedWeight because it calls __torch_function__
def sample_inputs_aten_index_put(op_info, device, dtype, requires_grad,
                                 **kwargs):
    make_arg = partial(make_tensor,
                       dtype=dtype,
                       device=device,
                       requires_grad=requires_grad)
    inputs = []
    adv_idx = torch.LongTensor([[0, 1], [2, 3]])
    # self_shape, indices
    additional = [
        ((5, 6, 7, 8), [None, adv_idx, adv_idx, None]),
        ((5, 6, 7, 8), [None, adv_idx, None, adv_idx]),
        ((5, 6, 7, 8), [adv_idx, None, None, adv_idx]),
        ((5, 6, 7, 8), [None, None, adv_idx, adv_idx]),
        ((5, 6, 7, 8, 9), [None, None, adv_idx, None, adv_idx]),
        ((5, 6, 7, 8, 9), [None, None, adv_idx, adv_idx, None]),
        ((5, 6, 7, 8, 9, 10), [None, None, None, adv_idx, adv_idx]),
        ((5, 6, 7, 8, 9, 10), [None, None, adv_idx, adv_idx, adv_idx]),
    ]
    for self_shape, indices in additional:
        for broadcast_value in [False, True]:
            inp = make_arg(self_shape)

            tmp_indices = [
                slice(None) if idx is None else idx for idx in indices
            ]
            values_shape = inp[tmp_indices].shape
            if broadcast_value:
                values_shape = values_shape[3:]
            values = make_arg(values_shape)
            inputs.append(SampleInput(inp, args=(tuple(indices), values)))
    return inputs
def sample_inputs_getitem(op_info, device, dtype, requires_grad, **kwargs):
    S = 5
    test_args = [
        ([1, 2], ),
        (slice(0, 3), ),
        ([slice(0, 3), 1], ),
        ([[0, 2, 3], [1, 3, 3], [0, 0, 2]], ),
        ([[0, 0, 3], [1, 1, 3], [0, 0, 2]], ),
        ([slice(None), slice(None), [0, 3]], ),
        ([slice(None), [0, 3], slice(None)], ),
        ([[0, 3], slice(None), slice(None)], ),
        ([[0, 3], [1, 2], slice(None)], ),
        ([
            [0, 3],
        ], ),
        ([[0, 3], slice(None)], ),
        ([[0, 3], Ellipsis], ),
        ([[0, 2, 3], [1, 3, 3],
          torch.LongTensor([0, 0, 2])], ),
    ]

    return tuple(
        SampleInput(make_tensor((S, S, S),
                                device=device,
                                dtype=dtype,
                                low=None,
                                high=None,
                                requires_grad=requires_grad),
                    args=args) for args in test_args)
Example #6
0
def _generate_sample_data(device="cpu",
                          dtype=torch.float,
                          requires_grad=True,
                          layout=torch.strided):
    assert layout in {
        torch.strided,
        torch.sparse_coo,
        torch.sparse_csr,
    }, "Layout must be strided/sparse_coo/sparse_csr"
    shapes = [
        [],
        [2],
        [3, 5],
        [3, 2, 1, 2],
    ]
    inputs = []
    for s in shapes:
        data = make_tensor(
            s, device=device, dtype=dtype,
            requires_grad=requires_grad)  # type: ignore[arg-type]
        mask = _create_random_mask(s, device)
        if layout == torch.sparse_coo:
            mask = mask.to_sparse_coo().coalesce()
            data = data.sparse_mask(mask).requires_grad_(requires_grad)
        elif layout == torch.sparse_csr:
            if data.ndim != 2 and mask.ndim != 2:
                continue
            mask = mask.to_sparse_csr()
            data = data.sparse_mask(mask)
        inputs.append(SampleInput(data, kwargs={"mask": mask}))
    return inputs
Example #7
0
    def test_expanded_weight_per_sample_grad(self, device, dtype, op):
        sample_inputs = op.sample_inputs(device, dtype, requires_grad=True)
        for sample_input in supported_inputs(op, sample_inputs):
            if op.name == "nn.functional.embedding":  # embedding flips its argument order for autograd tests
                sample_input = SampleInput(sample_input.args[0], args=(sample_input.input,), kwargs=sample_input.kwargs)
            input = sample_input.input
            args = sample_input.args
            kwargs = sample_input.kwargs
            batch_size = input.shape[0] if len(input.shape) > 1 else 1

            # get per sample grads with ExpandedWeights objects
            (ew_input, ew_args, ew_kwargs) = make_expanded_weight(sample_input, batch_size)
            diff_input_list = (ew_input,) + tuple(ew_args) + tuple(ew_kwargs.values())
            diff_input_list = [i for i in diff_input_list if is_diff_tensor(i)]
            diff_input_list = [i.orig_weight if isinstance(i, ExpandedWeight) else i for i in diff_input_list]
            if not diff_input_list:
                continue
            result = run_op(op, ew_input, *ew_args, **ew_kwargs)
            result.sum().backward()  # grad doesn't work with ExpandedWeight because it calls __torch_function__
            expanded_weight_grad = tuple(i.grad_sample if hasattr(i, "grad_sample") else i.grad for i in diff_input_list)

            # get per sample grads with for loop
            func = partial(run_op, op)
            per_sample_grad = for_loop_per_sample_grad(batch_size, input, func, *args, **kwargs)

            # check equality
            self.assertEqual(len(per_sample_grad), len(expanded_weight_grad))
            for (result_grad, expected_grad) in zip(expanded_weight_grad, per_sample_grad):
                if result_grad is None:
                    result_grad = torch.zeros_like(expected_grad)
                self.assertEqual(result_grad, expected_grad)
def sample_inputs_new_zeros_with_same_feature_meta(op_info, device, dtype,
                                                   requires_grad, **kwargs):
    make_arg = partial(make_tensor,
                       dtype=dtype,
                       device=device,
                       requires_grad=requires_grad)
    matrix = [
        # tangent, base, num_tangent_bdims
        ([5], [2, 3], 0),
        ([2, 3], [2, 3], 0),
        ([5], [2], 0),
        ([1, 0, 2], [1, 2], 0),
        ([], [1, 2], 0),
        ([8, 7, 5], [2, 3, 11], 1),
        ([6, 7, 5], [2, 3, 4], 2),
        ([6, 4], [3], 2),
    ]
    results = []
    for tangent_shape, base_shape, num_tangent_bdims in matrix:
        tangent = make_arg(tangent_shape)
        base = make_arg(base_shape)
        results.append(
            SampleInput(tangent,
                        args=(base, ),
                        kwargs=dict(self_num_batch_dims=num_tangent_bdims)))
    return results
def sample_inputs_conv2d(has_bias,
                         self,
                         device,
                         dtype,
                         requires_grad,
                         extra_args=(),
                         groups=1):
    in_ch, out_ch = 6, 4
    inp = make_tensor((2, in_ch * groups, 7, 5),
                      device=device,
                      dtype=dtype,
                      requires_grad=requires_grad,
                      low=-1,
                      high=1)
    weight = make_tensor((out_ch * groups, in_ch, 3, 2),
                         device=device,
                         dtype=dtype,
                         requires_grad=requires_grad,
                         low=-1,
                         high=1)
    bias = None
    if has_bias:
        bias = make_tensor((out_ch * groups, ),
                           device=device,
                           dtype=dtype,
                           requires_grad=requires_grad,
                           low=-1,
                           high=1)
    return [SampleInput(inp, args=((weight, bias) + extra_args))]
def sample_inputs_masked_fill(op_info, device, dtype, requires_grad, **kwargs):
    S = 3
    make_arg = partial(make_tensor,
                       device=device,
                       dtype=dtype,
                       requires_grad=requires_grad)

    yield SampleInput(make_arg((S, S)),
                      args=(torch.randn(S, S, device=device) > 0, 10))
    yield SampleInput(make_arg((S, S)),
                      args=(torch.randn(S, device=device) > 0, 10))
    yield SampleInput(make_arg(()),
                      args=(torch.randn((), device=device) > 0, 10))
    yield SampleInput(make_arg((S, S)),
                      args=(torch.randn((), device=device) > 0, 10))
    yield SampleInput(make_arg((S, )),
                      args=(torch.randn(S, S, device=device) > 0, 10),
                      broadcasts_input=True)
def sample_inputs_getitem(op_info, device, dtype, requires_grad, **kwargs):
    # Short for "advanced index"
    adv_idx = torch.LongTensor([[0, 1], [2, 3]])
    S = 5
    # self_dim, indices
    test_args = [
        (3, ([1, 2], )),
        (3, (slice(0, 3), )),
        (3, ([slice(0, 3), 1], )),
        (3, ([[0, 2, 3], [1, 3, 3], [0, 0, 2]], )),
        (3, ([[0, 0, 3], [1, 1, 3], [0, 0, 2]], )),
        (3, ([slice(None), slice(None), [0, 3]], )),
        (3, ([slice(None), [0, 3], slice(None)], )),
        (3, ([[0, 3], slice(None), slice(None)], )),
        (3, ([[0, 3], [1, 2], slice(None)], )),
        (3, ([
            [0, 3],
        ], )),
        (3, ([[0, 3], slice(None)], )),
        (3, ([[0, 3], Ellipsis], )),
        (3, ([[0, 2, 3], [1, 3, 3],
              torch.LongTensor([0, 0, 2])], )),
        (4, ([slice(None), adv_idx, adv_idx,
              slice(None)], )),
        (4, ([slice(None), adv_idx, slice(None), adv_idx], )),
        (4, ([adv_idx, slice(None), slice(None), adv_idx], )),
        (4, ([slice(None), slice(None), adv_idx, adv_idx], )),
        (4, ([Ellipsis, adv_idx, adv_idx], )),
        (5, ([slice(None),
              slice(None), adv_idx,
              slice(None), adv_idx], )),
        (5, ([slice(None),
              slice(None), adv_idx, adv_idx,
              slice(None)], )),
        (5, ([slice(None),
              slice(None), adv_idx, None, adv_idx,
              slice(None)], )),
        (6, ([slice(None),
              slice(None),
              slice(None), adv_idx, adv_idx], )),
        (6, ([slice(None), slice(None), adv_idx, adv_idx, adv_idx], )),
        (6, ([slice(None),
              slice(None), None, adv_idx, adv_idx, adv_idx], )),
    ]

    def get_shape(dim):
        return tuple(S + i for i in range(dim))

    return tuple(
        SampleInput(make_tensor(get_shape(self_dim),
                                device=device,
                                dtype=dtype,
                                low=None,
                                high=None,
                                requires_grad=requires_grad),
                    args=args) for self_dim, args in test_args)
Example #12
0
    def test_expanded_weight_per_sample_grad_mean(self, device, dtype, op):
        sample_inputs = op.sample_inputs(device, dtype, requires_grad=True)
        for sample_input in supported_inputs(op, sample_inputs):
            if op.name == "nn.functional.embedding":  # embedding flips its argument order for autograd tests
                sample_input = SampleInput(sample_input.args[0],
                                           args=(sample_input.input, ),
                                           kwargs=sample_input.kwargs)

            self._compare_ew_and_for_loop_per_sample_grads(
                op, sample_input, torch.mean)
def sample_inputs_conversion(op_info, device, dtype, requires_grad, **kwargs):
    make_arg = partial(make_tensor,
                       dtype=dtype,
                       device=device,
                       requires_grad=requires_grad)
    shapes = ((), (2, 3))
    memory_format_options = [None, torch.contiguous_format]
    for shape, memory_format in itertools.product(shapes,
                                                  memory_format_options):
        yield SampleInput(
            make_arg(shape),
            kwargs={'memory_format': memory_format} if memory_format else {})
Example #14
0
 def test_expanded_weight_forward(self, device, dtype, op):
     sample_inputs = op.sample_inputs(device, dtype)
     for sample_input in supported_inputs(op, sample_inputs):
         if op.name == "nn.functional.embedding":  # embedding flips its argument order for autograd tests
             sample_input = SampleInput(sample_input.args[0].clone(),
                                        args=(sample_input.input.clone(),),
                                        kwargs=sample_input.kwargs)
             if "cuda" in device and "max_norm" in sample_input.kwargs and "padding_idx" in sample_input.kwargs:
                 self.skipTest("embedding is non-determinstic in this case, see issue #74679")
         batch_size = sample_input.input.shape[0] if len(sample_input.input.shape) > 1 else 1
         (ew_input, ew_args, ew_kwargs) = make_expanded_weight(sample_input, batch_size)
         expanded_weight_result = run_op(op, ew_input, *ew_args, **ew_kwargs)
         normal_result = run_op(op, sample_input.input, *sample_input.args, **sample_input.kwargs)
         self.assertEqual(expanded_weight_result, normal_result)
Example #15
0
 def sample_inputs_generator():
     for sample_input in sample_inputs_func(device, dtype):
         mask = sample_input.kwargs.get('mask')
         if mask is None:
             yield sample_input
         else:
             if layout == sample_input.input.layout:
                 yield sample_input
             if layout != torch.strided:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 sample_input_kwargs.update(mask=mask.to_dense())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
             if layout != torch.sparse_coo and op.supports_sparse:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 if mask.layout == torch.sparse_csr:
                     # TODO: remove this if-block when sparse csr supports to_sparse
                     mask = torch.sparse_coo_tensor(
                         torch._convert_indices_from_csr_to_coo(
                             mask.crow_indices(),
                             mask.col_indices()), mask.values(),
                         mask.shape)._coalesced_(True)
                     sample_input_kwargs.update(mask=mask)
                 else:
                     sample_input_kwargs.update(
                         mask=mask.to_sparse())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
             if layout != torch.sparse_csr and op.supports_sparse_csr and sample_input.input.ndim == 2:
                 sample_input_kwargs = sample_input.kwargs.copy()
                 sample_input_kwargs.update(
                     mask=mask.to_sparse_csr())
                 yield SampleInput(sample_input.input.clone(),
                                   args=sample_input.args,
                                   kwargs=sample_input_kwargs)
def sample_inputs_mse_loss(op_info, device, dtype, requires_grad, **kwargs):
    def make_input(shape, requires_grad=requires_grad):
        return make_tensor(shape,
                           device=device,
                           dtype=dtype,
                           requires_grad=requires_grad)

    rhs_requires_grad = kwargs.get('rhs_requires_grad', requires_grad)
    S = 5

    shapes = ((S, S), (S, S, S), (S, S, S, S))
    reductions = ("none", "mean", "sum")

    for shape, reduction in itertools.product(shapes, reductions):
        yield SampleInput(make_input(shape),
                          args=(make_input(shape,
                                           requires_grad=rhs_requires_grad), ),
                          kwargs={"reduction": reduction})
    def generator():
        # 0-D index tensor
        idx = make_long_input((), low=0, high=M)
        yield SampleInput(
            make_input((M, S)),
            args=(idx, ),
        )

        # 1-D index tensor
        idx = make_long_input((S, ), low=0, high=M)
        yield SampleInput(
            make_input((M, S)),
            args=(idx, ),
        )

        # 2-D index tensor
        idx = make_long_input((S, S), low=0, high=M)
        yield SampleInput(
            make_input((M, S)),
            args=(idx, ),
        )

        idx = make_long_input((2, 2), low=0, high=S)
        idx[0, 0] = 2
        idx[1, 1] = 2
        yield SampleInput(
            make_input((S, S)),
            args=(idx, ),
            kwargs={'padding_idx': 2},
        )

        idx = make_long_input((2, 2), low=0, high=S)
        idx[0, 0] = 4
        idx[1, 1] = 4
        yield SampleInput(
            make_input((S, S)),
            args=(idx, ),
            kwargs={'padding_idx': -1},
        )

        # Scale the gradient based on the inverse frequency of a particular index.
        idx = make_long_input((2, 2), low=0, high=S)
        idx[0, 0] = 1
        idx[0, 1] = 1
        weights = make_input((S, S))
        yield SampleInput(
            weights,
            args=(idx, ),
            kwargs={'scale_grad_by_freq': True},
        )