Example #1
0
File: lltm.py Project: Daiver/jff
 def __init__(self, input_features, state_size):
     super(LLTM, self).__init__()
     self.input_features = input_features
     self.state_size = state_size
     self.weights = torch.nn.Parameter(
         torch.empty(3 * state_size, input_features + state_size))
     self.bias = torch.nn.Parameter(torch.empty(3 * state_size))
     self.reset_parameters()
 def __init__(self, input_size, output_size, num_emojis, dropout):
     super().__init__()
     self.V = torch.nn.Parameter(
         torch.empty(num_emojis, output_size).uniform_(-0.1, 0.1)
     )
     self.dropout = torch.nn.Dropout(p=dropout)
     if not input_size == output_size:
         self.is_proj = True
         self.W = torch.nn.Parameter(
             torch.empty(input_size, output_size).uniform_(-0.1, 0.1)
         )
         self.tanh = torch.nn.Tanh()
     else:
         self.is_proj = False
Example #3
0
    def __init__(self, beam_size, batch_size, pad, bos, eos, n_best, mb_device,
                 global_scorer, min_length, max_length, return_attention,
                 block_ngram_repeat, exclusion_tokens, memory_lengths,
                 stepwise_penalty, ratio):
        super(BeamSearch, self).__init__(
            pad, bos, eos, batch_size, mb_device, beam_size, min_length,
            block_ngram_repeat, exclusion_tokens, return_attention,
            max_length)
        # beam parameters
        self.global_scorer = global_scorer
        self.beam_size = beam_size
        self.n_best = n_best
        self.batch_size = batch_size
        self.ratio = ratio

        # result caching
        self.hypotheses = [[] for _ in range(batch_size)]

        # beam state
        self.top_beam_finished = torch.zeros([batch_size], dtype=torch.uint8)
        self.best_scores = torch.full([batch_size], -1e10, dtype=torch.float,
                                      device=mb_device)

        self._batch_offset = torch.arange(batch_size, dtype=torch.long)
        self._beam_offset = torch.arange(
            0, batch_size * beam_size, step=beam_size, dtype=torch.long,
            device=mb_device)
        self.topk_log_probs = torch.tensor(
            [0.0] + [float("-inf")] * (beam_size - 1), device=mb_device
        ).repeat(batch_size)
        self.select_indices = None
        self._memory_lengths = memory_lengths

        # buffers for the topk scores and 'backpointer'
        self.topk_scores = torch.empty((batch_size, beam_size),
                                       dtype=torch.float, device=mb_device)
        self.topk_ids = torch.empty((batch_size, beam_size), dtype=torch.long,
                                    device=mb_device)
        self._batch_index = torch.empty([batch_size, beam_size],
                                        dtype=torch.long, device=mb_device)
        self.done = False
        # "global state" of the old beam
        self._prev_penalty = None
        self._coverage = None

        self._stepwise_cov_pen = (
                stepwise_penalty and self.global_scorer.has_cov_pen)
        self._vanilla_cov_pen = (
            not stepwise_penalty and self.global_scorer.has_cov_pen)
        self._cov_pen = self.global_scorer.has_cov_pen
Example #4
0
 def test_constrained_expected_improvement_batch(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.tensor(
             [[-0.5, 0.0, 5.0, 0.0], [0.0, 0.0, 5.0, 0.0], [0.5, 0.0, 5.0, 0.0]],
             device=device,
             dtype=dtype,
         ).unsqueeze(dim=-2)
         variance = torch.ones(3, 4, device=device, dtype=dtype).unsqueeze(dim=-2)
         N = torch.distributions.Normal(loc=0.0, scale=1.0)
         a = N.icdf(torch.tensor(0.75))  # get a so that P(-a <= N <= a) = 0.5
         mm = MockModel(MockPosterior(mean=mean, variance=variance))
         module = ConstrainedExpectedImprovement(
             model=mm,
             best_f=0.0,
             objective_index=0,
             constraints={1: [None, 0], 2: [5.0, None], 3: [-a, a]},
         )
         X = torch.empty(3, 1, 1, device=device, dtype=dtype)  # dummy
         ei = module(X)
         ei_expected_unconstrained = torch.tensor(
             [0.19780, 0.39894, 0.69780], device=device, dtype=dtype
         )
         ei_expected = ei_expected_unconstrained * 0.5 * 0.5 * 0.5
         self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
Example #5
0
    def __call__(self, match_quality_matrix):
        """
        Args:
            match_quality_matrix (Tensor[float]): an MxN tensor, containing the
            pairwise quality between M ground-truth elements and N predicted elements.

        Returns:
            matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
            [0, M - 1] or a negative value indicating that prediction i could not
            be matched.
        """
        if match_quality_matrix.numel() == 0:
            # handle empty case
            device = match_quality_matrix.device
            return torch.empty((0,), dtype=torch.int64, device=device)

        # match_quality_matrix is M (gt) x N (predicted)
        # Max over gt elements (dim 0) to find best gt candidate for each prediction
        matched_vals, matches = match_quality_matrix.max(dim=0)
        if self.allow_low_quality_matches:
            all_matches = matches.clone()

        # Assign candidate matches with low quality to negative (unassigned) values
        below_low_threshold = matched_vals < self.low_threshold
        between_thresholds = (matched_vals >= self.low_threshold) & (
            matched_vals < self.high_threshold
        )
        matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD
        matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS

        if self.allow_low_quality_matches:
            self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)

        return matches
Example #6
0
def map_batch(func, *inputs):
    '''Apply a function on a batch of data and stack the results.

    Args:
        func: The function to map.
        inputs: Batch arguments list (Batch, *size).

    Returns:
        Similar to `torch.stack([func(*x) for x in zip(*inputs)])` but faster.
        In case `func` returns a tuple, this function will also return a tuple.
    '''
    # compute the output of the first instance in the batch
    inputs = list(even_zip(*inputs))
    res = func(*inputs[0])
    single = not isinstance(res, tuple)
    as_tuple = (lambda x: (x,)) if single else (lambda x: x)
    res = as_tuple(res)

    # if more outputs are expected, keep computing them
    if len(inputs) == 1:
        out = tuple(r.unsqueeze(0) for r in res)
    else:
        out = tuple(torch.empty(len(inputs), *r.size(),
                                device=r.device, dtype=r.dtype)
                    for r in res)
        for i, args in enumerate(inputs):
            if i > 0:
                res = as_tuple(func(*args))
            for result, output in zip(res, out):
                output[i, ...] = result
    return out[0] if single else out
    def __init__(self):
        super(BIAS_FRAME, self).__init__()

        # self.bias_frame = Variable(torch.empty(1, 3, 480, 640).uniform_(0, 1), requires_grad=True)

        # self.bias_frame = nn.Parameter(torch.empty(1, 3, 480, 640).uniform_(-1, 1))
        self.bias_frame = nn.Parameter(torch.empty(1,3,1,1).uniform_(-1, 1))
Example #8
0
    def test_sqrt(self):
        class MyModel(torch.nn.Module):
            def __init__(self):
                super(MyModel, self).__init__()

            def forward(self, input):
                return input.sqrt()
        input = Variable(torch.empty(BATCH_SIZE, 10, 10).uniform_(4, 9))
        self.run_model_test(MyModel(), train=False, input=input, batch_size=BATCH_SIZE)
Example #9
0
    def test_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            mean = torch.tensor([[-0.5]], device=device, dtype=dtype)
            variance = torch.ones(1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))

            module = ExpectedImprovement(model=mm, best_f=0.0)
            X = torch.empty(1, 1, device=device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected = torch.tensor(0.19780, device=device, dtype=dtype)
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))

            module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False)
            X = torch.empty(1, 1, device=device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected = torch.tensor(0.6978, device=device, dtype=dtype)
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
 def __init__(self, nf, rf, nx):
     super(Conv1D, self).__init__()
     self.rf = rf
     self.nf = nf
     if rf == 1:  # faster 1x1 conv
         w = torch.empty(nx, nf)
         nn.init.normal_(w, std=0.02)
         self.w = Parameter(w)
         self.b = Parameter(torch.zeros(nf))
     else:  # was used to train LM
         raise NotImplementedError
Example #11
0
 def __init__(self, nf: int, rf: int, nx: int) -> None:
     super().__init__()
     self.rf = rf
     self.nf = nf
     if rf == 1:
         w = torch.empty(nx, nf)
         torch.nn.init.normal_(w, std=0.02)
         self.w = Parameter(w)
         self.b = Parameter(torch.zeros(nf))
     else:
         raise NotImplementedError
Example #12
0
 def test_NormalQMCEngineSeededOut(self):
     # test even dimension
     engine = NormalQMCEngine(d=2, seed=12345)
     out = torch.empty(2, 2)
     self.assertIsNone(engine.draw(n=2, out=out))
     samples_expected = torch.tensor(
         [[-0.63099602, -1.32950772], [0.29625805, 1.86425618]]
     )
     self.assertTrue(torch.allclose(out, samples_expected))
     # test odd dimension
     engine = NormalQMCEngine(d=3, seed=12345)
     out = torch.empty(2, 3)
     self.assertIsNone(engine.draw(n=2, out=out))
     samples_expected = torch.tensor(
         [
             [1.83169884, -1.40473647, 0.24334828],
             [0.36596099, 1.2987395, -1.47556275],
         ]
     )
     self.assertTrue(torch.allclose(out, samples_expected))
Example #13
0
    def test_MultivariateNormalQMCEngineSeededOut(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):

            # test even dimension
            with manual_seed(54321):
                a = torch.randn(2, 2)
                cov = a @ a.transpose(-1, -2) + torch.rand(2).diag()

            mean = torch.zeros(2, device=device, dtype=dtype)
            cov = cov.to(device=device, dtype=dtype)
            engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345)
            out = torch.empty(2, 2, device=device, dtype=dtype)
            self.assertIsNone(engine.draw(n=2, out=out))
            samples_expected = torch.tensor(
                [[-0.849047422, -0.713852942], [0.398635030, 1.350660801]],
                device=device,
                dtype=dtype,
            )
            self.assertTrue(torch.allclose(out, samples_expected))

            # test odd dimension
            with manual_seed(54321):
                a = torch.randn(3, 3)
                cov = a @ a.transpose(-1, -2) + torch.rand(3).diag()

            mean = torch.zeros(3, device=device, dtype=dtype)
            cov = cov.to(device=device, dtype=dtype)
            engine = MultivariateNormalQMCEngine(mean, cov, seed=12345)
            out = torch.empty(2, 3, device=device, dtype=dtype)
            self.assertIsNone(engine.draw(n=2, out=out))
            samples_expected = torch.tensor(
                [
                    [3.113158941, -3.262257099, -0.819938779],
                    [0.621987879, 2.352285624, -1.992680788],
                ],
                device=device,
                dtype=dtype,
            )
            self.assertTrue(torch.allclose(out, samples_expected))
Example #14
0
 def test_posterior_mean(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.tensor([[0.25]], device=device, dtype=dtype)
         mm = MockModel(MockPosterior(mean=mean))
         module = PosteriorMean(model=mm)
         X = torch.empty(1, 1, device=device, dtype=dtype)
         pm = module(X)
         self.assertTrue(torch.equal(pm, mean.view(-1)))
         # check for proper error if multi-output model
         mean2 = torch.rand(1, 2, device=device, dtype=dtype)
         mm2 = MockModel(MockPosterior(mean=mean2))
         module2 = PosteriorMean(model=mm2)
         with self.assertRaises(UnsupportedError):
             module2(X)
Example #15
0
def test():
    a = np.random.randn(1337)
    tvm_a = tvm.nd.array(a)
    np.testing.assert_equal(tvm.nd.from_dlpack(tvm_a.to_dlpack()).asnumpy(), a)

    try:
        import torch
        import torch.utils.dlpack

        x = torch.rand(56, 56)
        tvm_x = tvm.nd.from_dlpack(torch.utils.dlpack.to_dlpack(x))
        np.testing.assert_equal(x.numpy(), tvm_x.asnumpy())
        y = tvm.nd.from_dlpack(tvm_x.to_dlpack())
        np.testing.assert_equal(y.asnumpy(), tvm_x.asnumpy())
        np.testing.assert_equal(torch.utils.dlpack.from_dlpack(y.to_dlpack()).numpy(), tvm_x.asnumpy())

        n = tvm.convert(137)
        xx = torch.rand(137,137)
        yy = torch.rand(137,137)
        zz2 = torch.empty(137,137)
        zz = xx.mm(yy)
        XX = tvm.placeholder((n,n), name='X')
        YY = tvm.placeholder((n,n), name='Y')

        k = tvm.reduce_axis((0, n), name='k')
        ZZ = tvm.compute((n,n), lambda i,j : tvm.sum(XX[i,k]*YY[k,j], axis=k))
        s = tvm.create_schedule(ZZ.op)
        f = tvm.build(s, [XX, YY, ZZ], target_host='llvm', name='f')

        f_pytorch = to_pytorch_func(f)
        zz2 = torch.empty(137,137)
        f_pytorch(xx, yy, zz2)
        tvm.testing.assert_allclose(zz.numpy(), zz2.numpy(), rtol=1e-6)

    except ImportError:
        pass
Example #16
0
 def dummy_inputs(cls, params, init_case):
     max_seq_len = params["max_seq_len"]
     batch_size = params["batch_size"]
     fv_sizes = init_case["feat_vocab_sizes"]
     n_words = init_case["word_vocab_size"]
     voc_sizes = [n_words] + fv_sizes
     pad_idxs = [init_case["word_padding_idx"]] + \
         init_case["feat_padding_idx"]
     lengths = torch.randint(0, max_seq_len, (batch_size,))
     lengths[0] = max_seq_len
     inps = torch.empty((max_seq_len, batch_size, len(voc_sizes)),
                        dtype=torch.long)
     for f, (voc_size, pad_idx) in enumerate(zip(voc_sizes, pad_idxs)):
         for b, len_ in enumerate(lengths):
             inps[:len_, b, f] = torch.randint(0, voc_size-1, (len_,))
             inps[len_:, b, f] = pad_idx
     return inps
def iteration(inputs):
    # targets, align half of the audio
    targets = torch.ones(int(batch_size * ((seconds * 100) / 2)))
    target_sizes = torch.empty(batch_size, dtype=torch.int).fill_(int((seconds * 100) / 2))
    input_percentages = torch.ones(batch_size).fill_(1)
    input_sizes = input_percentages.mul_(int(inputs.size(3))).int()

    out, output_sizes = model(inputs, input_sizes)
    out = out.transpose(0, 1)  # TxNxH

    loss = criterion(out, targets, output_sizes, target_sizes)
    loss = loss / inputs.size(0)  # average the loss by minibatch
    # compute gradient
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    torch.cuda.synchronize()
    del loss
    del out
Example #18
0
    def __init__(self, input_dim_capsule=gru_len * 2, num_capsule=Num_capsule, dim_capsule=Dim_capsule,                  routings=Routings, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Caps_Layer, self).__init__(**kwargs)

        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size  # 暂时没用到
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = self.squash
        else:
            self.activation = nn.ReLU(inplace=True)

        if self.share_weights:
            self.W = nn.Parameter(
                nn.init.xavier_normal_(t.empty(1, input_dim_capsule, self.num_capsule * self.dim_capsule)))
        else:
            self.W = nn.Parameter(
                t.randn(BATCH_SIZE, input_dim_capsule, self.num_capsule * self.dim_capsule))  # 64即batch_size
Example #19
0
 def test_expected_improvement_batch(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view(
             3, 1, 1
         )
         variance = torch.ones(3, 1, 1, device=device, dtype=dtype)
         mm = MockModel(MockPosterior(mean=mean, variance=variance))
         module = ExpectedImprovement(model=mm, best_f=0.0)
         X = torch.empty(3, 1, 1, device=device, dtype=dtype)  # dummy
         ei = module(X)
         ei_expected = torch.tensor(
             [0.19780, 0.39894, 0.69780], device=device, dtype=dtype
         )
         self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
         # check for proper error if multi-output model
         mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype)
         variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype)
         mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2))
         module2 = ExpectedImprovement(model=mm2, best_f=0.0)
         with self.assertRaises(UnsupportedError):
             module2(X)
Example #20
0
def loss_fn2(genFGen2, args, model):
    first_term_loss = compute_loss2(genFGen2, args, model)
    #first_term_loss2 = compute_loss2(genFGen2, args, model)
    #first_term_loss = torch.log(first_term_loss2 / (1.0 - first_term_loss2))

    #print('')
    #print(first_term_loss)

    #mu = torch.from_numpy(np.array([2.805741, -0.00889241], dtype="float32")).to(device)
    #S = torch.from_numpy(np.array([[pow(0.3442525,2), 0.0], [0.0, pow(0.35358343,2)]], dtype="float32")).to(device)

    #storeAll = torch.from_numpy(np.array(0.0, dtype="float32")).to(device)
    #toUse_storeAll = torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=S)
    #for loopIndex_i in range(genFGen2.size()[0]):
    #    storeAll += torch.exp(toUse_storeAll.log_prob(genFGen2[loopIndex_i:1 + loopIndex_i, :].squeeze(0)))
    #storeAll /= genFGen2.size()[0]

    #print(storeAll)
    #print('')

    #print('')
    #print(compute_loss2(mu.unsqueeze(0), args, model))

    #print(torch.exp(toUse_storeAll.log_prob(mu)))
    #print('')

    #first_term_loss = storeAll

    xData = toy_data.inf_train_gen(args.data, batch_size=args.batch_size)
    xData = torch.from_numpy(xData).type(torch.float32).to(device)

    #var2 = []
    #for i in genFGen2:
    #    var1 = []
    #    for j in xData:
    #        new_stuff = torch.dist(i, j, 2)  # this is a tensor
    #        var1.append(new_stuff.unsqueeze(0))
    #    var1_tensor = torch.cat(var1)
    #    second_term_loss2 = torch.min(var1_tensor) / args.batch_size
    #    var2.append(second_term_loss2.unsqueeze(0))
    #var2_tensor = torch.cat(var2)
    #second_term_loss = torch.mean(var2_tensor) / args.batch_size
    #second_term_loss *= 100.0

    #print('')
    #print(second_term_loss)

    # If you know in advance the size of the final tensor, you can allocate
    # an empty tensor beforehand and fill it in the for loop.

    #x = torch.empty(size=(len(items), 768))
    #for i in range(len(items)):
    #    x[i] = calc_result

    #print(len(genFGen2))
    #print(genFGen2.shape[0])
    # len(.) and not .shape[0]

    #print(len(xData))
    #print(xData.shape[0])
    # Use len(.) and not .shape[0]
    """
    #second_term_loss = torch.empty(size=(len(genFGen2), len(xData))).to(device)
    #second_term_loss = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=True)
    #second_term_loss3 = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=True)
    #second_term_loss3 = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=False)
    second_term_loss3 = torch.empty(size=(args.batch_size, args.batch_size), device=device, requires_grad=False)
    #for i in range(len(genFGen2)):
    for i in range(args.batch_size):
        #for j in range(len(xData)):
        for j in range(args.batch_size):
            #second_term_loss[i, j] = torch.dist(genFGen2[i,:], xData[j,:], 2)
            #second_term_loss[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)
            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)

            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)
            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)

            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)
            #second_term_loss3[i, j] = torch.tensor(0.1, requires_grad=True)

            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1)
            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1).requires_grad_()

            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1).requires_grad_()
            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_()**2

            #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_()**2
            second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_()

            #second_term_loss[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2)**2
    #second_term_loss2, _ = torch.min(second_term_loss, 1)
    second_term_loss2, _ = torch.min(second_term_loss3, 1)
    #second_term_loss = 5000.0 * torch.mean(second_term_loss2) / (args.batch_size**2)
    #second_term_loss = lambda1 * torch.mean(second_term_loss2) / (args.batch_size ** 2)
    #second_term_loss = lambda1 * torch.mean(second_term_loss2)
    second_term_loss = torch.mean(second_term_loss2)

    #print(second_term_loss)
    #print('')

    print('')
    print(first_term_loss)
    print(second_term_loss)

    print('')
    """

    second_term_loss32 = torch.empty(args.batch_size,
                                     device=device,
                                     requires_grad=False)
    for i in range(args.batch_size):
        #second_term_loss22 = torch.norm(genFGen2[i, :] - xData, p='fro', dim=1).requires_grad_()
        second_term_loss22 = torch.norm(genFGen2[i, :] - xData, p=None,
                                        dim=1).requires_grad_()
        #print(second_term_loss22.shape)
        second_term_loss32[i] = torch.min(second_term_loss22)
    #print(second_term_loss32)
    #print(second_term_loss32.shape)
    #print(torch.norm(genFGen2 - xData, p=None, dim=0).shape)
    #second_term_loss22 = torch.min(second_term_loss32)
    #print(second_term_loss22)
    #print(second_term_loss22.shape)
    second_term_loss2 = torch.mean(second_term_loss32)
    #print(second_term_loss2)
    #print(second_term_loss2.shape)

    #print('')

    #print(second_term_loss)
    #print(second_term_loss2)

    print('')
    print(first_term_loss)
    print(second_term_loss2)

    #third_term_loss = torch.from_numpy(np.array(0.0, dtype='float32')).to(device)
    #for i in range(args.batch_size):
    #    for j in range(args.batch_size):
    #        if i != j:
    #            # third_term_loss += ((np.linalg.norm(genFGen3[i,:].cpu().detach().numpy()-genFGen3[j,:].cpu().detach().numpy())) / (np.linalg.norm(genFGen2[i,:].cpu().detach().numpy()-genFGen2[j,:].cpu().detach().numpy())))
    #
    #            # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:], 2)) / (torch.norm(genFGen2[i,:]-genFGen2[j,:], 2)))
    #            # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:])) / (torch.norm(genFGen2[i,:]-genFGen2[j,:])))
    #
    #            # third_term_loss += ((torch.norm(genFGen3[i,:] - genFGen3[j,:])) / (torch.norm(genFGen2[i,:] - genFGen2[j,:])))
    #            third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2)))
    #    third_term_loss /= (args.batch_size - 1)
    #third_term_loss /= args.batch_size
    ##third_term_loss *= 1000.0

    genFGen3 = torch.randn([args.batch_size, 2],
                           device=device,
                           requires_grad=True)
    #third_term_loss = torch.from_numpy(np.array(0.0, dtype='float32')).to(device)
    third_term_loss3 = torch.empty(size=(args.batch_size, args.batch_size),
                                   device=device,
                                   requires_grad=False)
    for i in range(args.batch_size):
        for j in range(args.batch_size):
            if i != j:
                # third_term_loss += ((np.linalg.norm(genFGen3[i,:].cpu().detach().numpy()-genFGen3[j,:].cpu().detach().numpy())) / (np.linalg.norm(genFGen2[i,:].cpu().detach().numpy()-genFGen2[j,:].cpu().detach().numpy())))

                # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:], 2)) / (torch.norm(genFGen2[i,:]-genFGen2[j,:], 2)))
                # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:])) / (torch.norm(genFGen2[i,:]-genFGen2[j,:])))

                # third_term_loss += ((torch.norm(genFGen3[i,:] - genFGen3[j,:])) / (torch.norm(genFGen2[i,:] - genFGen2[j,:])))
                #third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2)))

                #third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2)))
                #third_term_loss3[i][j] = ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2).requires_grad_()) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2).requires_grad_()))

                third_term_loss3[i][j] = (
                    (torch.dist(genFGen3[i, :], genFGen3[j, :],
                                2).requires_grad_()) /
                    (torch.dist(genFGen2[i, :], genFGen2[j, :],
                                2).requires_grad_()))
    #third_term_loss /= (args.batch_size - 1)
    #third_term_loss2 = third_term_loss3 / (args.batch_size - 1)
    third_term_loss2 = torch.mean(third_term_loss3, 1)
    #third_term_loss /= args.batch_size
    #third_term_loss = third_term_loss2 / args.batch_size
    #third_term_loss = torch.mean(third_term_loss2)
    #third_term_loss = 0.01 * torch.mean(third_term_loss2)
    #third_term_loss = lambda2 * torch.mean(third_term_loss2)
    third_term_loss = torch.mean(third_term_loss2)
    #third_term_loss *= 1000.0

    print(third_term_loss)
    print('')

    #print('')
    #asdfsfa

    #return first_term_loss + second_term_loss + third_term_loss
    #return first_term_loss + second_term_loss

    #return second_term_loss
    #return first_term_loss + second_term_loss
    #return first_term_loss + second_term_loss + third_term_loss

    #return first_term_loss + second_term_loss + third_term_loss
    return first_term_loss + second_term_loss2 + third_term_loss
    def create_sentence_pair_dataset(data: list,
                          tokenizer: Union[BertTokenizer, AlbertTokenizer],
                          save_directory=None,
                          max_sequence_length=128):
        max_bert_input_length = 0
        for sentence_pair in data:
            sentence_1_tokenized, sentence_2_tokenized = tokenizer.tokenize(
                sentence_pair['sentence_1']), tokenizer.tokenize(sentence_pair['sentence_2'])
            _truncate_seq_pair(sentence_1_tokenized, sentence_2_tokenized,
                               max_sequence_length - 3)  # accounting for positioning tokens

            max_bert_input_length = max(max_bert_input_length,
                                        len(sentence_1_tokenized) + len(sentence_2_tokenized) + 3)
            sentence_pair['sentence_1_tokenized'] = sentence_1_tokenized
            sentence_pair['sentence_2_tokenized'] = sentence_2_tokenized

        bert_input_ids = torch.empty((len(data), max_bert_input_length), dtype=torch.long)
        bert_token_type_ids = torch.empty((len(data), max_bert_input_length), dtype=torch.long)
        bert_attention_masks = torch.empty((len(data), max_bert_input_length), dtype=torch.long)
        scores = torch.empty((len(data), 1), dtype=torch.float)

        for idx, sentence_pair in enumerate(data):
            tokens = []
            input_type_ids = []

            tokens.append("[CLS]")
            input_type_ids.append(0)
            for token in sentence_pair['sentence_1_tokenized']:
                tokens.append(token)
                input_type_ids.append(0)
            tokens.append("[SEP]")
            input_type_ids.append(0)

            for token in sentence_pair['sentence_2_tokenized']:
                tokens.append(token)
                input_type_ids.append(1)
            tokens.append("[SEP]")
            input_type_ids.append(1)

            input_ids = tokenizer.convert_tokens_to_ids(tokens)

            attention_masks = [1] * len(input_ids)
            while len(input_ids) < max_bert_input_length:
                input_ids.append(0)
                attention_masks.append(0)
                input_type_ids.append(0)

            bert_input_ids[idx] = torch.tensor(input_ids, dtype=torch.long)
            bert_token_type_ids[idx] = torch.tensor(input_type_ids, dtype=torch.long)
            bert_attention_masks[idx] = torch.tensor(attention_masks, dtype=torch.long)
            if 'similarity' not in sentence_pair or sentence_pair['similarity'] is None:
                scores[idx] = torch.tensor(float('nan'), dtype=torch.float)
            else:
                scores[idx] = torch.tensor(sentence_pair['similarity'], dtype=torch.float)

        if save_directory:
            torch.save(bert_input_ids, os.path.join(save_directory, f"bert_input_ids.pt"))
            torch.save(bert_token_type_ids, os.path.join(save_directory, f"bert_token_type_ids.pt"))
            torch.save(bert_attention_masks, os.path.join(save_directory, f"bert_attention_masks.pt"))
            torch.save(scores, os.path.join(save_directory, f"scores.pt"))

        return (bert_input_ids, bert_token_type_ids, bert_attention_masks), scores
Example #22
0
    def finalize_hypos(
        self,
        step: int,
        bbsz_idx,
        eos_scores,
        tokens,
        scores,
        finalized: List[List[Dict[str, Tensor]]],
        finished: List[bool],
        beam_size: int,
        attn: Optional[Tensor],
        src_lengths,
        max_len: int,
    ):
        """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly.
        A sentence is finalized when {beam_size} finished items have been collected for it.

        Returns number of sentences (not beam items) being finalized.
        These will be removed from the batch and not processed further.
        Args:
            bbsz_idx (Tensor):
        """
        assert bbsz_idx.numel() == eos_scores.numel()

        # clone relevant token and attention tensors.
        # tokens is (batch * beam, max_len). So the index_select
        # gets the newly EOS rows, then selects cols 1..{step + 2}
        tokens_clone = tokens.index_select(
            0, bbsz_idx)[:, 1:step + 2]  # skip the first index, which is EOS

        tokens_clone[:, step] = self.eos
        attn_clone = (attn.index_select(0, bbsz_idx)[:, :, 1:step + 2]
                      if attn is not None else None)

        # compute scores per token position
        pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1]
        pos_scores[:, step] = eos_scores
        # convert from cumulative to per-position scores
        pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1]

        # normalize sentence-level scores
        if self.normalize_scores:
            eos_scores /= (step + 1)**self.len_penalty

        # cum_unfin records which sentences in the batch are finished.
        # It helps match indexing between (a) the original sentences
        # in the batch and (b) the current, possibly-reduced set of
        # sentences.
        cum_unfin: List[int] = []
        prev = 0
        for f in finished:
            if f:
                prev += 1
            else:
                cum_unfin.append(prev)

        # The keys here are of the form "{sent}_{unfin_idx}", where
        # "unfin_idx" is the index in the current (possibly reduced)
        # list of sentences, and "sent" is the index in the original,
        # unreduced batch
        # set() is not supported in script export
        sents_seen: Dict[str, Optional[Tensor]] = {}

        # For every finished beam item
        for i in range(bbsz_idx.size()[0]):
            idx = bbsz_idx[i]
            score = eos_scores[i]
            # sentence index in the current (possibly reduced) batch
            unfin_idx = idx // beam_size
            # sentence index in the original (unreduced) batch
            sent = unfin_idx + cum_unfin[unfin_idx]
            # Cannot create dict for key type '(int, int)' in torchscript.
            # The workaround is to cast int to string
            seen = str(sent.item()) + "_" + str(unfin_idx.item())
            if seen not in sents_seen:
                sents_seen[seen] = None

            if self.match_source_len and step > src_lengths[unfin_idx]:
                score = torch.tensor(-math.inf).to(score)

            # An input sentence (among those in a batch) is finished when
            # beam_size hypotheses have been collected for it
            if len(finalized[sent]) < beam_size:
                if attn_clone is not None:
                    # remove padding tokens from attn scores
                    hypo_attn = attn_clone[i]
                else:
                    hypo_attn = torch.empty(0)

                finalized[sent].append({
                    "tokens": tokens_clone[i],
                    "score": score,
                    "attention": hypo_attn,  # src_len x tgt_len
                    "alignment": torch.empty(0),
                    "positional_scores": pos_scores[i],
                })

        newly_finished: List[int] = []

        for seen in sents_seen.keys():
            # check termination conditions for this sentence
            sent: int = int(float(seen.split("_")[0]))
            unfin_idx: int = int(float(seen.split("_")[1]))

            if not finished[sent] and self.is_finished(
                    step, unfin_idx, max_len, len(finalized[sent]), beam_size):
                finished[sent] = True
                newly_finished.append(unfin_idx)

        return newly_finished
Example #23
0
def scatter_gather(data):
    """
    This function gathers data from multiple processes, and returns them
    in a list, as they were obtained from each process.

    This function is useful for retrieving data from multiple processes,
    when launching the code with torch.distributed.launch

    Note: this function is slow and should not be used in tight loops, i.e.,
    do not use it in the training loop.

    Arguments:
        data: the object to be gathered from multiple processes.
            It must be serializable

    Returns:
        result (list): a list with as many elements as there are processes,
            where each element i in the list corresponds to the data that was
            gathered from the process of rank i.
    """
    # strategy: the main process creates a temporary directory, and communicates
    # the location of the temporary directory to all other processes.
    # each process will then serialize the data to the folder defined by
    # the main process, and then the main process reads all of the serialized
    # files and returns them in a list
    if not torch.distributed.deprecated.is_initialized():
        return [data]
    synchronize()
    # get rank of the current process
    rank = torch.distributed.deprecated.get_rank()

    # the data to communicate should be small
    data_to_communicate = torch.empty(256, dtype=torch.uint8, device="cuda")
    if rank == 0:
        # manually creates a temporary directory, that needs to be cleaned
        # afterwards
        tmp_dir = tempfile.mkdtemp()
        _encode(data_to_communicate, tmp_dir)

    synchronize()
    # the main process (rank=0) communicates the data to all processes
    torch.distributed.deprecated.broadcast(data_to_communicate, 0)

    # get the data that was communicated
    tmp_dir = _decode(data_to_communicate)

    # each process serializes to a different file
    file_template = "file{}.pth"
    tmp_file = os.path.join(tmp_dir, file_template.format(rank))
    torch.save(data, tmp_file)

    # synchronize before loading the data
    synchronize()

    # only the master process returns the data
    if rank == 0:
        data_list = []
        world_size = torch.distributed.deprecated.get_world_size()
        for r in range(world_size):
            file_path = os.path.join(tmp_dir, file_template.format(r))
            d = torch.load(file_path)
            data_list.append(d)
            # cleanup
            os.remove(file_path)
        # cleanup
        os.rmdir(tmp_dir)
        return data_list
Example #24
0
def test_two_stage_forward(cfg_file):
    models_with_semantic = [
        'htc/htc_r50_fpn_1x_coco.py',
        'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py',
        'scnet/scnet_r50_fpn_20e_coco.py',
    ]
    if cfg_file in models_with_semantic:
        with_semantic = True
    else:
        with_semantic = False

    model = _get_detector_cfg(cfg_file)
    model = _replace_r50_with_r18(model)
    model.backbone.init_cfg = None

    # Save cost
    if cfg_file in [
            'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py'  # noqa: E501
    ]:
        model.roi_head.bbox_head.num_classes = 80
        model.roi_head.bbox_head.loss_cls.num_classes = 80
        model.roi_head.mask_head.num_classes = 80
        model.test_cfg.rcnn.score_thr = 0.05
        model.test_cfg.rcnn.max_per_img = 100

    from mmdet.models import build_detector
    detector = build_detector(model)

    input_shape = (1, 3, 128, 128)

    # Test forward train with a non-empty truth batch
    mm_inputs = _demo_mm_inputs(input_shape,
                                num_items=[10],
                                with_semantic=with_semantic)
    imgs = mm_inputs.pop('imgs')
    img_metas = mm_inputs.pop('img_metas')
    losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)
    assert isinstance(losses, dict)
    loss, _ = detector._parse_losses(losses)
    loss.requires_grad_(True)
    assert float(loss.item()) > 0
    loss.backward()

    # Test forward train with an empty truth batch
    mm_inputs = _demo_mm_inputs(input_shape,
                                num_items=[0],
                                with_semantic=with_semantic)
    imgs = mm_inputs.pop('imgs')
    img_metas = mm_inputs.pop('img_metas')
    losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)
    assert isinstance(losses, dict)
    loss, _ = detector._parse_losses(losses)
    loss.requires_grad_(True)
    assert float(loss.item()) > 0
    loss.backward()

    # Test RoI forward train with an empty proposals
    if cfg_file in [
            'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py'  # noqa: E501
    ]:
        mm_inputs.pop('gt_semantic_seg')

    feature = detector.extract_feat(imgs[0][None, :])
    losses = detector.roi_head.forward_train(feature, img_metas,
                                             [torch.empty(
                                                 (0, 5))], **mm_inputs)
    assert isinstance(losses, dict)

    # Test forward test
    with torch.no_grad():
        img_list = [g[None, :] for g in imgs]
        batch_results = []
        for one_img, one_meta in zip(img_list, img_metas):
            result = detector.forward([one_img], [[one_meta]],
                                      return_loss=False)
            batch_results.append(result)
    cascade_models = [
        'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',
        'htc/htc_r50_fpn_1x_coco.py',
        'scnet/scnet_r50_fpn_20e_coco.py',
    ]
    # test empty proposal in roi_head
    with torch.no_grad():
        # test no proposal in the whole batch
        detector.simple_test(imgs[0][None, :], [img_metas[0]],
                             proposals=[torch.empty((0, 4))])

        # test no proposal of aug
        features = detector.extract_feats([imgs[0][None, :]] * 2)
        detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2,
                                   [[img_metas[0]]] * 2)

        # test rcnn_test_cfg is None
        if cfg_file not in cascade_models:
            feature = detector.extract_feat(imgs[0][None, :])
            bboxes, scores = detector.roi_head.simple_test_bboxes(
                feature, [img_metas[0]], [torch.empty((0, 4))], None)
            assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes])
            assert all([
                score.shape == torch.Size(
                    (0, detector.roi_head.bbox_head.fc_cls.out_features))
                for score in scores
            ])

        # test no proposal in the some image
        x1y1 = torch.randint(1, 100, (10, 2)).float()
        # x2y2 must be greater than x1y1
        x2y2 = x1y1 + torch.randint(1, 100, (10, 2))
        detector.simple_test(
            imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2,
            proposals=[torch.empty((0, 4)),
                       torch.cat([x1y1, x2y2], dim=-1)])

        # test no proposal of aug
        detector.roi_head.aug_test(
            features, [torch.cat([x1y1, x2y2], dim=-1),
                       torch.empty((0, 4))], [[img_metas[0]]] * 2)

        # test rcnn_test_cfg is None
        if cfg_file not in cascade_models:
            feature = detector.extract_feat(imgs[0][None, :].repeat(
                2, 1, 1, 1))
            bboxes, scores = detector.roi_head.simple_test_bboxes(
                feature, [img_metas[0]] * 2,
                [torch.empty((0, 4)),
                 torch.cat([x1y1, x2y2], dim=-1)], None)
            assert bboxes[0].shape == torch.Size((0, 4))
            assert scores[0].shape == torch.Size(
                (0, detector.roi_head.bbox_head.fc_cls.out_features))
 def _init_weights(self, m: int, n: int) -> nn.Parameter:
     return nn.Parameter(xavier_uniform_(torch.empty(m, n)))
    def simple_test_text(self,
                         x,
                         img_metas,
                         det_bboxes,
                         det_masks,
                         rescale=False):
        # image shape of the first image in the batch (only one)
        ori_shape = img_metas[0]['ori_shape']
        scale_factor = img_metas[0]['scale_factor']
        if torch.onnx.is_in_onnx_export() and det_bboxes.shape[0] == 0:
            # If there are no detection there is nothing to do for a mask head.
            # But during ONNX export we should run mask head
            # for it to appear in the graph.
            # So add one zero / dummy ROI that will be mapped
            # to an Identity op in the graph.
            det_bboxes = dummy_pad(det_bboxes, (0, 0, 0, 1))

        if det_bboxes.shape[0] == 0:
            decoded_texts = torch.empty([0, 0, 0],
                                        dtype=det_bboxes.dtype,
                                        device=det_bboxes.device)

            confidences = torch.empty([0, 0, 0],
                                      dtype=det_bboxes.dtype,
                                      device=det_bboxes.device)

            distributions = []
        else:
            # if det_bboxes is rescaled to the original image size, we need to
            # rescale it back to the testing scale to obtain RoIs.
            if rescale and not isinstance(scale_factor, float):
                scale_factor = torch.from_numpy(scale_factor).to(
                    det_bboxes.device)
            _bboxes = (det_bboxes[:, :4] *
                       scale_factor if rescale else det_bboxes)
            text_rois = bbox2roi([_bboxes])
            text_results = self._text_forward(x,
                                              text_rois,
                                              det_masks=det_masks)
            if torch.onnx.is_in_onnx_export():
                return text_results
            text_results = text_results['text_results'].permute(1, 0, 2)
            text_results = torch.nn.functional.softmax(text_results, dim=-1)
            confidences = []
            decoded_texts = []
            distributions = []
            for text in text_results:
                predicted_confidences, encoded = text.topk(1)
                predicted_confidences = predicted_confidences.cpu().numpy()
                encoded = encoded.cpu().numpy().reshape(-1)
                decoded = ''
                confidence = 1
                for l, c in zip(encoded, predicted_confidences):
                    confidence *= c
                    if l == 1:
                        break
                    decoded += self.alphabet[l]
                confidences.append(confidence)
                assert self.alphabet[0] == self.alphabet[1] == ' '
                distribution = np.transpose(
                    text.cpu().numpy())[2:, :len(decoded) + 1]
                distributions.append(distribution)
                decoded_texts.append(
                    decoded if confidence >= self.text_thr else '')

        return decoded_texts, confidences, distributions
Example #27
0
    def __init__(self,
                 seq_length,
                 input_dim,
                 num_hidden,
                 num_classes,
                 batch_size,
                 device='cpu'):
        super(LSTM, self).__init__()

        # Create tensors of right sizes
        # LSTM part
        self.Wfx = nn.Parameter(torch.empty(input_dim, num_hidden))
        self.Wix = nn.Parameter(torch.empty(input_dim, num_hidden))
        self.Wgx = nn.Parameter(torch.empty(input_dim, num_hidden))
        self.Wox = nn.Parameter(torch.empty(input_dim, num_hidden))
        self.Wfh = nn.Parameter(torch.empty(num_hidden, num_hidden))
        self.Wih = nn.Parameter(torch.empty(num_hidden, num_hidden))
        self.Wgh = nn.Parameter(torch.empty(num_hidden, num_hidden))
        self.Woh = nn.Parameter(torch.empty(num_hidden, num_hidden))
        self.bf = nn.Parameter(torch.empty(num_hidden))
        self.bi = nn.Parameter(torch.empty(num_hidden))
        self.bg = nn.Parameter(torch.empty(num_hidden))
        self.bo = nn.Parameter(torch.empty(num_hidden))
        self.c = torch.empty(batch_size, num_hidden,
                             device=device)  # cell state
        self.h = torch.empty(batch_size, num_hidden,
                             device=device)  # output state
        # Linear part
        self.Wph = nn.Parameter(torch.empty(num_hidden, num_classes))
        self.bp = nn.Parameter(torch.empty(num_classes))

        # Initialize weights
        mean = 0.0
        std = 1 / seq_length
        nn.init.normal_(self.Wgx, mean=mean, std=std)
        nn.init.normal_(self.Wix, mean=mean, std=std)
        nn.init.normal_(self.Wfx, mean=mean, std=std)
        nn.init.normal_(self.Wox, mean=mean, std=std)
        nn.init.normal_(self.Wgh, mean=mean, std=std)
        nn.init.normal_(self.Wih, mean=mean, std=std)
        nn.init.normal_(self.Wfh, mean=mean, std=std)
        nn.init.normal_(self.Woh, mean=mean, std=std)
        nn.init.normal_(self.Wph, mean=mean, std=std)
        nn.init.constant_(self.bi, 0.0)
        nn.init.constant_(self.bg, 0.0)
        nn.init.constant_(self.bf, 0.0)
        nn.init.constant_(self.bo, 0.0)
        nn.init.constant_(self.bp, 0.0)

        # Administrative stuff
        self.sequence_length = seq_length
        self.batch_size = batch_size
        self.input_dim = input_dim
Example #28
0
    channelsList = [1024, 512, 256, 128, 16]
    encNet = EncodeNet(channelsList).cuda().train()
    channelsList.reverse()
    decNet = DecodeNet(channelsList).cuda().train()
    print('create new model')
else:
    encNet = torch.load('../models/encNet_' + sys.argv[0] + '.pkl', map_location='cuda:'+sys.argv[1]).cuda().train()
    decNet = torch.load('../models/decNet_' + sys.argv[0] + '.pkl', map_location='cuda:'+sys.argv[1]).cuda().train()
    print('read ../models/' + sys.argv[0] + '.pkl')

print(encNet)
print(decNet)

optimizer = torch.optim.Adam([{'params':encNet.parameters()},{'params':decNet.parameters()}], lr=float(sys.argv[3]))

trainData = torch.empty([batchSize, 1, 256, 256]).float().cuda()

lastSavedI = 0
C = torch.arange(start=0, end=16).unsqueeze_(1).float().cuda()
sigma = 1
testImgSum = 24
testImgDir = '/datasets/MLG/wfang/imgCompress/kodim256/'
testDataReader = bmpLoader.datasetReader(colorFlag=False, batchSize=1, bufferBatchSizeMultiple=testImgSum,
                                         imgDir=testImgDir, imgSum=testImgSum)
testData = torch.empty([testImgSum, 1, 256, 256]).float().cuda()
for k in range(testImgSum):
    testData[k] = torch.from_numpy(testDataReader.readImg()).float().cuda()

i = 0
while(True):
Example #29
0
def train_with_kf(train_loader, model, criterion, optimizer, epoch, log,
                  kfclass):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time

        if input.shape[0] % (args.ngpu) != 0:
            continue

        if args.use_cuda:
            target = target.cuda()
            input = input.cuda()

        data_time.update(time.time() - end)

        if args.dataset == 'cifar10':
            with torch.no_grad():
                kf_input = kfclass(torch.randn(input.shape[0], 128).cuda())
        elif args.dataset == 'imagenet':
            with torch.no_grad():
                kf_input = kfclass(
                    torch.empty(input.shape[0], 128,
                                dtype=torch.float32).normal_().cuda())
                kf_input = F.interpolate(kf_input, size=224)

        input_list = []
        num_pgpu = input.shape[0] // args.ngpu
        for igpu in range(args.ngpu):
            input_list.append(
                torch.cat([
                    input[igpu * num_pgpu:(igpu + 1) * num_pgpu],
                    kf_input[igpu * num_pgpu:(igpu + 1) * num_pgpu]
                ],
                          dim=0))
        input = torch.cat(input_list, dim=0)

        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        output = model(input_var)

        output_list = []
        for igpu in range(args.ngpu):
            output_list.append(output[igpu * num_pgpu * 2:igpu * num_pgpu * 2 +
                                      num_pgpu])
        output = torch.cat(output_list, dim=0)

        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        top5.update(prec5.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        for module in net.modules():
            if isinstance(module, Kf_Conv2d):
                module.kfscale.data.clamp_(min=0, max=1)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print('  Epoch: [{:03d}][{:03d}/{:03d}]   '
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
                  'Loss {loss.val:.4f} ({loss.avg:.4f})   '
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5) + time_string())
    print(
        '  **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'
        .format(top1=top1, top5=top5, error1=100 - top1.avg), log)
    return top1.avg, losses.avg
Example #30
0
 def sample(self, numb):
     sample = torch.empty(numb, self.ZDIMS, device=self.device).normal_(mean=0,std=1) 
     mu, inv_sigma2_x = self.decode(sample)
     return mu, inv_sigma2_x
def generate_gaussian_simulations(min_x,
                                  max_x,
                                  min_y,
                                  max_y,
                                  n_steps,
                                  step_size=1,
                                  std=0.1):
    """Generate a natural path where the next direction angle is sampled from a gaussian distribution
    
    Arguments:
        min_x {float} -- x minimum boundary
        max_x {float} -- x maximum boundary
        min_y {float} -- y minimum boundary
        max_y {float} -- y maximum boundary
        n_steps {int} -- the number of simulation steps
    
    Keyword Arguments:
        step_size {int} -- the distance between two successive positions (default: {1})
        std {float} -- the std from the gaussian distribution (default: {0.1})
    
    Returns:
        tensor -- n_steps x 2 tensor corresponding to the positions of the new path
    """
    assert min_x <= max_x
    assert min_y <= max_y

    # generate starting position
    start_x = min_x + (max_x - min_x) * torch.rand(1)
    start_y = min_y + (max_y - min_y) * torch.rand(1)

    # generate the angle changes
    cur_angle = 2 * np.pi * torch.rand(1) - np.pi
    deviations = 2 * np.pi * torch.empty(n_steps - 1).normal_(mean=0, std=std)

    positions = [[start_x, start_y]]
    cur_x, cur_y = start_x, start_y

    for dev in deviations:
        cur_angle = cur_angle + dev

        # put angle into (-pi, pi] range
        if cur_angle > np.pi:
            cur_angle.sub_(2 * np.pi)
        elif cur_angle <= -np.pi:
            cur_angle.add_(2 * np.pi)

        cur_x = cur_x + step_size * torch.cos(cur_angle)
        cur_y = cur_y + step_size * torch.sin(cur_angle)

        # handle positions crossing the boundaries (reflected like a mirror)
        if cur_x < min_x:
            cur_angle = torch.sign(cur_angle) * np.pi - cur_angle
            cur_x = min_x + (min_x - cur_x)
        elif cur_x > max_x:
            cur_angle = torch.sign(cur_angle) * np.pi - cur_angle
            cur_x = max_x + (max_x - cur_x)

        if cur_y < min_y:
            cur_angle = -cur_angle
            cur_y = min_y + (min_y - cur_y)
        elif cur_y > max_y:
            cur_angle = -cur_angle
            cur_y = max_y + (max_y - cur_y)

        # add the new position
        positions.append([cur_x, cur_y])

    return torch.FloatTensor(positions)
Example #32
0
def test_sparse_rcnn_forward():
    config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py'
    model = _get_detector_cfg(config_path)
    model = _replace_r50_with_r18(model)
    model.backbone.init_cfg = None
    from mmdet.models import build_detector
    detector = build_detector(model)
    detector.init_weights()
    input_shape = (1, 3, 100, 100)
    mm_inputs = _demo_mm_inputs(input_shape, num_items=[5])
    imgs = mm_inputs.pop('imgs')
    img_metas = mm_inputs.pop('img_metas')
    # Test forward train with non-empty truth batch
    detector.train()
    gt_bboxes = mm_inputs['gt_bboxes']
    gt_bboxes = [item for item in gt_bboxes]
    gt_labels = mm_inputs['gt_labels']
    gt_labels = [item for item in gt_labels]
    losses = detector.forward(imgs,
                              img_metas,
                              gt_bboxes=gt_bboxes,
                              gt_labels=gt_labels,
                              return_loss=True)
    assert isinstance(losses, dict)
    loss, _ = detector._parse_losses(losses)
    assert float(loss.item()) > 0
    detector.forward_dummy(imgs)

    # Test forward train with an empty truth batch
    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])
    imgs = mm_inputs.pop('imgs')
    img_metas = mm_inputs.pop('img_metas')
    gt_bboxes = mm_inputs['gt_bboxes']
    gt_bboxes = [item for item in gt_bboxes]
    gt_labels = mm_inputs['gt_labels']
    gt_labels = [item for item in gt_labels]
    losses = detector.forward(imgs,
                              img_metas,
                              gt_bboxes=gt_bboxes,
                              gt_labels=gt_labels,
                              return_loss=True)
    assert isinstance(losses, dict)
    loss, _ = detector._parse_losses(losses)
    assert float(loss.item()) > 0

    # Test forward test
    detector.eval()
    with torch.no_grad():
        img_list = [g[None, :] for g in imgs]
        batch_results = []
        for one_img, one_meta in zip(img_list, img_metas):
            result = detector.forward([one_img], [[one_meta]],
                                      rescale=True,
                                      return_loss=False)
            batch_results.append(result)

    # test empty proposal in roi_head
    with torch.no_grad():
        # test no proposal in the whole batch
        detector.roi_head.simple_test([imgs[0][None, :]], torch.empty(
            (1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]],
                                      torch.ones((1, 4)))
Example #33
0
def create_digit_image(colors, curr_min_val=0, curr_max_val=1):

    numpy_colors = np.array(colors)

    # convert to [0,1] to draw
    if numpy_colors.min() < 0 or numpy_colors.max() > 1:
        if (curr_max_val - curr_min_val) <= 0:
            print('wrong min and max input values')
            return
        else:
            numpy_colors = round(((numpy_colors - curr_min_val) / (curr_max_val - curr_min_val)), 3)

    colors = numpy_colors.tolist()
    if len(numpy_colors.shape) < 2 or len(numpy_colors.shape) > 3:
        print('wrong input dimention. should be [_,8,3]')
    elif len(numpy_colors.shape) == 2:
        colors = [colors]
    # elif len(numpy_colors.shape)==3:
    #   do noting
    data_tensor = torch.empty(numpy_colors.shape[0], 3, 300, 200, dtype=torch.float)

    for j in range(numpy_colors.shape[0]):

        fig_temp, myaxis = plt.subplots(figsize=(200, 300), dpi=1)
        fig_temp.subplots_adjust(0, 0, 1, 1)
        myaxis.axis('off')
        myaxis.set_xlim([0, 2 * width])
        myaxis.set_ylim([0, 3 * width])
        myaxis.set_aspect('equal', 'box')

        patches = []

        segments_centers = []
        center = [width, 1.5*width]
        bg_patch = create_background(colors[j][7])
        patches.append(bg_patch)
        myaxis.add_patch(bg_patch)

        segments_centers.append([center[0], center[1] + width + height])
        segments_centers.append([center[0], center[1]])
        segments_centers.append([center[0], center[1] - width - height])
        segments_centers.append([center[0] - width / 2 - height / 2, center[1] + width / 2 + height / 2])
        segments_centers.append([center[0] + width / 2 + height / 2, center[1] + width / 2 + height / 2])
        segments_centers.append([center[0] - width / 2 - height / 2, center[1] - width / 2 - height / 2])
        segments_centers.append([center[0] + width / 2 + height / 2, center[1] - width / 2 - height / 2])

        vertical_horizon = [0, 0, 0, 1, 1, 1, 1]
        for i in range(len(segments_centers)):
            polygon = create_segment(segments_centers[i], vertical_horizon[i], colors[j][i])
            patches.append(polygon)
            myaxis.add_patch(polygon)

        fig_temp.canvas.draw()

        data = np.frombuffer(fig_temp.canvas.tostring_rgb(), dtype=np.uint8)
        # data = np.fromstring(fig_temp.canvas.tostring_rgb(), dtype=np.uint8, sep='')
        data = data.reshape(fig_temp.canvas.get_width_height()[::-1] + (3,))

        data_tensor[j] = F.to_tensor(data)

        # plt.show()
        plt.close(fig_temp)
        # print(data_tensor.shape)

    return data_tensor
Example #34
0
def get_dJdG(G_b, F, S_b, B_b, G1):
    theta_b = 1/2*torch.matmul(F.transpose(0,1),G_b) #theta_b : (n, batch_size)
    dJdG = torch.empty(config.BITS,G_b.shape[1],device='cuda')
    for j in range(G_b.shape[1]):
        dJdG[:,j] = get_dJdGj(G_b[:,j], theta_b[:,j], B_b[:,j], F, S_b[:,j],G1)
    return dJdG #size (c)
Example #35
0
    def __init__(self, k, c, a, c2=None, l=None, node_type='discrete'):
        """
        utils Layer
        :param k: dimension of output's alphabet, which goes from 0 to K-1 (when discrete)
        :param c: the number of hidden states
        :param c2: the number of states of the neighbours
        :param l: number of previous layers to consider. You must pass the appropriate number of statistics at training
        :param a: dimension of edges' alphabet, which goes from 0 to A-1
        """
        super().__init__()

        # For comparison w.r.t Numpy implementation
        # np.random.seed(seed=10)
        self.node_type = node_type
        self.is_layer_0 = True
        if c2 is not None or l is not None:
            assert c2 is not None and l is not None, 'You should specify both C2, L and A'
            self.is_layer_0 = False

        self.eps = 1e-8  # Laplace smoothing
        self.C = c
        self.K = k
        self.orig_A = a
        self.A = a + 2  # may consider a special case of the recurrent arc and the special case of bottom state

        if not self.is_layer_0:
            self.C2 = c2
            self.L = l

        # Initialisation of the model's parameters.
        # torch.manual_seed(0)

        if self.is_layer_0:
            # For debugging w.r.t Numpy version
            # pr = torch.from_numpy(np.random.uniform(size=self.C).astype(np.float32))

            pr = torch.nn.init.uniform_(
                torch.empty(self.C, dtype=torch.float64))
            self.prior = pr / pr.sum()

            # print(self.prior)

        if self.node_type == 'discrete':
            self.emission = CategoricalEmission(self.K, self.C)
        elif self.node_type == 'continuous':
            self.emission = GaussianEmission(self.K, self.C)

        # print(self.emission)

        if not self.is_layer_0:
            # For debugging w.r.t Numpy version
            # self.layerS = torch.from_numpy(np.random.uniform(size=self.L).astype(np.float32))  #

            self.layerS = torch.nn.init.uniform_(
                torch.empty(self.L, dtype=torch.float64))
            self.layerS /= self.layerS.sum()

            self.arcS = torch.zeros((self.L, self.A), dtype=torch.float64)
            self.transition = torch.empty([self.L, self.A, self.C, self.C2],
                                          dtype=torch.float64)

            for layer in range(0, self.L):
                # For debugging w.r.t Numpy version
                # elf.arcS[layer, :] = torch.from_numpy(np.random.uniform(size=self.A).astype(np.float32))

                self.arcS[layer, :] = torch.nn.init.uniform_(
                    self.arcS[layer, :])
                self.arcS[layer, :] /= self.arcS[layer, :].sum()
                for arc in range(0, self.A):
                    for j in range(0, self.C2):
                        # For debugging w.r.t Numpy version
                        # tr = torch.from_numpy(np.random.uniform(size=self.C).astype(np.float32))

                        tr = torch.nn.init.uniform_(torch.empty(self.C))
                        self.transition[layer, arc, :, j] = tr / tr.sum()

            # print(self.arcS)
            # print(self.transition)

        self.init_accumulators()
Example #36
0
    loss_pixel = 0
    for l in logit_pixel:
        loss_pixel += symmetric_lovasz_ignore_empty(l.squeeze(1), truth_pixel, truth_image)
    loss = symmetric_lovasz(logit.squeeze(1), truth_pixel)
    return 0.05 * loss_image + 0.1 * loss_pixel + 1 * loss


if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--basenet", choices=BASENET_CHOICES, default='vgg11', help='model of basenet')
    parser.add_argument("--num-filters", type=int, default=16, help='num filters for decoder')

    args = parser.parse_args()

    net = UNet(**vars(args))
    # print(net)
    parameters = [p for p in net.parameters() if p.requires_grad]
    n_params = sum(p.numel() for p in parameters)
    print('N of parameters {} ({} tensors)'.format(n_params, len(parameters)))
    encoder_parameters = [p for name, p in net.named_parameters() if p.requires_grad and name.startswith('encoder')]
    n_encoder_params = sum(p.numel() for p in encoder_parameters)
    print('N of encoder parameters {} ({} tensors)'.format(n_encoder_params, len(encoder_parameters)))
    print('N of decoder parameters {} ({} tensors)'.format(n_params - n_encoder_params,
                                                           len(parameters) - len(encoder_parameters)))

    x = torch.empty((1, 3, 128, 128))
    y = net(x)
    print(x.size(), '-->', y.size())
Example #37
0
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        stride=1,
        padding=0,
        dilation=1,
        groups=1,
        bias=False,
        padding_mode="zeros",
    ):
        super().__init__()

        self.is_calculated = False

        self.conv_layer = Conv2d(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            dilation,
            groups,
            bias,
            padding_mode,
        )
        self.kernel_size = self.conv_layer.kernel_size

        # small addition to avoid division by zero
        self.delta = 1e-3

        # freq, theta, sigma are set up according to S. Meshgini,
        # A. Aghagolzadeh and H. Seyedarabi, "Face recognition using
        # Gabor filter bank, kernel principal component analysis
        # and support vector machine"
        self.freq = Parameter(
            (math.pi / 2) * math.sqrt(2)
            **(-torch.randint(0, 5,
                              (out_channels, in_channels))).type(torch.Tensor),
            requires_grad=True,
        )
        self.theta = Parameter(
            (math.pi / 8) *
            torch.randint(0, 8,
                          (out_channels, in_channels)).type(torch.Tensor),
            requires_grad=True,
        )
        self.sigma = Parameter(math.pi / self.freq, requires_grad=True)
        self.psi = Parameter(math.pi * torch.rand(out_channels, in_channels),
                             requires_grad=True)

        self.x0 = Parameter(torch.ceil(torch.Tensor([self.kernel_size[0] / 2
                                                     ]))[0],
                            requires_grad=False)
        self.y0 = Parameter(torch.ceil(torch.Tensor([self.kernel_size[1] / 2
                                                     ]))[0],
                            requires_grad=False)

        self.y, self.x = torch.meshgrid([
            torch.linspace(-self.x0 + 1, self.x0 + 0, self.kernel_size[0]),
            torch.linspace(-self.y0 + 1, self.y0 + 0, self.kernel_size[1]),
        ])
        self.y = Parameter(self.y)
        self.x = Parameter(self.x)

        self.weight = Parameter(
            torch.empty(self.conv_layer.weight.shape, requires_grad=True),
            requires_grad=True,
        )

        self.register_parameter("freq", self.freq)
        self.register_parameter("theta", self.theta)
        self.register_parameter("sigma", self.sigma)
        self.register_parameter("psi", self.psi)
        self.register_parameter("x_shape", self.x0)
        self.register_parameter("y_shape", self.y0)
        self.register_parameter("y_grid", self.y)
        self.register_parameter("x_grid", self.x)
        self.register_parameter("weight", self.weight)
                                 2), (torch.rand(S,
                                                 2), True, True), 'full'),
 (
     'kl_div',
     F.log_softmax(torch.randn(S, 10), 1),
     (F.softmax(torch.randn(S, 10), 1), ),
 ),
 (
     'cross_entropy',
     (3, S),
     (torch.randint(S, (3, ), dtype=torch.int64), ),
 ),
 (
     'binary_cross_entropy_with_logits',
     (3, ),
     (torch.empty(3).random_(2), ),
 ),
 (
     'smooth_l1_loss',
     (3, S),
     (non_differentiable(torch.rand(3, S)), ),
 ),
 (
     'l1_loss',
     (3, S),
     (non_differentiable(torch.rand(3, S)), ),
 ),
 (
     'mse_loss',
     (3, S),
     (non_differentiable(torch.rand(3, S)), ),
Example #39
0
    def test_createParamStatic(self):
        weight = Initializer.createParamStatic((5,6))        

        assert isinstance(weight,torch.Tensor)
        assert weight.size() == torch.empty((5,6)).size()
Example #40
0
import CTimeData as CTD
# Specific utilities
import utilities_lib as ul

plt.close("all") # Close all previous Windows

"""
PyTorch Related library !!
"""

import torch

"""
Basic tensor creation
"""
x = torch.empty(5, 3)
x = torch.rand(5, 3)
x = torch.zeros(5, 3, dtype=torch.long)

## Construct Tensor from data
x = torch.tensor([5.5, 3])


#or create a tensor based on an existing tensor. 
#These methods will reuse properties of the input tensor, e.g. dtype,
# unless new values are provided by user

x = torch.randn_like(x, dtype=torch.float) 

print(x)
Example #41
0
def get_dJdF(F_b, G, S_b, B_b, F1): #F_b : (c, batch_size), S_b : (batch_size, n)
    theta_b = 1/2*torch.matmul(F_b.transpose(0,1),G) #theta_b : (batch_size, n)
    dJdF = torch.empty(config.BITS,F_b.shape[1],device='cuda')
    for i in range(F_b.shape[1]):
        dJdF[:,i] = get_dJdFi(F_b[:,i], theta_b[i,:], B_b[:,i], G, S_b[i,:],F1)
    return dJdF #size (c)
Example #42
0
    def _run_one_epoch(self, epoch, cross_valid=False):
        start = time.time()
        total_loss = 0
        sum_loss = 0

        data_loader = self.tr_loader if not cross_valid else self.cv_loader
        print('batch length:', len(data_loader))
        # visualizing loss using visdom
        if self.visdom_epoch and not cross_valid:
            vis_opts_epoch = dict(title=self.visdom_id + " epoch " + str(epoch),
                                  ylabel='Loss', xlabel='Epoch')
            vis_window_epoch = None
            vis_iters = torch.arange(1, len(data_loader) + 1)
            vis_iters_loss = torch.Tensor(len(data_loader))

        total_correct = 0
        total_sen = 0
        labels_cat = torch.empty(0, dtype=torch.int64)
        predicted_cat = torch.empty(0, dtype=torch.int64)

        target_names = ['aloe', 'burger', 'cabbage', 'candied_fruits', 'carrots', 'chips',
                  'chocolate', 'drinks', 'fries', 'grapes', 'gummies', 'ice-cream',
                  'jelly', 'noodles', 'pickles', 'pizza', 'ribs', 'salmon',
                  'soup', 'wings']  # 这里这里这里#
        for i, (data) in enumerate(data_loader):
            padded_input, input_lengths, labels = data
            if len(input_lengths) <= 1:
                continue
            total_sen = total_sen + padded_input.size(0)

            padded_input = padded_input.cuda()
            input_lengths = input_lengths.cuda()
            labels = labels.cuda()

            pred = self.model(padded_input, input_lengths)
            model_out = pred[0]

            loss = F.cross_entropy(model_out, labels, reduction='sum')
            sum_loss = sum_loss + loss.item()
            loss = loss / padded_input.size(0)
            pred_res = model_out.max(1)[1]
            gold = labels.contiguous().view(-1)
            n_correct_res = pred_res.eq(gold)
            n_correct_res = n_correct_res.sum().item()
            total_correct = total_correct + n_correct_res

            predicted_cat = torch.cat((predicted_cat, pred_res.cpu()), -1)
            labels_cat = torch.cat((labels_cat, labels.cpu()), -1)

            if not cross_valid:
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            total_loss += loss.item()

            # if self.model_choose == 'speaker_classify' and not cross_valid:
            if i % self.print_freq == 0:
                print('Epoch {0} | Iter {1} | Average Loss {2:.3f} | '
                      'Current Loss {3:.6f} | {4:.1f} ms/batch'.format(
                          epoch + 1, i + 1, total_loss / (i + 1),
                          loss.item(), 1000 * (time.time() - start) / (i + 1)),
                      flush=True)

            # visualizing loss using visdom
            if self.visdom_epoch and not cross_valid:
                vis_iters_loss[i] = loss.item()
                if i % self.print_freq == 0:
                    x_axis = vis_iters[:i+1]
                    y_axis = vis_iters_loss[:i+1]
                    if vis_window_epoch is None:
                        vis_window_epoch = self.vis.line(X=x_axis, Y=y_axis,
                                                         opts=vis_opts_epoch)
                    else:
                        self.vis.line(X=x_axis, Y=y_axis, win=vis_window_epoch,
                                      update='replace')


        print('n_correct:', total_correct)
        print('total_sen:', total_sen)
        print('acc:', total_correct/total_sen)
        print('每个batch的平均损失相加:', total_loss / (i + 1))
        print('每个batch的损失相加后再平均:', sum_loss / total_sen)
        print(metrics.classification_report(labels_cat, predicted_cat, target_names=target_names, digits=4))
        return sum_loss / total_sen, total_correct/total_sen
Example #43
0
import torch
import torch.nn as nn
import torch.nn.init as init
import matplotlib.pyplot as plt
import Least_squares.settings

n, d = Least_squares.settings.n, Least_squares.settings.d
d_1, d_2 = 10, 1
groups = 3

data = torch.empty(groups, n, d + 1)
with open('data.in', 'r') as f:
    for i in range(groups):
        for j in range(n):
            data_point = f.readline().split()
            for k in range(d + 1):
                data[i, j, k] = float(data_point[k])

lin_model = nn.Sequential(
    nn.Linear(d, d_2, bias=False)
)
lin_nn_model = nn.Sequential(
    nn.Linear(d, d_1, bias=False),
    nn.Linear(d_1, d_2, bias=False)
)
ReLU_model = nn.Sequential(
    nn.Linear(d, d_1),
    nn.ReLU(),
    nn.Linear(d_1, d_2)
)
loss = nn.MSELoss()
def update(config):
    # Load model

    nnet = torch.load(config.model, map_location=lambda storage, loc: storage)
    model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'],
                            nnet['num_classes'])
    model.load_state_dict(nnet['model_state_dict'])

    if config.use_gpu:
        # Set environment variable for GPU ID
        id = get_device_id()
        os.environ["CUDA_VISIBLE_DEVICES"] = id

        model = model.cuda()

    model_dir = os.path.join(config.store_path, config.experiment_name + '.dir')
    os.makedirs(config.store_path, exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    logging.basicConfig(
        level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
        filename=os.path.join(model_dir, config.experiment_name),
        filemode='w')

    # define a new Handler to log to console as well
    console = logging.StreamHandler()
    console.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

    logging.info('Model Parameters: ')
    logging.info('Number of Layers: %d' % (nnet['num_layers']))
    logging.info('Hidden Dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Classes: %d' % (nnet['num_classes']))
    logging.info('Data dimension: %d' % (nnet['feature_dim']))
    logging.info('Number of Frames: %d' % (nnet['num_frames']))
    logging.info('Optimizer: %s ' % (config.optimizer))
    logging.info('Batch Size: %d ' % (config.batch_size))
    logging.info('Initial Learning Rate: %f ' % (config.learning_rate))

    criterion = nn.MSELoss()
    dev_criterion = nn.CrossEntropyLoss()

    if config.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adadelta':
        optimizer = optim.Adadelta(model.parameters())
    elif config.optimizer == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate)
    elif config.optimizer == 'rmsprop':
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)
    else:
        raise NotImplementedError("Learning method not supported for the task")
    lr = config.learning_rate
    # Figure out all feature stuff

    shuff_file = str(os.getpid()) + '.scp'
    shell_cmd = "cat {:s} | shuf > {:s}".format(config.scp, shuff_file)
    r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE)

    feats_config = pickle.load(open(config.egs_config, 'rb'))

    if feats_config['feat_type']:
        feat_type = feats_config['feat_type'].split(',')[0]
        trans_path = feats_config['feat_type'].split(',')[1]

    if config.override_trans_path is not None:
        trans_path = config.override_trans_path

    if feat_type == "pca":
        cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, shuff_file)
    elif feat_type == "cmvn":
        cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file)
    else:
        cmd = shuff_file

    if feats_config['concat_feats']:
        context = feats_config['concat_feats'].split(',')
        cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1])

    # Load performance monitoring models
    pm_paths = config.pms.split(',')

    pm_models = []
    feat_dims = []
    for path in pm_paths:

        pm_model = torch.load(path, map_location=lambda storage, loc: storage)
        ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'],
                                  pm_model['encoder_num_layers'], pm_model['decoder_num_layers'],
                                  pm_model['hidden_dim'])
        ae_model.load_state_dict(pm_model['model_state_dict'])
        feat_dims.append(pm_model['feature_dim'])

        if config.use_gpu:
            ae_model.cuda()

        for p in ae_model.parameters():  # Do not update performance monitoring block
            p.requires_grad = False

        pm_models.append(ae_model)

    cmvn_paths = config.cmvns.split(',')
    means = []
    for path in cmvn_paths:
        mean, _ = get_cmvn(path)
        means.append(mean)

    if len(cmvn_paths) != len(pm_paths):
        logging.error("Number of cmvn paths not equal to number of model paths, exiting training!")
        sys.exit(1)
    else:
        num_pm_models = len(pm_paths)

    ep_loss_dev = []
    ep_fer_dev = []

    load_chunk = torch.load(config.dev_egs)
    dev_data = load_chunk[:, 0:-1]
    dev_labels = load_chunk[:, -1].long()
    dataset = nnetDataset(dev_data, dev_labels)
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True)

    # Compute initial performance on dev set
    val_losses = []
    val_fer = []

    for batch_x, batch_l in data_loader:
        if config.use_gpu:
            batch_x = Variable(batch_x).cuda()
            batch_l = Variable(batch_l).cuda()
        else:
            batch_x = Variable(batch_x)
            batch_l = Variable(batch_l)

        _, batch_x = model(batch_x)
        val_loss = dev_criterion(batch_x, batch_l)
        val_losses.append(val_loss.item())

        if config.use_gpu:
            val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy()))
        else:
            val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

    ep_loss_dev.append(np.mean(val_losses))
    ep_fer_dev.append(np.mean(val_fer))

    print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format(
        np.mean(val_losses),
        np.mean(val_fer))

    logging.info(print_log)

    for epoch in range(config.epochs):

        batches = []
        for idx in range(num_pm_models):
            if config.use_gpu:
                batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda()
            else:
                batch = torch.empty(0, config.max_seq_len, feat_dims[idx])
            batches.append(batch)

        lens = []
        utt_count = 0
        update_num = 0

        val_losses = []
        val_fer = []
        tr_losses = []
        for idx in range(num_pm_models):
            tr_losses.append([])

        for utt_id, mat in kaldi_io.read_mat_ark(cmd):

            lens.append(min(mat.shape[0], config.max_seq_len))

            if config.use_gpu:
                out = model(Variable(torch.FloatTensor(mat)).cuda())
            else:
                out = model(Variable(torch.FloatTensor(mat)))

            if config.use_gpu:
                post = out[1] - Variable(torch.FloatTensor(means[0])).cuda()
            else:
                post = out[1] - Variable(torch.FloatTensor(means[0]))

            post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0)))
            batch = batches[0]
            batch = torch.cat([batch, post[None, :, :]], 0)
            batches[0] = batch

            for idx in range(1, num_pm_models):
                if config.use_gpu:
                    post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])).cuda()
                else:
                    post = out[0][-idx] - Variable(torch.FloatTensor(means[idx]))
                post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0)))
                batch = batches[idx]
                batch = torch.cat([batch, post[None, :, :]], 0)
                batches[idx] = batch

            utt_count += 1

            if utt_count == config.batch_size:
                update_num += 1

                ## DO THE ADAPTATION

                lens = torch.IntTensor(lens)
                _, indices = torch.sort(lens, descending=True)

                if config.use_gpu:
                    loss_all = torch.FloatTensor([1]).cuda()
                else:
                    loss_all = torch.FloatTensor([1])
                for idx in range(num_pm_models):
                    batch_x = batches[idx][indices]
                    ae_model = pm_models[idx]
                    batch_l = lens[indices]

                    if config.time_shift == 0:
                        outputs = ae_model(batch_x, batch_l)
                    else:
                        outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift)

                    if config.time_shift == 0:
                        loss = stable_mse(outputs, batch_x)
                    else:
                        loss = stable_mse(outputs, batch_x[:, config.time_shift:, :])

                    loss_all *= loss

                    tl = tr_losses[idx]
                    tl.append(loss.item())
                    tr_losses[idx] = tl
                    # if idx < num_pm_models - 1:
                    #    loss.backward(retain_graph=True)
                    # else:
                    #    loss.backward()

                optimizer.zero_grad()
                loss_all.backward()
                optimizer.step()

                batches = []
                for idx in range(num_pm_models):
                    if config.use_gpu:
                        batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda()
                    else:
                        batch = torch.empty(0, config.max_seq_len, feat_dims[idx])
                    batches.append(batch)
                lens = []
                utt_count = 0

        logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch))

        # CHECK IF ADAPTATION IS WORKING AT ALL

        for batch_x, batch_l in data_loader:
            if config.use_gpu:
                batch_x = Variable(batch_x).cuda()
                batch_l = Variable(batch_l).cuda()
            else:
                batch_x = Variable(batch_x)
                batch_l = Variable(batch_l)

            _, batch_x = model(batch_x)
            val_loss = dev_criterion(batch_x, batch_l)
            val_losses.append(val_loss.item())

            if config.use_gpu:
                val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy()))
            else:
                val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy()))

        ep_loss_dev.append(np.mean(val_losses))
        ep_fer_dev.append(np.mean(val_fer))

        print_log = "Epoch: {:d} update ".format(epoch)
        for idx in range(num_pm_models):
            print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx]))

        print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer))

        logging.info(print_log)

        model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch + 1) + '.model')
        torch.save({
            'epoch': epoch + 1,
            'ep_loss_dev': ep_loss_dev,
            'ep_fer_dev': ep_fer_dev,
            'tr_losses': tr_losses,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb')))

        # Change learning rate to half
        optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor)
        logging.info('Learning rate changed to {:f}'.format(lr))
Example #45
0
#!/usr/bin/python3
# coding: utf-8
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from itertools import chain  # 合并 generator
##################################################################
## Initialization
print(nn.init.calculate_gain('relu'))  # 1.4142135623730951
print(nn.init.calculate_gain('leaky_relu'))  # 1.4141428569978354

w = torch.empty(3, 5); print(w)  # Returns a tensor filled with uninitialized data
print(nn.init.xavier_uniform_(w))
print(nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu')))
##################################################################
## Activation
x = torch.Tensor([1]); print(x)
print(F.elu(torch.Tensor([1, 0, -1])))  # tensor([ 1.0000,  0.0000, -0.6321]); ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1));  alpha=1.0
print(F.relu(torch.Tensor([1, 0, -1])))  # tensor([ 1.,  0.,  0.])
print(torch.sigmoid(x), F.softmax(x, dim=-1))  # tensor([0.7311]) tensor([1.]); softmax 对应一个数的时候, 输出全为 1
print(torch.sigmoid(torch.Tensor([0, 1, 2, 3])))  # tensor([0.5000, 0.7311, 0.8808, 0.9526])
print(torch.tanh(torch.Tensor([0, 1, 2, 3])))  # tensor([0.0000, 0.7616, 0.9640, 0.9951])

print(F.softmax(torch.Tensor([1, 2, 3]), dim=0))  # tensor([ 0.0900,  0.2447,  0.6652]); dim=0 should be added
print(F.softmax(F.softmax(torch.Tensor([1, 2, 3]), dim=0), dim=0))  # tensor([0.2535, 0.2959, 0.4506]); softmax() 不能 two times!!!
print(np.log(F.softmax(torch.Tensor([1, 2, 3]), dim=0)))  # tensor([-2.4076, -1.4076, -0.4076])
print(F.log_softmax(torch.Tensor([1, 2, 3]), dim=0))  # tensor([-2.4076, -1.4076, -0.4076]); equal to log(softmax(x))
Example #46
0
    def __init__(self, tensor):
        self.floating_dtype = tensor.dtype.is_floating_point
        self.int_mode = True
        self.sci_mode = False
        self.max_width = 1

        if not self.floating_dtype:
            copy = torch.empty(tensor.size(), dtype=torch.long).copy_(tensor).view(tensor.nelement())
            for value in copy.tolist():
                value_str = '{}'.format(value)
                self.max_width = max(self.max_width, len(value_str))

        else:
            copy = torch.empty(tensor.size(), dtype=torch.float64).copy_(tensor).view(tensor.nelement())
            copy_list = copy.tolist()
            try:
                for value in copy_list:
                    if value != math.ceil(value):
                        self.int_mode = False
                        break
            # nonfinites will throw errors
            except (ValueError, OverflowError):
                self.int_mode = False

            if self.int_mode:
                for value in copy_list:
                    value_str = '{:.0f}'.format(value)
                    if math.isnan(value) or math.isinf(value):
                        self.max_width = max(self.max_width, len(value_str))
                    else:
                        # in int_mode for floats, all numbers are integers, and we append a decimal to nonfinites
                        # to indicate that the tensor is of floating type. add 1 to the len to account for this.
                        self.max_width = max(self.max_width, len(value_str) + 1)

            else:
                copy_abs = copy.abs()
                pos_inf_mask = copy_abs.eq(float('inf'))
                neg_inf_mask = copy_abs.eq(float('-inf'))
                nan_mask = copy_abs.ne(copy)
                invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask
                if invalid_value_mask.all():
                    example_value = 0
                else:
                    example_value = copy_abs[invalid_value_mask.eq(0)][0]
                copy_abs[invalid_value_mask] = example_value

                exp_min = copy_abs.min()
                if exp_min != 0:
                    exp_min = math.floor(math.log10(exp_min)) + 1
                else:
                    exp_min = 1
                exp_max = copy_abs.max()
                if exp_max != 0:
                    exp_max = math.floor(math.log10(exp_max)) + 1
                else:
                    exp_max = 1

                # these conditions for using scientific notation are based on numpy
                if exp_max - exp_min > PRINT_OPTS.precision or exp_max > 8 or exp_min < -4:
                    self.sci_mode = True
                    for value in copy_list:
                        value_str = ('{{:.{}e}}').format(PRINT_OPTS.precision).format(value)
                        self.max_width = max(self.max_width, len(value_str))
                else:
                    for value in copy_list:
                        value_str = ('{{:.{}f}}').format(PRINT_OPTS.precision).format(value)
                        self.max_width = max(self.max_width, len(value_str))
Example #47
0
def double(x):
    a = torch.zeros(1, len(x), dtype=torch.double)
    result = torch.empty(1, len(x))
    torch.add(a, x, out=result)
    return result
Example #48
0
 def __init__(self, in_channel: int = 512, q_k_v_channel: int = 64):
     super(AttentionHead, self).__init__()
     self.q_k_v_channel = q_k_v_channel
     self.weights = nn.parameter.Parameter(
         torch.empty((in_channel, q_k_v_channel * 3)))
     self.score_norm = math.sqrt(q_k_v_channel)
Example #49
0
    def _generate(
        self,
        sample: Dict[str, Dict[str, Tensor]],
        prefix_tokens: Optional[Tensor] = None,
        constraints: Optional[Tensor] = None,
        bos_token: Optional[int] = None,
    ):
        incremental_states = torch.jit.annotate(
            List[Dict[str, Dict[str, Optional[Tensor]]]],
            [
                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
                for i in range(self.model.models_size)
            ],
        )
        net_input = sample["net_input"]

        if "src_tokens" in net_input:
            src_tokens = net_input["src_tokens"]
            # length of the source text being the character length except EndOfSentence and pad
            src_lengths = ((src_tokens.ne(self.eos)
                            & src_tokens.ne(self.pad)).long().sum(dim=1))
        elif "source" in net_input:
            src_tokens = net_input["source"]
            src_lengths = (net_input["padding_mask"].size(-1) -
                           net_input["padding_mask"].sum(-1)
                           if net_input["padding_mask"] is not None else
                           torch.tensor(src_tokens.size(-1)).to(src_tokens))
        else:
            raise Exception("expected src_tokens or source in net input")

        # bsz: total number of sentences in beam
        # Note that src_tokens may have more than 2 dimenions (i.e. audio features)
        bsz, src_len = src_tokens.size()[:2]
        beam_size = self.beam_size

        if constraints is not None and not self.search.supports_constraints:
            raise NotImplementedError(
                "Target-side constraints were provided, but search method doesn't support them"
            )

        # Initialize constraints, when active
        self.search.init_constraints(constraints, beam_size)

        max_len: int = -1
        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                self.model.max_decoder_positions() - 1,
            )
        assert (
            self.min_len <= max_len
        ), "min_len cannot be larger than max_len, please adjust these!"
        # compute the encoder output for each beam
        encoder_outs = self.model.forward_encoder(net_input)

        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order)
        # ensure encoder_outs is a List.
        assert encoder_outs is not None

        # initialize buffers
        scores = (torch.zeros(bsz * beam_size,
                              max_len + 1).to(src_tokens).float()
                  )  # +1 for eos; pad is never chosen for scoring
        tokens = (torch.zeros(bsz * beam_size,
                              max_len + 2).to(src_tokens).long().fill_(
                                  self.pad))  # +2 for eos and pad
        tokens[:, 0] = self.eos if bos_token is None else bos_token
        attn: Optional[Tensor] = None

        # A list that indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then cands_to_ignore would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        cands_to_ignore = (torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
                           )  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = torch.jit.annotate(
            List[List[Dict[str, Tensor]]],
            [
                torch.jit.annotate(List[Dict[str, Tensor]], [])
                for i in range(bsz)
            ],
        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step

        finished = [
            False for i in range(bsz)
        ]  # a boolean array indicating if the sentence at the index is finished or not
        num_remaining_sent = bsz  # number of sentences remaining

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = ((torch.arange(0, bsz) *
                         beam_size).unsqueeze(1).type_as(tokens).to(
                             src_tokens.device))
        cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(
            src_tokens.device)

        reorder_state: Optional[Tensor] = None
        batch_idxs: Optional[Tensor] = None

        original_batch_idxs: Optional[Tensor] = None
        if "id" in sample and isinstance(sample["id"], Tensor):
            original_batch_idxs = sample["id"]
        else:
            original_batch_idxs = torch.arange(0, bsz).type_as(tokens)

        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(
                        batch_idxs.numel()).type_as(batch_idxs)
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size)
                    original_batch_idxs = original_batch_idxs[batch_idxs]
                self.model.reorder_incremental_state(incremental_states,
                                                     reorder_state)
                encoder_outs = self.model.reorder_encoder_out(
                    encoder_outs, reorder_state)

            lprobs, avg_attn_scores = self.model.forward_decoder(
                tokens[:, :step + 1],
                encoder_outs,
                incremental_states,
                self.temperature,
            )

            if self.lm_model is not None:
                lm_out = self.lm_model(tokens[:, :step + 1])
                probs = self.lm_model.get_normalized_probs(lm_out,
                                                           log_probs=True,
                                                           sample=None)
                probs = probs[:, -1, :] * self.lm_weight
                lprobs += probs

            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # handle max length constraint
            if step >= max_len:
                lprobs[:, :self.eos] = -math.inf
                lprobs[:, self.eos + 1:] = -math.inf

            # handle prefix tokens (possibly with different lengths)
            if (prefix_tokens is not None and step < prefix_tokens.size(1)
                    and step < max_len):
                lprobs, tokens, scores = self._prefix_tokens(
                    step, lprobs, scores, tokens, prefix_tokens, beam_size)
            elif step < self.min_len:
                # minimum length constraint (does not apply if using prefix_tokens)
                lprobs[:, self.eos] = -math.inf

            # Record attention scores, only support avg_attn_scores is a Tensor
            if avg_attn_scores is not None:
                if attn is None:
                    attn = torch.empty(bsz * beam_size,
                                       avg_attn_scores.size(1),
                                       max_len + 2).to(scores)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            eos_bbsz_idx = torch.empty(0).to(
                tokens
            )  # indices of hypothesis ending with eos (finished sentences)
            eos_scores = torch.empty(0).to(
                scores
            )  # scores of hypothesis ending with eos (finished sentences)

            if self.should_set_src_lengths:
                self.search.set_src_lengths(src_lengths)

            if self.repeat_ngram_blocker is not None:
                lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz,
                                                   beam_size, step)

            # Shape: (batch, cand_size)
            cand_scores, cand_indices, cand_beams = self.search.step(
                step,
                lprobs.view(bsz, -1, self.vocab_size),
                scores.view(bsz, beam_size, -1)[:, :, :step],
                tokens[:, :step + 1],
                original_batch_idxs,
            )

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            # Shape of eos_mask: (batch size, beam size)
            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
            eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(
                eos_mask)

            # only consider eos when it's among the top beam_size indices
            # Now we know what beam item(s) to finish
            # Shape: 1d list of absolute-numbered
            eos_bbsz_idx = torch.masked_select(cand_bbsz_idx[:, :beam_size],
                                               mask=eos_mask[:, :beam_size])

            finalized_sents: List[int] = []
            if eos_bbsz_idx.numel() > 0:
                eos_scores = torch.masked_select(cand_scores[:, :beam_size],
                                                 mask=eos_mask[:, :beam_size])

                finalized_sents = self.finalize_hypos(
                    step,
                    eos_bbsz_idx,
                    eos_scores,
                    tokens,
                    scores,
                    finalized,
                    finished,
                    beam_size,
                    attn,
                    src_lengths,
                    max_len,
                )
                num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            if self.search.stop_on_max_len and step >= max_len:
                break
            assert step < max_len, f"{step} < {max_len}"

            # Remove finalized sentences (ones for which {beam_size}
            # finished hypotheses have been generated) from the batch.
            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(bsz,
                                        dtype=torch.bool,
                                        device=cand_indices.device)
                batch_mask[finalized_sents] = False
                # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it
                batch_idxs = torch.arange(
                    bsz, device=cand_indices.device).masked_select(batch_mask)

                # Choose the subset of the hypothesized constraints that will continue
                self.search.prune_sentences(batch_idxs)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]

                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]
                cands_to_ignore = cands_to_ignore[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(
                    new_bsz * beam_size, -1)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(
                        new_bsz * beam_size, attn.size(1), -1)
                bsz = new_bsz
            else:
                batch_idxs = None

            # Set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos

            # Rewrite the operator since the element wise or is not supported in torchscript.

            eos_mask[:, :beam_size] = ~((~cands_to_ignore) &
                                        (~eos_mask[:, :beam_size]))
            active_mask = torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[:eos_mask.size(1)],
            )

            # get the top beam_size active hypotheses, which are just
            # the hypos with the smallest values in active_mask.
            # {active_hypos} indicates which {beam_size} hypotheses
            # from the list of {2 * beam_size} candidates were
            # selected. Shapes: (batch size, beam size)
            new_cands_to_ignore, active_hypos = torch.topk(active_mask,
                                                           k=beam_size,
                                                           dim=1,
                                                           largest=False)

            # update cands_to_ignore to ignore any finalized hypos.
            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
            # Make sure there is at least one active item for each sentence in the batch.
            assert (~cands_to_ignore).any(dim=1).all()

            # update cands_to_ignore to ignore any finalized hypos

            # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam
            # can be selected more than once).
            active_bbsz_idx = torch.gather(cand_bbsz_idx,
                                           dim=1,
                                           index=active_hypos)
            active_scores = torch.gather(cand_scores,
                                         dim=1,
                                         index=active_hypos)

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses

            # Set the tokens for each beam (can select the same row more than once)
            tokens[:, :step + 1] = torch.index_select(tokens[:, :step + 1],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            # Select the next token for each of them
            tokens.view(bsz, beam_size,
                        -1)[:, :, step + 1] = torch.gather(cand_indices,
                                                           dim=1,
                                                           index=active_hypos)
            if step > 0:
                scores[:, :step] = torch.index_select(scores[:, :step],
                                                      dim=0,
                                                      index=active_bbsz_idx)
            scores.view(bsz, beam_size,
                        -1)[:, :, step] = torch.gather(cand_scores,
                                                       dim=1,
                                                       index=active_hypos)

            # Update constraints based on which candidates were selected for the next beam
            self.search.update_constraints(active_hypos)

            # copy attention for active hypotheses
            if attn is not None:
                attn[:, :, :step + 2] = torch.index_select(
                    attn[:, :, :step + 2], dim=0, index=active_bbsz_idx)

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            scores = torch.tensor(
                [float(elem["score"].item()) for elem in finalized[sent]])
            _, sorted_scores_indices = torch.sort(scores, descending=True)
            finalized[sent] = [
                finalized[sent][ssi] for ssi in sorted_scores_indices
            ]
            finalized[sent] = torch.jit.annotate(List[Dict[str, Tensor]],
                                                 finalized[sent])
        return finalized
Example #50
0
def test_mlp_sanity():
    mlp = MLP([100, 10, 2])
    with torch.no_grad():
        x = torch.empty((5, 100)).normal_()
        mlp(x)
Example #51
0
 def __init__(self, length):
     super().__init__()
     self.u = Parameter(torch.empty(length))
     self.reset_parameters()
Example #52
0
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import torch

#------------------Tensors--------------------

#5x3 matrix uninitialized
x = torch.empty(5, 3)
print(x)

#Randomly initialized matrix
y = torch.rand(5, 3)
print(y)

#Matrix filled with zeros and dtype long
z = torch.zeros(5, 3, dtype=torch.long)
print(z)

#Constructing the tensor from data
a = torch.tensor([5.5, 3])
print(a)

#Checking the size of tensor
print(x.size())

#-----------------Operations-------------------
Example #53
0
def _keypoints_to_vector_field(
        keypoints: torch.Tensor, rois: torch.Tensor,
        vector_field_size: int) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Encode keypoint locations into a target vector fields for use in SoftmaxWithLoss across space.

    Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
    closed interval [0, vector_field_size - 1] on discrete image coordinates. We use the
    continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
    d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.

    Arguments:
        keypoints: tensor of keypoint locations in of shape (N, K, 3).
        rois: Nx4 tensor of rois in xyxy format
        vector_field_size: integer side length of square vector_field.

    Returns:
        vector fields: A tensor of shape (N, K*2, W, H) containing an vector field map.
            W and H represents width and height of the map.
        valid: A tensor of shape (N, K) containing whether each keypoint is in
            the roi or not.
    """

    if rois.numel() == 0:
        return rois.new().long(), rois.new().long()
    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = vector_field_size / (rois[:, 2] - rois[:, 0])
    scale_y = vector_field_size / (rois[:, 3] - rois[:, 1])

    offset_x = offset_x[:, None]
    offset_y = offset_y[:, None]
    scale_x = scale_x[:, None]
    scale_y = scale_y[:, None]

    x = keypoints[..., 0]
    y = keypoints[..., 1]

    x_boundary_inds = x == rois[:, 2][:, None]
    y_boundary_inds = y == rois[:, 3][:, None]

    x = (x - offset_x) * scale_x
    x = x.floor().long()
    y = (y - offset_y) * scale_y
    y = y.floor().long()

    x[x_boundary_inds] = vector_field_size - 1
    y[y_boundary_inds] = vector_field_size - 1
    valid_loc = (x >= 0) & (y >= 0) & (x < vector_field_size) & (
        y < vector_field_size)
    vis = keypoints[..., 2] > 0
    valid = (vis).long()
    valid = valid.repeat_interleave(2)

    lin_ind = torch.empty(keypoints.shape[0], keypoints.shape[1] * 2,
                          vector_field_size, vector_field_size)

    for j, kpt_2d_list in enumerate((zip(x, y))):

        x_list = kpt_2d_list[0]
        y_list = kpt_2d_list[1]
        if len(x_list) == len(y_list):
            for i in range(len(x_list)):

                kpt = torch.tensor([[x_list[i], y_list[i]]])
                vec_field = compute_vertex(vector_field_size, kpt)

                lin_ind[j, 2 * i, :, :] = torch.tensor(vec_field)[:, :, 0]
                lin_ind[j, 2 * i + 1, :, :] = torch.tensor(vec_field)[:, :, 1]

    vector_fields = lin_ind.cuda()
    valid = torch.reshape(valid, (vector_fields.shape[0], -1))
    # print("vector field", vector_fields)

    return vector_fields, valid
Example #54
0
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Part 1 intro https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py
torch.empty(5, 3)
torch.empty((5, 3))
torch.rand(5, 3)
torch.zeros(5, 3, dtype=torch.long)
x = torch.tensor([5, 5, 3])
x

x = x.new_ones(5, 3, dtype=torch.double)
x = torch.rand_like(x, dtype=torch.float)
x

x.size()

y = torch.rand(5, 3)
x + y
torch.add(x, y)

result = torch.empty(5, 3)
torch.add(x, y, out=result)
result

y.add_(x)

x[:, 1]
Example #55
0
"""
Tensors
=======

Tensors behave almost exactly the same way in PyTorch as they do in
Torch.

Create a tensor of size (5 x 7) with uninitialized memory:

"""

import torch
a = torch.empty(5, 7, dtype=torch.float)

###############################################################
# Initialize a double tensor randomized with a normal distribution with mean=0,
# var=1:

a = torch.randn(5, 7, dtype=torch.double)
print(a)
print(a.size())

###############################################################
# .. note::
#     ``torch.Size`` is in fact a tuple, so it supports the same operations
#
# Inplace / Out-of-place
# ----------------------
#
# The first difference is that ALL operations on the tensor that operate
# in-place on it will have an ``_`` postfix. For example, ``add`` is the
Example #56
0
    def test_constrained_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # one constraint
            mean = torch.tensor([[-0.5, 0.0]], device=device, dtype=dtype).unsqueeze(
                dim=-2
            )
            variance = torch.ones(1, 2, device=device, dtype=dtype).unsqueeze(dim=-2)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))
            module = ConstrainedExpectedImprovement(
                model=mm, best_f=0.0, objective_index=0, constraints={1: [None, 0]}
            )
            X = torch.empty(1, 1, device=device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected_unconstrained = torch.tensor(
                0.19780, device=device, dtype=dtype
            )
            ei_expected = ei_expected_unconstrained * 0.5
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))

            # check that error raised if no constraints
            with self.assertRaises(ValueError):
                module = ConstrainedExpectedImprovement(
                    model=mm, best_f=0.0, objective_index=0, constraints={}
                )

            # check that error raised if objective is a constraint
            with self.assertRaises(ValueError):
                module = ConstrainedExpectedImprovement(
                    model=mm, best_f=0.0, objective_index=0, constraints={0: [None, 0]}
                )

            # check that error raised if constraint lower > upper
            with self.assertRaises(ValueError):
                module = ConstrainedExpectedImprovement(
                    model=mm, best_f=0.0, objective_index=0, constraints={0: [1, 0]}
                )

            # three constraints
            N = torch.distributions.Normal(loc=0.0, scale=1.0)
            a = N.icdf(torch.tensor(0.75))  # get a so that P(-a <= N <= a) = 0.5
            mean = torch.tensor(
                [[-0.5, 0.0, 5.0, 0.0]], device=device, dtype=dtype
            ).unsqueeze(dim=-2)
            variance = torch.ones(1, 4, device=device, dtype=dtype).unsqueeze(dim=-2)
            mm = MockModel(MockPosterior(mean=mean, variance=variance))
            module = ConstrainedExpectedImprovement(
                model=mm,
                best_f=0.0,
                objective_index=0,
                constraints={1: [None, 0], 2: [5.0, None], 3: [-a, a]},
            )
            X = torch.empty(1, 1, device=device, dtype=dtype)  # dummy
            ei = module(X)
            ei_expected_unconstrained = torch.tensor(
                0.19780, device=device, dtype=dtype
            )
            ei_expected = ei_expected_unconstrained * 0.5 * 0.5 * 0.5
            self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
            # test maximize
            module_min = ConstrainedExpectedImprovement(
                model=mm,
                best_f=0.0,
                objective_index=0,
                constraints={1: [None, 0]},
                maximize=False,
            )
            ei_min = module_min(X)
            ei_expected_unconstrained_min = torch.tensor(
                0.6978, device=device, dtype=dtype
            )
            ei_expected_min = ei_expected_unconstrained_min * 0.5
            self.assertTrue(torch.allclose(ei_min, ei_expected_min, atol=1e-4))
            # test invalid onstraints
            with self.assertRaises(ValueError):
                ConstrainedExpectedImprovement(
                    model=mm,
                    best_f=0.0,
                    objective_index=0,
                    constraints={1: [1.0, -1.0]},
                )
Example #57
0
    def __call__(self, data, masked_atom_indices=None):
        """
        :param data: pytorch geometric data object. Assume that the edge
        ordering is the default pytorch geometric ordering, where the two
        directions of a single edge occur in pairs.
        Eg. data.edge_index = tensor([[0, 1, 1, 2, 2, 3],
                                     [1, 0, 2, 1, 3, 2]])
        :param masked_atom_indices: If None, then randomly samples num_atoms
        * mask rate number of atom indices
        Otherwise a list of atom idx that sets the atoms to be masked (for
        debugging only)
        :return: None, Creates new attributes in original data object:
        data.mask_node_idx
        data.mask_node_label
        data.mask_edge_idx
        data.mask_edge_label
        """

        if masked_atom_indices is None:
            # sample x distinct atoms to be masked, based on mask rate. But
            # will sample at least 1 atom
            num_atoms = data.x.size()[0]
            sample_size = int(num_atoms * self.mask_rate + 1)
            masked_atom_indices = random.sample(range(num_atoms), sample_size)

        # create mask node label by copying atom feature of mask atom
        mask_node_labels_list = []
        for atom_idx in masked_atom_indices:
            mask_node_labels_list.append(data.x[atom_idx].view(1, -1))
        data.mask_node_label = torch.cat(mask_node_labels_list, dim=0)
        data.masked_atom_indices = torch.tensor(masked_atom_indices)

        # modify the original node feature of the masked node
        for atom_idx in masked_atom_indices:
            data.x[atom_idx] = torch.tensor([self.num_atom_type, 0])

        if self.mask_edge:
            # create mask edge labels by copying edge features of edges that are bonded to
            # mask atoms
            connected_edge_indices = []
            for bond_idx, (u, v) in enumerate(data.edge_index.cpu().numpy().T):
                for atom_idx in masked_atom_indices:
                    if atom_idx in set(
                        (u, v)) and bond_idx not in connected_edge_indices:
                        connected_edge_indices.append(bond_idx)

            if len(connected_edge_indices) > 0:
                # create mask edge labels by copying bond features of the bonds connected to
                # the mask atoms
                mask_edge_labels_list = []
                for bond_idx in connected_edge_indices[::2]:  # because the
                    # edge ordering is such that two directions of a single
                    # edge occur in pairs, so to get the unique undirected
                    # edge indices, we take every 2nd edge index from list
                    mask_edge_labels_list.append(data.edge_attr[bond_idx].view(
                        1, -1))

                data.mask_edge_label = torch.cat(mask_edge_labels_list, dim=0)
                # modify the original bond features of the bonds connected to the mask atoms
                for bond_idx in connected_edge_indices:
                    data.edge_attr[bond_idx] = torch.tensor(
                        [self.num_edge_type, 0])

                data.connected_edge_indices = torch.tensor(
                    connected_edge_indices[::2])
            else:
                data.mask_edge_label = torch.empty((0, 2)).to(torch.int64)
                data.connected_edge_indices = torch.tensor(
                    connected_edge_indices).to(torch.int64)

        return data
Example #58
0
def compute_multi_bald_batch(
    bayesian_model: nn.Module,
    available_loader,
    num_classes,
    k,
    b,
    target_size,
    initial_percentage,
    reduce_percentage,
    device=None,
) -> AcquisitionBatch:
    result = reduced_eval_consistent_bayesian_model(
        bayesian_model=bayesian_model,
        acquisition_function=AcquisitionFunction.bald,
        num_classes=num_classes,
        k=k,
        initial_percentage=initial_percentage,
        reduce_percentage=reduce_percentage,
        target_size=target_size,
        available_loader=available_loader,
        device=device,
    )

    subset_split = result.subset_split

    partial_multi_bald_B = result.scores_B
    # Now we can compute the conditional entropy
    conditional_entropies_B = joint_entropy_exact.batch_conditional_entropy_B(
        result.logits_B_K_C)

    # We turn the logits into probabilities.
    probs_B_K_C = result.logits_B_K_C.exp_()

    # Don't need the result anymore.
    result = None

    torch_utils.gc_cuda()
    # torch_utils.cuda_meminfo()

    with torch.no_grad():
        num_samples_per_ws = 40000 // k
        num_samples = num_samples_per_ws * k

        if device.type == "cuda":
            # KC_memory = k*num_classes*8
            sample_MK_memory = num_samples * k * 8
            MC_memory = num_samples * num_classes * 8
            copy_buffer_memory = 256 * num_samples * num_classes * 8
            slack_memory = 2 * 2**30
            multi_bald_batch_size = (torch_utils.get_cuda_available_memory() -
                                     (sample_MK_memory + copy_buffer_memory +
                                      slack_memory)) // MC_memory

            global compute_multi_bald_bag_multi_bald_batch_size
            if compute_multi_bald_bag_multi_bald_batch_size != multi_bald_batch_size:
                compute_multi_bald_bag_multi_bald_batch_size = multi_bald_batch_size
                print(
                    f"New compute_multi_bald_bag_multi_bald_batch_size = {multi_bald_batch_size}"
                )
        else:
            multi_bald_batch_size = 16

        subset_acquisition_bag = []
        global_acquisition_bag = []
        acquisition_bag_scores = []

        # We use this for early-out in the b==0 case.
        MIN_SPREAD = 0.1

        if b == 0:
            b = 100
            early_out = True
        else:
            early_out = False

        prev_joint_probs_M_K = None
        prev_samples_M_K = None
        for i in range(b):
            torch_utils.gc_cuda()

            if i > 0:
                # Compute the joint entropy
                joint_entropies_B = torch.empty((len(probs_B_K_C), ),
                                                dtype=torch.float64)

                exact_samples = num_classes**i
                if exact_samples <= num_samples:
                    prev_joint_probs_M_K = joint_entropy_exact.joint_probs_M_K(
                        probs_B_K_C[subset_acquisition_bag[-1]][None].to(
                            device),
                        prev_joint_probs_M_K=prev_joint_probs_M_K,
                    )

                    # torch_utils.cuda_meminfo()
                    batch_exact_joint_entropy(probs_B_K_C,
                                              prev_joint_probs_M_K,
                                              multi_bald_batch_size, device,
                                              joint_entropies_B)
                else:
                    if prev_joint_probs_M_K is not None:
                        prev_joint_probs_M_K = None
                        torch_utils.gc_cuda()

                    # Gather new traces for the new subset_acquisition_bag.
                    prev_samples_M_K = joint_entropy_sampling.sample_M_K(
                        probs_B_K_C[subset_acquisition_bag].to(device),
                        S=num_samples_per_ws)

                    # torch_utils.cuda_meminfo()
                    for joint_entropies_b, probs_b_K_C in with_progress_bar(
                            torch_utils.split_tensors(joint_entropies_B,
                                                      probs_B_K_C,
                                                      multi_bald_batch_size),
                            unit_scale=multi_bald_batch_size,
                    ):
                        joint_entropies_b.copy_(joint_entropy_sampling.batch(
                            probs_b_K_C.to(device), prev_samples_M_K),
                                                non_blocking=True)

                        # torch_utils.cuda_meminfo()

                    prev_samples_M_K = None
                    torch_utils.gc_cuda()

                partial_multi_bald_B = joint_entropies_B - conditional_entropies_B
                joint_entropies_B = None

            # Don't allow reselection
            partial_multi_bald_B[subset_acquisition_bag] = -math.inf

            winner_index = partial_multi_bald_B.argmax().item()

            # Actual MultiBALD is:
            actual_multi_bald_B = partial_multi_bald_B[
                winner_index] - torch.sum(
                    conditional_entropies_B[subset_acquisition_bag])
            actual_multi_bald_B = actual_multi_bald_B.item()

            print(f"Actual MultiBALD: {actual_multi_bald_B}")

            # If we early out, we don't take the point that triggers the early out.
            # Only allow early-out after acquiring at least 1 sample.
            if early_out and i > 1:
                current_spread = actual_multi_bald_B[
                    winner_index] - actual_multi_bald_B.median()
                if current_spread < MIN_SPREAD:
                    print("Early out")
                    break

            acquisition_bag_scores.append(actual_multi_bald_B)

            subset_acquisition_bag.append(winner_index)
            # We need to map the index back to the actual dataset.
            global_acquisition_bag.append(
                subset_split.get_dataset_indices([winner_index]).item())

            print(f"Acquisition bag: {sorted(global_acquisition_bag)}")

    return AcquisitionBatch(global_acquisition_bag, acquisition_bag_scores,
                            None)
Example #59
0
 def test_MockModel(self):
     mp = MockPosterior()
     mm = MockModel(mp)
     X = torch.empty(0)
     self.assertEqual(mm.posterior(X), mp)
Example #60
0
 def pointwise_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     r = torch.tensor([0, 1, 10, 0], dtype=torch.int8)
     t = torch.tensor([-1, -2, 3], dtype=torch.int8)
     s = torch.tensor([4, 0, 1, 0], dtype=torch.int8)
     f = torch.zeros(3)
     g = torch.tensor([-1, 0, 1])
     w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637])
     return (
         torch.abs(torch.tensor([-1, -2, 3])),
         torch.absolute(torch.tensor([-1, -2, 3])),
         torch.acos(a),
         torch.arccos(a),
         torch.acosh(a.uniform_(1.0, 2.0)),
         torch.add(a, 20),
         torch.add(a, b, out=a),
         b.add(a),
         b.add(a, out=b),
         b.add_(a),
         b.add(1),
         torch.add(a, torch.randn(4, 1), alpha=10),
         torch.addcdiv(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.addcmul(torch.randn(1, 3),
                       torch.randn(3, 1),
                       torch.randn(1, 3),
                       value=0.1),
         torch.angle(a),
         torch.asin(a),
         torch.arcsin(a),
         torch.asinh(a),
         torch.arcsinh(a),
         torch.atan(a),
         torch.arctan(a),
         torch.atanh(a.uniform_(-1.0, 1.0)),
         torch.arctanh(a.uniform_(-1.0, 1.0)),
         torch.atan2(a, a),
         torch.bitwise_not(t),
         torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)),
         torch.ceil(a),
         torch.ceil(float(torch.tensor(0.5))),
         torch.ceil(torch.tensor(0.5).item()),
         torch.clamp(a, min=-0.5, max=0.5),
         torch.clamp(a, min=0.5),
         torch.clamp(a, max=0.5),
         torch.clip(a, min=-0.5, max=0.5),
         torch.conj(a),
         torch.copysign(a, 1),
         torch.copysign(a, b),
         torch.cos(a),
         torch.cosh(a),
         torch.deg2rad(
             torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0,
                                                              -90.0]])),
         torch.div(a, b),
         a.div(b),
         a.div(1),
         a.div_(b),
         torch.divide(a, b, rounding_mode="trunc"),
         torch.divide(a, b, rounding_mode="floor"),
         torch.digamma(torch.tensor([1.0, 0.5])),
         torch.erf(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfc(torch.tensor([0.0, -1.0, 10.0])),
         torch.erfinv(torch.tensor([0.0, 0.5, -1.0])),
         torch.exp(torch.tensor([0.0, math.log(2.0)])),
         torch.exp(float(torch.tensor(1))),
         torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])),
         torch.expm1(torch.tensor([0.0, math.log(2.0)])),
         torch.fake_quantize_per_channel_affine(
             torch.randn(2, 2, 2),
             (torch.randn(2) + 1) * 0.05,
             torch.zeros(2),
             1,
             0,
             255,
         ),
         torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255),
         torch.float_power(torch.randint(10, (4, )), 2),
         torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4,
                                                             -5])),
         torch.floor(a),
         torch.floor(float(torch.tensor(1))),
         torch.floor_divide(torch.tensor([4.0, 3.0]),
                            torch.tensor([2.0, 2.0])),
         torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4),
         torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2),
         torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.frac(torch.tensor([1.0, 2.5, -3.2])),
         torch.randn(4, dtype=torch.cfloat).imag,
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1])),
         torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])),
         torch.lerp(torch.arange(1.0, 5.0),
                    torch.empty(4).fill_(10), 0.5),
         torch.lerp(
             torch.arange(1.0, 5.0),
             torch.empty(4).fill_(10),
             torch.full_like(torch.arange(1.0, 5.0), 0.5),
         ),
         torch.lgamma(torch.arange(0.5, 2, 0.5)),
         torch.log(torch.arange(5) + 10),
         torch.log10(torch.rand(5)),
         torch.log1p(torch.randn(5)),
         torch.log2(torch.rand(5)),
         torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]),
                         torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]),
                          torch.tensor([-1, -2, -3])),
         torch.logical_and(r, s),
         torch.logical_and(r.double(), s.double()),
         torch.logical_and(r.double(), s),
         torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)),
         torch.logical_not(
             torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)),
         torch.logical_not(
             torch.tensor([0.0, 1.0, -10.0], dtype=torch.double),
             out=torch.empty(3, dtype=torch.int16),
         ),
         torch.logical_or(r, s),
         torch.logical_or(r.double(), s.double()),
         torch.logical_or(r.double(), s),
         torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logical_xor(r, s),
         torch.logical_xor(r.double(), s.double()),
         torch.logical_xor(r.double(), s),
         torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)),
         torch.logit(torch.rand(5), eps=1e-6),
         torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])),
         torch.i0(torch.arange(5, dtype=torch.float32)),
         torch.igamma(a, b),
         torch.igammac(a, b),
         torch.mul(torch.randn(3), 100),
         b.mul(a),
         b.mul(5),
         b.mul(a, out=b),
         b.mul_(a),
         b.mul_(5),
         torch.multiply(torch.randn(4, 1), torch.randn(1, 4)),
         torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2),
         torch.tensor([float("nan"),
                       float("inf"), -float("inf"), 3.14]),
         torch.nan_to_num(w),
         torch.nan_to_num_(w),
         torch.nan_to_num(w, nan=2.0),
         torch.nan_to_num(w, nan=2.0, posinf=1.0),
         torch.neg(torch.randn(5)),
         # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]),
         torch.polygamma(1, torch.tensor([1.0, 0.5])),
         torch.polygamma(2, torch.tensor([1.0, 0.5])),
         torch.polygamma(3, torch.tensor([1.0, 0.5])),
         torch.polygamma(4, torch.tensor([1.0, 0.5])),
         torch.pow(a, 2),
         torch.pow(2, float(torch.tensor(0.5))),
         torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)),
         torch.rad2deg(
             torch.tensor([[3.142, -3.142], [6.283, -6.283],
                           [1.570, -1.570]])),
         torch.randn(4, dtype=torch.cfloat).real,
         torch.reciprocal(a),
         torch.remainder(torch.tensor([-3.0, -2.0]), 2),
         torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5),
         torch.round(a),
         torch.round(torch.tensor(0.5).item()),
         torch.rsqrt(a),
         torch.sigmoid(a),
         torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sgn(a),
         torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])),
         torch.sin(a),
         torch.sinc(a),
         torch.sinh(a),
         torch.sqrt(a),
         torch.square(a),
         torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2),
         b.sub(a),
         b.sub_(a),
         b.sub(5),
         torch.sum(5),
         torch.tan(a),
         torch.tanh(a),
         torch.true_divide(a, a),
         torch.trunc(a),
         torch.trunc_(a),
         torch.xlogy(f, g),
         torch.xlogy(f, g),
         torch.xlogy(f, 4),
         torch.xlogy(2, g),
     )