Example #1
0
def test2():
    x = torch.ones(1, 2)
    x = Variable(x)
    y = torch.ones(1, 2)

    z = x + 0.5
    print(x.data)
    def test_elmo_lstm_cell_completes_forward_pass(self):
        input_tensor = torch.autograd.Variable(torch.rand(4, 5, 3))
        input_tensor[1, 4:, :] = 0.
        input_tensor[2, 2:, :] = 0.
        input_tensor[3, 1:, :] = 0.

        initial_hidden_state = Variable(torch.ones([1, 4, 5]))
        initial_memory_state = Variable(torch.ones([1, 4, 7]))

        lstm = LstmCellWithProjection(input_size=3,
                                      hidden_size=5,
                                      cell_size=7,
                                      memory_cell_clip_value=2,
                                      state_projection_clip_value=1)
        output_sequence, lstm_state = lstm(input_tensor, [5, 4, 2, 1],
                                           (initial_hidden_state, initial_memory_state))
        numpy.testing.assert_array_equal(output_sequence.data[1, 4:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(output_sequence.data[2, 2:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(output_sequence.data[3, 1:, :].numpy(), 0.0)

        # Test the state clipping.
        numpy.testing.assert_array_less(output_sequence.data.numpy(), 1.0)
        numpy.testing.assert_array_less(-output_sequence.data.numpy(), 1.0)

        # LSTM state should be (num_layers, batch_size, hidden_size)
        assert list(lstm_state[0].size()) == [1, 4, 5]
        # LSTM memory cell should be (num_layers, batch_size, cell_size)
        assert list((lstm_state[1].size())) == [1, 4, 7]

        # Test the cell clipping.
        numpy.testing.assert_array_less(lstm_state[0].data.numpy(), 2.0)
        numpy.testing.assert_array_less(-lstm_state[0].data.numpy(), 2.0)
Example #3
0
    def test_flattened_index_select(self):
        indices = numpy.array([[1, 2],
                               [3, 4]])
        targets = torch.ones([2, 6, 3]).cumsum(1) - 1
        # Make the second batch double it's index so they're different.
        targets[1, :, :] *= 2
        indices = torch.tensor(indices, dtype=torch.long)

        selected = util.flattened_index_select(targets, indices)

        assert list(selected.size()) == [2, 2, 2, 3]

        ones = numpy.ones([3])
        numpy.testing.assert_array_equal(selected[0, 0, 0, :].data.numpy(), ones)
        numpy.testing.assert_array_equal(selected[0, 0, 1, :].data.numpy(), ones * 2)
        numpy.testing.assert_array_equal(selected[0, 1, 0, :].data.numpy(), ones * 3)
        numpy.testing.assert_array_equal(selected[0, 1, 1, :].data.numpy(), ones * 4)

        numpy.testing.assert_array_equal(selected[1, 0, 0, :].data.numpy(), ones * 2)
        numpy.testing.assert_array_equal(selected[1, 0, 1, :].data.numpy(), ones * 4)
        numpy.testing.assert_array_equal(selected[1, 1, 0, :].data.numpy(), ones * 6)
        numpy.testing.assert_array_equal(selected[1, 1, 1, :].data.numpy(), ones * 8)

        # Check we only accept 2D indices.
        with pytest.raises(ConfigurationError):
            util.flattened_index_select(targets, torch.ones([3, 4, 5]))
Example #4
0
        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.094 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.11 * torch.ones(2), requires_grad=True))
            sig_q = torch.exp(log_sig_q)
            trivial_baseline = pyro.module("mu_baseline", pt_mu_baseline, tags="baseline")
            baseline_value = trivial_baseline(ng_ones(1))
            mu_latent = pyro.sample("mu_latent",
                                    dist.Normal(mu_q, sig_q, reparameterized=False),
                                    baseline=dict(baseline_value=baseline_value))

            def obs_inner(i, _i, _x):
                for k in range(n_superfluous_top + n_superfluous_bottom):
                    z_baseline = pyro.module("z_baseline_%d_%d" % (i, k),
                                             pt_superfluous_baselines[3 * k + i], tags="baseline")
                    baseline_value = z_baseline(mu_latent.detach()).unsqueeze(-1)
                    mean_i = pyro.param("mean_%d_%d" % (i, k),
                                        Variable(0.5 * torch.ones(4 - i, 1), requires_grad=True))
                    pyro.sample("z_%d_%d" % (i, k),
                                dist.Normal(mean_i, ng_ones(4 - i, 1), reparameterized=False),
                                baseline=dict(baseline_value=baseline_value))

            def obs_outer(i, x):
                pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x:
                              obs_inner(i, _i, _x), batch_size=4 - i)

            pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :],
                                            self.data_tensor[7:9, :]],
                          lambda i, x: obs_outer(i, x), batch_size=3)

            return mu_latent
Example #5
0
        def guide():
            mu_q = pyro.param("mu_q", Variable(self.analytic_mu_n.data + 0.334 * torch.ones(2),
                                               requires_grad=True))
            log_sig_q = pyro.param("log_sig_q", Variable(
                                   self.analytic_log_sig_n.data - 0.29 * torch.ones(2),
                                   requires_grad=True))
            mu_q_prime = pyro.param("mu_q_prime", Variable(torch.Tensor([-0.34, 0.52]),
                                    requires_grad=True))
            kappa_q = pyro.param("kappa_q", Variable(torch.Tensor([0.74]),
                                 requires_grad=True))
            log_sig_q_prime = pyro.param("log_sig_q_prime",
                                         Variable(-0.5 * torch.log(1.2 * self.lam0.data),
                                                  requires_grad=True))
            sig_q, sig_q_prime = torch.exp(log_sig_q), torch.exp(log_sig_q_prime)
            mu_latent_dist = dist.Normal(mu_q, sig_q, reparameterized=repa2)
            mu_latent = pyro.sample("mu_latent", mu_latent_dist,
                                    baseline=dict(use_decaying_avg_baseline=use_decaying_avg_baseline))
            mu_latent_prime_dist = dist.Normal(kappa_q.expand_as(mu_latent) * mu_latent + mu_q_prime,
                                               sig_q_prime,
                                               reparameterized=repa1)
            pyro.sample("mu_latent_prime",
                        mu_latent_prime_dist,
                        baseline=dict(nn_baseline=mu_prime_baseline,
                                      nn_baseline_input=mu_latent,
                                      use_decaying_avg_baseline=use_decaying_avg_baseline))

            return mu_latent
Example #6
0
    def test_cpu(self):
        create_extension(
            name='test_extensions.cpulib',
            headers=[test_dir + '/ffi/src/cpu/lib.h'],
            sources=[
                test_dir + '/ffi/src/cpu/lib1.c',
                test_dir + '/ffi/src/cpu/lib2.c',
            ],
            verbose=False,
        ).build()
        from test_extensions import cpulib
        tensor = torch.ones(2, 2).float()

        cpulib.good_func(tensor, 2, 1.5)
        self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5)

        new_tensor = cpulib.new_tensor(4)
        self.assertEqual(new_tensor, torch.ones(4, 4) * 4)

        f = cpulib.int_to_float(5)
        self.assertIs(type(f), float)

        self.assertRaises(TypeError,
                          lambda: cpulib.good_func(tensor.double(), 2, 1.5))
        self.assertRaises(torch.FatalError,
                          lambda: cpulib.bad_func(tensor, 2, 1.5))
Example #7
0
    def __init__(self, hidden_size, num_inputs, action_space):
        super(Policy, self).__init__()
        self.action_space = action_space
        num_outputs = action_space.shape[0]

        self.bn0 = nn.BatchNorm1d(num_inputs)
        self.bn0.weight.data.fill_(1)
        self.bn0.bias.data.fill_(0)

        self.linear1 = nn.Linear(num_inputs, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.bn1.weight.data.fill_(1)
        self.bn1.bias.data.fill_(0)

        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.bn2.weight.data.fill_(1)
        self.bn2.bias.data.fill_(0)

        self.V = nn.Linear(hidden_size, 1)
        self.V.weight.data.mul_(0.1)
        self.V.bias.data.mul_(0.1)

        self.mu = nn.Linear(hidden_size, num_outputs)
        self.mu.weight.data.mul_(0.1)
        self.mu.bias.data.mul_(0.1)

        self.L = nn.Linear(hidden_size, num_outputs ** 2)
        self.L.weight.data.mul_(0.1)
        self.L.bias.data.mul_(0.1)

        self.tril_mask = Variable(torch.tril(torch.ones(
            num_outputs, num_outputs), diagonal=-1).unsqueeze(0))
        self.diag_mask = Variable(torch.diag(torch.diag(
            torch.ones(num_outputs, num_outputs))).unsqueeze(0))
Example #8
0
    def test_Concat(self):
        input = torch.randn(4, 2)
        num_modules = random.randint(2, 5)
        linears = [nn.Linear(2, 5) for i in range(num_modules)]

        m = nn.Concat(0)
        for l in linears:
            m.add(l)
            l.zeroGradParameters()
            l.weight.fill_(1)
            l.bias.fill_(0)

        # Check that these don't raise errors
        m.__repr__()
        str(m)

        output = m.forward(input)
        output2 = input.sum(1, True).expand(4, 5).repeat(num_modules, 1)
        self.assertEqual(output2, output)

        gradInput = m.backward(input, torch.ones(output2.size()))
        gradInput2 = torch.ones(4, 2).fill_(num_modules * 5)
        self.assertEqual(gradInput, gradInput2)

        gradWeight = input.sum(0, keepdim=True).expand(5, 2)
        for l in linears:
            self.assertEqual(gradWeight, l.gradWeight)
  def setUp(self):
    # Tests will use 3 filters and image width, height = 2 X 2

    # Batch size 1
    x = torch.ones((1, 3, 2, 2))
    x[0, 0, 1, 0] = 1.1
    x[0, 0, 1, 1] = 1.2
    x[0, 1, 0, 1] = 1.2
    x[0, 2, 1, 0] = 1.3
    self.x = x
    self.gradient = torch.rand(x.shape)

    # Batch size 2
    x = torch.ones((2, 3, 2, 2))
    x[0, 0, 1, 0] = 1.1
    x[0, 0, 1, 1] = 1.2
    x[0, 1, 0, 1] = 1.2
    x[0, 2, 1, 0] = 1.3

    x[1, 0, 0, 0] = 1.4
    x[1, 1, 0, 0] = 1.5
    x[1, 1, 0, 1] = 1.6
    x[1, 2, 1, 1] = 1.7
    self.x2 = x
    self.gradient2 = torch.rand(x.shape)

    # All equal
    self.dutyCycle = torch.zeros((1, 3, 1, 1))
    self.dutyCycle[:] = 1.0 / 3.0
Example #10
0
    def forward(self, input_features, adj):
        #x = self.conv1(input_features, adj)
        #x = self.bn1(x)
        #x = self.act(x)
        #x = self.conv2(x, adj)
        #x = self.bn2(x)

        # pool over all nodes 
        #graph_h = self.pool_graph(x)
        graph_h = input_features.view(-1, self.max_num_nodes * self.max_num_nodes)
        # vae
        h_decode, z_mu, z_lsgms = self.vae(graph_h)
        out = F.sigmoid(h_decode)
        out_tensor = out.cpu().data
        recon_adj_lower = self.recover_adj_lower(out_tensor)
        recon_adj_tensor = self.recover_full_adj_from_lower(recon_adj_lower)

        # set matching features be degree
        out_features = torch.sum(recon_adj_tensor, 1)

        adj_data = adj.cpu().data[0]
        adj_features = torch.sum(adj_data, 1)

        S = self.edge_similarity_matrix(adj_data, recon_adj_tensor, adj_features, out_features,
                self.deg_feature_similarity)

        # initialization strategies
        init_corr = 1 / self.max_num_nodes
        init_assignment = torch.ones(self.max_num_nodes, self.max_num_nodes) * init_corr
        #init_assignment = torch.FloatTensor(4, 4)
        #init.uniform(init_assignment)
        assignment = self.mpm(init_assignment, S)
        #print('Assignment: ', assignment)

        # matching
        # use negative of the assignment score since the alg finds min cost flow
        row_ind, col_ind = scipy.optimize.linear_sum_assignment(-assignment.numpy())
        print('row: ', row_ind)
        print('col: ', col_ind)
        # order row index according to col index
        #adj_permuted = self.permute_adj(adj_data, row_ind, col_ind)
        adj_permuted = adj_data
        adj_vectorized = adj_permuted[torch.triu(torch.ones(self.max_num_nodes,self.max_num_nodes) )== 1].squeeze_()
        adj_vectorized_var = Variable(adj_vectorized).cuda()

        #print(adj)
        #print('permuted: ', adj_permuted)
        #print('recon: ', recon_adj_tensor)
        adj_recon_loss = self.adj_recon_loss(adj_vectorized_var, out[0])
        print('recon: ', adj_recon_loss)
        print(adj_vectorized_var)
        print(out[0])

        loss_kl = -0.5 * torch.sum(1 + z_lsgms - z_mu.pow(2) - z_lsgms.exp())
        loss_kl /= self.max_num_nodes * self.max_num_nodes # normalize
        print('kl: ', loss_kl)

        loss = adj_recon_loss + loss_kl

        return loss
Example #11
0
 def test_joint_optimize(
     self,
     mock_get_best_candidates,
     mock_gen_candidates,
     mock_gen_batch_initial_conditions,
     cuda=False,
 ):
     q = 3
     num_restarts = 2
     raw_samples = 10
     options = {}
     mock_acq_function = MockAcquisitionFunction()
     tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         mock_gen_batch_initial_conditions.return_value = torch.zeros(
             num_restarts, q, 3, **tkwargs
         )
         mock_gen_candidates.return_value = torch.cat(
             [i * torch.ones(1, q, 3, **tkwargs) for i in range(num_restarts)], dim=0
         )
         mock_get_best_candidates.return_value = torch.ones(1, q, 3, **tkwargs)
         expected_candidates = mock_get_best_candidates.return_value
         bounds = torch.stack(
             [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)]
         )
         candidates = joint_optimize(
             acq_function=mock_acq_function,
             bounds=bounds,
             q=q,
             num_restarts=num_restarts,
             raw_samples=raw_samples,
             options=options,
         )
         self.assertTrue(torch.equal(candidates, expected_candidates))
Example #12
0
 def model():
     latent = named.Object("latent")
     latent.list = named.List()
     loc = latent.list.add().loc.param_(torch.zeros(1))
     latent.dict = named.Dict()
     foo = latent.dict["foo"].foo.sample_(dist.Normal(loc, torch.ones(1)))
     latent.object.bar.sample_(dist.Normal(loc, torch.ones(1)), obs=foo)
Example #13
0
def test_hmc_conjugate_gaussian(fixture,
                                num_samples,
                                warmup_steps,
                                hmc_params,
                                expected_means,
                                expected_precs,
                                mean_tol,
                                std_tol):
    pyro.get_param_store().clear()
    hmc_kernel = HMC(fixture.model, **hmc_params)
    mcmc_run = MCMC(hmc_kernel, num_samples, warmup_steps).run(fixture.data)
    for i in range(1, fixture.chain_len + 1):
        param_name = 'loc_' + str(i)
        marginal = EmpiricalMarginal(mcmc_run, sites=param_name)
        latent_loc = marginal.mean
        latent_std = marginal.variance.sqrt()
        expected_mean = torch.ones(fixture.dim) * expected_means[i - 1]
        expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1])

        # Actual vs expected posterior means for the latents
        logger.info('Posterior mean (actual) - {}'.format(param_name))
        logger.info(latent_loc)
        logger.info('Posterior mean (expected) - {}'.format(param_name))
        logger.info(expected_mean)
        assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol)

        # Actual vs expected posterior precisions for the latents
        logger.info('Posterior std (actual) - {}'.format(param_name))
        logger.info(latent_std)
        logger.info('Posterior std (expected) - {}'.format(param_name))
        logger.info(expected_std)
        assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
Example #14
0
def vector_grad():
    x = Variable(torch.ones(2)*3, requires_grad=True)
    y = Variable(torch.ones(2)*4, requires_grad=True)
    z = x.pow(2) + 3*y.pow(2)
    z.backward(torch.ones(2))
    print(x.grad)
    print(y.grad)
Example #15
0
def heads_tails(n_ent, train_data, valid_data=None, test_data=None):
    train_src, train_rel, train_dst = train_data
    if valid_data:
        valid_src, valid_rel, valid_dst = valid_data
    else:
        valid_src = valid_rel = valid_dst = []
    if test_data:
        test_src, test_rel, test_dst = test_data
    else:
        test_src = test_rel = test_dst = []
    all_src = train_src + valid_src + test_src
    all_rel = train_rel + valid_rel + test_rel
    all_dst = train_dst + valid_dst + test_dst
    heads = defaultdict(lambda: set())
    tails = defaultdict(lambda: set())
    for s, r, t in zip(all_src, all_rel, all_dst):
        tails[(s, r)].add(t)
        heads[(t, r)].add(s)
    heads_sp = {}
    tails_sp = {}
    for k in tails.keys():
        tails_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(tails[k])]),
                                               torch.ones(len(tails[k])), torch.Size([n_ent]))
    for k in heads.keys():
        heads_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(heads[k])]),
                                               torch.ones(len(heads[k])), torch.Size([n_ent]))
    return heads_sp, tails_sp
    def test_regex_matches_are_initialized_correctly(self):
        class Net(torch.nn.Module):
            def __init__(self):
                super(Net, self).__init__()
                self.linear_1_with_funky_name = torch.nn.Linear(5, 10)
                self.linear_2 = torch.nn.Linear(10, 5)
                self.conv = torch.nn.Conv1d(5, 5, 5)

            def forward(self, inputs):  # pylint: disable=arguments-differ
                pass

        # pyhocon does funny things if there's a . in a key.  This test makes sure that we
        # handle these kinds of regexes correctly.
        json_params = """{"initializer": [
        ["conv", {"type": "constant", "val": 5}],
        ["funky_na.*bi", {"type": "constant", "val": 7}]
        ]}
        """
        params = Params(pyhocon.ConfigFactory.parse_string(json_params))
        initializers = InitializerApplicator.from_params(params['initializer'])
        model = Net()
        initializers(model)

        for parameter in model.conv.parameters():
            assert torch.equal(parameter.data, torch.ones(parameter.size()) * 5)

        parameter = model.linear_1_with_funky_name.bias
        assert torch.equal(parameter.data, torch.ones(parameter.size()) * 7)
Example #17
0
    def test_rescale_torch_tensor(self):
        rows, cols = 3, 5
        original_tensor = torch.randint(low=10, high=40, size=(rows, cols)).float()
        prev_max_tensor = torch.ones(1, 5) * 40.0
        prev_min_tensor = torch.ones(1, 5) * 10.0
        new_min_tensor = torch.ones(1, 5) * -1.0
        new_max_tensor = torch.ones(1, 5).float()

        print("Original tensor: ", original_tensor)
        rescaled_tensor = rescale_torch_tensor(
            original_tensor,
            new_min_tensor,
            new_max_tensor,
            prev_min_tensor,
            prev_max_tensor,
        )
        print("Rescaled tensor: ", rescaled_tensor)
        reconstructed_original_tensor = rescale_torch_tensor(
            rescaled_tensor,
            prev_min_tensor,
            prev_max_tensor,
            new_min_tensor,
            new_max_tensor,
        )
        print("Reconstructed Original tensor: ", reconstructed_original_tensor)

        comparison_tensor = torch.eq(original_tensor, reconstructed_original_tensor)
        self.assertTrue(torch.sum(comparison_tensor), rows * cols)
Example #18
0
    def test_python_ir(self):
        x = Variable(torch.Tensor([0.4]), requires_grad=True)
        y = Variable(torch.Tensor([0.7]), requires_grad=True)

        def doit(x, y):
            return torch.sigmoid(torch.tanh(x * (x + y)))

        traced, _ = torch.jit.trace(doit, (x, y))
        g = torch._C._jit_get_graph(traced)
        g2 = torch._C.Graph()
        g_to_g2 = {}
        for node in g.inputs():
            g_to_g2[node] = g2.addInput()
        for node in g.nodes():
            n_ = g2.createClone(node, lambda x: g_to_g2[x])
            g2.appendNode(n_)
            for o, no in zip(node.outputs(), n_.outputs()):
                g_to_g2[o] = no

        for node in g.outputs():
            g2.registerOutput(g_to_g2[node])

        t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2]))
        assert(t_node.attributeNames() == ["a"])
        g2.appendNode(t_node)
        assert(torch.equal(torch.ones([2, 2]), t_node.t("a")))
        self.assertExpected(str(g2))
Example #19
0
 def test_growing_dataset(self):
     dataset = [torch.ones(4) for _ in range(4)]
     dataloader_seq = DataLoader(dataset, shuffle=False)
     dataloader_shuffle = DataLoader(dataset, shuffle=True)
     dataset.append(torch.ones(4))
     self.assertEqual(len(dataloader_seq), 5)
     self.assertEqual(len(dataloader_shuffle), 5)
Example #20
0
def bernoulli_normal_model():
    bern_0 = pyro.sample('bern_0', dist.Bernoulli(torch.zeros(1) * 1e-2))
    loc = torch.ones(1) if bern_0.item() else -torch.ones(1)
    normal_0 = torch.ones(1)
    pyro.sample('normal_0', dist.Normal(loc, torch.ones(1) * 1e-2),
                obs=normal_0)
    return [bern_0, normal_0]
Example #21
0
 def model():
     p2 = torch.tensor(torch.ones(2) / 2)
     p3 = torch.tensor(torch.ones(3) / 3)
     x2 = pyro.sample("x2", dist.OneHotCategorical(p2))
     x3 = pyro.sample("x3", dist.OneHotCategorical(p3))
     assert x2.shape == torch.Size([2]) + iarange_shape + p2.shape
     assert x3.shape == torch.Size([3, 1]) + iarange_shape + p3.shape
def knn(Mxx, Mxy, Myy, k, sqrt):
    n0 = Mxx.size(0)
    n1 = Myy.size(0)
    label = torch.cat((torch.ones(n0),torch.zeros(n1)))
    M = torch.cat((torch.cat((Mxx,Mxy),1), torch.cat((Mxy.transpose(0,1),Myy), 1)), 0)
    if sqrt:
        M = M.abs().sqrt()
    INFINITY = float('inf')
    val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False)

    count = torch.zeros(n0+n1)
    for i in range(0,k):
        count = count + label.index_select(0,idx[i])
    pred = torch.ge(count, (float(k)/2)*torch.ones(n0+n1)).float()

    s = Score_knn()
    s.tp = (pred*label).sum()
    s.fp = (pred*(1-label)).sum()
    s.fn = ((1-pred)*label).sum()
    s.tn = ((1-pred)*(1-label)).sum()
    s.precision = s.tp/(s.tp+s.fp)
    s.recall = s.tp/(s.tp+s.fn)
    s.acc_t = s.tp/(s.tp+s.fn)
    s.acc_f = s.tn/(s.tn+s.fp)
    s.acc = torch.eq(label, pred).float().mean()
    s.k = k 

    return s
Example #23
0
    def __init__(self, env_spec,
                 hidden_sizes=(64,64),
                 min_log_std=-3,
                 init_log_std=0,
                 seed=None):
        """
        :param env_spec: specifications of the env (see utils/gym_env.py)
        :param hidden_sizes: network hidden layer sizes (currently 2 layers only)
        :param min_log_std: log_std is clamped at this value and can't go below
        :param init_log_std: initial log standard deviation
        :param seed: random seed
        """
        self.n = env_spec.observation_dim  # number of states
        self.m = env_spec.action_dim  # number of actions
        self.min_log_std = min_log_std

        # Set seed
        # ------------------------
        if seed is not None:
            torch.manual_seed(seed)
            np.random.seed(seed)

        # Policy network
        # ------------------------
        self.model = nn.Sequential()
        self.model.add_module('fc_0', nn.Linear(self.n, hidden_sizes[0]))
        self.model.add_module('tanh_0', nn.Tanh())
        self.model.add_module('fc_1', nn.Linear(hidden_sizes[0], hidden_sizes[1]))
        self.model.add_module('tanh_1', nn.Tanh())
        self.model.add_module('fc_2', nn.Linear(hidden_sizes[1], self.m))
        # make weights small
        for param in list(self.model.parameters())[-2:]:  # only last layer
           param.data = (1.0/hidden_sizes[1]) * param.data
        self.log_std = Variable(torch.ones(self.m) * init_log_std, requires_grad=True)
        self.trainable_params = list(self.model.parameters()) + [self.log_std]

        # Old Policy network
        # ------------------------
        self.old_model = nn.Sequential()
        self.old_model.add_module('old_fc_0', nn.Linear(self.n, hidden_sizes[0]))
        self.old_model.add_module('old_tanh_0', nn.Tanh())
        self.old_model.add_module('old_fc_1', nn.Linear(hidden_sizes[0], hidden_sizes[1]))
        self.old_model.add_module('old_tanh_1', nn.Tanh())
        self.old_model.add_module('old_fc_2', nn.Linear(hidden_sizes[1], self.m))
        self.old_log_std = Variable(torch.ones(self.m) * init_log_std)
        self.old_params = list(self.old_model.parameters()) + [self.old_log_std]
        for idx, param in enumerate(self.old_params):
            param.data = self.trainable_params[idx].data.clone()

        # Easy access variables
        # -------------------------
        self.log_std_val = np.float64(self.log_std.data.numpy().ravel())
        self.param_shapes = [p.data.numpy().shape for p in self.trainable_params]
        self.param_sizes = [p.data.numpy().size for p in self.trainable_params]
        self.d = np.sum(self.param_sizes)  # total number of params

        # Placeholders
        # ------------------------
        self.obs_var = Variable(torch.randn(self.n), requires_grad=False)
Example #24
0
def test():
    x = torch.ones(1, 2)
    Sigma = torch.FloatTensor([[1, 0.8], [0.8, 1]])

    z = torch.ones(x.size())
    y = torch.matmul(x, Sigma)
    y = torch.matmul(y, x.t())
    print(y)
Example #25
0
 def get_batch_audio(self, tgt_l=3, bsize=1, sample_rate=5500,
                     window_size=0.03, t=37):
     # batch x 1 x nfft x t
     nfft = int(math.floor((sample_rate * window_size) / 2) + 1)
     test_src = Variable(torch.ones(bsize, 1, nfft, t)).float()
     test_tgt = Variable(torch.ones(tgt_l, bsize, 1)).long()
     test_length = None
     return test_src, test_tgt, test_length
Example #26
0
 def get_batch_audio(self, tgt_l=7, bsize=3, sample_rate=5500,
                     window_size=0.03, t=37):
     # batch x 1 x nfft x t
     nfft = int(math.floor((sample_rate * window_size) / 2) + 1)
     test_src = torch.ones(bsize, 1, nfft, t).float()
     test_tgt = torch.ones(tgt_l, bsize, 1).long()
     test_length = torch.ones(bsize).long().fill_(tgt_l)
     return test_src, test_tgt, test_length
Example #27
0
def test_tensor_array_monkey_patch():
    this_tests('na')
    t = torch.ones(a)
    t = np.array(t)
    assert np.all(t == t), "Tensors did not properly convert to numpy arrays"
    t = torch.ones(a)
    t = np.array(t,dtype=float)
    assert np.all(t == t), "Tensors did not properly convert to numpy arrays with a dtype set"
Example #28
0
def test_unweighted_mean_and_var(size, dtype):
    empirical_dist = Empirical()
    for i in range(5):
        empirical_dist.add(torch.ones(size, dtype=dtype) * i)
    true_mean = torch.ones(size) * 2
    true_var = torch.ones(size) * 2
    assert_equal(empirical_dist.mean, true_mean)
    assert_equal(empirical_dist.variance, true_var)
Example #29
0
        def guide():
            loc1 = pyro.param("loc1", torch.randn(2, requires_grad=True))
            scale1 = pyro.param("scale1", torch.ones(2, requires_grad=True))
            pyro.sample("latent1", Normal(loc1, scale1))

            loc2 = pyro.param("loc2", torch.randn(2, requires_grad=True))
            scale2 = pyro.param("scale2", torch.ones(2, requires_grad=True))
            latent2 = pyro.sample("latent2", Normal(loc2, scale2))
            return latent2
Example #30
0
def test_random_module(nn_module):
    pyro.clear_param_store()
    nn_module = nn_module()
    p = torch.ones(2, 2)
    prior = dist.Bernoulli(p)
    lifted_mod = pyro.random_module("module", nn_module, prior)
    nn_module = lifted_mod()
    for name, parameter in nn_module.named_parameters():
        assert torch.equal(torch.ones(2, 2), parameter.data)
Example #31
0
def train_model(model, optim, train_q_embed, dev_q_embed, dev_q_cand_ids,
                train_pairs, dev_pairs, hparams, log_path, seed):
    """Train model using negative sampling.

    Args:

    - model
    - optim: optimizer
    - train_q_embed: Embedding object for training queries, shape (nb
      train queries, dim)
    - dev_q_embed: Embedding object for dev queries, shape (nb dev
      queries, dim)
    - dev_q_cand_ids: list containing candidate ID of each dev query
      (None if it is not a candidate), used to compute MAP on dev set.
    - train_pairs: array of (query ID, hypernym ID) pairs
      for training
    - dev_pairs: array of (query ID, hypernym ID) pairs for
      validation
    - hparams: dict containing settings of hyperparameters
    - log_path: path of log file
    - seed: seed for RNG

    """

    # Extract hyperparameter settings
    nb_neg_samples = hparams["nb_neg_samples"]
    subsample = hparams["subsample"]
    max_epochs = hparams["max_epochs"]
    patience = hparams["patience"]
    batch_size = hparams["batch_size"]
    clip = hparams["clip"]

    if seed:
        random.seed(seed)
        np.random.seed(seed)

    # Prepare sampling of negative examples
    candidate_ids = list(range(model.get_nb_candidates()))
    cand_sampler = make_sampler(candidate_ids)

    # Prepare subsampling of positive examples
    pos_sample_prob = {}
    if subsample:
        hyp_fd = {}
        for h_id in train_pairs[:, 1]:
            if h_id not in hyp_fd:
                hyp_fd[h_id] = 0
            hyp_fd[h_id] += 1
        min_freq = min(hyp_fd.values())
        for (h_id, freq) in hyp_fd.items():
            pos_sample_prob[h_id] = sqrt(min_freq / freq)

    # Initialize training batch for query IDs, positive hypernym IDs,
    # negative hypernym IDs, positive targets, and negative targets.
    # targets. We separate positive and negative examples to compute
    # the losses separately. Note that this is a bit inefficient, as
    # we compute the query projections twice.
    batch_q = np.zeros(batch_size, 'int64')
    batch_h_pos = np.zeros((batch_size, 1), 'int64')
    batch_h_neg = np.zeros((batch_size, nb_neg_samples), 'int64')
    t_pos_var = wrap_in_var(torch.ones((batch_size, 1)), False, model.use_cuda)
    t_neg_var = wrap_in_var(torch.zeros((batch_size, nb_neg_samples)), False,
                            model.use_cuda)

    # Prepare list of sets of gold hypernym IDs for queries in
    # training set. This is used for negative sampling.
    nb_train_queries = train_q_embed.weight.shape[0]
    train_gold_ids = [set() for _ in range(nb_train_queries)]
    nb_train_pairs = train_pairs.shape[0]
    for i in range(nb_train_pairs):
        q_id = int(train_pairs[i, 0])
        h_id = int(train_pairs[i, 1])
        train_gold_ids[q_id].add(h_id)

    # Prepare list of sets of gold hypernym IDs for queries in dev set
    # to compute score (MAP)
    nb_dev_queries = dev_q_embed.weight.shape[0]
    dev_gold_ids = [set() for _ in range(nb_dev_queries)]
    nb_dev_pairs = dev_pairs.shape[0]
    for i in range(nb_dev_pairs):
        q_id = int(dev_pairs[i, 0])
        h_id = int(dev_pairs[i, 1])
        dev_gold_ids[q_id].add(h_id)

    # Prepare input variables to compute loss on dev set
    dev_q_ids = torch.LongTensor(dev_pairs[:, 0])
    if model.use_cuda:
        dev_q_ids = dev_q_ids.cuda()
    dev_q_var = dev_q_embed(dev_q_ids)
    dev_h_var = wrap_in_var(
        torch.LongTensor(dev_pairs[:, 1]).unsqueeze(1), False, model.use_cuda)
    dev_t_var = wrap_in_var(torch.ones((nb_dev_pairs, 1)), False,
                            model.use_cuda)

    # Make Evaluator to compute MAP on dev set
    dev_eval = Evaluator(model, dev_q_embed, dev_q_cand_ids)

    print("\nEvaluating untrained model on dev set...")
    MAP = dev_eval.get_MAP(dev_gold_ids)
    print("MAP: {:.4f}".format(MAP))

    checkpoint_header = [
        "Epoch", "Updates", "PosLoss", "NegLoss", "DevLoss", "DevMAP",
        "TimeElapsed"
    ]
    with open(log_path, "w") as f:
        f.write("\t".join(checkpoint_header) + "\n")

    # Train model
    best_model = deepcopy(model)
    best_score = float("-inf")
    nb_no_gain = 0
    batch_row_id = 0
    done = False
    start_time = time.time()
    print("\nStarting training...\n")
    print("\t".join(checkpoint_header))
    for epoch in range(1, max_epochs + 1):
        model.train()
        np.random.shuffle(train_pairs)
        total_pos_loss = 0.0
        total_neg_loss = 0.0

        # Loop through training pairs
        nb_updates = 0
        for pair_ix in range(train_pairs.shape[0]):
            q_id = train_pairs[pair_ix, 0]
            h_id = train_pairs[pair_ix, 1]
            if subsample and random.random() >= pos_sample_prob[h_id]:
                continue
            batch_q[batch_row_id] = q_id
            batch_h_pos[batch_row_id] = h_id

            # Get negative examples
            neg_samples = []
            while len(neg_samples) < nb_neg_samples:
                cand_id = next(cand_sampler)
                if cand_id not in train_gold_ids[q_id]:
                    neg_samples.append(cand_id)
            batch_h_neg[batch_row_id] = neg_samples

            # Update on batch
            batch_row_id = (batch_row_id + 1) % batch_size
            if batch_row_id + 1 == batch_size:
                q_ids = wrap_in_var(torch.LongTensor(batch_q), False,
                                    model.use_cuda)
                q_var = train_q_embed(q_ids)
                h_pos_var = wrap_in_var(torch.LongTensor(batch_h_pos), False,
                                        model.use_cuda)
                h_neg_var = wrap_in_var(torch.LongTensor(batch_h_neg), False,
                                        model.use_cuda)
                optim.zero_grad()
                pos_loss = model.get_loss(q_var, h_pos_var, t_pos_var)
                neg_loss = model.get_loss(q_var, h_neg_var, t_neg_var)
                loss = pos_loss + neg_loss
                loss.backward()
                if clip > 0:
                    torch.nn.utils.clip_grad_norm(train_q_embed.parameters(),
                                                  clip)
                    torch.nn.utils.clip_grad_norm(model.parameters(), clip)
                optim.step()
                total_pos_loss += pos_loss.data[0]
                total_neg_loss += neg_loss.data[0]
                nb_updates += 1

        # Check progress
        avg_pos_loss = total_pos_loss / (nb_updates * batch_size)
        avg_neg_loss = total_neg_loss / (nb_updates * batch_size)

        # Compute loss and MAP on dev set
        model.eval()
        dev_loss = model.get_loss(dev_q_var, dev_h_var, dev_t_var)
        avg_dev_loss = dev_loss.data[0] / nb_dev_pairs
        MAP = dev_eval.get_MAP(dev_gold_ids)
        checkpoint_data = []
        checkpoint_data.append(str(epoch))
        checkpoint_data.append(str(nb_updates))
        checkpoint_data.append("{:.4f}".format(avg_pos_loss))
        checkpoint_data.append("{:.4f}".format(avg_neg_loss))
        checkpoint_data.append("{:.4f}".format(avg_dev_loss))
        checkpoint_data.append("{:.4f}".format(MAP))
        checkpoint_data.append("{:.1f}s".format(time.time() - start_time))
        print("\t".join(checkpoint_data))
        with open(log_path, "a") as f:
            f.write("\t".join(checkpoint_data) + "\n")

        # Early stopping
        if MAP > best_score:
            best_score = MAP
            best_model = deepcopy(model)
            nb_no_gain = 0
        else:
            nb_no_gain += 1
        if nb_no_gain >= patience:
            print("EARLY STOP!")
            done = True
            print("\nEvaluating best model on dev set...")
            dev_eval.set_model(best_model)
            MAP = dev_eval.get_MAP(dev_gold_ids)
            print("MAP of best model: {:.3f}".format(MAP))
        if done:
            break
    print("\nTraining finished after {} epochs".format(epoch))
    return best_model
Example #32
0
def harrInitMethod1(originalChnl):
    return torch.cat([torch.ones(originalChnl, 1), torch.zeros(originalChnl, 2)], 1).reshape(originalChnl, 1, 3)
Example #33
0
    def test_batchnorm_with_weights(self):
        """
        Test of the PyTorch 2D batchnorm Node with weights and biases on Glow.
        """
        class SimpleQuantizedBatchNorm(nn.Module):
            def __init__(
                self,
                C,
                weight,
                bias,
                running_mean,
                running_var,
                in_scale,
                in_zero_point,
                out_scale,
                out_zero_point,
            ):
                super(SimpleQuantizedBatchNorm, self).__init__()
                self.qconfig = my_qconfig
                self.batchnorm = nn.BatchNorm3d(C)
                self.batchnorm.scale = out_scale
                self.batchnorm.zero_point = out_zero_point
                self.batchnorm.weight = nn.Parameter(weight)
                self.batchnorm.bias = nn.Parameter(bias)
                self.batchnorm.running_mean = running_mean
                self.batchnorm.running_var = running_var
                self.relu = nn.ReLU()
                self.q = torch.quantization.QuantStub()
                self.q.scale = in_scale
                self.q.zero_point = in_zero_point
                self.dq = torch.quantization.DeQuantStub()

            def forward(self, x):
                qx = self.q(x)
                qy = self.batchnorm(qx)
                y = self.dq(qy)
                return y

        C = 7
        in_scale = 0.0031
        out_scale = 0.0047
        in_zero_point = -42
        out_zero_point = 23
        weight = torch.ones(C) + torch.rand(C) * 0.001
        bias = torch.rand(C) * 0.0001
        running_mean = torch.zeros(C)
        running_var = torch.ones(C)

        inputs = torch.randn((6, C, 4, 33, 42), requires_grad=False)
        model = SimpleQuantizedBatchNorm(
            C,
            weight,
            bias,
            running_mean,
            running_var,
            in_scale,
            in_zero_point,
            out_scale,
            out_zero_point,
        )
        model.eval()

        utils.compare_tracing_methods(
            model,
            inputs,
            skip_to_glow=True,
        )
Example #34
0
                                                   ('conv', 0),
                                                   ('identity', 1),
                                                   ('shuffle_conv', 0),
                                                   ('dep_conv', 2),
                                                   ('shuffle_conv', 4),
                                                   ('identity', 0)],
                                    normal_normal_concat=range(2, 6))

    cell = BuildCell(layer7_doublechannel,
                     c_prev_prev=-1,
                     c_prev=256,
                     c=32,
                     cell_type='normal_up',
                     dp=0)
    # The final output size for normal up may not be a multiple if normal does not exist
    s0 = torch.FloatTensor(torch.ones(1, 128, 16, 16))
    s1 = torch.FloatTensor(torch.ones(1, 256, 8, 8))
    output = cell(None, s1)
    print(output.size())

    x = torch.FloatTensor(torch.ones(1, 3, 128, 128))
    network = BuildNasUnetPrune(layer7_doublechannel,
                                input_c=3,
                                c=16,
                                num_classes=1,
                                meta_node_num=4,
                                layers=9,
                                dp=0,
                                use_sharing=True,
                                double_down_channel=True)
    print(network)
Example #35
0
the ``Variable`` (except for Variables created by the user - their
``grad_fn is None``).

If you want to compute the derivatives, you can call ``.backward()`` on
a ``Variable``. If ``Variable`` is a scalar (i.e. it holds a one element
data), you don’t need to specify any arguments to ``backward()``,
however if it has more elements, you need to specify a ``grad_output``
argument that is a tensor of matching shape.
"""

import torch
from torch.autograd import Variable

###############################################################
# Create a variable:
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)

###############################################################
# Do an operation of variable:
y = x + 2
print(y)

###############################################################
# ``y`` was created as a result of an operation, so it has a ``grad_fn``.
print(y.grad_fn)

###############################################################
# Do more operations on y
z = y * y * 3
out = z.mean()
Example #36
0
    def train(self):
        #torch.autograd.set_detect_anomaly(True)
        """
        Main training loop
        Helpful URL: https://github.com/balakg/posewarp-cvpr2018/blob/master/code/posewarp_gan_train.py
        """

        for epoch in range(self.num_epochs):
            num_batches = len(self.train_dataset_loader)
            # Initialize running averages
            disc_losses = AverageMeter()
            train_disc_accuracies = AverageMeter()
            tot_losses = AverageMeter()
            train_accuracies = AverageMeter()

            for batch_id, batch_data in enumerate(self.train_dataset_loader):
                self.gan.train()  # Set the model to train mode
                self.vgg_loss_network.eval()
                current_step = epoch * num_batches + batch_id

                # Get data from dataset
                src_img = batch_data['im'].cuda(async=True)
                target_img = batch_data['target_im'].cuda(async=True)
                src_iuv = batch_data['im_iuv'].cuda(async=True)
                target_iuv = batch_data['target_iuv'].cuda(async=True)
                #pdb.set_trace()

                # ============
                # Run predictive GAN on source image
                _, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False)
                # Run predictive GAN on target image
                _ , classification_tgt = self.gan(target_img, src_iuv, target_iuv, use_gt=True)
                # Create discriminator groundtruth
                # For src, we create zeros
                # For tgt, we create ones
                disc_gt_src = torch.zeros(classification_src.shape[0], 1, dtype=torch.float32).cuda()
                disc_gt_tgt = torch.ones(classification_src.shape[0], 1, dtype=torch.float32).cuda()
                disc_gt = torch.cat((disc_gt_src, disc_gt_tgt), dim=0).cuda(async=True)

                classification_all = torch.cat((classification_src, classification_tgt) , dim=0)
                # Train Discriminator network
                disc_loss = self._optimizeDiscriminator(classification_all, disc_gt)
                disc_losses.update(disc_loss.item(), disc_gt.shape[0])
                disc_acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_all, dim=1)) == disc_gt ).float() )

                train_disc_accuracies.update(disc_acc.item(), disc_gt.shape[0])

                print("Epoch: {}, Batch {}/{} has Discriminator loss {}, and acc {}".format(epoch, batch_id, num_batches, disc_losses.avg, train_disc_accuracies.avg))
                # Start training GAN first for several iterations
                if current_step < self.start_disc_iters:
                    print("Discriminator training only: {}/{}\n".format(current_step,self.start_disc_iters))
                    continue
               
                # ============
                # Optimize the GAN
                # Note that now we use disc_gt_tgt which are 1's
                generated_img, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False)
                tot_loss = self._optimizeGAN(generated_img, target_img, classification_src, disc_gt_tgt)
                tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0])

                acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_src, dim=1)) == disc_gt_tgt ).float() )

                tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0])
                train_accuracies.update(acc.item(), disc_gt_tgt.shape[0])

                # Not adjusting learning rate currently
                # if epoch % 100 == 99:
                #     self._adjust_learning_rate(epoch)
                # # Not Clipping Weights
                # self._clip_weights()

                if current_step % self.log_freq == 0:
                    print("Epoch: {}, Batch {}/{} has loss {}, and acc {}".format(epoch, batch_id, num_batches, tot_losses.avg, train_accuracies.avg))
                    # TODO: you probably want to plot something here
                    self.txwriter.add_scalar('train/discriminator_loss', disc_losses.avg, current_step)
                    self.txwriter.add_scalar('train/total_loss', tot_losses.avg, current_step)
                    self.txwriter.add_scalar('train/discriminator_acc', train_accuracies.avg, current_step)
                """
                Visualize some images
                """
                if current_step % self.display_freq == 0:
                    name1 = '{0}_{1}_{2}'.format(epoch, current_step, "image1")
                    name2 = '{0}_{1}_{2}'.format(epoch, current_step, "image2")
                    name3 = '{0}_{1}_{2}'.format(epoch, current_step, "gan_image")
                    im1 = denormalizeImage(src_img[0,:,:,:].cpu().numpy())
                    im2 = denormalizeImage(target_img[0,:,:,:].cpu().numpy())
                    im3 = denormalizeImage(generated_img[0,:,:,:].detach().cpu().numpy())
                    self.txwriter.add_image("Image1/"+name1,im1)
                    self.txwriter.add_image("Image2/"+name2,im2)
                    self.txwriter.add_image("GAN/"+name3,im3)
                """
                TODO : Test accuracies
                if current_step % self.test_freq == 0:#self._test_freq-1:
                    self._model.eval()
                    val_accuracy = self.validate()
                    print("Epoch: {} has val accuracy {}".format(epoch, val_accuracy))
                    self.txwriter.add_scalar('test/acc', val_accuracy, current_step)
                """
                """
                Save Model periodically
                """
                if (current_step % self.save_freq == 0) and current_step > 0:
                    save_name = 'model_checkpoint.pth'
                    torch.save(self.gan.state_dict(), save_name)
                    print('Saved model to {}'.format(save_name))
Example #37
0
def idenInitMethod2(originalChnl):
    return torch.cat([torch.zeros(originalChnl, 2), torch.ones(originalChnl, 1)], 1).reshape(originalChnl, 1, 3)
Example #38
0
 def pad(self, x, length, padval):
   y = torch.ones((length,)).long() * padval
   y[:min(len(x), length)] = x[:min(len(x), length)]
   return y
Example #39
0
def ones(shape, dtype, ctx):
    return th.ones(shape, dtype=dtype, device=ctx)
Example #40
0
 def forward(ctx, inputs, bound):
     b = torch.ones(inputs.size())*bound
     b = b.to(inputs.device)
     ctx.save_for_backward(inputs, b)
     return torch.max(inputs, b)
 def obs_to_graph_dict(self, obs) -> tg.data.Data:
     """
     this function takes the observation and creates a graph including the following
     features:
     (indicator, x, y, node_demand, node_visited)
     indicator: 0: depot, 1: customers
     x, y: position of the node in grid (double, double)
     node_demand: the customer demand or current vehicle capacity depending on the type of node (the vehicle
     capacity is negative)
     """
     customer_positions = obs['customer_positions']
     customer_visited = obs['customer_visited']
     vehicle_position = obs["current_vehicle_position"]
     customer_demands = obs['customer_demands'] / obs['max_vehicle_capacity']
     vehicle_capacity = obs['current_vehicle_capacity'] / obs['max_vehicle_capacity']
     num_customers = customer_positions.shape[0]
     num_depots = 1
     num_vehicles = 1
     num_nodes = num_customers + num_depots + num_vehicles
     node_pos = np.vstack([customer_positions,
                           obs['depot_position'],
                           vehicle_position])
     # if vehicle is currently at the depot position than depot is treated like a visited node
     if np.array_equal(vehicle_position, obs['depot_position']):
         depot_visited = np.zeros(shape=(num_depots, 1))
     else:
         depot_visited = np.ones(shape=(num_depots, 1))
     # node visited is True if the node can be chosen as action (customer that is not yet visited or depot if the
     # vehicle is not currently at the depot). Otherwise the node visited value is False (this is always true for
     # vehicle node)
     node_visited = np.vstack([np.logical_not(customer_visited).reshape(-1, 1),
                               depot_visited,
                               np.zeros(shape=(num_vehicles, 1))])
     # indicator is : 0: customers, 1: depot, 2: vehicle
     node_ind = np.vstack([np.ones(shape=(num_customers, 1)) * 0,
                           np.ones(shape=(num_depots, 1)) * 1,
                           np.ones(shape=(num_vehicles, 1)) * 2])
     node_demand = np.vstack([customer_demands.reshape(-1, 1),
                              np.zeros(shape=(num_depots, 1)),
                              -vehicle_capacity])
     customer_nodes = np.where(node_ind == 0)[0]
     depot_nodes = np.where(node_ind == 1)[0]
     vehicle_nodes = np.where(node_ind == 2)[0]
     # features are : pos_x, pos_y, demand/capacity
     node_features = np.hstack([node_ind, node_pos, node_demand, node_visited])
     # customer edge indexes include all customers and depot
     # edge_indexes = [(i, j) for i, j in itertools.product(range(num_customers + 1), range(num_customers + 1)) if
     #                 i != j]
     customer_and_depot_nodes = np.concatenate([customer_nodes, depot_nodes])
     vehicle_edge_indexes = [(i.item(), j.item()) for i in vehicle_nodes for j in customer_and_depot_nodes]
     vehicle_edge_indexes = vehicle_edge_indexes + [(j, i) for i, j in vehicle_edge_indexes]
     edge_indexes_directed = vehicle_edge_indexes
     node_features_tensor = torch.tensor(node_features, dtype=torch.float32)
     edge_indexes_tensor = torch.tensor(edge_indexes_directed, dtype=torch.long,
                                        device=node_features_tensor.device).transpose(1, 0)
     edge_attributes_tensor = torch.ones(size=(len(edge_indexes_directed), 1), device=node_features_tensor.device,
                                         dtype=torch.float32)
     illegal_actions = np.zeros(shape=(num_nodes,))
     if not obs['action_mask'][self.env.DEPOT_INDEX]:
         # depot option is not available, and therefore this action should be masked
         illegal_actions[depot_nodes] = True
     # mask out all customers that there demand exceeds the vehicle current capacity
     illegal_actions[customer_nodes] = np.logical_or(customer_demands > vehicle_capacity,
                                                     customer_visited)
     # mask out the vehicle nodes since they can never be chosen
     illegal_actions[vehicle_nodes] = True
     illegal_actions_tensor = torch.tensor(illegal_actions, device=node_features_tensor.device,
                                           dtype=torch.bool)
     graph_tg = tg.data.Data(x=node_features_tensor, edge_attr=edge_attributes_tensor,
                             edge_index=edge_indexes_tensor)
     graph_tg.illegal_actions = illegal_actions_tensor
     graph_tg.u = torch.tensor([[1]], device=node_features_tensor.device, dtype=torch.float32)
     self.num_customers = num_customers
     return graph_tg
Example #42
0
# plt.savefig("figures/dc_positive.pdf")
# show_trial(info, 9, 12) # Negative
# plt.savefig("figures/dc_negative.pdf")

#%%
_, t, *_ = masks.size()
n = runner.config.TASK.NUM_NODES
h = runner.config.MODEL.HIDDEN_SIZE
strength = 100.0
perturbation = torch.zeros(t, n, h, device=device)
perturbation_step = 2 # Right in the middle.
nodes_perturbed = np.random.randint(10) # A random set #
dropout=True
dropout_mask = None
if dropout:
    dropout_mask = torch.ones(t, n, h, device=runner.device)
    dropout_mask[perturbation_step, nodes_perturbed] = 0.0
perturbation[perturbation_step, nodes_perturbed] = torch.rand(h, device=device) * strength
metrics_perturbed, info_perturbed = runner.eval(ckpt_path, perturb=perturbation, dropout_mask=dropout_mask)
# show_trial(info_perturbed, 7, 12)
# show_trial(info_perturbed, 9, 12)
# plt.title("Target: 0, Pulse: 100.0")
# plt.savefig("figures/dc_pulse_negative.pdf") # Note there's stochasticity here

# show_trial(info_perturbed, 7, 12)
# plt.title("Target: 0, Pulse: 100.0")
# plt.savefig("figures/dc_pulse_positive.pdf")

show_trial(info_perturbed, 7, 12)
plt.title("Target: 0, Dropout")
plt.savefig("figures/dc_dropout.pdf")
Example #43
0
def label_real(size):
    data = torch.ones(size, 1)
    return data.to(device)
Example #44
0
    def _generate(
        self,
        sample: Dict[str, Dict[str, Tensor]],
        prefix_tokens: Optional[Tensor] = None,
        constraints: Optional[Tensor] = None,
        bos_token: Optional[int] = None,
    ):
        incremental_states = torch.jit.annotate(
            List[Dict[str, Dict[str, Optional[Tensor]]]],
            [
                torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {})
                for i in range(self.model.models_size)
            ],
        )
        net_input = sample["net_input"]

        if "src_tokens" in net_input:
            src_tokens = net_input["src_tokens"]
            # length of the source text being the character length except EndOfSentence and pad
            src_lengths = (
                (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)
            )
        elif "source" in net_input:
            src_tokens = net_input["source"]
            src_lengths = (
                net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1)
                if net_input["padding_mask"] is not None
                else torch.tensor(src_tokens.size(-1)).to(src_tokens)
            )
        else:
            raise Exception("expected src_tokens or source in net input")

        # bsz: total number of sentences in beam
        # Note that src_tokens may have more than 2 dimenions (i.e. audio features)
        bsz, src_len = src_tokens.size()[:2]
        beam_size = self.beam_size

        if constraints is not None and not self.search.supports_constraints:
            raise NotImplementedError(
                "Target-side constraints were provided, but search method doesn't support them"
            )

        # Initialize constraints, when active
        self.search.init_constraints(constraints, beam_size)

        max_len: int = -1
        if self.match_source_len:
            max_len = src_lengths.max().item()
        else:
            max_len = min(
                int(self.max_len_a * src_len + self.max_len_b),
                # exclude the EOS marker
                self.model.max_decoder_positions() - 1,
            )
        assert (
            self.min_len <= max_len
        ), "min_len cannot be larger than max_len, please adjust these!"
        # compute the encoder output for each beam
        encoder_outs = self.model.forward_encoder(net_input)

        # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores
        new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1)
        new_order = new_order.to(src_tokens.device).long()
        encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order)
        # ensure encoder_outs is a List.
        assert encoder_outs is not None

        # initialize buffers
        scores = (
            torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float()
        )  # +1 for eos; pad is never chosen for scoring
        tokens = (
            torch.zeros(bsz * beam_size, max_len + 2)
            .to(src_tokens)
            .long()
            .fill_(self.pad)
        )  # +2 for eos and pad
        tokens[:, 0] = self.eos if bos_token is None else bos_token
        attn: Optional[Tensor] = None

        # A list that indicates candidates that should be ignored.
        # For example, suppose we're sampling and have already finalized 2/5
        # samples. Then cands_to_ignore would mark 2 positions as being ignored,
        # so that we only finalize the remaining 3 samples.
        cands_to_ignore = (
            torch.zeros(bsz, beam_size).to(src_tokens).eq(-1)
        )  # forward and backward-compatible False mask

        # list of completed sentences
        finalized = torch.jit.annotate(
            List[List[Dict[str, Tensor]]],
            [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)],
        )  # contains lists of dictionaries of infomation about the hypothesis being finalized at each step

        finished = [
            False for i in range(bsz)
        ]  # a boolean array indicating if the sentence at the index is finished or not
        num_remaining_sent = bsz  # number of sentences remaining

        # number of candidate hypos per step
        cand_size = 2 * beam_size  # 2 x beam size in case half are EOS

        # offset arrays for converting between different indexing schemes
        bbsz_offsets = (
            (torch.arange(0, bsz) * beam_size)
            .unsqueeze(1)
            .type_as(tokens)
            .to(src_tokens.device)
        )
        cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device)

        reorder_state: Optional[Tensor] = None
        batch_idxs: Optional[Tensor] = None

        original_batch_idxs: Optional[Tensor] = None
        if "id" in sample and isinstance(sample["id"], Tensor):
            original_batch_idxs = sample["id"]
        else:
            original_batch_idxs = torch.arange(0, bsz).type_as(tokens)

        for step in range(max_len + 1):  # one extra step for EOS marker
            # reorder decoder internal states based on the prev choice of beams
            if reorder_state is not None:
                if batch_idxs is not None:
                    # update beam indices to take into account removed sentences
                    corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as(
                        batch_idxs
                    )
                    reorder_state.view(-1, beam_size).add_(
                        corr.unsqueeze(-1) * beam_size
                    )
                    original_batch_idxs = original_batch_idxs[batch_idxs]
                self.model.reorder_incremental_state(incremental_states, reorder_state)
                encoder_outs = self.model.reorder_encoder_out(
                    encoder_outs, reorder_state
                )

            lprobs, avg_attn_scores = self.model.forward_decoder(
                tokens[:, : step + 1],
                encoder_outs,
                incremental_states,
                self.temperature,
            )

            if self.lm_model is not None:
                lm_out = self.lm_model(tokens[:, : step + 1])
                probs = self.lm_model.get_normalized_probs(
                    lm_out, log_probs=True, sample=None
                )
                probs = probs[:, -1, :] * self.lm_weight
                lprobs += probs

            lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs)

            lprobs[:, self.pad] = -math.inf  # never select pad
            lprobs[:, self.unk] -= self.unk_penalty  # apply unk penalty

            # handle max length constraint
            if step >= max_len:
                lprobs[:, : self.eos] = -math.inf
                lprobs[:, self.eos + 1 :] = -math.inf

            # handle prefix tokens (possibly with different lengths)
            if (
                prefix_tokens is not None
                and step < prefix_tokens.size(1)
                and step < max_len
            ):
                lprobs, tokens, scores = self._prefix_tokens(
                    step, lprobs, scores, tokens, prefix_tokens, beam_size
                )
            elif step < self.min_len:
                # minimum length constraint (does not apply if using prefix_tokens)
                lprobs[:, self.eos] = -math.inf

            # Record attention scores, only support avg_attn_scores is a Tensor
            if avg_attn_scores is not None:
                if attn is None:
                    attn = torch.empty(
                        bsz * beam_size, avg_attn_scores.size(1), max_len + 2
                    ).to(scores)
                attn[:, :, step + 1].copy_(avg_attn_scores)

            scores = scores.type_as(lprobs)
            eos_bbsz_idx = torch.empty(0).to(
                tokens
            )  # indices of hypothesis ending with eos (finished sentences)
            eos_scores = torch.empty(0).to(
                scores
            )  # scores of hypothesis ending with eos (finished sentences)

            if self.should_set_src_lengths:
                self.search.set_src_lengths(src_lengths)

            if self.repeat_ngram_blocker is not None:
                lprobs = self.repeat_ngram_blocker(
                    tokens, lprobs, bsz, beam_size, step
                )

            # Shape: (batch, cand_size)
            cand_scores, cand_indices, cand_beams = self.search.step(
                step,
                lprobs.view(bsz, -1, self.vocab_size),
                scores.view(bsz, beam_size, -1)[:, :, :step],
                tokens[:, : step + 1],
                original_batch_idxs,
            )

            # cand_bbsz_idx contains beam indices for the top candidate
            # hypotheses, with a range of values: [0, bsz*beam_size),
            # and dimensions: [bsz, cand_size]
            cand_bbsz_idx = cand_beams.add(bbsz_offsets)

            # finalize hypotheses that end in eos
            # Shape of eos_mask: (batch size, beam size)
            eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf)
            eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask)

            # only consider eos when it's among the top beam_size indices
            # Now we know what beam item(s) to finish
            # Shape: 1d list of absolute-numbered
            eos_bbsz_idx = torch.masked_select(
                cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size]
            )

            finalized_sents: List[int] = []
            if eos_bbsz_idx.numel() > 0:
                eos_scores = torch.masked_select(
                    cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size]
                )

                finalized_sents = self.finalize_hypos(
                    step,
                    eos_bbsz_idx,
                    eos_scores,
                    tokens,
                    scores,
                    finalized,
                    finished,
                    beam_size,
                    attn,
                    src_lengths,
                    max_len,
                )
                num_remaining_sent -= len(finalized_sents)

            assert num_remaining_sent >= 0
            if num_remaining_sent == 0:
                break
            if self.search.stop_on_max_len and step >= max_len:
                break
            assert step < max_len, f"{step} < {max_len}"

            # Remove finalized sentences (ones for which {beam_size}
            # finished hypotheses have been generated) from the batch.
            if len(finalized_sents) > 0:
                new_bsz = bsz - len(finalized_sents)

                # construct batch_idxs which holds indices of batches to keep for the next pass
                batch_mask = torch.ones(
                    bsz, dtype=torch.bool, device=cand_indices.device
                )
                batch_mask[finalized_sents] = False
                # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it
                batch_idxs = torch.arange(
                    bsz, device=cand_indices.device
                ).masked_select(batch_mask)

                # Choose the subset of the hypothesized constraints that will continue
                self.search.prune_sentences(batch_idxs)

                eos_mask = eos_mask[batch_idxs]
                cand_beams = cand_beams[batch_idxs]
                bbsz_offsets.resize_(new_bsz, 1)
                cand_bbsz_idx = cand_beams.add(bbsz_offsets)
                cand_scores = cand_scores[batch_idxs]
                cand_indices = cand_indices[batch_idxs]

                if prefix_tokens is not None:
                    prefix_tokens = prefix_tokens[batch_idxs]
                src_lengths = src_lengths[batch_idxs]
                cands_to_ignore = cands_to_ignore[batch_idxs]

                scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1)
                if attn is not None:
                    attn = attn.view(bsz, -1)[batch_idxs].view(
                        new_bsz * beam_size, attn.size(1), -1
                    )
                bsz = new_bsz
            else:
                batch_idxs = None

            # Set active_mask so that values > cand_size indicate eos hypos
            # and values < cand_size indicate candidate active hypos.
            # After, the min values per row are the top candidate active hypos

            # Rewrite the operator since the element wise or is not supported in torchscript.

            eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size]))
            active_mask = torch.add(
                eos_mask.type_as(cand_offsets) * cand_size,
                cand_offsets[: eos_mask.size(1)],
            )

            # get the top beam_size active hypotheses, which are just
            # the hypos with the smallest values in active_mask.
            # {active_hypos} indicates which {beam_size} hypotheses
            # from the list of {2 * beam_size} candidates were
            # selected. Shapes: (batch size, beam size)
            new_cands_to_ignore, active_hypos = torch.topk(
                active_mask, k=beam_size, dim=1, largest=False
            )

            # update cands_to_ignore to ignore any finalized hypos.
            cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size]
            # Make sure there is at least one active item for each sentence in the batch.
            assert (~cands_to_ignore).any(dim=1).all()

            # update cands_to_ignore to ignore any finalized hypos

            # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam
            # can be selected more than once).
            active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos)
            active_scores = torch.gather(cand_scores, dim=1, index=active_hypos)

            active_bbsz_idx = active_bbsz_idx.view(-1)
            active_scores = active_scores.view(-1)

            # copy tokens and scores for active hypotheses

            # Set the tokens for each beam (can select the same row more than once)
            tokens[:, : step + 1] = torch.index_select(
                tokens[:, : step + 1], dim=0, index=active_bbsz_idx
            )
            # Select the next token for each of them
            tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather(
                cand_indices, dim=1, index=active_hypos
            )
            if step > 0:
                scores[:, :step] = torch.index_select(
                    scores[:, :step], dim=0, index=active_bbsz_idx
                )
            scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather(
                cand_scores, dim=1, index=active_hypos
            )

            # Update constraints based on which candidates were selected for the next beam
            self.search.update_constraints(active_hypos)

            # copy attention for active hypotheses
            if attn is not None:
                attn[:, :, : step + 2] = torch.index_select(
                    attn[:, :, : step + 2], dim=0, index=active_bbsz_idx
                )

            # reorder incremental state in decoder
            reorder_state = active_bbsz_idx

        # sort by score descending
        for sent in range(len(finalized)):
            scores = torch.tensor(
                [float(elem["score"].item()) for elem in finalized[sent]]
            )
            _, sorted_scores_indices = torch.sort(scores, descending=True)
            finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices]
            finalized[sent] = torch.jit.annotate(
                List[Dict[str, Tensor]], finalized[sent]
            )
        return finalized
Example #45
0
def test(args: dict(), save_flag: bool, seed_val):
    
    device = util.get_device(device_no=args.device_no)   
    model = torch.load(args.model_path, map_location=device)

    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)
    
    testfile = args.input_file
    true_label = args.label
    truncation = args.truncation
    n_samples = None
    if "n_samples" in args:
        n_samples = args.n_samples
   
    # Load the BERT tokenizer.
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    max_len = 0
    reviews = []
    labels = []
    with open(testfile, "r") as fin:
        reviews = fin.readlines()
    
    reviews = [rev.lower() for rev in reviews]
    
    if n_samples == None:
        n_samples = len(reviews)

    indices = np.random.choice(np.arange(len(reviews)), size=n_samples)
    selected_reviews = [reviews[idx] for idx in indices]

    labels = [0 if true_label == "negative" else 1]*len(selected_reviews)

    # Tokenize all of the sentences and map the tokens to thier word IDs.
    input_ids = []
    attention_masks = []

    # For every sentence...
    for rev in selected_reviews:
        # `encode_plus` will:
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        input_id = tokenizer.encode(rev, add_special_tokens=True)
        if len(input_id) > 512:                        
            if truncation == "tail-only":
                # tail-only truncation
                input_id = [tokenizer.cls_token_id]+input_id[-511:]      
            elif truncation == "head-and-tail":
                # head-and-tail truncation       
                input_id = [tokenizer.cls_token_id]+input_id[1:129]+input_id[-382:]+[tokenizer.sep_token_id]
            else:
                # head-only truncation
                input_id = input_id[:511]+[tokenizer.sep_token_id]
                
            input_ids.append(torch.tensor(input_id).view(1,-1))
            attention_masks.append(torch.ones([1,len(input_id)], dtype=torch.long))
        else:
            encoded_dict = tokenizer.encode_plus(
                                rev,                      # Sentence to encode.
                                add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                                max_length = 512,           # Pad & truncate all sentences.
                                pad_to_max_length = True,
                                return_attention_mask = True,   # Construct attn. masks.
                                return_tensors = 'pt',     # Return pytorch tensors.
                        )
            
            # Add the encoded sentence to the list.    
            input_ids.append(encoded_dict['input_ids'])
            
            # And its attention mask (simply differentiates padding from non-padding).
            attention_masks.append(encoded_dict['attention_mask'])

    # Convert the lists into tensors.
    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)
    labels = torch.tensor(labels)

    # Set the batch size.  
    batch_size = 8  

    # Create the DataLoader.
    prediction_data = TensorDataset(input_ids, attention_masks, labels)
    prediction_sampler = SequentialSampler(prediction_data)
    prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
    print('Predicting labels for {:,} test sentences...'.format(len(input_ids)))

    # Put model in evaluation mode
    model.eval()

    # Tracking variables 
    predictions , true_labels = [], []

    # Predict 
    for batch in prediction_dataloader:
        # Add batch to GPU
        batch = tuple(t.to(device) for t in batch)
        
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        
        # Telling the model not to compute or store gradients, saving memory and 
        # speeding up prediction
        with torch.no_grad():
            # Forward pass, calculate logit predictions
            outputs = model(b_input_ids, token_type_ids=None, 
                            attention_mask=b_input_mask)

        logits = outputs[0]

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        
        # Store predictions and true labels
        predictions.append(logits)
        true_labels.append(label_ids)
    
    print('DONE.')
    return predictions, true_labels, selected_reviews
Example #46
0
    def sum_losses(self, batch, loss, margin, prec_at_k):
        """For Pretraining
        Function for preatrainind this CNN with the triplet loss. Takes a sample of N=PK images, P different
        persons, K images of each. K=4 is a normal parameter.
        [!] Batch all and batch hard should work fine. Take care with weighted triplet or cross entropy!!
        Args:
            batch (list): [images, labels], images are Tensor of size (N,H,W,C), H=224, W=112, labels Tensor of
            size (N)
        """

        inp = batch[0][0]
        inp = Variable(inp).cuda()

        labels = batch[1][0]
        labels = labels.cuda()

        embeddings = self.forward(inp)
        
        if loss == "cross_entropy":
            m = _get_triplet_mask(labels).nonzero()
            e0 = []
            e1 = []
            e2 = []
            for p in m:
                e0.append(embeddings[p[0]])
                e1.append(embeddings[p[1]])
                e2.append(embeddings[p[2]])
            e0 = torch.stack(e0,0)
            e1 = torch.stack(e1,0)
            e2 = torch.stack(e2,0)

            out_pos = self.compare(e0, e1, train=True)
            out_neg = self.compare(e0, e2, train=True)

            tar_pos = Variable(torch.ones(out_pos.size(0)).view(-1,1).cuda())
            tar_neg = Variable(torch.zeros(out_pos.size(0)).view(-1,1).cuda())

            loss_pos = F.binary_cross_entropy_with_logits(out_pos, tar_pos)
            loss_neg = F.binary_cross_entropy_with_logits(out_neg, tar_neg)

            total_loss = (loss_pos + loss_neg)/2

        elif loss == 'batch_all':
            # works, batch all strategy
            m = _get_triplet_mask(labels).nonzero()
            e0 = []
            e1 = []
            e2 = []
            for p in m:
                e0.append(embeddings[p[0]])
                e1.append(embeddings[p[1]])
                e2.append(embeddings[p[2]])
            e0 = torch.stack(e0,0)
            e1 = torch.stack(e1,0)
            e2 = torch.stack(e2,0)
            total_loss = F.triplet_margin_loss(e0, e1, e2, margin=margin, p=2)
        elif loss == 'batch_hard':
            # compute pariwise square distance matrix, not stable with sqr as 0 can happen
            n = embeddings.size(0)
            m = embeddings.size(0)
            d = embeddings.size(1)

            x = embeddings.data.unsqueeze(1).expand(n, m, d)
            y = embeddings.data.unsqueeze(0).expand(n, m, d)

            dist = torch.pow(x - y, 2).sum(2)

            mask_anchor_positive = _get_anchor_positive_triplet_mask(labels).float()
            mask_anchor_negative = _get_anchor_negative_triplet_mask(labels).float()

            pos_dist = dist * mask_anchor_positive
            # here add value so that not valid values can not be picked
            max_val = torch.max(dist)
            neg_dist = dist + max_val * (1.0 - mask_anchor_negative)

            # for each anchor compute hardest pair
            triplets = []
            for i in range(dist.size(0)):
                pos = torch.max(pos_dist[i],0)[1].item()
                neg = torch.min(neg_dist[i],0)[1].item()
                triplets.append((i, pos, neg))

            e0 = []
            e1 = []
            e2 = []
            for p in triplets:
                e0.append(embeddings[p[0]])
                e1.append(embeddings[p[1]])
                e2.append(embeddings[p[2]])
            e0 = torch.stack(e0,0)
            e1 = torch.stack(e1,0)
            e2 = torch.stack(e2,0)
            total_loss = F.triplet_margin_loss(e0, e1, e2, margin=margin, p=2)

        elif loss == 'weighted_triplet':
            # compute pairwise distance matrix
            dist = []
            # iteratively construct the columns
            for e in embeddings:
                ee = torch.cat([e.view(1,-1) for _ in range(embeddings.size(0))],0)
                dist.append(F.pairwise_distance(embeddings, ee))
            dist = torch.cat(dist, 1)

            # First, we need to get a mask for every valid positive (they should have same label)
            mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
            pos_dist = dist * Variable(mask_anchor_positive.float())

            # Now every valid negative mask
            mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
            neg_dist = dist * Variable(mask_anchor_negative.float())

            # now get the weights for each anchor, detach because it should be a constant weighting factor
            pos_weights = Variable(torch.zeros(dist.size()).cuda())
            neg_weights = Variable(torch.zeros(dist.size()).cuda())
            for i in range(dist.size(0)):
                # make by line
                mask = torch.zeros(dist.size()).byte().cuda()
                mask[i] = 1
                pos_weights[mask_anchor_positive & mask] = F.softmax(pos_dist[mask_anchor_positive & mask], 0)
                neg_weights[mask_anchor_negative & mask] = F.softmin(neg_dist[mask_anchor_negative & mask], 0)
            pos_weights = pos_weights.detach()
            neg_weights = neg_weights.detach()
            pos_weight_dist = pos_dist * pos_weights
            neg_weight_dist = neg_dist * neg_weights

            triplet_loss = torch.clamp(margin + pos_weight_dist.sum(1, keepdim=True) - neg_weight_dist.sum(1, keepdim=True), min=0)
            total_loss = triplet_loss.mean()
        else:
            raise NotImplementedError("Loss: {}".format(loss))

        losses = {}

        if prec_at_k:
            # compute pariwise square distance matrix, not stable with sqr as 0 can happen
            n = embeddings.size(0)
            m = embeddings.size(0)
            d = embeddings.size(1)

            x = embeddings.data.unsqueeze(1).expand(n, m, d)
            y = embeddings.data.unsqueeze(0).expand(n, m, d)

            dist = torch.pow(x - y, 2).sum(2)
            mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
            _, indices = torch.sort(dist, dim=1)
            num_hit = 0
            num_ges = 0
            for i in range(dist.size(0)):
                d = mask_anchor_positive[i].nonzero().view(-1,1)
                ind = indices[i][:prec_at_k+1]

                same = d==ind
                num_hit += same.sum()
                num_ges += prec_at_k
            k_loss = torch.Tensor(1)
            k_loss[0] = num_hit / num_ges
            losses['prec_at_k'] = Variable(k_loss.cuda())

        losses['total_loss'] = total_loss

        return losses
Example #47
0
def leGallInitMethod2(originalChnl):
    return torch.cat([torch.zeros(originalChnl, 1, 1), torch.ones(originalChnl, 1, 2) / 4], -1)
Example #48
0
 def _features(self, states):
     length = states.size(0)
     ones = th.ones(length, 1).to(states.device)
     al = th.arange(length, dtype=th.float32, device=states.device).view(-1, 1) / 100.0
     return th.cat([states, states**2, al, al**2, al**3, ones], dim=1)
Example #49
0
    def build_target(self, pred, labels, batchsize, fsize, n_ch, output_id):
        # target assignment
        tgt_mask = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 4 + self.n_classes).to(device=self.device)
        obj_mask = torch.ones(batchsize, self.n_anchors, fsize, fsize).to(device=self.device)
        tgt_scale = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 2).to(self.device)
        target = torch.zeros(batchsize, self.n_anchors, fsize, fsize, n_ch).to(self.device)

        # labels = labels.cpu().data
        nlabel = (labels.sum(dim=2) > 0).sum(dim=1)  # number of objects

        truth_x_all = (labels[:, :, 2] + labels[:, :, 0]) / (self.strides[output_id] * 2)
        truth_y_all = (labels[:, :, 3] + labels[:, :, 1]) / (self.strides[output_id] * 2)
        truth_w_all = (labels[:, :, 2] - labels[:, :, 0]) / self.strides[output_id]
        truth_h_all = (labels[:, :, 3] - labels[:, :, 1]) / self.strides[output_id]
        truth_i_all = truth_x_all.to(torch.int16).cpu().numpy()
        truth_j_all = truth_y_all.to(torch.int16).cpu().numpy()

        for b in range(batchsize):
            n = int(nlabel[b])
            if n == 0:
                continue
            truth_box = torch.zeros(n, 4).to(self.device)
            truth_box[:n, 2] = truth_w_all[b, :n]
            truth_box[:n, 3] = truth_h_all[b, :n]
            truth_i = truth_i_all[b, :n]
            truth_j = truth_j_all[b, :n]

            # calculate iou between truth and reference anchors
            anchor_ious_all = bboxes_iou(truth_box.cpu(), self.ref_anchors[output_id], CIoU=True)

            # temp = bbox_iou(truth_box.cpu(), self.ref_anchors[output_id])

            best_n_all = anchor_ious_all.argmax(dim=1)
            best_n = best_n_all % 3
            best_n_mask = ((best_n_all == self.anch_masks[output_id][0]) |
                           (best_n_all == self.anch_masks[output_id][1]) |
                           (best_n_all == self.anch_masks[output_id][2]))

            if sum(best_n_mask) == 0:
                continue

            truth_box[:n, 0] = truth_x_all[b, :n]
            truth_box[:n, 1] = truth_y_all[b, :n]

            pred_ious = bboxes_iou(pred[b].view(-1, 4), truth_box, xyxy=False)
            pred_best_iou, _ = pred_ious.max(dim=1)
            pred_best_iou = (pred_best_iou > self.ignore_thre)
            pred_best_iou = pred_best_iou.view(pred[b].shape[:3])
            # set mask to zero (ignore) if pred matches truth
            obj_mask[b] = ~ pred_best_iou

            for ti in range(best_n.shape[0]):
                if best_n_mask[ti] == 1:
                    i, j = truth_i[ti], truth_j[ti]
                    a = best_n[ti]
                    obj_mask[b, a, j, i] = 1
                    tgt_mask[b, a, j, i, :] = 1
                    target[b, a, j, i, 0] = truth_x_all[b, ti] - truth_x_all[b, ti].to(torch.int16).to(torch.float)
                    target[b, a, j, i, 1] = truth_y_all[b, ti] - truth_y_all[b, ti].to(torch.int16).to(torch.float)
                    target[b, a, j, i, 2] = torch.log(
                        truth_w_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 0] + 1e-16)
                    target[b, a, j, i, 3] = torch.log(
                        truth_h_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 1] + 1e-16)
                    target[b, a, j, i, 4] = 1
                    target[b, a, j, i, 5 + labels[b, ti, 4].to(torch.int16).cpu().numpy()] = 1
                    tgt_scale[b, a, j, i, :] = torch.sqrt(2 - truth_w_all[b, ti] * truth_h_all[b, ti] / fsize / fsize)
        return obj_mask, tgt_mask, tgt_scale, target
Example #50
0
        loss = loss.view(bsz, opnum)
        acc = acc.view(bsz, opnum)
        loss = loss * mask
        acc = acc * mask
        acc = acc.sum(-1)
        # acc_high = (acc * high_mask).sum()
        acc = acc.sum()
        # acc_middle = acc - acc_high

        loss = loss.sum() / (mask.sum())
        return loss, acc


if __name__ == '__main__':
    bsz = 32
    max_length = 50
    max_olen = 3
    articles = torch.zeros(bsz, max_length).long()
    articles_mask = torch.ones(articles.size())
    ops = torch.zeros(bsz, 4, max_olen).long()
    ops_mask = torch.ones(ops.size())
    question_id = torch.arange(bsz).long()
    question_pos = torch.arange(bsz).long()
    ans = torch.zeros(bsz).long()
    inp = [articles, articles_mask, ops, ops_mask, question_id, question_pos]
    tgt = ans
    model = ALbertForCloze.from_pretrained(
        '/chpc/home/stu-ysfang-a/RoBERTa-data/roberta-large',
        cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1))
    loss, acc = model(inp, tgt)
Example #51
0
 def __init__(self, features, eps=1e-8):
     super(LayerNorm, self).__init__()
     self.gamma = nn.Parameter(torch.ones(features))
     self.beta = nn.Parameter(torch.zeros(features))
     self.eps = eps
Example #52
0
    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        x = self.extract_feat(img)

        losses = dict()

        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(*rpn_loss_inputs,
                                            gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)

            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
        else:
            proposal_list = proposals

        for i in range(self.num_stages):
            self.current_stage = i
            rcnn_train_cfg = self.train_cfg.rcnn[i]
            lw = self.train_cfg.stage_loss_weights[i]

            # assign gts and sample proposals
            sampling_results = []
            if self.with_bbox or self.with_mask:
                bbox_assigner = build_assigner(rcnn_train_cfg.assigner)
                bbox_sampler = build_sampler(rcnn_train_cfg.sampler,
                                             context=self)
                num_imgs = img.size(0)
                if gt_bboxes_ignore is None:
                    gt_bboxes_ignore = [None for _ in range(num_imgs)]

                for j in range(num_imgs):
                    assign_result = bbox_assigner.assign(
                        proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],
                        gt_labels[j])
                    sampling_result = bbox_sampler.sample(
                        assign_result,
                        proposal_list[j],
                        gt_bboxes[j],
                        gt_labels[j],
                        feats=[lvl_feat[j][None] for lvl_feat in x])
                    sampling_results.append(sampling_result)

            # bbox head forward and loss
            bbox_roi_extractor = self.bbox_roi_extractor[i]
            bbox_head = self.bbox_head[i]

            rois = bbox2roi([res.bboxes for res in sampling_results])
            bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
                                            rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = bbox_head(bbox_feats)

            bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
                                                gt_labels, rcnn_train_cfg)
            loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
            for name, value in loss_bbox.items():
                losses['s{}.{}'.format(
                    i, name)] = (value * lw if 'loss' in name else value)

            # mask head forward and loss
            if self.with_mask:
                if not self.share_roi_extractor:
                    mask_roi_extractor = self.mask_roi_extractor[i]
                    pos_rois = bbox2roi(
                        [res.pos_bboxes for res in sampling_results])
                    mask_feats = mask_roi_extractor(
                        x[:mask_roi_extractor.num_inputs], pos_rois)
                    if self.with_shared_head:
                        mask_feats = self.shared_head(mask_feats)
                else:
                    # reuse positive bbox feats
                    pos_inds = []
                    device = bbox_feats.device
                    for res in sampling_results:
                        pos_inds.append(
                            torch.ones(res.pos_bboxes.shape[0],
                                       device=device,
                                       dtype=torch.uint8))
                        pos_inds.append(
                            torch.zeros(res.neg_bboxes.shape[0],
                                        device=device,
                                        dtype=torch.uint8))
                    pos_inds = torch.cat(pos_inds)
                    mask_feats = bbox_feats[pos_inds]
                mask_head = self.mask_head[i]
                mask_pred = mask_head(mask_feats)
                mask_targets = mask_head.get_target(sampling_results, gt_masks,
                                                    rcnn_train_cfg)
                pos_labels = torch.cat(
                    [res.pos_gt_labels for res in sampling_results])
                loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
                for name, value in loss_mask.items():
                    losses['s{}.{}'.format(
                        i, name)] = (value * lw if 'loss' in name else value)

            # refine bboxes
            if i < self.num_stages - 1:
                pos_is_gts = [res.pos_is_gt for res in sampling_results]
                roi_labels = bbox_targets[0]  # bbox_targets is a tuple
                with torch.no_grad():
                    proposal_list = bbox_head.refine_bboxes(
                        rois, roi_labels, bbox_pred, pos_is_gts, img_meta)

        return losses
        x = model.down_conv_4(x)
        x = model.relu(x)
        x = model.down_conv_5(x)
        x = model.relu(x)
        x = model.down_conv_6(x)
        x = model.relu(x)
        x = model.down_conv_7(x)
        x = model.relu(x)
        x = model.down_pool_8(x)
        x = model.tanh(x)

        x = x.view(-1, 512)
        """

        if i == 6:
            latent = torch.ones(1,512) * -1
        if i == 5:
            latent = torch.zeros(1,512)
        if i == 4:
            latent = torch.ones(1,512)
        if i == 3:
            latent = np.random.rand(1, 512)
            latent[latent > 0.9] = 0
            latent[latent > 0] = 1
            latent = torch.from_numpy(latent)
        if i == 2:
            latent = np.random.rand(1, 512)
            latent[latent > 0.9] = 1
            latent[latent < 1] = 0
            latent = torch.from_numpy(latent)
        if i == 1:
 def __init__(self, C):
     super(ParamSum, self).__init__()
     self.a = nn.Parameter(torch.ones(C))
     self.b = nn.Parameter(torch.ones(C))
Example #55
0
    def forward_train(self,
                      img,
                      img_meta,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None,
                      gt_masks=None,
                      proposals=None):
        """
        Args:
            img (Tensor): of shape (N, C, H, W) encoding input images.
                Typically these should be mean centered and std scaled.

            img_meta (list[dict]): list of image info dict where each dict has:
                'img_shape', 'scale_factor', 'flip', and may also contain
                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
                For details on the values of these keys see
                `mmdet/datasets/pipelines/formatting.py:Collect`.

            gt_bboxes (list[Tensor]): each item are the truth boxes for each
                image in [tl_x, tl_y, br_x, br_y] format.

            gt_labels (list[Tensor]): class indices corresponding to each box

            gt_bboxes_ignore (None | list[Tensor]): specify which bounding
                boxes can be ignored when computing the loss.

            gt_masks (None | Tensor) : true segmentation masks for each box
                used if the architecture supports a segmentation task.

            proposals : override rpn proposals with custom proposals. Use when
                `with_rpn` is False.

        Returns:
            dict[str, Tensor]: a dictionary of loss components
        """
        x = self.extract_feat(img)

        losses = dict()

        # RPN forward and loss
        if self.with_rpn:
            rpn_outs = self.rpn_head(x)
            rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
                                          self.train_cfg.rpn)
            rpn_losses = self.rpn_head.loss(
                *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
            losses.update(rpn_losses)

            proposal_cfg = self.train_cfg.get('rpn_proposal',
                                              self.test_cfg.rpn)
            proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
            proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
        else:
            proposal_list = proposals

        # assign gts and sample proposals
        if self.with_bbox or self.with_mask:
            bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
            bbox_sampler = build_sampler(
                self.train_cfg.rcnn.sampler, context=self)
            num_imgs = img.size(0)
            if gt_bboxes_ignore is None:
                gt_bboxes_ignore = [None for _ in range(num_imgs)]
            sampling_results = []
            for i in range(num_imgs):
                assign_result = bbox_assigner.assign(proposal_list[i],
                                                     gt_bboxes[i],
                                                     gt_bboxes_ignore[i],
                                                     gt_labels[i])
                sampling_result = bbox_sampler.sample(
                    assign_result,
                    proposal_list[i],
                    gt_bboxes[i],
                    gt_labels[i],
                    feats=[lvl_feat[i][None] for lvl_feat in x])
                sampling_results.append(sampling_result)

        # bbox head forward and loss
        if self.with_bbox:
            rois = bbox2roi([res.bboxes for res in sampling_results])
            # TODO: a more flexible way to decide which feature maps to use
            bbox_feats = self.bbox_roi_extractor(
                x[:self.bbox_roi_extractor.num_inputs], rois)
            if self.with_shared_head:
                bbox_feats = self.shared_head(bbox_feats)
            cls_score, bbox_pred = self.bbox_head(bbox_feats)

            bbox_targets = self.bbox_head.get_target(sampling_results,
                                                     gt_bboxes, gt_labels,
                                                     self.train_cfg.rcnn)
            loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
                                            *bbox_targets)
            losses.update(loss_bbox)

        # mask head forward and loss
        if self.with_mask:
            if not self.share_roi_extractor:
                pos_rois = bbox2roi(
                    [res.pos_bboxes for res in sampling_results])
                mask_feats = self.mask_roi_extractor(
                    x[:self.mask_roi_extractor.num_inputs], pos_rois)
                if self.with_shared_head:
                    mask_feats = self.shared_head(mask_feats)
            else:
                pos_inds = []
                device = bbox_feats.device
                for res in sampling_results:
                    pos_inds.append(
                        torch.ones(
                            res.pos_bboxes.shape[0],
                            device=device,
                            dtype=torch.bool))
                    pos_inds.append(
                        torch.zeros(
                            res.neg_bboxes.shape[0],
                            device=device,
                            dtype=torch.bool))
                pos_inds = torch.cat(pos_inds)
                mask_feats = bbox_feats[pos_inds]

            if mask_feats.shape[0] > 0:
                mask_pred = self.mask_head(mask_feats)
                mask_targets = self.mask_head.get_target(
                    sampling_results, gt_masks, self.train_cfg.rcnn)
                pos_labels = torch.cat(
                    [res.pos_gt_labels for res in sampling_results])
                loss_mask = self.mask_head.loss(mask_pred, mask_targets,
                                                pos_labels)
                losses.update(loss_mask)

        return losses
Example #56
0
 def getBuilderTensor(self):
     builder = torch.zeros(size=[self.dimension+1,self.dimension], dtype = torch.float32)
     builder[1:4, :] = torch.eye(3)
     builder[0, :] = -1 * torch.ones(3)
     return builder
 def setUp(self):
     self.x = torch.ones((2), device='cuda', dtype=torch.float32)
     common_init(self)
Example #58
0
import superimport

import numpy as np
import matplotlib.pyplot as plt
import pyprobml_utils as pml


import torch
from torch import nn
import torch.nn.functional as F

np.random.seed(42)

n = 100
x = torch.ones(n, 2, requires_grad=False) 
x[:,0].uniform_(-1.,1)


def mse(y_hat, y): return ((y_hat-y)**2).mean()
#def mse(y, y_pred): return (y_pred - y).pow(2).sum()

a = torch.as_tensor(np.array([3.0,2.0])).float()
y = x@a + torch.rand(n)

plt.scatter(x[:,0],y)


# must cast parameters to float to match type of x
#a = torch.as_tensor(np.array([-1.,1])).float()
#a = nn.Parameter(a);
Example #59
0
 def test_execution(self):
     a = torch.ones(1)
     b = 3 * torch.ones(1)
     s = 3
     # Test forward.
     self.check_function("forward", (a, b, s))
Example #60
0
def dump_model_to_tensorboard(model, writer, channels=64, time=64):
    dummy_input = torch.ones((1, channels, time, 1)).to("cuda")  # en input som fungerar
    writer.add_graph(model, dummy_input)