def test2(): x = torch.ones(1, 2) x = Variable(x) y = torch.ones(1, 2) z = x + 0.5 print(
def test_elmo_lstm_cell_completes_forward_pass(self): input_tensor = torch.autograd.Variable(torch.rand(4, 5, 3)) input_tensor[1, 4:, :] = 0. input_tensor[2, 2:, :] = 0. input_tensor[3, 1:, :] = 0. initial_hidden_state = Variable(torch.ones([1, 4, 5])) initial_memory_state = Variable(torch.ones([1, 4, 7])) lstm = LstmCellWithProjection(input_size=3, hidden_size=5, cell_size=7, memory_cell_clip_value=2, state_projection_clip_value=1) output_sequence, lstm_state = lstm(input_tensor, [5, 4, 2, 1], (initial_hidden_state, initial_memory_state)) numpy.testing.assert_array_equal([1, 4:, :].numpy(), 0.0) numpy.testing.assert_array_equal([2, 2:, :].numpy(), 0.0) numpy.testing.assert_array_equal([3, 1:, :].numpy(), 0.0) # Test the state clipping. numpy.testing.assert_array_less(, 1.0) numpy.testing.assert_array_less(, 1.0) # LSTM state should be (num_layers, batch_size, hidden_size) assert list(lstm_state[0].size()) == [1, 4, 5] # LSTM memory cell should be (num_layers, batch_size, cell_size) assert list((lstm_state[1].size())) == [1, 4, 7] # Test the cell clipping. numpy.testing.assert_array_less(lstm_state[0].data.numpy(), 2.0) numpy.testing.assert_array_less(-lstm_state[0].data.numpy(), 2.0)
def test_flattened_index_select(self): indices = numpy.array([[1, 2], [3, 4]]) targets = torch.ones([2, 6, 3]).cumsum(1) - 1 # Make the second batch double it's index so they're different. targets[1, :, :] *= 2 indices = torch.tensor(indices, dtype=torch.long) selected = util.flattened_index_select(targets, indices) assert list(selected.size()) == [2, 2, 2, 3] ones = numpy.ones([3]) numpy.testing.assert_array_equal(selected[0, 0, 0, :].data.numpy(), ones) numpy.testing.assert_array_equal(selected[0, 0, 1, :].data.numpy(), ones * 2) numpy.testing.assert_array_equal(selected[0, 1, 0, :].data.numpy(), ones * 3) numpy.testing.assert_array_equal(selected[0, 1, 1, :].data.numpy(), ones * 4) numpy.testing.assert_array_equal(selected[1, 0, 0, :].data.numpy(), ones * 2) numpy.testing.assert_array_equal(selected[1, 0, 1, :].data.numpy(), ones * 4) numpy.testing.assert_array_equal(selected[1, 1, 0, :].data.numpy(), ones * 6) numpy.testing.assert_array_equal(selected[1, 1, 1, :].data.numpy(), ones * 8) # Check we only accept 2D indices. with pytest.raises(ConfigurationError): util.flattened_index_select(targets, torch.ones([3, 4, 5]))
def guide(): mu_q = pyro.param("mu_q", Variable( + 0.094 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( - 0.11 * torch.ones(2), requires_grad=True)) sig_q = torch.exp(log_sig_q) trivial_baseline = pyro.module("mu_baseline", pt_mu_baseline, tags="baseline") baseline_value = trivial_baseline(ng_ones(1)) mu_latent = pyro.sample("mu_latent", dist.Normal(mu_q, sig_q, reparameterized=False), baseline=dict(baseline_value=baseline_value)) def obs_inner(i, _i, _x): for k in range(n_superfluous_top + n_superfluous_bottom): z_baseline = pyro.module("z_baseline_%d_%d" % (i, k), pt_superfluous_baselines[3 * k + i], tags="baseline") baseline_value = z_baseline(mu_latent.detach()).unsqueeze(-1) mean_i = pyro.param("mean_%d_%d" % (i, k), Variable(0.5 * torch.ones(4 - i, 1), requires_grad=True)) pyro.sample("z_%d_%d" % (i, k), dist.Normal(mean_i, ng_ones(4 - i, 1), reparameterized=False), baseline=dict(baseline_value=baseline_value)) def obs_outer(i, x): pyro.map_data("map_obs_inner_%d" % i, x, lambda _i, _x: obs_inner(i, _i, _x), batch_size=4 - i) pyro.map_data("map_obs_outer", [self.data_tensor[0:4, :], self.data_tensor[4:7, :], self.data_tensor[7:9, :]], lambda i, x: obs_outer(i, x), batch_size=3) return mu_latent
def guide(): mu_q = pyro.param("mu_q", Variable( + 0.334 * torch.ones(2), requires_grad=True)) log_sig_q = pyro.param("log_sig_q", Variable( - 0.29 * torch.ones(2), requires_grad=True)) mu_q_prime = pyro.param("mu_q_prime", Variable(torch.Tensor([-0.34, 0.52]), requires_grad=True)) kappa_q = pyro.param("kappa_q", Variable(torch.Tensor([0.74]), requires_grad=True)) log_sig_q_prime = pyro.param("log_sig_q_prime", Variable(-0.5 * torch.log(1.2 *, requires_grad=True)) sig_q, sig_q_prime = torch.exp(log_sig_q), torch.exp(log_sig_q_prime) mu_latent_dist = dist.Normal(mu_q, sig_q, reparameterized=repa2) mu_latent = pyro.sample("mu_latent", mu_latent_dist, baseline=dict(use_decaying_avg_baseline=use_decaying_avg_baseline)) mu_latent_prime_dist = dist.Normal(kappa_q.expand_as(mu_latent) * mu_latent + mu_q_prime, sig_q_prime, reparameterized=repa1) pyro.sample("mu_latent_prime", mu_latent_prime_dist, baseline=dict(nn_baseline=mu_prime_baseline, nn_baseline_input=mu_latent, use_decaying_avg_baseline=use_decaying_avg_baseline)) return mu_latent
def test_cpu(self): create_extension( name='test_extensions.cpulib', headers=[test_dir + '/ffi/src/cpu/lib.h'], sources=[ test_dir + '/ffi/src/cpu/lib1.c', test_dir + '/ffi/src/cpu/lib2.c', ], verbose=False, ).build() from test_extensions import cpulib tensor = torch.ones(2, 2).float() cpulib.good_func(tensor, 2, 1.5) self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5) new_tensor = cpulib.new_tensor(4) self.assertEqual(new_tensor, torch.ones(4, 4) * 4) f = cpulib.int_to_float(5) self.assertIs(type(f), float) self.assertRaises(TypeError, lambda: cpulib.good_func(tensor.double(), 2, 1.5)) self.assertRaises(torch.FatalError, lambda: cpulib.bad_func(tensor, 2, 1.5))
def __init__(self, hidden_size, num_inputs, action_space): super(Policy, self).__init__() self.action_space = action_space num_outputs = action_space.shape[0] self.bn0 = nn.BatchNorm1d(num_inputs) self.linear1 = nn.Linear(num_inputs, hidden_size) self.bn1 = nn.BatchNorm1d(hidden_size) self.linear2 = nn.Linear(hidden_size, hidden_size) self.bn2 = nn.BatchNorm1d(hidden_size) self.V = nn.Linear(hidden_size, 1) = nn.Linear(hidden_size, num_outputs) self.L = nn.Linear(hidden_size, num_outputs ** 2) self.tril_mask = Variable(torch.tril(torch.ones( num_outputs, num_outputs), diagonal=-1).unsqueeze(0)) self.diag_mask = Variable(torch.diag(torch.diag( torch.ones(num_outputs, num_outputs))).unsqueeze(0))
def test_Concat(self): input = torch.randn(4, 2) num_modules = random.randint(2, 5) linears = [nn.Linear(2, 5) for i in range(num_modules)] m = nn.Concat(0) for l in linears: m.add(l) l.zeroGradParameters() l.weight.fill_(1) l.bias.fill_(0) # Check that these don't raise errors m.__repr__() str(m) output = m.forward(input) output2 = input.sum(1, True).expand(4, 5).repeat(num_modules, 1) self.assertEqual(output2, output) gradInput = m.backward(input, torch.ones(output2.size())) gradInput2 = torch.ones(4, 2).fill_(num_modules * 5) self.assertEqual(gradInput, gradInput2) gradWeight = input.sum(0, keepdim=True).expand(5, 2) for l in linears: self.assertEqual(gradWeight, l.gradWeight)
def setUp(self): # Tests will use 3 filters and image width, height = 2 X 2 # Batch size 1 x = torch.ones((1, 3, 2, 2)) x[0, 0, 1, 0] = 1.1 x[0, 0, 1, 1] = 1.2 x[0, 1, 0, 1] = 1.2 x[0, 2, 1, 0] = 1.3 self.x = x self.gradient = torch.rand(x.shape) # Batch size 2 x = torch.ones((2, 3, 2, 2)) x[0, 0, 1, 0] = 1.1 x[0, 0, 1, 1] = 1.2 x[0, 1, 0, 1] = 1.2 x[0, 2, 1, 0] = 1.3 x[1, 0, 0, 0] = 1.4 x[1, 1, 0, 0] = 1.5 x[1, 1, 0, 1] = 1.6 x[1, 2, 1, 1] = 1.7 self.x2 = x self.gradient2 = torch.rand(x.shape) # All equal self.dutyCycle = torch.zeros((1, 3, 1, 1)) self.dutyCycle[:] = 1.0 / 3.0
def forward(self, input_features, adj): #x = self.conv1(input_features, adj) #x = self.bn1(x) #x = self.act(x) #x = self.conv2(x, adj) #x = self.bn2(x) # pool over all nodes #graph_h = self.pool_graph(x) graph_h = input_features.view(-1, self.max_num_nodes * self.max_num_nodes) # vae h_decode, z_mu, z_lsgms = self.vae(graph_h) out = F.sigmoid(h_decode) out_tensor = out.cpu().data recon_adj_lower = self.recover_adj_lower(out_tensor) recon_adj_tensor = self.recover_full_adj_from_lower(recon_adj_lower) # set matching features be degree out_features = torch.sum(recon_adj_tensor, 1) adj_data = adj.cpu().data[0] adj_features = torch.sum(adj_data, 1) S = self.edge_similarity_matrix(adj_data, recon_adj_tensor, adj_features, out_features, self.deg_feature_similarity) # initialization strategies init_corr = 1 / self.max_num_nodes init_assignment = torch.ones(self.max_num_nodes, self.max_num_nodes) * init_corr #init_assignment = torch.FloatTensor(4, 4) #init.uniform(init_assignment) assignment = self.mpm(init_assignment, S) #print('Assignment: ', assignment) # matching # use negative of the assignment score since the alg finds min cost flow row_ind, col_ind = scipy.optimize.linear_sum_assignment(-assignment.numpy()) print('row: ', row_ind) print('col: ', col_ind) # order row index according to col index #adj_permuted = self.permute_adj(adj_data, row_ind, col_ind) adj_permuted = adj_data adj_vectorized = adj_permuted[torch.triu(torch.ones(self.max_num_nodes,self.max_num_nodes) )== 1].squeeze_() adj_vectorized_var = Variable(adj_vectorized).cuda() #print(adj) #print('permuted: ', adj_permuted) #print('recon: ', recon_adj_tensor) adj_recon_loss = self.adj_recon_loss(adj_vectorized_var, out[0]) print('recon: ', adj_recon_loss) print(adj_vectorized_var) print(out[0]) loss_kl = -0.5 * torch.sum(1 + z_lsgms - z_mu.pow(2) - z_lsgms.exp()) loss_kl /= self.max_num_nodes * self.max_num_nodes # normalize print('kl: ', loss_kl) loss = adj_recon_loss + loss_kl return loss
def test_joint_optimize( self, mock_get_best_candidates, mock_gen_candidates, mock_gen_batch_initial_conditions, cuda=False, ): q = 3 num_restarts = 2 raw_samples = 10 options = {} mock_acq_function = MockAcquisitionFunction() tkwargs = {"device": torch.device("cuda") if cuda else torch.device("cpu")} for dtype in (torch.float, torch.double): tkwargs["dtype"] = dtype mock_gen_batch_initial_conditions.return_value = torch.zeros( num_restarts, q, 3, **tkwargs ) mock_gen_candidates.return_value = [i * torch.ones(1, q, 3, **tkwargs) for i in range(num_restarts)], dim=0 ) mock_get_best_candidates.return_value = torch.ones(1, q, 3, **tkwargs) expected_candidates = mock_get_best_candidates.return_value bounds = torch.stack( [torch.zeros(3, **tkwargs), 4 * torch.ones(3, **tkwargs)] ) candidates = joint_optimize( acq_function=mock_acq_function, bounds=bounds, q=q, num_restarts=num_restarts, raw_samples=raw_samples, options=options, ) self.assertTrue(torch.equal(candidates, expected_candidates))
def model(): latent = named.Object("latent") latent.list = named.List() loc = latent.list.add().loc.param_(torch.zeros(1)) latent.dict = named.Dict() foo = latent.dict["foo"].foo.sample_(dist.Normal(loc, torch.ones(1))), torch.ones(1)), obs=foo)
def test_hmc_conjugate_gaussian(fixture, num_samples, warmup_steps, hmc_params, expected_means, expected_precs, mean_tol, std_tol): pyro.get_param_store().clear() hmc_kernel = HMC(fixture.model, **hmc_params) mcmc_run = MCMC(hmc_kernel, num_samples, warmup_steps).run( for i in range(1, fixture.chain_len + 1): param_name = 'loc_' + str(i) marginal = EmpiricalMarginal(mcmc_run, sites=param_name) latent_loc = marginal.mean latent_std = marginal.variance.sqrt() expected_mean = torch.ones(fixture.dim) * expected_means[i - 1] expected_std = 1 / torch.sqrt(torch.ones(fixture.dim) * expected_precs[i - 1]) # Actual vs expected posterior means for the latents'Posterior mean (actual) - {}'.format(param_name))'Posterior mean (expected) - {}'.format(param_name)) assert_equal(rmse(latent_loc, expected_mean).item(), 0.0, prec=mean_tol) # Actual vs expected posterior precisions for the latents'Posterior std (actual) - {}'.format(param_name))'Posterior std (expected) - {}'.format(param_name)) assert_equal(rmse(latent_std, expected_std).item(), 0.0, prec=std_tol)
def vector_grad(): x = Variable(torch.ones(2)*3, requires_grad=True) y = Variable(torch.ones(2)*4, requires_grad=True) z = x.pow(2) + 3*y.pow(2) z.backward(torch.ones(2)) print(x.grad) print(y.grad)
def heads_tails(n_ent, train_data, valid_data=None, test_data=None): train_src, train_rel, train_dst = train_data if valid_data: valid_src, valid_rel, valid_dst = valid_data else: valid_src = valid_rel = valid_dst = [] if test_data: test_src, test_rel, test_dst = test_data else: test_src = test_rel = test_dst = [] all_src = train_src + valid_src + test_src all_rel = train_rel + valid_rel + test_rel all_dst = train_dst + valid_dst + test_dst heads = defaultdict(lambda: set()) tails = defaultdict(lambda: set()) for s, r, t in zip(all_src, all_rel, all_dst): tails[(s, r)].add(t) heads[(t, r)].add(s) heads_sp = {} tails_sp = {} for k in tails.keys(): tails_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(tails[k])]), torch.ones(len(tails[k])), torch.Size([n_ent])) for k in heads.keys(): heads_sp[k] = torch.sparse.FloatTensor(torch.LongTensor([list(heads[k])]), torch.ones(len(heads[k])), torch.Size([n_ent])) return heads_sp, tails_sp
def test_regex_matches_are_initialized_correctly(self): class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.linear_1_with_funky_name = torch.nn.Linear(5, 10) self.linear_2 = torch.nn.Linear(10, 5) self.conv = torch.nn.Conv1d(5, 5, 5) def forward(self, inputs): # pylint: disable=arguments-differ pass # pyhocon does funny things if there's a . in a key. This test makes sure that we # handle these kinds of regexes correctly. json_params = """{"initializer": [ ["conv", {"type": "constant", "val": 5}], ["funky_na.*bi", {"type": "constant", "val": 7}] ]} """ params = Params(pyhocon.ConfigFactory.parse_string(json_params)) initializers = InitializerApplicator.from_params(params['initializer']) model = Net() initializers(model) for parameter in model.conv.parameters(): assert torch.equal(, torch.ones(parameter.size()) * 5) parameter = model.linear_1_with_funky_name.bias assert torch.equal(, torch.ones(parameter.size()) * 7)
def test_rescale_torch_tensor(self): rows, cols = 3, 5 original_tensor = torch.randint(low=10, high=40, size=(rows, cols)).float() prev_max_tensor = torch.ones(1, 5) * 40.0 prev_min_tensor = torch.ones(1, 5) * 10.0 new_min_tensor = torch.ones(1, 5) * -1.0 new_max_tensor = torch.ones(1, 5).float() print("Original tensor: ", original_tensor) rescaled_tensor = rescale_torch_tensor( original_tensor, new_min_tensor, new_max_tensor, prev_min_tensor, prev_max_tensor, ) print("Rescaled tensor: ", rescaled_tensor) reconstructed_original_tensor = rescale_torch_tensor( rescaled_tensor, prev_min_tensor, prev_max_tensor, new_min_tensor, new_max_tensor, ) print("Reconstructed Original tensor: ", reconstructed_original_tensor) comparison_tensor = torch.eq(original_tensor, reconstructed_original_tensor) self.assertTrue(torch.sum(comparison_tensor), rows * cols)
def test_python_ir(self): x = Variable(torch.Tensor([0.4]), requires_grad=True) y = Variable(torch.Tensor([0.7]), requires_grad=True) def doit(x, y): return torch.sigmoid(torch.tanh(x * (x + y))) traced, _ = torch.jit.trace(doit, (x, y)) g = torch._C._jit_get_graph(traced) g2 = torch._C.Graph() g_to_g2 = {} for node in g.inputs(): g_to_g2[node] = g2.addInput() for node in g.nodes(): n_ = g2.createClone(node, lambda x: g_to_g2[x]) g2.appendNode(n_) for o, no in zip(node.outputs(), n_.outputs()): g_to_g2[o] = no for node in g.outputs(): g2.registerOutput(g_to_g2[node]) t_node = g2.create("TensorTest").t_("a", torch.ones([2, 2])) assert(t_node.attributeNames() == ["a"]) g2.appendNode(t_node) assert(torch.equal(torch.ones([2, 2]), t_node.t("a"))) self.assertExpected(str(g2))
def test_growing_dataset(self): dataset = [torch.ones(4) for _ in range(4)] dataloader_seq = DataLoader(dataset, shuffle=False) dataloader_shuffle = DataLoader(dataset, shuffle=True) dataset.append(torch.ones(4)) self.assertEqual(len(dataloader_seq), 5) self.assertEqual(len(dataloader_shuffle), 5)
def bernoulli_normal_model(): bern_0 = pyro.sample('bern_0', dist.Bernoulli(torch.zeros(1) * 1e-2)) loc = torch.ones(1) if bern_0.item() else -torch.ones(1) normal_0 = torch.ones(1) pyro.sample('normal_0', dist.Normal(loc, torch.ones(1) * 1e-2), obs=normal_0) return [bern_0, normal_0]
def model(): p2 = torch.tensor(torch.ones(2) / 2) p3 = torch.tensor(torch.ones(3) / 3) x2 = pyro.sample("x2", dist.OneHotCategorical(p2)) x3 = pyro.sample("x3", dist.OneHotCategorical(p3)) assert x2.shape == torch.Size([2]) + iarange_shape + p2.shape assert x3.shape == torch.Size([3, 1]) + iarange_shape + p3.shape
def knn(Mxx, Mxy, Myy, k, sqrt): n0 = Mxx.size(0) n1 = Myy.size(0) label =,torch.zeros(n1))) M =,Mxy),1),,1),Myy), 1)), 0) if sqrt: M = M.abs().sqrt() INFINITY = float('inf') val, idx = (M+torch.diag(INFINITY*torch.ones(n0+n1))).topk(k, 0, False) count = torch.zeros(n0+n1) for i in range(0,k): count = count + label.index_select(0,idx[i]) pred =, (float(k)/2)*torch.ones(n0+n1)).float() s = Score_knn() = (pred*label).sum() s.fp = (pred*(1-label)).sum() s.fn = ((1-pred)*label).sum() = ((1-pred)*(1-label)).sum() s.precision = s.recall = s.acc_t = s.acc_f = s.acc = torch.eq(label, pred).float().mean() s.k = k return s
def __init__(self, env_spec, hidden_sizes=(64,64), min_log_std=-3, init_log_std=0, seed=None): """ :param env_spec: specifications of the env (see utils/ :param hidden_sizes: network hidden layer sizes (currently 2 layers only) :param min_log_std: log_std is clamped at this value and can't go below :param init_log_std: initial log standard deviation :param seed: random seed """ self.n = env_spec.observation_dim # number of states self.m = env_spec.action_dim # number of actions self.min_log_std = min_log_std # Set seed # ------------------------ if seed is not None: torch.manual_seed(seed) np.random.seed(seed) # Policy network # ------------------------ self.model = nn.Sequential() self.model.add_module('fc_0', nn.Linear(self.n, hidden_sizes[0])) self.model.add_module('tanh_0', nn.Tanh()) self.model.add_module('fc_1', nn.Linear(hidden_sizes[0], hidden_sizes[1])) self.model.add_module('tanh_1', nn.Tanh()) self.model.add_module('fc_2', nn.Linear(hidden_sizes[1], self.m)) # make weights small for param in list(self.model.parameters())[-2:]: # only last layer = (1.0/hidden_sizes[1]) * self.log_std = Variable(torch.ones(self.m) * init_log_std, requires_grad=True) self.trainable_params = list(self.model.parameters()) + [self.log_std] # Old Policy network # ------------------------ self.old_model = nn.Sequential() self.old_model.add_module('old_fc_0', nn.Linear(self.n, hidden_sizes[0])) self.old_model.add_module('old_tanh_0', nn.Tanh()) self.old_model.add_module('old_fc_1', nn.Linear(hidden_sizes[0], hidden_sizes[1])) self.old_model.add_module('old_tanh_1', nn.Tanh()) self.old_model.add_module('old_fc_2', nn.Linear(hidden_sizes[1], self.m)) self.old_log_std = Variable(torch.ones(self.m) * init_log_std) self.old_params = list(self.old_model.parameters()) + [self.old_log_std] for idx, param in enumerate(self.old_params): = self.trainable_params[idx].data.clone() # Easy access variables # ------------------------- self.log_std_val = np.float64( self.param_shapes = [ for p in self.trainable_params] self.param_sizes = [ for p in self.trainable_params] self.d = np.sum(self.param_sizes) # total number of params # Placeholders # ------------------------ self.obs_var = Variable(torch.randn(self.n), requires_grad=False)
def test(): x = torch.ones(1, 2) Sigma = torch.FloatTensor([[1, 0.8], [0.8, 1]]) z = torch.ones(x.size()) y = torch.matmul(x, Sigma) y = torch.matmul(y, x.t()) print(y)
def get_batch_audio(self, tgt_l=3, bsize=1, sample_rate=5500, window_size=0.03, t=37): # batch x 1 x nfft x t nfft = int(math.floor((sample_rate * window_size) / 2) + 1) test_src = Variable(torch.ones(bsize, 1, nfft, t)).float() test_tgt = Variable(torch.ones(tgt_l, bsize, 1)).long() test_length = None return test_src, test_tgt, test_length
def get_batch_audio(self, tgt_l=7, bsize=3, sample_rate=5500, window_size=0.03, t=37): # batch x 1 x nfft x t nfft = int(math.floor((sample_rate * window_size) / 2) + 1) test_src = torch.ones(bsize, 1, nfft, t).float() test_tgt = torch.ones(tgt_l, bsize, 1).long() test_length = torch.ones(bsize).long().fill_(tgt_l) return test_src, test_tgt, test_length
def test_tensor_array_monkey_patch(): this_tests('na') t = torch.ones(a) t = np.array(t) assert np.all(t == t), "Tensors did not properly convert to numpy arrays" t = torch.ones(a) t = np.array(t,dtype=float) assert np.all(t == t), "Tensors did not properly convert to numpy arrays with a dtype set"
def test_unweighted_mean_and_var(size, dtype): empirical_dist = Empirical() for i in range(5): empirical_dist.add(torch.ones(size, dtype=dtype) * i) true_mean = torch.ones(size) * 2 true_var = torch.ones(size) * 2 assert_equal(empirical_dist.mean, true_mean) assert_equal(empirical_dist.variance, true_var)
def guide(): loc1 = pyro.param("loc1", torch.randn(2, requires_grad=True)) scale1 = pyro.param("scale1", torch.ones(2, requires_grad=True)) pyro.sample("latent1", Normal(loc1, scale1)) loc2 = pyro.param("loc2", torch.randn(2, requires_grad=True)) scale2 = pyro.param("scale2", torch.ones(2, requires_grad=True)) latent2 = pyro.sample("latent2", Normal(loc2, scale2)) return latent2
def test_random_module(nn_module): pyro.clear_param_store() nn_module = nn_module() p = torch.ones(2, 2) prior = dist.Bernoulli(p) lifted_mod = pyro.random_module("module", nn_module, prior) nn_module = lifted_mod() for name, parameter in nn_module.named_parameters(): assert torch.equal(torch.ones(2, 2),
def train_model(model, optim, train_q_embed, dev_q_embed, dev_q_cand_ids, train_pairs, dev_pairs, hparams, log_path, seed): """Train model using negative sampling. Args: - model - optim: optimizer - train_q_embed: Embedding object for training queries, shape (nb train queries, dim) - dev_q_embed: Embedding object for dev queries, shape (nb dev queries, dim) - dev_q_cand_ids: list containing candidate ID of each dev query (None if it is not a candidate), used to compute MAP on dev set. - train_pairs: array of (query ID, hypernym ID) pairs for training - dev_pairs: array of (query ID, hypernym ID) pairs for validation - hparams: dict containing settings of hyperparameters - log_path: path of log file - seed: seed for RNG """ # Extract hyperparameter settings nb_neg_samples = hparams["nb_neg_samples"] subsample = hparams["subsample"] max_epochs = hparams["max_epochs"] patience = hparams["patience"] batch_size = hparams["batch_size"] clip = hparams["clip"] if seed: random.seed(seed) np.random.seed(seed) # Prepare sampling of negative examples candidate_ids = list(range(model.get_nb_candidates())) cand_sampler = make_sampler(candidate_ids) # Prepare subsampling of positive examples pos_sample_prob = {} if subsample: hyp_fd = {} for h_id in train_pairs[:, 1]: if h_id not in hyp_fd: hyp_fd[h_id] = 0 hyp_fd[h_id] += 1 min_freq = min(hyp_fd.values()) for (h_id, freq) in hyp_fd.items(): pos_sample_prob[h_id] = sqrt(min_freq / freq) # Initialize training batch for query IDs, positive hypernym IDs, # negative hypernym IDs, positive targets, and negative targets. # targets. We separate positive and negative examples to compute # the losses separately. Note that this is a bit inefficient, as # we compute the query projections twice. batch_q = np.zeros(batch_size, 'int64') batch_h_pos = np.zeros((batch_size, 1), 'int64') batch_h_neg = np.zeros((batch_size, nb_neg_samples), 'int64') t_pos_var = wrap_in_var(torch.ones((batch_size, 1)), False, model.use_cuda) t_neg_var = wrap_in_var(torch.zeros((batch_size, nb_neg_samples)), False, model.use_cuda) # Prepare list of sets of gold hypernym IDs for queries in # training set. This is used for negative sampling. nb_train_queries = train_q_embed.weight.shape[0] train_gold_ids = [set() for _ in range(nb_train_queries)] nb_train_pairs = train_pairs.shape[0] for i in range(nb_train_pairs): q_id = int(train_pairs[i, 0]) h_id = int(train_pairs[i, 1]) train_gold_ids[q_id].add(h_id) # Prepare list of sets of gold hypernym IDs for queries in dev set # to compute score (MAP) nb_dev_queries = dev_q_embed.weight.shape[0] dev_gold_ids = [set() for _ in range(nb_dev_queries)] nb_dev_pairs = dev_pairs.shape[0] for i in range(nb_dev_pairs): q_id = int(dev_pairs[i, 0]) h_id = int(dev_pairs[i, 1]) dev_gold_ids[q_id].add(h_id) # Prepare input variables to compute loss on dev set dev_q_ids = torch.LongTensor(dev_pairs[:, 0]) if model.use_cuda: dev_q_ids = dev_q_ids.cuda() dev_q_var = dev_q_embed(dev_q_ids) dev_h_var = wrap_in_var( torch.LongTensor(dev_pairs[:, 1]).unsqueeze(1), False, model.use_cuda) dev_t_var = wrap_in_var(torch.ones((nb_dev_pairs, 1)), False, model.use_cuda) # Make Evaluator to compute MAP on dev set dev_eval = Evaluator(model, dev_q_embed, dev_q_cand_ids) print("\nEvaluating untrained model on dev set...") MAP = dev_eval.get_MAP(dev_gold_ids) print("MAP: {:.4f}".format(MAP)) checkpoint_header = [ "Epoch", "Updates", "PosLoss", "NegLoss", "DevLoss", "DevMAP", "TimeElapsed" ] with open(log_path, "w") as f: f.write("\t".join(checkpoint_header) + "\n") # Train model best_model = deepcopy(model) best_score = float("-inf") nb_no_gain = 0 batch_row_id = 0 done = False start_time = time.time() print("\nStarting training...\n") print("\t".join(checkpoint_header)) for epoch in range(1, max_epochs + 1): model.train() np.random.shuffle(train_pairs) total_pos_loss = 0.0 total_neg_loss = 0.0 # Loop through training pairs nb_updates = 0 for pair_ix in range(train_pairs.shape[0]): q_id = train_pairs[pair_ix, 0] h_id = train_pairs[pair_ix, 1] if subsample and random.random() >= pos_sample_prob[h_id]: continue batch_q[batch_row_id] = q_id batch_h_pos[batch_row_id] = h_id # Get negative examples neg_samples = [] while len(neg_samples) < nb_neg_samples: cand_id = next(cand_sampler) if cand_id not in train_gold_ids[q_id]: neg_samples.append(cand_id) batch_h_neg[batch_row_id] = neg_samples # Update on batch batch_row_id = (batch_row_id + 1) % batch_size if batch_row_id + 1 == batch_size: q_ids = wrap_in_var(torch.LongTensor(batch_q), False, model.use_cuda) q_var = train_q_embed(q_ids) h_pos_var = wrap_in_var(torch.LongTensor(batch_h_pos), False, model.use_cuda) h_neg_var = wrap_in_var(torch.LongTensor(batch_h_neg), False, model.use_cuda) optim.zero_grad() pos_loss = model.get_loss(q_var, h_pos_var, t_pos_var) neg_loss = model.get_loss(q_var, h_neg_var, t_neg_var) loss = pos_loss + neg_loss loss.backward() if clip > 0: torch.nn.utils.clip_grad_norm(train_q_embed.parameters(), clip) torch.nn.utils.clip_grad_norm(model.parameters(), clip) optim.step() total_pos_loss +=[0] total_neg_loss +=[0] nb_updates += 1 # Check progress avg_pos_loss = total_pos_loss / (nb_updates * batch_size) avg_neg_loss = total_neg_loss / (nb_updates * batch_size) # Compute loss and MAP on dev set model.eval() dev_loss = model.get_loss(dev_q_var, dev_h_var, dev_t_var) avg_dev_loss =[0] / nb_dev_pairs MAP = dev_eval.get_MAP(dev_gold_ids) checkpoint_data = [] checkpoint_data.append(str(epoch)) checkpoint_data.append(str(nb_updates)) checkpoint_data.append("{:.4f}".format(avg_pos_loss)) checkpoint_data.append("{:.4f}".format(avg_neg_loss)) checkpoint_data.append("{:.4f}".format(avg_dev_loss)) checkpoint_data.append("{:.4f}".format(MAP)) checkpoint_data.append("{:.1f}s".format(time.time() - start_time)) print("\t".join(checkpoint_data)) with open(log_path, "a") as f: f.write("\t".join(checkpoint_data) + "\n") # Early stopping if MAP > best_score: best_score = MAP best_model = deepcopy(model) nb_no_gain = 0 else: nb_no_gain += 1 if nb_no_gain >= patience: print("EARLY STOP!") done = True print("\nEvaluating best model on dev set...") dev_eval.set_model(best_model) MAP = dev_eval.get_MAP(dev_gold_ids) print("MAP of best model: {:.3f}".format(MAP)) if done: break print("\nTraining finished after {} epochs".format(epoch)) return best_model
def harrInitMethod1(originalChnl): return[torch.ones(originalChnl, 1), torch.zeros(originalChnl, 2)], 1).reshape(originalChnl, 1, 3)
def test_batchnorm_with_weights(self): """ Test of the PyTorch 2D batchnorm Node with weights and biases on Glow. """ class SimpleQuantizedBatchNorm(nn.Module): def __init__( self, C, weight, bias, running_mean, running_var, in_scale, in_zero_point, out_scale, out_zero_point, ): super(SimpleQuantizedBatchNorm, self).__init__() self.qconfig = my_qconfig self.batchnorm = nn.BatchNorm3d(C) self.batchnorm.scale = out_scale self.batchnorm.zero_point = out_zero_point self.batchnorm.weight = nn.Parameter(weight) self.batchnorm.bias = nn.Parameter(bias) self.batchnorm.running_mean = running_mean self.batchnorm.running_var = running_var self.relu = nn.ReLU() self.q = torch.quantization.QuantStub() self.q.scale = in_scale self.q.zero_point = in_zero_point self.dq = torch.quantization.DeQuantStub() def forward(self, x): qx = self.q(x) qy = self.batchnorm(qx) y = self.dq(qy) return y C = 7 in_scale = 0.0031 out_scale = 0.0047 in_zero_point = -42 out_zero_point = 23 weight = torch.ones(C) + torch.rand(C) * 0.001 bias = torch.rand(C) * 0.0001 running_mean = torch.zeros(C) running_var = torch.ones(C) inputs = torch.randn((6, C, 4, 33, 42), requires_grad=False) model = SimpleQuantizedBatchNorm( C, weight, bias, running_mean, running_var, in_scale, in_zero_point, out_scale, out_zero_point, ) model.eval() utils.compare_tracing_methods( model, inputs, skip_to_glow=True, )
('conv', 0), ('identity', 1), ('shuffle_conv', 0), ('dep_conv', 2), ('shuffle_conv', 4), ('identity', 0)], normal_normal_concat=range(2, 6)) cell = BuildCell(layer7_doublechannel, c_prev_prev=-1, c_prev=256, c=32, cell_type='normal_up', dp=0) # The final output size for normal up may not be a multiple if normal does not exist s0 = torch.FloatTensor(torch.ones(1, 128, 16, 16)) s1 = torch.FloatTensor(torch.ones(1, 256, 8, 8)) output = cell(None, s1) print(output.size()) x = torch.FloatTensor(torch.ones(1, 3, 128, 128)) network = BuildNasUnetPrune(layer7_doublechannel, input_c=3, c=16, num_classes=1, meta_node_num=4, layers=9, dp=0, use_sharing=True, double_down_channel=True) print(network)
the ``Variable`` (except for Variables created by the user - their ``grad_fn is None``). If you want to compute the derivatives, you can call ``.backward()`` on a ``Variable``. If ``Variable`` is a scalar (i.e. it holds a one element data), you don’t need to specify any arguments to ``backward()``, however if it has more elements, you need to specify a ``grad_output`` argument that is a tensor of matching shape. """ import torch from torch.autograd import Variable ############################################################### # Create a variable: x = Variable(torch.ones(2, 2), requires_grad=True) print(x) ############################################################### # Do an operation of variable: y = x + 2 print(y) ############################################################### # ``y`` was created as a result of an operation, so it has a ``grad_fn``. print(y.grad_fn) ############################################################### # Do more operations on y z = y * y * 3 out = z.mean()
def train(self): #torch.autograd.set_detect_anomaly(True) """ Main training loop Helpful URL: """ for epoch in range(self.num_epochs): num_batches = len(self.train_dataset_loader) # Initialize running averages disc_losses = AverageMeter() train_disc_accuracies = AverageMeter() tot_losses = AverageMeter() train_accuracies = AverageMeter() for batch_id, batch_data in enumerate(self.train_dataset_loader): self.gan.train() # Set the model to train mode self.vgg_loss_network.eval() current_step = epoch * num_batches + batch_id # Get data from dataset src_img = batch_data['im'].cuda(async=True) target_img = batch_data['target_im'].cuda(async=True) src_iuv = batch_data['im_iuv'].cuda(async=True) target_iuv = batch_data['target_iuv'].cuda(async=True) #pdb.set_trace() # ============ # Run predictive GAN on source image _, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False) # Run predictive GAN on target image _ , classification_tgt = self.gan(target_img, src_iuv, target_iuv, use_gt=True) # Create discriminator groundtruth # For src, we create zeros # For tgt, we create ones disc_gt_src = torch.zeros(classification_src.shape[0], 1, dtype=torch.float32).cuda() disc_gt_tgt = torch.ones(classification_src.shape[0], 1, dtype=torch.float32).cuda() disc_gt =, disc_gt_tgt), dim=0).cuda(async=True) classification_all =, classification_tgt) , dim=0) # Train Discriminator network disc_loss = self._optimizeDiscriminator(classification_all, disc_gt) disc_losses.update(disc_loss.item(), disc_gt.shape[0]) disc_acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_all, dim=1)) == disc_gt ).float() ) train_disc_accuracies.update(disc_acc.item(), disc_gt.shape[0]) print("Epoch: {}, Batch {}/{} has Discriminator loss {}, and acc {}".format(epoch, batch_id, num_batches, disc_losses.avg, train_disc_accuracies.avg)) # Start training GAN first for several iterations if current_step < self.start_disc_iters: print("Discriminator training only: {}/{}\n".format(current_step,self.start_disc_iters)) continue # ============ # Optimize the GAN # Note that now we use disc_gt_tgt which are 1's generated_img, classification_src = self.gan(src_img, src_iuv, target_iuv, use_gt=False) tot_loss = self._optimizeGAN(generated_img, target_img, classification_src, disc_gt_tgt) tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0]) acc = 100.0 * torch.mean( ( torch.round(F.softmax(classification_src, dim=1)) == disc_gt_tgt ).float() ) tot_losses.update(tot_loss.item(), disc_gt_tgt.shape[0]) train_accuracies.update(acc.item(), disc_gt_tgt.shape[0]) # Not adjusting learning rate currently # if epoch % 100 == 99: # self._adjust_learning_rate(epoch) # # Not Clipping Weights # self._clip_weights() if current_step % self.log_freq == 0: print("Epoch: {}, Batch {}/{} has loss {}, and acc {}".format(epoch, batch_id, num_batches, tot_losses.avg, train_accuracies.avg)) # TODO: you probably want to plot something here self.txwriter.add_scalar('train/discriminator_loss', disc_losses.avg, current_step) self.txwriter.add_scalar('train/total_loss', tot_losses.avg, current_step) self.txwriter.add_scalar('train/discriminator_acc', train_accuracies.avg, current_step) """ Visualize some images """ if current_step % self.display_freq == 0: name1 = '{0}_{1}_{2}'.format(epoch, current_step, "image1") name2 = '{0}_{1}_{2}'.format(epoch, current_step, "image2") name3 = '{0}_{1}_{2}'.format(epoch, current_step, "gan_image") im1 = denormalizeImage(src_img[0,:,:,:].cpu().numpy()) im2 = denormalizeImage(target_img[0,:,:,:].cpu().numpy()) im3 = denormalizeImage(generated_img[0,:,:,:].detach().cpu().numpy()) self.txwriter.add_image("Image1/"+name1,im1) self.txwriter.add_image("Image2/"+name2,im2) self.txwriter.add_image("GAN/"+name3,im3) """ TODO : Test accuracies if current_step % self.test_freq == 0:#self._test_freq-1: self._model.eval() val_accuracy = self.validate() print("Epoch: {} has val accuracy {}".format(epoch, val_accuracy)) self.txwriter.add_scalar('test/acc', val_accuracy, current_step) """ """ Save Model periodically """ if (current_step % self.save_freq == 0) and current_step > 0: save_name = 'model_checkpoint.pth', save_name) print('Saved model to {}'.format(save_name))
def idenInitMethod2(originalChnl): return[torch.zeros(originalChnl, 2), torch.ones(originalChnl, 1)], 1).reshape(originalChnl, 1, 3)
def pad(self, x, length, padval): y = torch.ones((length,)).long() * padval y[:min(len(x), length)] = x[:min(len(x), length)] return y
def ones(shape, dtype, ctx): return th.ones(shape, dtype=dtype, device=ctx)
def forward(ctx, inputs, bound): b = torch.ones(inputs.size())*bound b = ctx.save_for_backward(inputs, b) return torch.max(inputs, b)
def obs_to_graph_dict(self, obs) -> """ this function takes the observation and creates a graph including the following features: (indicator, x, y, node_demand, node_visited) indicator: 0: depot, 1: customers x, y: position of the node in grid (double, double) node_demand: the customer demand or current vehicle capacity depending on the type of node (the vehicle capacity is negative) """ customer_positions = obs['customer_positions'] customer_visited = obs['customer_visited'] vehicle_position = obs["current_vehicle_position"] customer_demands = obs['customer_demands'] / obs['max_vehicle_capacity'] vehicle_capacity = obs['current_vehicle_capacity'] / obs['max_vehicle_capacity'] num_customers = customer_positions.shape[0] num_depots = 1 num_vehicles = 1 num_nodes = num_customers + num_depots + num_vehicles node_pos = np.vstack([customer_positions, obs['depot_position'], vehicle_position]) # if vehicle is currently at the depot position than depot is treated like a visited node if np.array_equal(vehicle_position, obs['depot_position']): depot_visited = np.zeros(shape=(num_depots, 1)) else: depot_visited = np.ones(shape=(num_depots, 1)) # node visited is True if the node can be chosen as action (customer that is not yet visited or depot if the # vehicle is not currently at the depot). Otherwise the node visited value is False (this is always true for # vehicle node) node_visited = np.vstack([np.logical_not(customer_visited).reshape(-1, 1), depot_visited, np.zeros(shape=(num_vehicles, 1))]) # indicator is : 0: customers, 1: depot, 2: vehicle node_ind = np.vstack([np.ones(shape=(num_customers, 1)) * 0, np.ones(shape=(num_depots, 1)) * 1, np.ones(shape=(num_vehicles, 1)) * 2]) node_demand = np.vstack([customer_demands.reshape(-1, 1), np.zeros(shape=(num_depots, 1)), -vehicle_capacity]) customer_nodes = np.where(node_ind == 0)[0] depot_nodes = np.where(node_ind == 1)[0] vehicle_nodes = np.where(node_ind == 2)[0] # features are : pos_x, pos_y, demand/capacity node_features = np.hstack([node_ind, node_pos, node_demand, node_visited]) # customer edge indexes include all customers and depot # edge_indexes = [(i, j) for i, j in itertools.product(range(num_customers + 1), range(num_customers + 1)) if # i != j] customer_and_depot_nodes = np.concatenate([customer_nodes, depot_nodes]) vehicle_edge_indexes = [(i.item(), j.item()) for i in vehicle_nodes for j in customer_and_depot_nodes] vehicle_edge_indexes = vehicle_edge_indexes + [(j, i) for i, j in vehicle_edge_indexes] edge_indexes_directed = vehicle_edge_indexes node_features_tensor = torch.tensor(node_features, dtype=torch.float32) edge_indexes_tensor = torch.tensor(edge_indexes_directed, dtype=torch.long, device=node_features_tensor.device).transpose(1, 0) edge_attributes_tensor = torch.ones(size=(len(edge_indexes_directed), 1), device=node_features_tensor.device, dtype=torch.float32) illegal_actions = np.zeros(shape=(num_nodes,)) if not obs['action_mask'][self.env.DEPOT_INDEX]: # depot option is not available, and therefore this action should be masked illegal_actions[depot_nodes] = True # mask out all customers that there demand exceeds the vehicle current capacity illegal_actions[customer_nodes] = np.logical_or(customer_demands > vehicle_capacity, customer_visited) # mask out the vehicle nodes since they can never be chosen illegal_actions[vehicle_nodes] = True illegal_actions_tensor = torch.tensor(illegal_actions, device=node_features_tensor.device, dtype=torch.bool) graph_tg =, edge_attr=edge_attributes_tensor, edge_index=edge_indexes_tensor) graph_tg.illegal_actions = illegal_actions_tensor graph_tg.u = torch.tensor([[1]], device=node_features_tensor.device, dtype=torch.float32) self.num_customers = num_customers return graph_tg
# plt.savefig("figures/dc_positive.pdf") # show_trial(info, 9, 12) # Negative # plt.savefig("figures/dc_negative.pdf") #%% _, t, *_ = masks.size() n = runner.config.TASK.NUM_NODES h = runner.config.MODEL.HIDDEN_SIZE strength = 100.0 perturbation = torch.zeros(t, n, h, device=device) perturbation_step = 2 # Right in the middle. nodes_perturbed = np.random.randint(10) # A random set # dropout=True dropout_mask = None if dropout: dropout_mask = torch.ones(t, n, h, device=runner.device) dropout_mask[perturbation_step, nodes_perturbed] = 0.0 perturbation[perturbation_step, nodes_perturbed] = torch.rand(h, device=device) * strength metrics_perturbed, info_perturbed = runner.eval(ckpt_path, perturb=perturbation, dropout_mask=dropout_mask) # show_trial(info_perturbed, 7, 12) # show_trial(info_perturbed, 9, 12) # plt.title("Target: 0, Pulse: 100.0") # plt.savefig("figures/dc_pulse_negative.pdf") # Note there's stochasticity here # show_trial(info_perturbed, 7, 12) # plt.title("Target: 0, Pulse: 100.0") # plt.savefig("figures/dc_pulse_positive.pdf") show_trial(info_perturbed, 7, 12) plt.title("Target: 0, Dropout") plt.savefig("figures/dc_dropout.pdf")
def label_real(size): data = torch.ones(size, 1) return
def _generate( self, sample: Dict[str, Dict[str, Tensor]], prefix_tokens: Optional[Tensor] = None, constraints: Optional[Tensor] = None, bos_token: Optional[int] = None, ): incremental_states = torch.jit.annotate( List[Dict[str, Dict[str, Optional[Tensor]]]], [ torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) for i in range(self.model.models_size) ], ) net_input = sample["net_input"] if "src_tokens" in net_input: src_tokens = net_input["src_tokens"] # length of the source text being the character length except EndOfSentence and pad src_lengths = ( ( & ) elif "source" in net_input: src_tokens = net_input["source"] src_lengths = ( net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) if net_input["padding_mask"] is not None else torch.tensor(src_tokens.size(-1)).to(src_tokens) ) else: raise Exception("expected src_tokens or source in net input") # bsz: total number of sentences in beam # Note that src_tokens may have more than 2 dimenions (i.e. audio features) bsz, src_len = src_tokens.size()[:2] beam_size = self.beam_size if constraints is not None and not raise NotImplementedError( "Target-side constraints were provided, but search method doesn't support them" ) # Initialize constraints, when active, beam_size) max_len: int = -1 if self.match_source_len: max_len = src_lengths.max().item() else: max_len = min( int(self.max_len_a * src_len + self.max_len_b), # exclude the EOS marker self.model.max_decoder_positions() - 1, ) assert ( self.min_len <= max_len ), "min_len cannot be larger than max_len, please adjust these!" # compute the encoder output for each beam encoder_outs = self.model.forward_encoder(net_input) # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) new_order = encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) # ensure encoder_outs is a List. assert encoder_outs is not None # initialize buffers scores = ( torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() ) # +1 for eos; pad is never chosen for scoring tokens = ( torch.zeros(bsz * beam_size, max_len + 2) .to(src_tokens) .long() .fill_(self.pad) ) # +2 for eos and pad tokens[:, 0] = self.eos if bos_token is None else bos_token attn: Optional[Tensor] = None # A list that indicates candidates that should be ignored. # For example, suppose we're sampling and have already finalized 2/5 # samples. Then cands_to_ignore would mark 2 positions as being ignored, # so that we only finalize the remaining 3 samples. cands_to_ignore = ( torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) ) # forward and backward-compatible False mask # list of completed sentences finalized = torch.jit.annotate( List[List[Dict[str, Tensor]]], [torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz)], ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step finished = [ False for i in range(bsz) ] # a boolean array indicating if the sentence at the index is finished or not num_remaining_sent = bsz # number of sentences remaining # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = ( (torch.arange(0, bsz) * beam_size) .unsqueeze(1) .type_as(tokens) .to(src_tokens.device) ) cand_offsets = torch.arange(0, cand_size).type_as(tokens).to(src_tokens.device) reorder_state: Optional[Tensor] = None batch_idxs: Optional[Tensor] = None original_batch_idxs: Optional[Tensor] = None if "id" in sample and isinstance(sample["id"], Tensor): original_batch_idxs = sample["id"] else: original_batch_idxs = torch.arange(0, bsz).type_as(tokens) for step in range(max_len + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: if batch_idxs is not None: # update beam indices to take into account removed sentences corr = batch_idxs - torch.arange(batch_idxs.numel()).type_as( batch_idxs ) reorder_state.view(-1, beam_size).add_( corr.unsqueeze(-1) * beam_size ) original_batch_idxs = original_batch_idxs[batch_idxs] self.model.reorder_incremental_state(incremental_states, reorder_state) encoder_outs = self.model.reorder_encoder_out( encoder_outs, reorder_state ) lprobs, avg_attn_scores = self.model.forward_decoder( tokens[:, : step + 1], encoder_outs, incremental_states, self.temperature, ) if self.lm_model is not None: lm_out = self.lm_model(tokens[:, : step + 1]) probs = self.lm_model.get_normalized_probs( lm_out, log_probs=True, sample=None ) probs = probs[:, -1, :] * self.lm_weight lprobs += probs lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) lprobs[:, self.pad] = -math.inf # never select pad lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty # handle max length constraint if step >= max_len: lprobs[:, : self.eos] = -math.inf lprobs[:, self.eos + 1 :] = -math.inf # handle prefix tokens (possibly with different lengths) if ( prefix_tokens is not None and step < prefix_tokens.size(1) and step < max_len ): lprobs, tokens, scores = self._prefix_tokens( step, lprobs, scores, tokens, prefix_tokens, beam_size ) elif step < self.min_len: # minimum length constraint (does not apply if using prefix_tokens) lprobs[:, self.eos] = -math.inf # Record attention scores, only support avg_attn_scores is a Tensor if avg_attn_scores is not None: if attn is None: attn = torch.empty( bsz * beam_size, avg_attn_scores.size(1), max_len + 2 ).to(scores) attn[:, :, step + 1].copy_(avg_attn_scores) scores = scores.type_as(lprobs) eos_bbsz_idx = torch.empty(0).to( tokens ) # indices of hypothesis ending with eos (finished sentences) eos_scores = torch.empty(0).to( scores ) # scores of hypothesis ending with eos (finished sentences) if self.should_set_src_lengths: if self.repeat_ngram_blocker is not None: lprobs = self.repeat_ngram_blocker( tokens, lprobs, bsz, beam_size, step ) # Shape: (batch, cand_size) cand_scores, cand_indices, cand_beams = step, lprobs.view(bsz, -1, self.vocab_size), scores.view(bsz, beam_size, -1)[:, :, :step], tokens[:, : step + 1], original_batch_idxs, ) # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add(bbsz_offsets) # finalize hypotheses that end in eos # Shape of eos_mask: (batch size, beam size) eos_mask = cand_indices.eq(self.eos) & eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to(eos_mask) # only consider eos when it's among the top beam_size indices # Now we know what beam item(s) to finish # Shape: 1d list of absolute-numbered eos_bbsz_idx = torch.masked_select( cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size] ) finalized_sents: List[int] = [] if eos_bbsz_idx.numel() > 0: eos_scores = torch.masked_select( cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size] ) finalized_sents = self.finalize_hypos( step, eos_bbsz_idx, eos_scores, tokens, scores, finalized, finished, beam_size, attn, src_lengths, max_len, ) num_remaining_sent -= len(finalized_sents) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break if and step >= max_len: break assert step < max_len, f"{step} < {max_len}" # Remove finalized sentences (ones for which {beam_size} # finished hypotheses have been generated) from the batch. if len(finalized_sents) > 0: new_bsz = bsz - len(finalized_sents) # construct batch_idxs which holds indices of batches to keep for the next pass batch_mask = torch.ones( bsz, dtype=torch.bool, device=cand_indices.device ) batch_mask[finalized_sents] = False # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it batch_idxs = torch.arange( bsz, device=cand_indices.device ).masked_select(batch_mask) # Choose the subset of the hypothesized constraints that will continue eos_mask = eos_mask[batch_idxs] cand_beams = cand_beams[batch_idxs] bbsz_offsets.resize_(new_bsz, 1) cand_bbsz_idx = cand_beams.add(bbsz_offsets) cand_scores = cand_scores[batch_idxs] cand_indices = cand_indices[batch_idxs] if prefix_tokens is not None: prefix_tokens = prefix_tokens[batch_idxs] src_lengths = src_lengths[batch_idxs] cands_to_ignore = cands_to_ignore[batch_idxs] scores = scores.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) tokens = tokens.view(bsz, -1)[batch_idxs].view(new_bsz * beam_size, -1) if attn is not None: attn = attn.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, attn.size(1), -1 ) bsz = new_bsz else: batch_idxs = None # Set active_mask so that values > cand_size indicate eos hypos # and values < cand_size indicate candidate active hypos. # After, the min values per row are the top candidate active hypos # Rewrite the operator since the element wise or is not supported in torchscript. eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) active_mask = torch.add( eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[: eos_mask.size(1)], ) # get the top beam_size active hypotheses, which are just # the hypos with the smallest values in active_mask. # {active_hypos} indicates which {beam_size} hypotheses # from the list of {2 * beam_size} candidates were # selected. Shapes: (batch size, beam size) new_cands_to_ignore, active_hypos = torch.topk( active_mask, k=beam_size, dim=1, largest=False ) # update cands_to_ignore to ignore any finalized hypos. cands_to_ignore =[:, :beam_size] # Make sure there is at least one active item for each sentence in the batch. assert (~cands_to_ignore).any(dim=1).all() # update cands_to_ignore to ignore any finalized hypos # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam # can be selected more than once). active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # copy tokens and scores for active hypotheses # Set the tokens for each beam (can select the same row more than once) tokens[:, : step + 1] = torch.index_select( tokens[:, : step + 1], dim=0, index=active_bbsz_idx ) # Select the next token for each of them tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather( cand_indices, dim=1, index=active_hypos ) if step > 0: scores[:, :step] = torch.index_select( scores[:, :step], dim=0, index=active_bbsz_idx ) scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather( cand_scores, dim=1, index=active_hypos ) # Update constraints based on which candidates were selected for the next beam # copy attention for active hypotheses if attn is not None: attn[:, :, : step + 2] = torch.index_select( attn[:, :, : step + 2], dim=0, index=active_bbsz_idx ) # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(len(finalized)): scores = torch.tensor( [float(elem["score"].item()) for elem in finalized[sent]] ) _, sorted_scores_indices = torch.sort(scores, descending=True) finalized[sent] = [finalized[sent][ssi] for ssi in sorted_scores_indices] finalized[sent] = torch.jit.annotate( List[Dict[str, Tensor]], finalized[sent] ) return finalized
def test(args: dict(), save_flag: bool, seed_val): device = util.get_device(device_no=args.device_no) model = torch.load(args.model_path, map_location=device) random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) testfile = args.input_file true_label = args.label truncation = args.truncation n_samples = None if "n_samples" in args: n_samples = args.n_samples # Load the BERT tokenizer. tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) max_len = 0 reviews = [] labels = [] with open(testfile, "r") as fin: reviews = fin.readlines() reviews = [rev.lower() for rev in reviews] if n_samples == None: n_samples = len(reviews) indices = np.random.choice(np.arange(len(reviews)), size=n_samples) selected_reviews = [reviews[idx] for idx in indices] labels = [0 if true_label == "negative" else 1]*len(selected_reviews) # Tokenize all of the sentences and map the tokens to thier word IDs. input_ids = [] attention_masks = [] # For every sentence... for rev in selected_reviews: # `encode_plus` will: # (1) Tokenize the sentence. # (2) Prepend the `[CLS]` token to the start. # (3) Append the `[SEP]` token to the end. # (4) Map tokens to their IDs. # (5) Pad or truncate the sentence to `max_length` # (6) Create attention masks for [PAD] tokens. input_id = tokenizer.encode(rev, add_special_tokens=True) if len(input_id) > 512: if truncation == "tail-only": # tail-only truncation input_id = [tokenizer.cls_token_id]+input_id[-511:] elif truncation == "head-and-tail": # head-and-tail truncation input_id = [tokenizer.cls_token_id]+input_id[1:129]+input_id[-382:]+[tokenizer.sep_token_id] else: # head-only truncation input_id = input_id[:511]+[tokenizer.sep_token_id] input_ids.append(torch.tensor(input_id).view(1,-1)) attention_masks.append(torch.ones([1,len(input_id)], dtype=torch.long)) else: encoded_dict = tokenizer.encode_plus( rev, # Sentence to encode. add_special_tokens = True, # Add '[CLS]' and '[SEP]' max_length = 512, # Pad & truncate all sentences. pad_to_max_length = True, return_attention_mask = True, # Construct attn. masks. return_tensors = 'pt', # Return pytorch tensors. ) # Add the encoded sentence to the list. input_ids.append(encoded_dict['input_ids']) # And its attention mask (simply differentiates padding from non-padding). attention_masks.append(encoded_dict['attention_mask']) # Convert the lists into tensors. input_ids =, dim=0) attention_masks =, dim=0) labels = torch.tensor(labels) # Set the batch size. batch_size = 8 # Create the DataLoader. prediction_data = TensorDataset(input_ids, attention_masks, labels) prediction_sampler = SequentialSampler(prediction_data) prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size) print('Predicting labels for {:,} test sentences...'.format(len(input_ids))) # Put model in evaluation mode model.eval() # Tracking variables predictions , true_labels = [], [] # Predict for batch in prediction_dataloader: # Add batch to GPU batch = tuple( for t in batch) # Unpack the inputs from our dataloader b_input_ids, b_input_mask, b_labels = batch # Telling the model not to compute or store gradients, saving memory and # speeding up prediction with torch.no_grad(): # Forward pass, calculate logit predictions outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) logits = outputs[0] # Move logits and labels to CPU logits = logits.detach().cpu().numpy() label_ids ='cpu').numpy() # Store predictions and true labels predictions.append(logits) true_labels.append(label_ids) print('DONE.') return predictions, true_labels, selected_reviews
def sum_losses(self, batch, loss, margin, prec_at_k): """For Pretraining Function for preatrainind this CNN with the triplet loss. Takes a sample of N=PK images, P different persons, K images of each. K=4 is a normal parameter. [!] Batch all and batch hard should work fine. Take care with weighted triplet or cross entropy!! Args: batch (list): [images, labels], images are Tensor of size (N,H,W,C), H=224, W=112, labels Tensor of size (N) """ inp = batch[0][0] inp = Variable(inp).cuda() labels = batch[1][0] labels = labels.cuda() embeddings = self.forward(inp) if loss == "cross_entropy": m = _get_triplet_mask(labels).nonzero() e0 = [] e1 = [] e2 = [] for p in m: e0.append(embeddings[p[0]]) e1.append(embeddings[p[1]]) e2.append(embeddings[p[2]]) e0 = torch.stack(e0,0) e1 = torch.stack(e1,0) e2 = torch.stack(e2,0) out_pos =, e1, train=True) out_neg =, e2, train=True) tar_pos = Variable(torch.ones(out_pos.size(0)).view(-1,1).cuda()) tar_neg = Variable(torch.zeros(out_pos.size(0)).view(-1,1).cuda()) loss_pos = F.binary_cross_entropy_with_logits(out_pos, tar_pos) loss_neg = F.binary_cross_entropy_with_logits(out_neg, tar_neg) total_loss = (loss_pos + loss_neg)/2 elif loss == 'batch_all': # works, batch all strategy m = _get_triplet_mask(labels).nonzero() e0 = [] e1 = [] e2 = [] for p in m: e0.append(embeddings[p[0]]) e1.append(embeddings[p[1]]) e2.append(embeddings[p[2]]) e0 = torch.stack(e0,0) e1 = torch.stack(e1,0) e2 = torch.stack(e2,0) total_loss = F.triplet_margin_loss(e0, e1, e2, margin=margin, p=2) elif loss == 'batch_hard': # compute pariwise square distance matrix, not stable with sqr as 0 can happen n = embeddings.size(0) m = embeddings.size(0) d = embeddings.size(1) x =, m, d) y =, m, d) dist = torch.pow(x - y, 2).sum(2) mask_anchor_positive = _get_anchor_positive_triplet_mask(labels).float() mask_anchor_negative = _get_anchor_negative_triplet_mask(labels).float() pos_dist = dist * mask_anchor_positive # here add value so that not valid values can not be picked max_val = torch.max(dist) neg_dist = dist + max_val * (1.0 - mask_anchor_negative) # for each anchor compute hardest pair triplets = [] for i in range(dist.size(0)): pos = torch.max(pos_dist[i],0)[1].item() neg = torch.min(neg_dist[i],0)[1].item() triplets.append((i, pos, neg)) e0 = [] e1 = [] e2 = [] for p in triplets: e0.append(embeddings[p[0]]) e1.append(embeddings[p[1]]) e2.append(embeddings[p[2]]) e0 = torch.stack(e0,0) e1 = torch.stack(e1,0) e2 = torch.stack(e2,0) total_loss = F.triplet_margin_loss(e0, e1, e2, margin=margin, p=2) elif loss == 'weighted_triplet': # compute pairwise distance matrix dist = [] # iteratively construct the columns for e in embeddings: ee =[e.view(1,-1) for _ in range(embeddings.size(0))],0) dist.append(F.pairwise_distance(embeddings, ee)) dist =, 1) # First, we need to get a mask for every valid positive (they should have same label) mask_anchor_positive = _get_anchor_positive_triplet_mask(labels) pos_dist = dist * Variable(mask_anchor_positive.float()) # Now every valid negative mask mask_anchor_negative = _get_anchor_negative_triplet_mask(labels) neg_dist = dist * Variable(mask_anchor_negative.float()) # now get the weights for each anchor, detach because it should be a constant weighting factor pos_weights = Variable(torch.zeros(dist.size()).cuda()) neg_weights = Variable(torch.zeros(dist.size()).cuda()) for i in range(dist.size(0)): # make by line mask = torch.zeros(dist.size()).byte().cuda() mask[i] = 1 pos_weights[mask_anchor_positive & mask] = F.softmax(pos_dist[mask_anchor_positive & mask], 0) neg_weights[mask_anchor_negative & mask] = F.softmin(neg_dist[mask_anchor_negative & mask], 0) pos_weights = pos_weights.detach() neg_weights = neg_weights.detach() pos_weight_dist = pos_dist * pos_weights neg_weight_dist = neg_dist * neg_weights triplet_loss = torch.clamp(margin + pos_weight_dist.sum(1, keepdim=True) - neg_weight_dist.sum(1, keepdim=True), min=0) total_loss = triplet_loss.mean() else: raise NotImplementedError("Loss: {}".format(loss)) losses = {} if prec_at_k: # compute pariwise square distance matrix, not stable with sqr as 0 can happen n = embeddings.size(0) m = embeddings.size(0) d = embeddings.size(1) x =, m, d) y =, m, d) dist = torch.pow(x - y, 2).sum(2) mask_anchor_positive = _get_anchor_positive_triplet_mask(labels) _, indices = torch.sort(dist, dim=1) num_hit = 0 num_ges = 0 for i in range(dist.size(0)): d = mask_anchor_positive[i].nonzero().view(-1,1) ind = indices[i][:prec_at_k+1] same = d==ind num_hit += same.sum() num_ges += prec_at_k k_loss = torch.Tensor(1) k_loss[0] = num_hit / num_ges losses['prec_at_k'] = Variable(k_loss.cuda()) losses['total_loss'] = total_loss return losses
def leGallInitMethod2(originalChnl): return[torch.zeros(originalChnl, 1, 1), torch.ones(originalChnl, 1, 2) / 4], -1)
def _features(self, states): length = states.size(0) ones = th.ones(length, 1).to(states.device) al = th.arange(length, dtype=th.float32, device=states.device).view(-1, 1) / 100.0 return[states, states**2, al, al**2, al**3, ones], dim=1)
def build_target(self, pred, labels, batchsize, fsize, n_ch, output_id): # target assignment tgt_mask = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 4 + self.n_classes).to(device=self.device) obj_mask = torch.ones(batchsize, self.n_anchors, fsize, fsize).to(device=self.device) tgt_scale = torch.zeros(batchsize, self.n_anchors, fsize, fsize, 2).to(self.device) target = torch.zeros(batchsize, self.n_anchors, fsize, fsize, n_ch).to(self.device) # labels = labels.cpu().data nlabel = (labels.sum(dim=2) > 0).sum(dim=1) # number of objects truth_x_all = (labels[:, :, 2] + labels[:, :, 0]) / (self.strides[output_id] * 2) truth_y_all = (labels[:, :, 3] + labels[:, :, 1]) / (self.strides[output_id] * 2) truth_w_all = (labels[:, :, 2] - labels[:, :, 0]) / self.strides[output_id] truth_h_all = (labels[:, :, 3] - labels[:, :, 1]) / self.strides[output_id] truth_i_all = truth_j_all = for b in range(batchsize): n = int(nlabel[b]) if n == 0: continue truth_box = torch.zeros(n, 4).to(self.device) truth_box[:n, 2] = truth_w_all[b, :n] truth_box[:n, 3] = truth_h_all[b, :n] truth_i = truth_i_all[b, :n] truth_j = truth_j_all[b, :n] # calculate iou between truth and reference anchors anchor_ious_all = bboxes_iou(truth_box.cpu(), self.ref_anchors[output_id], CIoU=True) # temp = bbox_iou(truth_box.cpu(), self.ref_anchors[output_id]) best_n_all = anchor_ious_all.argmax(dim=1) best_n = best_n_all % 3 best_n_mask = ((best_n_all == self.anch_masks[output_id][0]) | (best_n_all == self.anch_masks[output_id][1]) | (best_n_all == self.anch_masks[output_id][2])) if sum(best_n_mask) == 0: continue truth_box[:n, 0] = truth_x_all[b, :n] truth_box[:n, 1] = truth_y_all[b, :n] pred_ious = bboxes_iou(pred[b].view(-1, 4), truth_box, xyxy=False) pred_best_iou, _ = pred_ious.max(dim=1) pred_best_iou = (pred_best_iou > self.ignore_thre) pred_best_iou = pred_best_iou.view(pred[b].shape[:3]) # set mask to zero (ignore) if pred matches truth obj_mask[b] = ~ pred_best_iou for ti in range(best_n.shape[0]): if best_n_mask[ti] == 1: i, j = truth_i[ti], truth_j[ti] a = best_n[ti] obj_mask[b, a, j, i] = 1 tgt_mask[b, a, j, i, :] = 1 target[b, a, j, i, 0] = truth_x_all[b, ti] - truth_x_all[b, ti].to(torch.int16).to(torch.float) target[b, a, j, i, 1] = truth_y_all[b, ti] - truth_y_all[b, ti].to(torch.int16).to(torch.float) target[b, a, j, i, 2] = torch.log( truth_w_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 0] + 1e-16) target[b, a, j, i, 3] = torch.log( truth_h_all[b, ti] / torch.Tensor(self.masked_anchors[output_id])[best_n[ti], 1] + 1e-16) target[b, a, j, i, 4] = 1 target[b, a, j, i, 5 + labels[b, ti, 4].to(torch.int16).cpu().numpy()] = 1 tgt_scale[b, a, j, i, :] = torch.sqrt(2 - truth_w_all[b, ti] * truth_h_all[b, ti] / fsize / fsize) return obj_mask, tgt_mask, tgt_scale, target
loss = loss.view(bsz, opnum) acc = acc.view(bsz, opnum) loss = loss * mask acc = acc * mask acc = acc.sum(-1) # acc_high = (acc * high_mask).sum() acc = acc.sum() # acc_middle = acc - acc_high loss = loss.sum() / (mask.sum()) return loss, acc if __name__ == '__main__': bsz = 32 max_length = 50 max_olen = 3 articles = torch.zeros(bsz, max_length).long() articles_mask = torch.ones(articles.size()) ops = torch.zeros(bsz, 4, max_olen).long() ops_mask = torch.ones(ops.size()) question_id = torch.arange(bsz).long() question_pos = torch.arange(bsz).long() ans = torch.zeros(bsz).long() inp = [articles, articles_mask, ops, ops_mask, question_id, question_pos] tgt = ans model = ALbertForCloze.from_pretrained( '/chpc/home/stu-ysfang-a/RoBERTa-data/roberta-large', cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1)) loss, acc = model(inp, tgt)
def __init__(self, features, eps=1e-8): super(LayerNorm, self).__init__() self.gamma = nn.Parameter(torch.ones(features)) self.beta = nn.Parameter(torch.zeros(features)) self.eps = eps
def forward_train(self, img, img_meta, gt_bboxes, gt_labels, gt_bboxes_ignore=None, gt_masks=None, proposals=None): x = self.extract_feat(img) losses = dict() if self.with_rpn: rpn_outs = self.rpn_head(x) rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) rpn_losses = self.rpn_head.loss(*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) losses.update(rpn_losses) proposal_cfg = self.train_cfg.get('rpn_proposal', self.test_cfg.rpn) proposal_inputs = rpn_outs + (img_meta, proposal_cfg) proposal_list = self.rpn_head.get_bboxes(*proposal_inputs) else: proposal_list = proposals for i in range(self.num_stages): self.current_stage = i rcnn_train_cfg = self.train_cfg.rcnn[i] lw = self.train_cfg.stage_loss_weights[i] # assign gts and sample proposals sampling_results = [] if self.with_bbox or self.with_mask: bbox_assigner = build_assigner(rcnn_train_cfg.assigner) bbox_sampler = build_sampler(rcnn_train_cfg.sampler, context=self) num_imgs = img.size(0) if gt_bboxes_ignore is None: gt_bboxes_ignore = [None for _ in range(num_imgs)] for j in range(num_imgs): assign_result = bbox_assigner.assign( proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j], gt_labels[j]) sampling_result = bbox_sampler.sample( assign_result, proposal_list[j], gt_bboxes[j], gt_labels[j], feats=[lvl_feat[j][None] for lvl_feat in x]) sampling_results.append(sampling_result) # bbox head forward and loss bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] rois = bbox2roi([res.bboxes for res in sampling_results]) bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes, gt_labels, rcnn_train_cfg) loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets) for name, value in loss_bbox.items(): losses['s{}.{}'.format( i, name)] = (value * lw if 'loss' in name else value) # mask head forward and loss if self.with_mask: if not self.share_roi_extractor: mask_roi_extractor = self.mask_roi_extractor[i] pos_rois = bbox2roi( [res.pos_bboxes for res in sampling_results]) mask_feats = mask_roi_extractor( x[:mask_roi_extractor.num_inputs], pos_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) else: # reuse positive bbox feats pos_inds = [] device = bbox_feats.device for res in sampling_results: pos_inds.append( torch.ones(res.pos_bboxes.shape[0], device=device, dtype=torch.uint8)) pos_inds.append( torch.zeros(res.neg_bboxes.shape[0], device=device, dtype=torch.uint8)) pos_inds = mask_feats = bbox_feats[pos_inds] mask_head = self.mask_head[i] mask_pred = mask_head(mask_feats) mask_targets = mask_head.get_target(sampling_results, gt_masks, rcnn_train_cfg) pos_labels = [res.pos_gt_labels for res in sampling_results]) loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels) for name, value in loss_mask.items(): losses['s{}.{}'.format( i, name)] = (value * lw if 'loss' in name else value) # refine bboxes if i < self.num_stages - 1: pos_is_gts = [res.pos_is_gt for res in sampling_results] roi_labels = bbox_targets[0] # bbox_targets is a tuple with torch.no_grad(): proposal_list = bbox_head.refine_bboxes( rois, roi_labels, bbox_pred, pos_is_gts, img_meta) return losses
x = model.down_conv_4(x) x = model.relu(x) x = model.down_conv_5(x) x = model.relu(x) x = model.down_conv_6(x) x = model.relu(x) x = model.down_conv_7(x) x = model.relu(x) x = model.down_pool_8(x) x = model.tanh(x) x = x.view(-1, 512) """ if i == 6: latent = torch.ones(1,512) * -1 if i == 5: latent = torch.zeros(1,512) if i == 4: latent = torch.ones(1,512) if i == 3: latent = np.random.rand(1, 512) latent[latent > 0.9] = 0 latent[latent > 0] = 1 latent = torch.from_numpy(latent) if i == 2: latent = np.random.rand(1, 512) latent[latent > 0.9] = 1 latent[latent < 1] = 0 latent = torch.from_numpy(latent) if i == 1:
def __init__(self, C): super(ParamSum, self).__init__() self.a = nn.Parameter(torch.ones(C)) self.b = nn.Parameter(torch.ones(C))
def forward_train(self, img, img_meta, gt_bboxes, gt_labels, gt_bboxes_ignore=None, gt_masks=None, proposals=None): """ Args: img (Tensor): of shape (N, C, H, W) encoding input images. Typically these should be mean centered and std scaled. img_meta (list[dict]): list of image info dict where each dict has: 'img_shape', 'scale_factor', 'flip', and may also contain 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. For details on the values of these keys see `mmdet/datasets/pipelines/`. gt_bboxes (list[Tensor]): each item are the truth boxes for each image in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): class indices corresponding to each box gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. gt_masks (None | Tensor) : true segmentation masks for each box used if the architecture supports a segmentation task. proposals : override rpn proposals with custom proposals. Use when `with_rpn` is False. Returns: dict[str, Tensor]: a dictionary of loss components """ x = self.extract_feat(img) losses = dict() # RPN forward and loss if self.with_rpn: rpn_outs = self.rpn_head(x) rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) rpn_losses = self.rpn_head.loss( *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) losses.update(rpn_losses) proposal_cfg = self.train_cfg.get('rpn_proposal', self.test_cfg.rpn) proposal_inputs = rpn_outs + (img_meta, proposal_cfg) proposal_list = self.rpn_head.get_bboxes(*proposal_inputs) else: proposal_list = proposals # assign gts and sample proposals if self.with_bbox or self.with_mask: bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner) bbox_sampler = build_sampler( self.train_cfg.rcnn.sampler, context=self) num_imgs = img.size(0) if gt_bboxes_ignore is None: gt_bboxes_ignore = [None for _ in range(num_imgs)] sampling_results = [] for i in range(num_imgs): assign_result = bbox_assigner.assign(proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i], gt_labels[i]) sampling_result = bbox_sampler.sample( assign_result, proposal_list[i], gt_bboxes[i], gt_labels[i], feats=[lvl_feat[i][None] for lvl_feat in x]) sampling_results.append(sampling_result) # bbox head forward and loss if self.with_bbox: rois = bbox2roi([res.bboxes for res in sampling_results]) # TODO: a more flexible way to decide which feature maps to use bbox_feats = self.bbox_roi_extractor( x[:self.bbox_roi_extractor.num_inputs], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = self.bbox_head(bbox_feats) bbox_targets = self.bbox_head.get_target(sampling_results, gt_bboxes, gt_labels, self.train_cfg.rcnn) loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, *bbox_targets) losses.update(loss_bbox) # mask head forward and loss if self.with_mask: if not self.share_roi_extractor: pos_rois = bbox2roi( [res.pos_bboxes for res in sampling_results]) mask_feats = self.mask_roi_extractor( x[:self.mask_roi_extractor.num_inputs], pos_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) else: pos_inds = [] device = bbox_feats.device for res in sampling_results: pos_inds.append( torch.ones( res.pos_bboxes.shape[0], device=device, dtype=torch.bool)) pos_inds.append( torch.zeros( res.neg_bboxes.shape[0], device=device, dtype=torch.bool)) pos_inds = mask_feats = bbox_feats[pos_inds] if mask_feats.shape[0] > 0: mask_pred = self.mask_head(mask_feats) mask_targets = self.mask_head.get_target( sampling_results, gt_masks, self.train_cfg.rcnn) pos_labels = [res.pos_gt_labels for res in sampling_results]) loss_mask = self.mask_head.loss(mask_pred, mask_targets, pos_labels) losses.update(loss_mask) return losses
def getBuilderTensor(self): builder = torch.zeros(size=[self.dimension+1,self.dimension], dtype = torch.float32) builder[1:4, :] = torch.eye(3) builder[0, :] = -1 * torch.ones(3) return builder
def setUp(self): self.x = torch.ones((2), device='cuda', dtype=torch.float32) common_init(self)
import superimport import numpy as np import matplotlib.pyplot as plt import pyprobml_utils as pml import torch from torch import nn import torch.nn.functional as F np.random.seed(42) n = 100 x = torch.ones(n, 2, requires_grad=False) x[:,0].uniform_(-1.,1) def mse(y_hat, y): return ((y_hat-y)**2).mean() #def mse(y, y_pred): return (y_pred - y).pow(2).sum() a = torch.as_tensor(np.array([3.0,2.0])).float() y = x@a + torch.rand(n) plt.scatter(x[:,0],y) # must cast parameters to float to match type of x #a = torch.as_tensor(np.array([-1.,1])).float() #a = nn.Parameter(a);
def test_execution(self): a = torch.ones(1) b = 3 * torch.ones(1) s = 3 # Test forward. self.check_function("forward", (a, b, s))
def dump_model_to_tensorboard(model, writer, channels=64, time=64): dummy_input = torch.ones((1, channels, time, 1)).to("cuda") # en input som fungerar writer.add_graph(model, dummy_input)