def __init__(self, input_features, state_size): super(LLTM, self).__init__() self.input_features = input_features self.state_size = state_size self.weights = torch.nn.Parameter( torch.empty(3 * state_size, input_features + state_size)) self.bias = torch.nn.Parameter(torch.empty(3 * state_size)) self.reset_parameters()
def __init__(self, input_size, output_size, num_emojis, dropout): super().__init__() self.V = torch.nn.Parameter( torch.empty(num_emojis, output_size).uniform_(-0.1, 0.1) ) self.dropout = torch.nn.Dropout(p=dropout) if not input_size == output_size: self.is_proj = True self.W = torch.nn.Parameter( torch.empty(input_size, output_size).uniform_(-0.1, 0.1) ) self.tanh = torch.nn.Tanh() else: self.is_proj = False
def __init__(self, beam_size, batch_size, pad, bos, eos, n_best, mb_device, global_scorer, min_length, max_length, return_attention, block_ngram_repeat, exclusion_tokens, memory_lengths, stepwise_penalty, ratio): super(BeamSearch, self).__init__( pad, bos, eos, batch_size, mb_device, beam_size, min_length, block_ngram_repeat, exclusion_tokens, return_attention, max_length) # beam parameters self.global_scorer = global_scorer self.beam_size = beam_size self.n_best = n_best self.batch_size = batch_size self.ratio = ratio # result caching self.hypotheses = [[] for _ in range(batch_size)] # beam state self.top_beam_finished = torch.zeros([batch_size], dtype=torch.uint8) self.best_scores = torch.full([batch_size], -1e10, dtype=torch.float, device=mb_device) self._batch_offset = torch.arange(batch_size, dtype=torch.long) self._beam_offset = torch.arange( 0, batch_size * beam_size, step=beam_size, dtype=torch.long, device=mb_device) self.topk_log_probs = torch.tensor( [0.0] + [float("-inf")] * (beam_size - 1), device=mb_device ).repeat(batch_size) self.select_indices = None self._memory_lengths = memory_lengths # buffers for the topk scores and 'backpointer' self.topk_scores = torch.empty((batch_size, beam_size), dtype=torch.float, device=mb_device) self.topk_ids = torch.empty((batch_size, beam_size), dtype=torch.long, device=mb_device) self._batch_index = torch.empty([batch_size, beam_size], dtype=torch.long, device=mb_device) self.done = False # "global state" of the old beam self._prev_penalty = None self._coverage = None self._stepwise_cov_pen = ( stepwise_penalty and self.global_scorer.has_cov_pen) self._vanilla_cov_pen = ( not stepwise_penalty and self.global_scorer.has_cov_pen) self._cov_pen = self.global_scorer.has_cov_pen
def test_constrained_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor( [[-0.5, 0.0, 5.0, 0.0], [0.0, 0.0, 5.0, 0.0], [0.5, 0.0, 5.0, 0.0]], device=device, dtype=dtype, ).unsqueeze(dim=-2) variance = torch.ones(3, 4, device=device, dtype=dtype).unsqueeze(dim=-2) N = torch.distributions.Normal(loc=0.0, scale=1.0) a = N.icdf(torch.tensor(0.75)) # get a so that P(-a <= N <= a) = 0.5 mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={1: [None, 0], 2: [5.0, None], 3: [-a, a]}, ) X = torch.empty(3, 1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected_unconstrained = torch.tensor( [0.19780, 0.39894, 0.69780], device=device, dtype=dtype ) ei_expected = ei_expected_unconstrained * 0.5 * 0.5 * 0.5 self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
def __call__(self, match_quality_matrix): """ Args: match_quality_matrix (Tensor[float]): an MxN tensor, containing the pairwise quality between M ground-truth elements and N predicted elements. Returns: matches (Tensor[int64]): an N tensor where N[i] is a matched gt in [0, M - 1] or a negative value indicating that prediction i could not be matched. """ if match_quality_matrix.numel() == 0: # handle empty case device = match_quality_matrix.device return torch.empty((0,), dtype=torch.int64, device=device) # match_quality_matrix is M (gt) x N (predicted) # Max over gt elements (dim 0) to find best gt candidate for each prediction matched_vals, matches = match_quality_matrix.max(dim=0) if self.allow_low_quality_matches: all_matches = matches.clone() # Assign candidate matches with low quality to negative (unassigned) values below_low_threshold = matched_vals < self.low_threshold between_thresholds = (matched_vals >= self.low_threshold) & ( matched_vals < self.high_threshold ) matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS if self.allow_low_quality_matches: self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches
def map_batch(func, *inputs): '''Apply a function on a batch of data and stack the results. Args: func: The function to map. inputs: Batch arguments list (Batch, *size). Returns: Similar to `torch.stack([func(*x) for x in zip(*inputs)])` but faster. In case `func` returns a tuple, this function will also return a tuple. ''' # compute the output of the first instance in the batch inputs = list(even_zip(*inputs)) res = func(*inputs[0]) single = not isinstance(res, tuple) as_tuple = (lambda x: (x,)) if single else (lambda x: x) res = as_tuple(res) # if more outputs are expected, keep computing them if len(inputs) == 1: out = tuple(r.unsqueeze(0) for r in res) else: out = tuple(torch.empty(len(inputs), *r.size(), device=r.device, dtype=r.dtype) for r in res) for i, args in enumerate(inputs): if i > 0: res = as_tuple(func(*args)) for result, output in zip(res, out): output[i, ...] = result return out[0] if single else out
def __init__(self): super(BIAS_FRAME, self).__init__() # self.bias_frame = Variable(torch.empty(1, 3, 480, 640).uniform_(0, 1), requires_grad=True) # self.bias_frame = nn.Parameter(torch.empty(1, 3, 480, 640).uniform_(-1, 1)) self.bias_frame = nn.Parameter(torch.empty(1,3,1,1).uniform_(-1, 1))
def test_sqrt(self): class MyModel(torch.nn.Module): def __init__(self): super(MyModel, self).__init__() def forward(self, input): return input.sqrt() input = Variable(torch.empty(BATCH_SIZE, 10, 10).uniform_(4, 9)) self.run_model_test(MyModel(), train=False, input=input, batch_size=BATCH_SIZE)
def test_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([[-0.5]], device=device, dtype=dtype) variance = torch.ones(1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.19780, device=device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) module = ExpectedImprovement(model=mm, best_f=0.0, maximize=False) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor(0.6978, device=device, dtype=dtype) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4))
def __init__(self, nf, rf, nx): super(Conv1D, self).__init__() self.rf = rf self.nf = nf if rf == 1: # faster 1x1 conv w = torch.empty(nx, nf) nn.init.normal_(w, std=0.02) self.w = Parameter(w) self.b = Parameter(torch.zeros(nf)) else: # was used to train LM raise NotImplementedError
def __init__(self, nf: int, rf: int, nx: int) -> None: super().__init__() self.rf = rf self.nf = nf if rf == 1: w = torch.empty(nx, nf) torch.nn.init.normal_(w, std=0.02) self.w = Parameter(w) self.b = Parameter(torch.zeros(nf)) else: raise NotImplementedError
def test_NormalQMCEngineSeededOut(self): # test even dimension engine = NormalQMCEngine(d=2, seed=12345) out = torch.empty(2, 2) self.assertIsNone(engine.draw(n=2, out=out)) samples_expected = torch.tensor( [[-0.63099602, -1.32950772], [0.29625805, 1.86425618]] ) self.assertTrue(torch.allclose(out, samples_expected)) # test odd dimension engine = NormalQMCEngine(d=3, seed=12345) out = torch.empty(2, 3) self.assertIsNone(engine.draw(n=2, out=out)) samples_expected = torch.tensor( [ [1.83169884, -1.40473647, 0.24334828], [0.36596099, 1.2987395, -1.47556275], ] ) self.assertTrue(torch.allclose(out, samples_expected))
def test_MultivariateNormalQMCEngineSeededOut(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # test even dimension with manual_seed(54321): a = torch.randn(2, 2) cov = a @ a.transpose(-1, -2) + torch.rand(2).diag() mean = torch.zeros(2, device=device, dtype=dtype) cov = cov.to(device=device, dtype=dtype) engine = MultivariateNormalQMCEngine(mean=mean, cov=cov, seed=12345) out = torch.empty(2, 2, device=device, dtype=dtype) self.assertIsNone(engine.draw(n=2, out=out)) samples_expected = torch.tensor( [[-0.849047422, -0.713852942], [0.398635030, 1.350660801]], device=device, dtype=dtype, ) self.assertTrue(torch.allclose(out, samples_expected)) # test odd dimension with manual_seed(54321): a = torch.randn(3, 3) cov = a @ a.transpose(-1, -2) + torch.rand(3).diag() mean = torch.zeros(3, device=device, dtype=dtype) cov = cov.to(device=device, dtype=dtype) engine = MultivariateNormalQMCEngine(mean, cov, seed=12345) out = torch.empty(2, 3, device=device, dtype=dtype) self.assertIsNone(engine.draw(n=2, out=out)) samples_expected = torch.tensor( [ [3.113158941, -3.262257099, -0.819938779], [0.621987879, 2.352285624, -1.992680788], ], device=device, dtype=dtype, ) self.assertTrue(torch.allclose(out, samples_expected))
def test_posterior_mean(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([[0.25]], device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean)) module = PosteriorMean(model=mm) X = torch.empty(1, 1, device=device, dtype=dtype) pm = module(X) self.assertTrue(torch.equal(pm, mean.view(-1))) # check for proper error if multi-output model mean2 = torch.rand(1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2)) module2 = PosteriorMean(model=mm2) with self.assertRaises(UnsupportedError): module2(X)
def test(): a = np.random.randn(1337) tvm_a = tvm.nd.array(a) np.testing.assert_equal(tvm.nd.from_dlpack(tvm_a.to_dlpack()).asnumpy(), a) try: import torch import torch.utils.dlpack x = torch.rand(56, 56) tvm_x = tvm.nd.from_dlpack(torch.utils.dlpack.to_dlpack(x)) np.testing.assert_equal(x.numpy(), tvm_x.asnumpy()) y = tvm.nd.from_dlpack(tvm_x.to_dlpack()) np.testing.assert_equal(y.asnumpy(), tvm_x.asnumpy()) np.testing.assert_equal(torch.utils.dlpack.from_dlpack(y.to_dlpack()).numpy(), tvm_x.asnumpy()) n = tvm.convert(137) xx = torch.rand(137,137) yy = torch.rand(137,137) zz2 = torch.empty(137,137) zz = xx.mm(yy) XX = tvm.placeholder((n,n), name='X') YY = tvm.placeholder((n,n), name='Y') k = tvm.reduce_axis((0, n), name='k') ZZ = tvm.compute((n,n), lambda i,j : tvm.sum(XX[i,k]*YY[k,j], axis=k)) s = tvm.create_schedule(ZZ.op) f = tvm.build(s, [XX, YY, ZZ], target_host='llvm', name='f') f_pytorch = to_pytorch_func(f) zz2 = torch.empty(137,137) f_pytorch(xx, yy, zz2) tvm.testing.assert_allclose(zz.numpy(), zz2.numpy(), rtol=1e-6) except ImportError: pass
def dummy_inputs(cls, params, init_case): max_seq_len = params["max_seq_len"] batch_size = params["batch_size"] fv_sizes = init_case["feat_vocab_sizes"] n_words = init_case["word_vocab_size"] voc_sizes = [n_words] + fv_sizes pad_idxs = [init_case["word_padding_idx"]] + \ init_case["feat_padding_idx"] lengths = torch.randint(0, max_seq_len, (batch_size,)) lengths[0] = max_seq_len inps = torch.empty((max_seq_len, batch_size, len(voc_sizes)), dtype=torch.long) for f, (voc_size, pad_idx) in enumerate(zip(voc_sizes, pad_idxs)): for b, len_ in enumerate(lengths): inps[:len_, b, f] = torch.randint(0, voc_size-1, (len_,)) inps[len_:, b, f] = pad_idx return inps
def iteration(inputs): # targets, align half of the audio targets = torch.ones(int(batch_size * ((seconds * 100) / 2))) target_sizes = torch.empty(batch_size, dtype=torch.int).fill_(int((seconds * 100) / 2)) input_percentages = torch.ones(batch_size).fill_(1) input_sizes = input_percentages.mul_(int(inputs.size(3))).int() out, output_sizes = model(inputs, input_sizes) out = out.transpose(0, 1) # TxNxH loss = criterion(out, targets, output_sizes, target_sizes) loss = loss / inputs.size(0) # average the loss by minibatch # compute gradient optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() del loss del out
def __init__(self, input_dim_capsule=gru_len * 2, num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings, kernel_size=(9, 1), share_weights=True, activation='default', **kwargs): super(Caps_Layer, self).__init__(**kwargs) self.num_capsule = num_capsule self.dim_capsule = dim_capsule self.routings = routings self.kernel_size = kernel_size # 暂时没用到 self.share_weights = share_weights if activation == 'default': self.activation = self.squash else: self.activation = nn.ReLU(inplace=True) if self.share_weights: self.W = nn.Parameter( nn.init.xavier_normal_(t.empty(1, input_dim_capsule, self.num_capsule * self.dim_capsule))) else: self.W = nn.Parameter( t.randn(BATCH_SIZE, input_dim_capsule, self.num_capsule * self.dim_capsule)) # 64即batch_size
def test_expected_improvement_batch(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): mean = torch.tensor([-0.5, 0.0, 0.5], device=device, dtype=dtype).view( 3, 1, 1 ) variance = torch.ones(3, 1, 1, device=device, dtype=dtype) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ExpectedImprovement(model=mm, best_f=0.0) X = torch.empty(3, 1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected = torch.tensor( [0.19780, 0.39894, 0.69780], device=device, dtype=dtype ) self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # check for proper error if multi-output model mean2 = torch.rand(3, 1, 2, device=device, dtype=dtype) variance2 = torch.rand(3, 1, 2, device=device, dtype=dtype) mm2 = MockModel(MockPosterior(mean=mean2, variance=variance2)) module2 = ExpectedImprovement(model=mm2, best_f=0.0) with self.assertRaises(UnsupportedError): module2(X)
def loss_fn2(genFGen2, args, model): first_term_loss = compute_loss2(genFGen2, args, model) #first_term_loss2 = compute_loss2(genFGen2, args, model) #first_term_loss = torch.log(first_term_loss2 / (1.0 - first_term_loss2)) #print('') #print(first_term_loss) #mu = torch.from_numpy(np.array([2.805741, -0.00889241], dtype="float32")).to(device) #S = torch.from_numpy(np.array([[pow(0.3442525,2), 0.0], [0.0, pow(0.35358343,2)]], dtype="float32")).to(device) #storeAll = torch.from_numpy(np.array(0.0, dtype="float32")).to(device) #toUse_storeAll = torch.distributions.MultivariateNormal(loc=mu, covariance_matrix=S) #for loopIndex_i in range(genFGen2.size()[0]): # storeAll += torch.exp(toUse_storeAll.log_prob(genFGen2[loopIndex_i:1 + loopIndex_i, :].squeeze(0))) #storeAll /= genFGen2.size()[0] #print(storeAll) #print('') #print('') #print(compute_loss2(mu.unsqueeze(0), args, model)) #print(torch.exp(toUse_storeAll.log_prob(mu))) #print('') #first_term_loss = storeAll xData = toy_data.inf_train_gen(args.data, batch_size=args.batch_size) xData = torch.from_numpy(xData).type(torch.float32).to(device) #var2 = [] #for i in genFGen2: # var1 = [] # for j in xData: # new_stuff = torch.dist(i, j, 2) # this is a tensor # var1.append(new_stuff.unsqueeze(0)) # var1_tensor = torch.cat(var1) # second_term_loss2 = torch.min(var1_tensor) / args.batch_size # var2.append(second_term_loss2.unsqueeze(0)) #var2_tensor = torch.cat(var2) #second_term_loss = torch.mean(var2_tensor) / args.batch_size #second_term_loss *= 100.0 #print('') #print(second_term_loss) # If you know in advance the size of the final tensor, you can allocate # an empty tensor beforehand and fill it in the for loop. #x = torch.empty(size=(len(items), 768)) #for i in range(len(items)): # x[i] = calc_result #print(len(genFGen2)) #print(genFGen2.shape[0]) # len(.) and not .shape[0] #print(len(xData)) #print(xData.shape[0]) # Use len(.) and not .shape[0] """ #second_term_loss = torch.empty(size=(len(genFGen2), len(xData))).to(device) #second_term_loss = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=True) #second_term_loss3 = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=True) #second_term_loss3 = torch.empty(size=(len(genFGen2), len(xData)), device=device, requires_grad=False) second_term_loss3 = torch.empty(size=(args.batch_size, args.batch_size), device=device, requires_grad=False) #for i in range(len(genFGen2)): for i in range(args.batch_size): #for j in range(len(xData)): for j in range(args.batch_size): #second_term_loss[i, j] = torch.dist(genFGen2[i,:], xData[j,:], 2) #second_term_loss[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.tensor(0.1, requires_grad=True) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1) #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1).requires_grad_() #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 1).requires_grad_() #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_()**2 #second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_()**2 second_term_loss3[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2).requires_grad_() #second_term_loss[i, j] = torch.dist(genFGen2[i, :], xData[j, :], 2)**2 #second_term_loss2, _ = torch.min(second_term_loss, 1) second_term_loss2, _ = torch.min(second_term_loss3, 1) #second_term_loss = 5000.0 * torch.mean(second_term_loss2) / (args.batch_size**2) #second_term_loss = lambda1 * torch.mean(second_term_loss2) / (args.batch_size ** 2) #second_term_loss = lambda1 * torch.mean(second_term_loss2) second_term_loss = torch.mean(second_term_loss2) #print(second_term_loss) #print('') print('') print(first_term_loss) print(second_term_loss) print('') """ second_term_loss32 = torch.empty(args.batch_size, device=device, requires_grad=False) for i in range(args.batch_size): #second_term_loss22 = torch.norm(genFGen2[i, :] - xData, p='fro', dim=1).requires_grad_() second_term_loss22 = torch.norm(genFGen2[i, :] - xData, p=None, dim=1).requires_grad_() #print(second_term_loss22.shape) second_term_loss32[i] = torch.min(second_term_loss22) #print(second_term_loss32) #print(second_term_loss32.shape) #print(torch.norm(genFGen2 - xData, p=None, dim=0).shape) #second_term_loss22 = torch.min(second_term_loss32) #print(second_term_loss22) #print(second_term_loss22.shape) second_term_loss2 = torch.mean(second_term_loss32) #print(second_term_loss2) #print(second_term_loss2.shape) #print('') #print(second_term_loss) #print(second_term_loss2) print('') print(first_term_loss) print(second_term_loss2) #third_term_loss = torch.from_numpy(np.array(0.0, dtype='float32')).to(device) #for i in range(args.batch_size): # for j in range(args.batch_size): # if i != j: # # third_term_loss += ((np.linalg.norm(genFGen3[i,:].cpu().detach().numpy()-genFGen3[j,:].cpu().detach().numpy())) / (np.linalg.norm(genFGen2[i,:].cpu().detach().numpy()-genFGen2[j,:].cpu().detach().numpy()))) # # # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:], 2)) / (torch.norm(genFGen2[i,:]-genFGen2[j,:], 2))) # # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:])) / (torch.norm(genFGen2[i,:]-genFGen2[j,:]))) # # # third_term_loss += ((torch.norm(genFGen3[i,:] - genFGen3[j,:])) / (torch.norm(genFGen2[i,:] - genFGen2[j,:]))) # third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2))) # third_term_loss /= (args.batch_size - 1) #third_term_loss /= args.batch_size ##third_term_loss *= 1000.0 genFGen3 = torch.randn([args.batch_size, 2], device=device, requires_grad=True) #third_term_loss = torch.from_numpy(np.array(0.0, dtype='float32')).to(device) third_term_loss3 = torch.empty(size=(args.batch_size, args.batch_size), device=device, requires_grad=False) for i in range(args.batch_size): for j in range(args.batch_size): if i != j: # third_term_loss += ((np.linalg.norm(genFGen3[i,:].cpu().detach().numpy()-genFGen3[j,:].cpu().detach().numpy())) / (np.linalg.norm(genFGen2[i,:].cpu().detach().numpy()-genFGen2[j,:].cpu().detach().numpy()))) # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:], 2)) / (torch.norm(genFGen2[i,:]-genFGen2[j,:], 2))) # third_term_loss += ((torch.norm(genFGen3[i,:]-genFGen3[j,:])) / (torch.norm(genFGen2[i,:]-genFGen2[j,:]))) # third_term_loss += ((torch.norm(genFGen3[i,:] - genFGen3[j,:])) / (torch.norm(genFGen2[i,:] - genFGen2[j,:]))) #third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2))) #third_term_loss += ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2)) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2))) #third_term_loss3[i][j] = ((torch.dist(genFGen3[i, :], genFGen3[j, :], 2).requires_grad_()) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2).requires_grad_())) third_term_loss3[i][j] = ( (torch.dist(genFGen3[i, :], genFGen3[j, :], 2).requires_grad_()) / (torch.dist(genFGen2[i, :], genFGen2[j, :], 2).requires_grad_())) #third_term_loss /= (args.batch_size - 1) #third_term_loss2 = third_term_loss3 / (args.batch_size - 1) third_term_loss2 = torch.mean(third_term_loss3, 1) #third_term_loss /= args.batch_size #third_term_loss = third_term_loss2 / args.batch_size #third_term_loss = torch.mean(third_term_loss2) #third_term_loss = 0.01 * torch.mean(third_term_loss2) #third_term_loss = lambda2 * torch.mean(third_term_loss2) third_term_loss = torch.mean(third_term_loss2) #third_term_loss *= 1000.0 print(third_term_loss) print('') #print('') #asdfsfa #return first_term_loss + second_term_loss + third_term_loss #return first_term_loss + second_term_loss #return second_term_loss #return first_term_loss + second_term_loss #return first_term_loss + second_term_loss + third_term_loss #return first_term_loss + second_term_loss + third_term_loss return first_term_loss + second_term_loss2 + third_term_loss
def create_sentence_pair_dataset(data: list, tokenizer: Union[BertTokenizer, AlbertTokenizer], save_directory=None, max_sequence_length=128): max_bert_input_length = 0 for sentence_pair in data: sentence_1_tokenized, sentence_2_tokenized = tokenizer.tokenize( sentence_pair['sentence_1']), tokenizer.tokenize(sentence_pair['sentence_2']) _truncate_seq_pair(sentence_1_tokenized, sentence_2_tokenized, max_sequence_length - 3) # accounting for positioning tokens max_bert_input_length = max(max_bert_input_length, len(sentence_1_tokenized) + len(sentence_2_tokenized) + 3) sentence_pair['sentence_1_tokenized'] = sentence_1_tokenized sentence_pair['sentence_2_tokenized'] = sentence_2_tokenized bert_input_ids = torch.empty((len(data), max_bert_input_length), dtype=torch.long) bert_token_type_ids = torch.empty((len(data), max_bert_input_length), dtype=torch.long) bert_attention_masks = torch.empty((len(data), max_bert_input_length), dtype=torch.long) scores = torch.empty((len(data), 1), dtype=torch.float) for idx, sentence_pair in enumerate(data): tokens = [] input_type_ids = [] tokens.append("[CLS]") input_type_ids.append(0) for token in sentence_pair['sentence_1_tokenized']: tokens.append(token) input_type_ids.append(0) tokens.append("[SEP]") input_type_ids.append(0) for token in sentence_pair['sentence_2_tokenized']: tokens.append(token) input_type_ids.append(1) tokens.append("[SEP]") input_type_ids.append(1) input_ids = tokenizer.convert_tokens_to_ids(tokens) attention_masks = [1] * len(input_ids) while len(input_ids) < max_bert_input_length: input_ids.append(0) attention_masks.append(0) input_type_ids.append(0) bert_input_ids[idx] = torch.tensor(input_ids, dtype=torch.long) bert_token_type_ids[idx] = torch.tensor(input_type_ids, dtype=torch.long) bert_attention_masks[idx] = torch.tensor(attention_masks, dtype=torch.long) if 'similarity' not in sentence_pair or sentence_pair['similarity'] is None: scores[idx] = torch.tensor(float('nan'), dtype=torch.float) else: scores[idx] = torch.tensor(sentence_pair['similarity'], dtype=torch.float) if save_directory: torch.save(bert_input_ids, os.path.join(save_directory, f"bert_input_ids.pt")) torch.save(bert_token_type_ids, os.path.join(save_directory, f"bert_token_type_ids.pt")) torch.save(bert_attention_masks, os.path.join(save_directory, f"bert_attention_masks.pt")) torch.save(scores, os.path.join(save_directory, f"scores.pt")) return (bert_input_ids, bert_token_type_ids, bert_attention_masks), scores
def finalize_hypos( self, step: int, bbsz_idx, eos_scores, tokens, scores, finalized: List[List[Dict[str, Tensor]]], finished: List[bool], beam_size: int, attn: Optional[Tensor], src_lengths, max_len: int, ): """Finalize hypothesis, store finalized information in `finalized`, and change `finished` accordingly. A sentence is finalized when {beam_size} finished items have been collected for it. Returns number of sentences (not beam items) being finalized. These will be removed from the batch and not processed further. Args: bbsz_idx (Tensor): """ assert bbsz_idx.numel() == eos_scores.numel() # clone relevant token and attention tensors. # tokens is (batch * beam, max_len). So the index_select # gets the newly EOS rows, then selects cols 1..{step + 2} tokens_clone = tokens.index_select( 0, bbsz_idx)[:, 1:step + 2] # skip the first index, which is EOS tokens_clone[:, step] = self.eos attn_clone = (attn.index_select(0, bbsz_idx)[:, :, 1:step + 2] if attn is not None else None) # compute scores per token position pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1] pos_scores[:, step] = eos_scores # convert from cumulative to per-position scores pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] # normalize sentence-level scores if self.normalize_scores: eos_scores /= (step + 1)**self.len_penalty # cum_unfin records which sentences in the batch are finished. # It helps match indexing between (a) the original sentences # in the batch and (b) the current, possibly-reduced set of # sentences. cum_unfin: List[int] = [] prev = 0 for f in finished: if f: prev += 1 else: cum_unfin.append(prev) # The keys here are of the form "{sent}_{unfin_idx}", where # "unfin_idx" is the index in the current (possibly reduced) # list of sentences, and "sent" is the index in the original, # unreduced batch # set() is not supported in script export sents_seen: Dict[str, Optional[Tensor]] = {} # For every finished beam item for i in range(bbsz_idx.size()[0]): idx = bbsz_idx[i] score = eos_scores[i] # sentence index in the current (possibly reduced) batch unfin_idx = idx // beam_size # sentence index in the original (unreduced) batch sent = unfin_idx + cum_unfin[unfin_idx] # Cannot create dict for key type '(int, int)' in torchscript. # The workaround is to cast int to string seen = str(sent.item()) + "_" + str(unfin_idx.item()) if seen not in sents_seen: sents_seen[seen] = None if self.match_source_len and step > src_lengths[unfin_idx]: score = torch.tensor(-math.inf).to(score) # An input sentence (among those in a batch) is finished when # beam_size hypotheses have been collected for it if len(finalized[sent]) < beam_size: if attn_clone is not None: # remove padding tokens from attn scores hypo_attn = attn_clone[i] else: hypo_attn = torch.empty(0) finalized[sent].append({ "tokens": tokens_clone[i], "score": score, "attention": hypo_attn, # src_len x tgt_len "alignment": torch.empty(0), "positional_scores": pos_scores[i], }) newly_finished: List[int] = [] for seen in sents_seen.keys(): # check termination conditions for this sentence sent: int = int(float(seen.split("_")[0])) unfin_idx: int = int(float(seen.split("_")[1])) if not finished[sent] and self.is_finished( step, unfin_idx, max_len, len(finalized[sent]), beam_size): finished[sent] = True newly_finished.append(unfin_idx) return newly_finished
def scatter_gather(data): """ This function gathers data from multiple processes, and returns them in a list, as they were obtained from each process. This function is useful for retrieving data from multiple processes, when launching the code with torch.distributed.launch Note: this function is slow and should not be used in tight loops, i.e., do not use it in the training loop. Arguments: data: the object to be gathered from multiple processes. It must be serializable Returns: result (list): a list with as many elements as there are processes, where each element i in the list corresponds to the data that was gathered from the process of rank i. """ # strategy: the main process creates a temporary directory, and communicates # the location of the temporary directory to all other processes. # each process will then serialize the data to the folder defined by # the main process, and then the main process reads all of the serialized # files and returns them in a list if not torch.distributed.deprecated.is_initialized(): return [data] synchronize() # get rank of the current process rank = torch.distributed.deprecated.get_rank() # the data to communicate should be small data_to_communicate = torch.empty(256, dtype=torch.uint8, device="cuda") if rank == 0: # manually creates a temporary directory, that needs to be cleaned # afterwards tmp_dir = tempfile.mkdtemp() _encode(data_to_communicate, tmp_dir) synchronize() # the main process (rank=0) communicates the data to all processes torch.distributed.deprecated.broadcast(data_to_communicate, 0) # get the data that was communicated tmp_dir = _decode(data_to_communicate) # each process serializes to a different file file_template = "file{}.pth" tmp_file = os.path.join(tmp_dir, file_template.format(rank)) torch.save(data, tmp_file) # synchronize before loading the data synchronize() # only the master process returns the data if rank == 0: data_list = [] world_size = torch.distributed.deprecated.get_world_size() for r in range(world_size): file_path = os.path.join(tmp_dir, file_template.format(r)) d = torch.load(file_path) data_list.append(d) # cleanup os.remove(file_path) # cleanup os.rmdir(tmp_dir) return data_list
def test_two_stage_forward(cfg_file): models_with_semantic = [ 'htc/htc_r50_fpn_1x_coco.py', 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py', 'scnet/scnet_r50_fpn_20e_coco.py', ] if cfg_file in models_with_semantic: with_semantic = True else: with_semantic = False model = _get_detector_cfg(cfg_file) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None # Save cost if cfg_file in [ 'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py' # noqa: E501 ]: model.roi_head.bbox_head.num_classes = 80 model.roi_head.bbox_head.loss_cls.num_classes = 80 model.roi_head.mask_head.num_classes = 80 model.test_cfg.rcnn.score_thr = 0.05 model.test_cfg.rcnn.max_per_img = 100 from mmdet.models import build_detector detector = build_detector(model) input_shape = (1, 3, 128, 128) # Test forward train with a non-empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[10], with_semantic=with_semantic) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) loss.requires_grad_(True) assert float(loss.item()) > 0 loss.backward() # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[0], with_semantic=with_semantic) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) loss.requires_grad_(True) assert float(loss.item()) > 0 loss.backward() # Test RoI forward train with an empty proposals if cfg_file in [ 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py' # noqa: E501 ]: mm_inputs.pop('gt_semantic_seg') feature = detector.extract_feat(imgs[0][None, :]) losses = detector.roi_head.forward_train(feature, img_metas, [torch.empty( (0, 5))], **mm_inputs) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], return_loss=False) batch_results.append(result) cascade_models = [ 'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py', 'htc/htc_r50_fpn_1x_coco.py', 'scnet/scnet_r50_fpn_20e_coco.py', ] # test empty proposal in roi_head with torch.no_grad(): # test no proposal in the whole batch detector.simple_test(imgs[0][None, :], [img_metas[0]], proposals=[torch.empty((0, 4))]) # test no proposal of aug features = detector.extract_feats([imgs[0][None, :]] * 2) detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2, [[img_metas[0]]] * 2) # test rcnn_test_cfg is None if cfg_file not in cascade_models: feature = detector.extract_feat(imgs[0][None, :]) bboxes, scores = detector.roi_head.simple_test_bboxes( feature, [img_metas[0]], [torch.empty((0, 4))], None) assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes]) assert all([ score.shape == torch.Size( (0, detector.roi_head.bbox_head.fc_cls.out_features)) for score in scores ]) # test no proposal in the some image x1y1 = torch.randint(1, 100, (10, 2)).float() # x2y2 must be greater than x1y1 x2y2 = x1y1 + torch.randint(1, 100, (10, 2)) detector.simple_test( imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2, proposals=[torch.empty((0, 4)), torch.cat([x1y1, x2y2], dim=-1)]) # test no proposal of aug detector.roi_head.aug_test( features, [torch.cat([x1y1, x2y2], dim=-1), torch.empty((0, 4))], [[img_metas[0]]] * 2) # test rcnn_test_cfg is None if cfg_file not in cascade_models: feature = detector.extract_feat(imgs[0][None, :].repeat( 2, 1, 1, 1)) bboxes, scores = detector.roi_head.simple_test_bboxes( feature, [img_metas[0]] * 2, [torch.empty((0, 4)), torch.cat([x1y1, x2y2], dim=-1)], None) assert bboxes[0].shape == torch.Size((0, 4)) assert scores[0].shape == torch.Size( (0, detector.roi_head.bbox_head.fc_cls.out_features))
def _init_weights(self, m: int, n: int) -> nn.Parameter: return nn.Parameter(xavier_uniform_(torch.empty(m, n)))
def simple_test_text(self, x, img_metas, det_bboxes, det_masks, rescale=False): # image shape of the first image in the batch (only one) ori_shape = img_metas[0]['ori_shape'] scale_factor = img_metas[0]['scale_factor'] if torch.onnx.is_in_onnx_export() and det_bboxes.shape[0] == 0: # If there are no detection there is nothing to do for a mask head. # But during ONNX export we should run mask head # for it to appear in the graph. # So add one zero / dummy ROI that will be mapped # to an Identity op in the graph. det_bboxes = dummy_pad(det_bboxes, (0, 0, 0, 1)) if det_bboxes.shape[0] == 0: decoded_texts = torch.empty([0, 0, 0], dtype=det_bboxes.dtype, device=det_bboxes.device) confidences = torch.empty([0, 0, 0], dtype=det_bboxes.dtype, device=det_bboxes.device) distributions = [] else: # if det_bboxes is rescaled to the original image size, we need to # rescale it back to the testing scale to obtain RoIs. if rescale and not isinstance(scale_factor, float): scale_factor = torch.from_numpy(scale_factor).to( det_bboxes.device) _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) text_rois = bbox2roi([_bboxes]) text_results = self._text_forward(x, text_rois, det_masks=det_masks) if torch.onnx.is_in_onnx_export(): return text_results text_results = text_results['text_results'].permute(1, 0, 2) text_results = torch.nn.functional.softmax(text_results, dim=-1) confidences = [] decoded_texts = [] distributions = [] for text in text_results: predicted_confidences, encoded = text.topk(1) predicted_confidences = predicted_confidences.cpu().numpy() encoded = encoded.cpu().numpy().reshape(-1) decoded = '' confidence = 1 for l, c in zip(encoded, predicted_confidences): confidence *= c if l == 1: break decoded += self.alphabet[l] confidences.append(confidence) assert self.alphabet[0] == self.alphabet[1] == ' ' distribution = np.transpose( text.cpu().numpy())[2:, :len(decoded) + 1] distributions.append(distribution) decoded_texts.append( decoded if confidence >= self.text_thr else '') return decoded_texts, confidences, distributions
def __init__(self, seq_length, input_dim, num_hidden, num_classes, batch_size, device='cpu'): super(LSTM, self).__init__() # Create tensors of right sizes # LSTM part self.Wfx = nn.Parameter(torch.empty(input_dim, num_hidden)) self.Wix = nn.Parameter(torch.empty(input_dim, num_hidden)) self.Wgx = nn.Parameter(torch.empty(input_dim, num_hidden)) self.Wox = nn.Parameter(torch.empty(input_dim, num_hidden)) self.Wfh = nn.Parameter(torch.empty(num_hidden, num_hidden)) self.Wih = nn.Parameter(torch.empty(num_hidden, num_hidden)) self.Wgh = nn.Parameter(torch.empty(num_hidden, num_hidden)) self.Woh = nn.Parameter(torch.empty(num_hidden, num_hidden)) self.bf = nn.Parameter(torch.empty(num_hidden)) self.bi = nn.Parameter(torch.empty(num_hidden)) self.bg = nn.Parameter(torch.empty(num_hidden)) self.bo = nn.Parameter(torch.empty(num_hidden)) self.c = torch.empty(batch_size, num_hidden, device=device) # cell state self.h = torch.empty(batch_size, num_hidden, device=device) # output state # Linear part self.Wph = nn.Parameter(torch.empty(num_hidden, num_classes)) self.bp = nn.Parameter(torch.empty(num_classes)) # Initialize weights mean = 0.0 std = 1 / seq_length nn.init.normal_(self.Wgx, mean=mean, std=std) nn.init.normal_(self.Wix, mean=mean, std=std) nn.init.normal_(self.Wfx, mean=mean, std=std) nn.init.normal_(self.Wox, mean=mean, std=std) nn.init.normal_(self.Wgh, mean=mean, std=std) nn.init.normal_(self.Wih, mean=mean, std=std) nn.init.normal_(self.Wfh, mean=mean, std=std) nn.init.normal_(self.Woh, mean=mean, std=std) nn.init.normal_(self.Wph, mean=mean, std=std) nn.init.constant_(self.bi, 0.0) nn.init.constant_(self.bg, 0.0) nn.init.constant_(self.bf, 0.0) nn.init.constant_(self.bo, 0.0) nn.init.constant_(self.bp, 0.0) # Administrative stuff self.sequence_length = seq_length self.batch_size = batch_size self.input_dim = input_dim
channelsList = [1024, 512, 256, 128, 16] encNet = EncodeNet(channelsList).cuda().train() channelsList.reverse() decNet = DecodeNet(channelsList).cuda().train() print('create new model') else: encNet = torch.load('../models/encNet_' + sys.argv[0] + '.pkl', map_location='cuda:'+sys.argv[1]).cuda().train() decNet = torch.load('../models/decNet_' + sys.argv[0] + '.pkl', map_location='cuda:'+sys.argv[1]).cuda().train() print('read ../models/' + sys.argv[0] + '.pkl') print(encNet) print(decNet) optimizer = torch.optim.Adam([{'params':encNet.parameters()},{'params':decNet.parameters()}], lr=float(sys.argv[3])) trainData = torch.empty([batchSize, 1, 256, 256]).float().cuda() lastSavedI = 0 C = torch.arange(start=0, end=16).unsqueeze_(1).float().cuda() sigma = 1 testImgSum = 24 testImgDir = '/datasets/MLG/wfang/imgCompress/kodim256/' testDataReader = bmpLoader.datasetReader(colorFlag=False, batchSize=1, bufferBatchSizeMultiple=testImgSum, imgDir=testImgDir, imgSum=testImgSum) testData = torch.empty([testImgSum, 1, 256, 256]).float().cuda() for k in range(testImgSum): testData[k] = torch.from_numpy(testDataReader.readImg()).float().cuda() i = 0 while(True):
def train_with_kf(train_loader, model, criterion, optimizer, epoch, log, kfclass): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time if input.shape[0] % (args.ngpu) != 0: continue if args.use_cuda: target = target.cuda() input = input.cuda() data_time.update(time.time() - end) if args.dataset == 'cifar10': with torch.no_grad(): kf_input = kfclass(torch.randn(input.shape[0], 128).cuda()) elif args.dataset == 'imagenet': with torch.no_grad(): kf_input = kfclass( torch.empty(input.shape[0], 128, dtype=torch.float32).normal_().cuda()) kf_input = F.interpolate(kf_input, size=224) input_list = [] num_pgpu = input.shape[0] // args.ngpu for igpu in range(args.ngpu): input_list.append( torch.cat([ input[igpu * num_pgpu:(igpu + 1) * num_pgpu], kf_input[igpu * num_pgpu:(igpu + 1) * num_pgpu] ], dim=0)) input = torch.cat(input_list, dim=0) input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) output = model(input_var) output_list = [] for igpu in range(args.ngpu): output_list.append(output[igpu * num_pgpu * 2:igpu * num_pgpu * 2 + num_pgpu]) output = torch.cat(output_list, dim=0) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() for module in net.modules(): if isinstance(module, Kf_Conv2d): module.kfscale.data.clamp_(min=0, max=1) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string()) print( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) return top1.avg, losses.avg
def sample(self, numb): sample = torch.empty(numb, self.ZDIMS, device=self.device).normal_(mean=0,std=1) mu, inv_sigma2_x = self.decode(sample) return mu, inv_sigma2_x
def generate_gaussian_simulations(min_x, max_x, min_y, max_y, n_steps, step_size=1, std=0.1): """Generate a natural path where the next direction angle is sampled from a gaussian distribution Arguments: min_x {float} -- x minimum boundary max_x {float} -- x maximum boundary min_y {float} -- y minimum boundary max_y {float} -- y maximum boundary n_steps {int} -- the number of simulation steps Keyword Arguments: step_size {int} -- the distance between two successive positions (default: {1}) std {float} -- the std from the gaussian distribution (default: {0.1}) Returns: tensor -- n_steps x 2 tensor corresponding to the positions of the new path """ assert min_x <= max_x assert min_y <= max_y # generate starting position start_x = min_x + (max_x - min_x) * torch.rand(1) start_y = min_y + (max_y - min_y) * torch.rand(1) # generate the angle changes cur_angle = 2 * np.pi * torch.rand(1) - np.pi deviations = 2 * np.pi * torch.empty(n_steps - 1).normal_(mean=0, std=std) positions = [[start_x, start_y]] cur_x, cur_y = start_x, start_y for dev in deviations: cur_angle = cur_angle + dev # put angle into (-pi, pi] range if cur_angle > np.pi: cur_angle.sub_(2 * np.pi) elif cur_angle <= -np.pi: cur_angle.add_(2 * np.pi) cur_x = cur_x + step_size * torch.cos(cur_angle) cur_y = cur_y + step_size * torch.sin(cur_angle) # handle positions crossing the boundaries (reflected like a mirror) if cur_x < min_x: cur_angle = torch.sign(cur_angle) * np.pi - cur_angle cur_x = min_x + (min_x - cur_x) elif cur_x > max_x: cur_angle = torch.sign(cur_angle) * np.pi - cur_angle cur_x = max_x + (max_x - cur_x) if cur_y < min_y: cur_angle = -cur_angle cur_y = min_y + (min_y - cur_y) elif cur_y > max_y: cur_angle = -cur_angle cur_y = max_y + (max_y - cur_y) # add the new position positions.append([cur_x, cur_y]) return torch.FloatTensor(positions)
def test_sparse_rcnn_forward(): config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' model = _get_detector_cfg(config_path) model = _replace_r50_with_r18(model) model.backbone.init_cfg = None from mmdet.models import build_detector detector = build_detector(model) detector.init_weights() input_shape = (1, 3, 100, 100) mm_inputs = _demo_mm_inputs(input_shape, num_items=[5]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') # Test forward train with non-empty truth batch detector.train() gt_bboxes = mm_inputs['gt_bboxes'] gt_bboxes = [item for item in gt_bboxes] gt_labels = mm_inputs['gt_labels'] gt_labels = [item for item in gt_labels] losses = detector.forward(imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 detector.forward_dummy(imgs) # Test forward train with an empty truth batch mm_inputs = _demo_mm_inputs(input_shape, num_items=[0]) imgs = mm_inputs.pop('imgs') img_metas = mm_inputs.pop('img_metas') gt_bboxes = mm_inputs['gt_bboxes'] gt_bboxes = [item for item in gt_bboxes] gt_labels = mm_inputs['gt_labels'] gt_labels = [item for item in gt_labels] losses = detector.forward(imgs, img_metas, gt_bboxes=gt_bboxes, gt_labels=gt_labels, return_loss=True) assert isinstance(losses, dict) loss, _ = detector._parse_losses(losses) assert float(loss.item()) > 0 # Test forward test detector.eval() with torch.no_grad(): img_list = [g[None, :] for g in imgs] batch_results = [] for one_img, one_meta in zip(img_list, img_metas): result = detector.forward([one_img], [[one_meta]], rescale=True, return_loss=False) batch_results.append(result) # test empty proposal in roi_head with torch.no_grad(): # test no proposal in the whole batch detector.roi_head.simple_test([imgs[0][None, :]], torch.empty( (1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]], torch.ones((1, 4)))
def create_digit_image(colors, curr_min_val=0, curr_max_val=1): numpy_colors = np.array(colors) # convert to [0,1] to draw if numpy_colors.min() < 0 or numpy_colors.max() > 1: if (curr_max_val - curr_min_val) <= 0: print('wrong min and max input values') return else: numpy_colors = round(((numpy_colors - curr_min_val) / (curr_max_val - curr_min_val)), 3) colors = numpy_colors.tolist() if len(numpy_colors.shape) < 2 or len(numpy_colors.shape) > 3: print('wrong input dimention. should be [_,8,3]') elif len(numpy_colors.shape) == 2: colors = [colors] # elif len(numpy_colors.shape)==3: # do noting data_tensor = torch.empty(numpy_colors.shape[0], 3, 300, 200, dtype=torch.float) for j in range(numpy_colors.shape[0]): fig_temp, myaxis = plt.subplots(figsize=(200, 300), dpi=1) fig_temp.subplots_adjust(0, 0, 1, 1) myaxis.axis('off') myaxis.set_xlim([0, 2 * width]) myaxis.set_ylim([0, 3 * width]) myaxis.set_aspect('equal', 'box') patches = [] segments_centers = [] center = [width, 1.5*width] bg_patch = create_background(colors[j][7]) patches.append(bg_patch) myaxis.add_patch(bg_patch) segments_centers.append([center[0], center[1] + width + height]) segments_centers.append([center[0], center[1]]) segments_centers.append([center[0], center[1] - width - height]) segments_centers.append([center[0] - width / 2 - height / 2, center[1] + width / 2 + height / 2]) segments_centers.append([center[0] + width / 2 + height / 2, center[1] + width / 2 + height / 2]) segments_centers.append([center[0] - width / 2 - height / 2, center[1] - width / 2 - height / 2]) segments_centers.append([center[0] + width / 2 + height / 2, center[1] - width / 2 - height / 2]) vertical_horizon = [0, 0, 0, 1, 1, 1, 1] for i in range(len(segments_centers)): polygon = create_segment(segments_centers[i], vertical_horizon[i], colors[j][i]) patches.append(polygon) myaxis.add_patch(polygon) fig_temp.canvas.draw() data = np.frombuffer(fig_temp.canvas.tostring_rgb(), dtype=np.uint8) # data = np.fromstring(fig_temp.canvas.tostring_rgb(), dtype=np.uint8, sep='') data = data.reshape(fig_temp.canvas.get_width_height()[::-1] + (3,)) data_tensor[j] = F.to_tensor(data) # plt.show() plt.close(fig_temp) # print(data_tensor.shape) return data_tensor
def get_dJdG(G_b, F, S_b, B_b, G1): theta_b = 1/2*torch.matmul(F.transpose(0,1),G_b) #theta_b : (n, batch_size) dJdG = torch.empty(config.BITS,G_b.shape[1],device='cuda') for j in range(G_b.shape[1]): dJdG[:,j] = get_dJdGj(G_b[:,j], theta_b[:,j], B_b[:,j], F, S_b[:,j],G1) return dJdG #size (c)
def __init__(self, k, c, a, c2=None, l=None, node_type='discrete'): """ utils Layer :param k: dimension of output's alphabet, which goes from 0 to K-1 (when discrete) :param c: the number of hidden states :param c2: the number of states of the neighbours :param l: number of previous layers to consider. You must pass the appropriate number of statistics at training :param a: dimension of edges' alphabet, which goes from 0 to A-1 """ super().__init__() # For comparison w.r.t Numpy implementation # np.random.seed(seed=10) self.node_type = node_type self.is_layer_0 = True if c2 is not None or l is not None: assert c2 is not None and l is not None, 'You should specify both C2, L and A' self.is_layer_0 = False self.eps = 1e-8 # Laplace smoothing self.C = c self.K = k self.orig_A = a self.A = a + 2 # may consider a special case of the recurrent arc and the special case of bottom state if not self.is_layer_0: self.C2 = c2 self.L = l # Initialisation of the model's parameters. # torch.manual_seed(0) if self.is_layer_0: # For debugging w.r.t Numpy version # pr = torch.from_numpy(np.random.uniform(size=self.C).astype(np.float32)) pr = torch.nn.init.uniform_( torch.empty(self.C, dtype=torch.float64)) self.prior = pr / pr.sum() # print(self.prior) if self.node_type == 'discrete': self.emission = CategoricalEmission(self.K, self.C) elif self.node_type == 'continuous': self.emission = GaussianEmission(self.K, self.C) # print(self.emission) if not self.is_layer_0: # For debugging w.r.t Numpy version # self.layerS = torch.from_numpy(np.random.uniform(size=self.L).astype(np.float32)) # self.layerS = torch.nn.init.uniform_( torch.empty(self.L, dtype=torch.float64)) self.layerS /= self.layerS.sum() self.arcS = torch.zeros((self.L, self.A), dtype=torch.float64) self.transition = torch.empty([self.L, self.A, self.C, self.C2], dtype=torch.float64) for layer in range(0, self.L): # For debugging w.r.t Numpy version # elf.arcS[layer, :] = torch.from_numpy(np.random.uniform(size=self.A).astype(np.float32)) self.arcS[layer, :] = torch.nn.init.uniform_( self.arcS[layer, :]) self.arcS[layer, :] /= self.arcS[layer, :].sum() for arc in range(0, self.A): for j in range(0, self.C2): # For debugging w.r.t Numpy version # tr = torch.from_numpy(np.random.uniform(size=self.C).astype(np.float32)) tr = torch.nn.init.uniform_(torch.empty(self.C)) self.transition[layer, arc, :, j] = tr / tr.sum() # print(self.arcS) # print(self.transition) self.init_accumulators()
loss_pixel = 0 for l in logit_pixel: loss_pixel += symmetric_lovasz_ignore_empty(l.squeeze(1), truth_pixel, truth_image) loss = symmetric_lovasz(logit.squeeze(1), truth_pixel) return 0.05 * loss_image + 0.1 * loss_pixel + 1 * loss if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--basenet", choices=BASENET_CHOICES, default='vgg11', help='model of basenet') parser.add_argument("--num-filters", type=int, default=16, help='num filters for decoder') args = parser.parse_args() net = UNet(**vars(args)) # print(net) parameters = [p for p in net.parameters() if p.requires_grad] n_params = sum(p.numel() for p in parameters) print('N of parameters {} ({} tensors)'.format(n_params, len(parameters))) encoder_parameters = [p for name, p in net.named_parameters() if p.requires_grad and name.startswith('encoder')] n_encoder_params = sum(p.numel() for p in encoder_parameters) print('N of encoder parameters {} ({} tensors)'.format(n_encoder_params, len(encoder_parameters))) print('N of decoder parameters {} ({} tensors)'.format(n_params - n_encoder_params, len(parameters) - len(encoder_parameters))) x = torch.empty((1, 3, 128, 128)) y = net(x) print(x.size(), '-->', y.size())
def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, padding_mode="zeros", ): super().__init__() self.is_calculated = False self.conv_layer = Conv2d( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, ) self.kernel_size = self.conv_layer.kernel_size # small addition to avoid division by zero self.delta = 1e-3 # freq, theta, sigma are set up according to S. Meshgini, # A. Aghagolzadeh and H. Seyedarabi, "Face recognition using # Gabor filter bank, kernel principal component analysis # and support vector machine" self.freq = Parameter( (math.pi / 2) * math.sqrt(2) **(-torch.randint(0, 5, (out_channels, in_channels))).type(torch.Tensor), requires_grad=True, ) self.theta = Parameter( (math.pi / 8) * torch.randint(0, 8, (out_channels, in_channels)).type(torch.Tensor), requires_grad=True, ) self.sigma = Parameter(math.pi / self.freq, requires_grad=True) self.psi = Parameter(math.pi * torch.rand(out_channels, in_channels), requires_grad=True) self.x0 = Parameter(torch.ceil(torch.Tensor([self.kernel_size[0] / 2 ]))[0], requires_grad=False) self.y0 = Parameter(torch.ceil(torch.Tensor([self.kernel_size[1] / 2 ]))[0], requires_grad=False) self.y, self.x = torch.meshgrid([ torch.linspace(-self.x0 + 1, self.x0 + 0, self.kernel_size[0]), torch.linspace(-self.y0 + 1, self.y0 + 0, self.kernel_size[1]), ]) self.y = Parameter(self.y) self.x = Parameter(self.x) self.weight = Parameter( torch.empty(self.conv_layer.weight.shape, requires_grad=True), requires_grad=True, ) self.register_parameter("freq", self.freq) self.register_parameter("theta", self.theta) self.register_parameter("sigma", self.sigma) self.register_parameter("psi", self.psi) self.register_parameter("x_shape", self.x0) self.register_parameter("y_shape", self.y0) self.register_parameter("y_grid", self.y) self.register_parameter("x_grid", self.x) self.register_parameter("weight", self.weight)
2), (torch.rand(S, 2), True, True), 'full'), ( 'kl_div', F.log_softmax(torch.randn(S, 10), 1), (F.softmax(torch.randn(S, 10), 1), ), ), ( 'cross_entropy', (3, S), (torch.randint(S, (3, ), dtype=torch.int64), ), ), ( 'binary_cross_entropy_with_logits', (3, ), (torch.empty(3).random_(2), ), ), ( 'smooth_l1_loss', (3, S), (non_differentiable(torch.rand(3, S)), ), ), ( 'l1_loss', (3, S), (non_differentiable(torch.rand(3, S)), ), ), ( 'mse_loss', (3, S), (non_differentiable(torch.rand(3, S)), ),
def test_createParamStatic(self): weight = Initializer.createParamStatic((5,6)) assert isinstance(weight,torch.Tensor) assert weight.size() == torch.empty((5,6)).size()
import CTimeData as CTD # Specific utilities import utilities_lib as ul plt.close("all") # Close all previous Windows """ PyTorch Related library !! """ import torch """ Basic tensor creation """ x = torch.empty(5, 3) x = torch.rand(5, 3) x = torch.zeros(5, 3, dtype=torch.long) ## Construct Tensor from data x = torch.tensor([5.5, 3]) #or create a tensor based on an existing tensor. #These methods will reuse properties of the input tensor, e.g. dtype, # unless new values are provided by user x = torch.randn_like(x, dtype=torch.float) print(x)
def get_dJdF(F_b, G, S_b, B_b, F1): #F_b : (c, batch_size), S_b : (batch_size, n) theta_b = 1/2*torch.matmul(F_b.transpose(0,1),G) #theta_b : (batch_size, n) dJdF = torch.empty(config.BITS,F_b.shape[1],device='cuda') for i in range(F_b.shape[1]): dJdF[:,i] = get_dJdFi(F_b[:,i], theta_b[i,:], B_b[:,i], G, S_b[i,:],F1) return dJdF #size (c)
def _run_one_epoch(self, epoch, cross_valid=False): start = time.time() total_loss = 0 sum_loss = 0 data_loader = self.tr_loader if not cross_valid else self.cv_loader print('batch length:', len(data_loader)) # visualizing loss using visdom if self.visdom_epoch and not cross_valid: vis_opts_epoch = dict(title=self.visdom_id + " epoch " + str(epoch), ylabel='Loss', xlabel='Epoch') vis_window_epoch = None vis_iters = torch.arange(1, len(data_loader) + 1) vis_iters_loss = torch.Tensor(len(data_loader)) total_correct = 0 total_sen = 0 labels_cat = torch.empty(0, dtype=torch.int64) predicted_cat = torch.empty(0, dtype=torch.int64) target_names = ['aloe', 'burger', 'cabbage', 'candied_fruits', 'carrots', 'chips', 'chocolate', 'drinks', 'fries', 'grapes', 'gummies', 'ice-cream', 'jelly', 'noodles', 'pickles', 'pizza', 'ribs', 'salmon', 'soup', 'wings'] # 这里这里这里# for i, (data) in enumerate(data_loader): padded_input, input_lengths, labels = data if len(input_lengths) <= 1: continue total_sen = total_sen + padded_input.size(0) padded_input = padded_input.cuda() input_lengths = input_lengths.cuda() labels = labels.cuda() pred = self.model(padded_input, input_lengths) model_out = pred[0] loss = F.cross_entropy(model_out, labels, reduction='sum') sum_loss = sum_loss + loss.item() loss = loss / padded_input.size(0) pred_res = model_out.max(1)[1] gold = labels.contiguous().view(-1) n_correct_res = pred_res.eq(gold) n_correct_res = n_correct_res.sum().item() total_correct = total_correct + n_correct_res predicted_cat = torch.cat((predicted_cat, pred_res.cpu()), -1) labels_cat = torch.cat((labels_cat, labels.cpu()), -1) if not cross_valid: self.optimizer.zero_grad() loss.backward() self.optimizer.step() total_loss += loss.item() # if self.model_choose == 'speaker_classify' and not cross_valid: if i % self.print_freq == 0: print('Epoch {0} | Iter {1} | Average Loss {2:.3f} | ' 'Current Loss {3:.6f} | {4:.1f} ms/batch'.format( epoch + 1, i + 1, total_loss / (i + 1), loss.item(), 1000 * (time.time() - start) / (i + 1)), flush=True) # visualizing loss using visdom if self.visdom_epoch and not cross_valid: vis_iters_loss[i] = loss.item() if i % self.print_freq == 0: x_axis = vis_iters[:i+1] y_axis = vis_iters_loss[:i+1] if vis_window_epoch is None: vis_window_epoch = self.vis.line(X=x_axis, Y=y_axis, opts=vis_opts_epoch) else: self.vis.line(X=x_axis, Y=y_axis, win=vis_window_epoch, update='replace') print('n_correct:', total_correct) print('total_sen:', total_sen) print('acc:', total_correct/total_sen) print('每个batch的平均损失相加:', total_loss / (i + 1)) print('每个batch的损失相加后再平均:', sum_loss / total_sen) print(metrics.classification_report(labels_cat, predicted_cat, target_names=target_names, digits=4)) return sum_loss / total_sen, total_correct/total_sen
import torch import torch.nn as nn import torch.nn.init as init import matplotlib.pyplot as plt import Least_squares.settings n, d = Least_squares.settings.n, Least_squares.settings.d d_1, d_2 = 10, 1 groups = 3 data = torch.empty(groups, n, d + 1) with open('data.in', 'r') as f: for i in range(groups): for j in range(n): data_point = f.readline().split() for k in range(d + 1): data[i, j, k] = float(data_point[k]) lin_model = nn.Sequential( nn.Linear(d, d_2, bias=False) ) lin_nn_model = nn.Sequential( nn.Linear(d, d_1, bias=False), nn.Linear(d_1, d_2, bias=False) ) ReLU_model = nn.Sequential( nn.Linear(d, d_1), nn.ReLU(), nn.Linear(d_1, d_2) ) loss = nn.MSELoss()
def update(config): # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (nnet['num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) criterion = nn.MSELoss() dev_criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") lr = config.learning_rate # Figure out all feature stuff shuff_file = str(os.getpid()) + '.scp' shell_cmd = "cat {:s} | shuf > {:s}".format(config.scp, shuff_file) r = subprocess.run(shell_cmd, shell=True, stdout=subprocess.PIPE) feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format(trans_path, shuff_file) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, shuff_file) else: cmd = shuff_file if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format(context[0], context[1]) # Load performance monitoring models pm_paths = config.pms.split(',') pm_models = [] feat_dims = [] for path in pm_paths: pm_model = torch.load(path, map_location=lambda storage, loc: storage) ae_model = autoencoderRNN(pm_model['feature_dim'], pm_model['feature_dim'], pm_model['bn_dim'], pm_model['encoder_num_layers'], pm_model['decoder_num_layers'], pm_model['hidden_dim']) ae_model.load_state_dict(pm_model['model_state_dict']) feat_dims.append(pm_model['feature_dim']) if config.use_gpu: ae_model.cuda() for p in ae_model.parameters(): # Do not update performance monitoring block p.requires_grad = False pm_models.append(ae_model) cmvn_paths = config.cmvns.split(',') means = [] for path in cmvn_paths: mean, _ = get_cmvn(path) means.append(mean) if len(cmvn_paths) != len(pm_paths): logging.error("Number of cmvn paths not equal to number of model paths, exiting training!") sys.exit(1) else: num_pm_models = len(pm_paths) ep_loss_dev = [] ep_fer_dev = [] load_chunk = torch.load(config.dev_egs) dev_data = load_chunk[:, 0:-1] dev_labels = load_chunk[:, -1].long() dataset = nnetDataset(dev_data, dev_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=50000, shuffle=True) # Compute initial performance on dev set val_losses = [] val_fer = [] for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: -1 update Dev loss: {:.3f} :: Dev FER: {:.2f}".format( np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) for epoch in range(config.epochs): batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 update_num = 0 val_losses = [] val_fer = [] tr_losses = [] for idx in range(num_pm_models): tr_losses.append([]) for utt_id, mat in kaldi_io.read_mat_ark(cmd): lens.append(min(mat.shape[0], config.max_seq_len)) if config.use_gpu: out = model(Variable(torch.FloatTensor(mat)).cuda()) else: out = model(Variable(torch.FloatTensor(mat))) if config.use_gpu: post = out[1] - Variable(torch.FloatTensor(means[0])).cuda() else: post = out[1] - Variable(torch.FloatTensor(means[0])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[0] batch = torch.cat([batch, post[None, :, :]], 0) batches[0] = batch for idx in range(1, num_pm_models): if config.use_gpu: post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])).cuda() else: post = out[0][-idx] - Variable(torch.FloatTensor(means[idx])) post = F.pad(post, (0, 0, 0, config.max_seq_len - post.size(0))) batch = batches[idx] batch = torch.cat([batch, post[None, :, :]], 0) batches[idx] = batch utt_count += 1 if utt_count == config.batch_size: update_num += 1 ## DO THE ADAPTATION lens = torch.IntTensor(lens) _, indices = torch.sort(lens, descending=True) if config.use_gpu: loss_all = torch.FloatTensor([1]).cuda() else: loss_all = torch.FloatTensor([1]) for idx in range(num_pm_models): batch_x = batches[idx][indices] ae_model = pm_models[idx] batch_l = lens[indices] if config.time_shift == 0: outputs = ae_model(batch_x, batch_l) else: outputs = ae_model(batch_x[:, :-config.time_shift, :], batch_l - config.time_shift) if config.time_shift == 0: loss = stable_mse(outputs, batch_x) else: loss = stable_mse(outputs, batch_x[:, config.time_shift:, :]) loss_all *= loss tl = tr_losses[idx] tl.append(loss.item()) tr_losses[idx] = tl # if idx < num_pm_models - 1: # loss.backward(retain_graph=True) # else: # loss.backward() optimizer.zero_grad() loss_all.backward() optimizer.step() batches = [] for idx in range(num_pm_models): if config.use_gpu: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]).cuda() else: batch = torch.empty(0, config.max_seq_len, feat_dims[idx]) batches.append(batch) lens = [] utt_count = 0 logging.info("Finished unsupervised adaptation for epoch {:d} with multi-layer RNN-AE Loss".format(epoch)) # CHECK IF ADAPTATION IS WORKING AT ALL for batch_x, batch_l in data_loader: if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) _, batch_x = model(batch_x) val_loss = dev_criterion(batch_x, batch_l) val_losses.append(val_loss.item()) if config.use_gpu: val_fer.append(compute_fer(batch_x.cpu().data.numpy(), batch_l.cpu().data.numpy())) else: val_fer.append(compute_fer(batch_x.data.numpy(), batch_l.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} update ".format(epoch) for idx in range(num_pm_models): print_log = print_log + "Tr loss layer {:d} = {:.3f} | ".format(idx, np.mean(tr_losses[idx])) print_log = print_log + "Dev loss: {:.3f} | Dev FER: {:.2f}".format(np.mean(val_losses), np.mean(val_fer)) logging.info(print_log) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch + 1) + '.model') torch.save({ 'epoch': epoch + 1, 'ep_loss_dev': ep_loss_dev, 'ep_fer_dev': ep_fer_dev, 'tr_losses': tr_losses, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb'))) # Change learning rate to half optimizer, lr = adjust_learning_rate(optimizer, lr, config.lr_factor) logging.info('Learning rate changed to {:f}'.format(lr))
#!/usr/bin/python3 # coding: utf-8 import torch from torch import nn from torch import optim import torch.nn.functional as F import numpy as np import matplotlib.pyplot as plt import numpy as np from itertools import chain # 合并 generator ################################################################## ## Initialization print(nn.init.calculate_gain('relu')) # 1.4142135623730951 print(nn.init.calculate_gain('leaky_relu')) # 1.4141428569978354 w = torch.empty(3, 5); print(w) # Returns a tensor filled with uninitialized data print(nn.init.xavier_uniform_(w)) print(nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu'))) ################################################################## ## Activation x = torch.Tensor([1]); print(x) print(F.elu(torch.Tensor([1, 0, -1]))) # tensor([ 1.0000, 0.0000, -0.6321]); ELU(x) = max(0,x) + min(0, alpha * (exp(x) - 1)); alpha=1.0 print(F.relu(torch.Tensor([1, 0, -1]))) # tensor([ 1., 0., 0.]) print(torch.sigmoid(x), F.softmax(x, dim=-1)) # tensor([0.7311]) tensor([1.]); softmax 对应一个数的时候, 输出全为 1 print(torch.sigmoid(torch.Tensor([0, 1, 2, 3]))) # tensor([0.5000, 0.7311, 0.8808, 0.9526]) print(torch.tanh(torch.Tensor([0, 1, 2, 3]))) # tensor([0.0000, 0.7616, 0.9640, 0.9951]) print(F.softmax(torch.Tensor([1, 2, 3]), dim=0)) # tensor([ 0.0900, 0.2447, 0.6652]); dim=0 should be added print(F.softmax(F.softmax(torch.Tensor([1, 2, 3]), dim=0), dim=0)) # tensor([0.2535, 0.2959, 0.4506]); softmax() 不能 two times!!! print(np.log(F.softmax(torch.Tensor([1, 2, 3]), dim=0))) # tensor([-2.4076, -1.4076, -0.4076]) print(F.log_softmax(torch.Tensor([1, 2, 3]), dim=0)) # tensor([-2.4076, -1.4076, -0.4076]); equal to log(softmax(x))
def __init__(self, tensor): self.floating_dtype = tensor.dtype.is_floating_point self.int_mode = True self.sci_mode = False self.max_width = 1 if not self.floating_dtype: copy = torch.empty(tensor.size(), dtype=torch.long).copy_(tensor).view(tensor.nelement()) for value in copy.tolist(): value_str = '{}'.format(value) self.max_width = max(self.max_width, len(value_str)) else: copy = torch.empty(tensor.size(), dtype=torch.float64).copy_(tensor).view(tensor.nelement()) copy_list = copy.tolist() try: for value in copy_list: if value != math.ceil(value): self.int_mode = False break # nonfinites will throw errors except (ValueError, OverflowError): self.int_mode = False if self.int_mode: for value in copy_list: value_str = '{:.0f}'.format(value) if math.isnan(value) or math.isinf(value): self.max_width = max(self.max_width, len(value_str)) else: # in int_mode for floats, all numbers are integers, and we append a decimal to nonfinites # to indicate that the tensor is of floating type. add 1 to the len to account for this. self.max_width = max(self.max_width, len(value_str) + 1) else: copy_abs = copy.abs() pos_inf_mask = copy_abs.eq(float('inf')) neg_inf_mask = copy_abs.eq(float('-inf')) nan_mask = copy_abs.ne(copy) invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask if invalid_value_mask.all(): example_value = 0 else: example_value = copy_abs[invalid_value_mask.eq(0)][0] copy_abs[invalid_value_mask] = example_value exp_min = copy_abs.min() if exp_min != 0: exp_min = math.floor(math.log10(exp_min)) + 1 else: exp_min = 1 exp_max = copy_abs.max() if exp_max != 0: exp_max = math.floor(math.log10(exp_max)) + 1 else: exp_max = 1 # these conditions for using scientific notation are based on numpy if exp_max - exp_min > PRINT_OPTS.precision or exp_max > 8 or exp_min < -4: self.sci_mode = True for value in copy_list: value_str = ('{{:.{}e}}').format(PRINT_OPTS.precision).format(value) self.max_width = max(self.max_width, len(value_str)) else: for value in copy_list: value_str = ('{{:.{}f}}').format(PRINT_OPTS.precision).format(value) self.max_width = max(self.max_width, len(value_str))
def double(x): a = torch.zeros(1, len(x), dtype=torch.double) result = torch.empty(1, len(x)) torch.add(a, x, out=result) return result
def __init__(self, in_channel: int = 512, q_k_v_channel: int = 64): super(AttentionHead, self).__init__() self.q_k_v_channel = q_k_v_channel self.weights = nn.parameter.Parameter( torch.empty((in_channel, q_k_v_channel * 3))) self.score_norm = math.sqrt(q_k_v_channel)
def _generate( self, sample: Dict[str, Dict[str, Tensor]], prefix_tokens: Optional[Tensor] = None, constraints: Optional[Tensor] = None, bos_token: Optional[int] = None, ): incremental_states = torch.jit.annotate( List[Dict[str, Dict[str, Optional[Tensor]]]], [ torch.jit.annotate(Dict[str, Dict[str, Optional[Tensor]]], {}) for i in range(self.model.models_size) ], ) net_input = sample["net_input"] if "src_tokens" in net_input: src_tokens = net_input["src_tokens"] # length of the source text being the character length except EndOfSentence and pad src_lengths = ((src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1)) elif "source" in net_input: src_tokens = net_input["source"] src_lengths = (net_input["padding_mask"].size(-1) - net_input["padding_mask"].sum(-1) if net_input["padding_mask"] is not None else torch.tensor(src_tokens.size(-1)).to(src_tokens)) else: raise Exception("expected src_tokens or source in net input") # bsz: total number of sentences in beam # Note that src_tokens may have more than 2 dimenions (i.e. audio features) bsz, src_len = src_tokens.size()[:2] beam_size = self.beam_size if constraints is not None and not self.search.supports_constraints: raise NotImplementedError( "Target-side constraints were provided, but search method doesn't support them" ) # Initialize constraints, when active self.search.init_constraints(constraints, beam_size) max_len: int = -1 if self.match_source_len: max_len = src_lengths.max().item() else: max_len = min( int(self.max_len_a * src_len + self.max_len_b), # exclude the EOS marker self.model.max_decoder_positions() - 1, ) assert ( self.min_len <= max_len ), "min_len cannot be larger than max_len, please adjust these!" # compute the encoder output for each beam encoder_outs = self.model.forward_encoder(net_input) # placeholder of indices for bsz * beam_size to hold tokens and accumulative scores new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) new_order = new_order.to(src_tokens.device).long() encoder_outs = self.model.reorder_encoder_out(encoder_outs, new_order) # ensure encoder_outs is a List. assert encoder_outs is not None # initialize buffers scores = (torch.zeros(bsz * beam_size, max_len + 1).to(src_tokens).float() ) # +1 for eos; pad is never chosen for scoring tokens = (torch.zeros(bsz * beam_size, max_len + 2).to(src_tokens).long().fill_( self.pad)) # +2 for eos and pad tokens[:, 0] = self.eos if bos_token is None else bos_token attn: Optional[Tensor] = None # A list that indicates candidates that should be ignored. # For example, suppose we're sampling and have already finalized 2/5 # samples. Then cands_to_ignore would mark 2 positions as being ignored, # so that we only finalize the remaining 3 samples. cands_to_ignore = (torch.zeros(bsz, beam_size).to(src_tokens).eq(-1) ) # forward and backward-compatible False mask # list of completed sentences finalized = torch.jit.annotate( List[List[Dict[str, Tensor]]], [ torch.jit.annotate(List[Dict[str, Tensor]], []) for i in range(bsz) ], ) # contains lists of dictionaries of infomation about the hypothesis being finalized at each step finished = [ False for i in range(bsz) ] # a boolean array indicating if the sentence at the index is finished or not num_remaining_sent = bsz # number of sentences remaining # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = ((torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens).to( src_tokens.device)) cand_offsets = torch.arange(0, cand_size).type_as(tokens).to( src_tokens.device) reorder_state: Optional[Tensor] = None batch_idxs: Optional[Tensor] = None original_batch_idxs: Optional[Tensor] = None if "id" in sample and isinstance(sample["id"], Tensor): original_batch_idxs = sample["id"] else: original_batch_idxs = torch.arange(0, bsz).type_as(tokens) for step in range(max_len + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: if batch_idxs is not None: # update beam indices to take into account removed sentences corr = batch_idxs - torch.arange( batch_idxs.numel()).type_as(batch_idxs) reorder_state.view(-1, beam_size).add_( corr.unsqueeze(-1) * beam_size) original_batch_idxs = original_batch_idxs[batch_idxs] self.model.reorder_incremental_state(incremental_states, reorder_state) encoder_outs = self.model.reorder_encoder_out( encoder_outs, reorder_state) lprobs, avg_attn_scores = self.model.forward_decoder( tokens[:, :step + 1], encoder_outs, incremental_states, self.temperature, ) if self.lm_model is not None: lm_out = self.lm_model(tokens[:, :step + 1]) probs = self.lm_model.get_normalized_probs(lm_out, log_probs=True, sample=None) probs = probs[:, -1, :] * self.lm_weight lprobs += probs lprobs[lprobs != lprobs] = torch.tensor(-math.inf).to(lprobs) lprobs[:, self.pad] = -math.inf # never select pad lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty # handle max length constraint if step >= max_len: lprobs[:, :self.eos] = -math.inf lprobs[:, self.eos + 1:] = -math.inf # handle prefix tokens (possibly with different lengths) if (prefix_tokens is not None and step < prefix_tokens.size(1) and step < max_len): lprobs, tokens, scores = self._prefix_tokens( step, lprobs, scores, tokens, prefix_tokens, beam_size) elif step < self.min_len: # minimum length constraint (does not apply if using prefix_tokens) lprobs[:, self.eos] = -math.inf # Record attention scores, only support avg_attn_scores is a Tensor if avg_attn_scores is not None: if attn is None: attn = torch.empty(bsz * beam_size, avg_attn_scores.size(1), max_len + 2).to(scores) attn[:, :, step + 1].copy_(avg_attn_scores) scores = scores.type_as(lprobs) eos_bbsz_idx = torch.empty(0).to( tokens ) # indices of hypothesis ending with eos (finished sentences) eos_scores = torch.empty(0).to( scores ) # scores of hypothesis ending with eos (finished sentences) if self.should_set_src_lengths: self.search.set_src_lengths(src_lengths) if self.repeat_ngram_blocker is not None: lprobs = self.repeat_ngram_blocker(tokens, lprobs, bsz, beam_size, step) # Shape: (batch, cand_size) cand_scores, cand_indices, cand_beams = self.search.step( step, lprobs.view(bsz, -1, self.vocab_size), scores.view(bsz, beam_size, -1)[:, :, :step], tokens[:, :step + 1], original_batch_idxs, ) # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add(bbsz_offsets) # finalize hypotheses that end in eos # Shape of eos_mask: (batch size, beam size) eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) eos_mask[:, :beam_size][cands_to_ignore] = torch.tensor(0).to( eos_mask) # only consider eos when it's among the top beam_size indices # Now we know what beam item(s) to finish # Shape: 1d list of absolute-numbered eos_bbsz_idx = torch.masked_select(cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size]) finalized_sents: List[int] = [] if eos_bbsz_idx.numel() > 0: eos_scores = torch.masked_select(cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size]) finalized_sents = self.finalize_hypos( step, eos_bbsz_idx, eos_scores, tokens, scores, finalized, finished, beam_size, attn, src_lengths, max_len, ) num_remaining_sent -= len(finalized_sents) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break if self.search.stop_on_max_len and step >= max_len: break assert step < max_len, f"{step} < {max_len}" # Remove finalized sentences (ones for which {beam_size} # finished hypotheses have been generated) from the batch. if len(finalized_sents) > 0: new_bsz = bsz - len(finalized_sents) # construct batch_idxs which holds indices of batches to keep for the next pass batch_mask = torch.ones(bsz, dtype=torch.bool, device=cand_indices.device) batch_mask[finalized_sents] = False # TODO replace `nonzero(as_tuple=False)` after TorchScript supports it batch_idxs = torch.arange( bsz, device=cand_indices.device).masked_select(batch_mask) # Choose the subset of the hypothesized constraints that will continue self.search.prune_sentences(batch_idxs) eos_mask = eos_mask[batch_idxs] cand_beams = cand_beams[batch_idxs] bbsz_offsets.resize_(new_bsz, 1) cand_bbsz_idx = cand_beams.add(bbsz_offsets) cand_scores = cand_scores[batch_idxs] cand_indices = cand_indices[batch_idxs] if prefix_tokens is not None: prefix_tokens = prefix_tokens[batch_idxs] src_lengths = src_lengths[batch_idxs] cands_to_ignore = cands_to_ignore[batch_idxs] scores = scores.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, -1) tokens = tokens.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, -1) if attn is not None: attn = attn.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, attn.size(1), -1) bsz = new_bsz else: batch_idxs = None # Set active_mask so that values > cand_size indicate eos hypos # and values < cand_size indicate candidate active hypos. # After, the min values per row are the top candidate active hypos # Rewrite the operator since the element wise or is not supported in torchscript. eos_mask[:, :beam_size] = ~((~cands_to_ignore) & (~eos_mask[:, :beam_size])) active_mask = torch.add( eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[:eos_mask.size(1)], ) # get the top beam_size active hypotheses, which are just # the hypos with the smallest values in active_mask. # {active_hypos} indicates which {beam_size} hypotheses # from the list of {2 * beam_size} candidates were # selected. Shapes: (batch size, beam size) new_cands_to_ignore, active_hypos = torch.topk(active_mask, k=beam_size, dim=1, largest=False) # update cands_to_ignore to ignore any finalized hypos. cands_to_ignore = new_cands_to_ignore.ge(cand_size)[:, :beam_size] # Make sure there is at least one active item for each sentence in the batch. assert (~cands_to_ignore).any(dim=1).all() # update cands_to_ignore to ignore any finalized hypos # {active_bbsz_idx} denotes which beam number is continued for each new hypothesis (a beam # can be selected more than once). active_bbsz_idx = torch.gather(cand_bbsz_idx, dim=1, index=active_hypos) active_scores = torch.gather(cand_scores, dim=1, index=active_hypos) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # copy tokens and scores for active hypotheses # Set the tokens for each beam (can select the same row more than once) tokens[:, :step + 1] = torch.index_select(tokens[:, :step + 1], dim=0, index=active_bbsz_idx) # Select the next token for each of them tokens.view(bsz, beam_size, -1)[:, :, step + 1] = torch.gather(cand_indices, dim=1, index=active_hypos) if step > 0: scores[:, :step] = torch.index_select(scores[:, :step], dim=0, index=active_bbsz_idx) scores.view(bsz, beam_size, -1)[:, :, step] = torch.gather(cand_scores, dim=1, index=active_hypos) # Update constraints based on which candidates were selected for the next beam self.search.update_constraints(active_hypos) # copy attention for active hypotheses if attn is not None: attn[:, :, :step + 2] = torch.index_select( attn[:, :, :step + 2], dim=0, index=active_bbsz_idx) # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(len(finalized)): scores = torch.tensor( [float(elem["score"].item()) for elem in finalized[sent]]) _, sorted_scores_indices = torch.sort(scores, descending=True) finalized[sent] = [ finalized[sent][ssi] for ssi in sorted_scores_indices ] finalized[sent] = torch.jit.annotate(List[Dict[str, Tensor]], finalized[sent]) return finalized
def test_mlp_sanity(): mlp = MLP([100, 10, 2]) with torch.no_grad(): x = torch.empty((5, 100)).normal_() mlp(x)
def __init__(self, length): super().__init__() self.u = Parameter(torch.empty(length)) self.reset_parameters()
# -*- coding: utf-8 -*- """ Spyder Editor This is a temporary script file. """ import torch #------------------Tensors-------------------- #5x3 matrix uninitialized x = torch.empty(5, 3) print(x) #Randomly initialized matrix y = torch.rand(5, 3) print(y) #Matrix filled with zeros and dtype long z = torch.zeros(5, 3, dtype=torch.long) print(z) #Constructing the tensor from data a = torch.tensor([5.5, 3]) print(a) #Checking the size of tensor print(x.size()) #-----------------Operations-------------------
def _keypoints_to_vector_field( keypoints: torch.Tensor, rois: torch.Tensor, vector_field_size: int) -> Tuple[torch.Tensor, torch.Tensor]: """ Encode keypoint locations into a target vector fields for use in SoftmaxWithLoss across space. Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the closed interval [0, vector_field_size - 1] on discrete image coordinates. We use the continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate. Arguments: keypoints: tensor of keypoint locations in of shape (N, K, 3). rois: Nx4 tensor of rois in xyxy format vector_field_size: integer side length of square vector_field. Returns: vector fields: A tensor of shape (N, K*2, W, H) containing an vector field map. W and H represents width and height of the map. valid: A tensor of shape (N, K) containing whether each keypoint is in the roi or not. """ if rois.numel() == 0: return rois.new().long(), rois.new().long() offset_x = rois[:, 0] offset_y = rois[:, 1] scale_x = vector_field_size / (rois[:, 2] - rois[:, 0]) scale_y = vector_field_size / (rois[:, 3] - rois[:, 1]) offset_x = offset_x[:, None] offset_y = offset_y[:, None] scale_x = scale_x[:, None] scale_y = scale_y[:, None] x = keypoints[..., 0] y = keypoints[..., 1] x_boundary_inds = x == rois[:, 2][:, None] y_boundary_inds = y == rois[:, 3][:, None] x = (x - offset_x) * scale_x x = x.floor().long() y = (y - offset_y) * scale_y y = y.floor().long() x[x_boundary_inds] = vector_field_size - 1 y[y_boundary_inds] = vector_field_size - 1 valid_loc = (x >= 0) & (y >= 0) & (x < vector_field_size) & ( y < vector_field_size) vis = keypoints[..., 2] > 0 valid = (vis).long() valid = valid.repeat_interleave(2) lin_ind = torch.empty(keypoints.shape[0], keypoints.shape[1] * 2, vector_field_size, vector_field_size) for j, kpt_2d_list in enumerate((zip(x, y))): x_list = kpt_2d_list[0] y_list = kpt_2d_list[1] if len(x_list) == len(y_list): for i in range(len(x_list)): kpt = torch.tensor([[x_list[i], y_list[i]]]) vec_field = compute_vertex(vector_field_size, kpt) lin_ind[j, 2 * i, :, :] = torch.tensor(vec_field)[:, :, 0] lin_ind[j, 2 * i + 1, :, :] = torch.tensor(vec_field)[:, :, 1] vector_fields = lin_ind.cuda() valid = torch.reshape(valid, (vector_fields.shape[0], -1)) # print("vector field", vector_fields) return vector_fields, valid
import numpy as np import torch import torch.nn as nn import torch.nn.functional as F # Part 1 intro https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py torch.empty(5, 3) torch.empty((5, 3)) torch.rand(5, 3) torch.zeros(5, 3, dtype=torch.long) x = torch.tensor([5, 5, 3]) x x = x.new_ones(5, 3, dtype=torch.double) x = torch.rand_like(x, dtype=torch.float) x x.size() y = torch.rand(5, 3) x + y torch.add(x, y) result = torch.empty(5, 3) torch.add(x, y, out=result) result y.add_(x) x[:, 1]
""" Tensors ======= Tensors behave almost exactly the same way in PyTorch as they do in Torch. Create a tensor of size (5 x 7) with uninitialized memory: """ import torch a = torch.empty(5, 7, dtype=torch.float) ############################################################### # Initialize a double tensor randomized with a normal distribution with mean=0, # var=1: a = torch.randn(5, 7, dtype=torch.double) print(a) print(a.size()) ############################################################### # .. note:: # ``torch.Size`` is in fact a tuple, so it supports the same operations # # Inplace / Out-of-place # ---------------------- # # The first difference is that ALL operations on the tensor that operate # in-place on it will have an ``_`` postfix. For example, ``add`` is the
def test_constrained_expected_improvement(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # one constraint mean = torch.tensor([[-0.5, 0.0]], device=device, dtype=dtype).unsqueeze( dim=-2 ) variance = torch.ones(1, 2, device=device, dtype=dtype).unsqueeze(dim=-2) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={1: [None, 0]} ) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected_unconstrained = torch.tensor( 0.19780, device=device, dtype=dtype ) ei_expected = ei_expected_unconstrained * 0.5 self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # check that error raised if no constraints with self.assertRaises(ValueError): module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={} ) # check that error raised if objective is a constraint with self.assertRaises(ValueError): module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={0: [None, 0]} ) # check that error raised if constraint lower > upper with self.assertRaises(ValueError): module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={0: [1, 0]} ) # three constraints N = torch.distributions.Normal(loc=0.0, scale=1.0) a = N.icdf(torch.tensor(0.75)) # get a so that P(-a <= N <= a) = 0.5 mean = torch.tensor( [[-0.5, 0.0, 5.0, 0.0]], device=device, dtype=dtype ).unsqueeze(dim=-2) variance = torch.ones(1, 4, device=device, dtype=dtype).unsqueeze(dim=-2) mm = MockModel(MockPosterior(mean=mean, variance=variance)) module = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={1: [None, 0], 2: [5.0, None], 3: [-a, a]}, ) X = torch.empty(1, 1, device=device, dtype=dtype) # dummy ei = module(X) ei_expected_unconstrained = torch.tensor( 0.19780, device=device, dtype=dtype ) ei_expected = ei_expected_unconstrained * 0.5 * 0.5 * 0.5 self.assertTrue(torch.allclose(ei, ei_expected, atol=1e-4)) # test maximize module_min = ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={1: [None, 0]}, maximize=False, ) ei_min = module_min(X) ei_expected_unconstrained_min = torch.tensor( 0.6978, device=device, dtype=dtype ) ei_expected_min = ei_expected_unconstrained_min * 0.5 self.assertTrue(torch.allclose(ei_min, ei_expected_min, atol=1e-4)) # test invalid onstraints with self.assertRaises(ValueError): ConstrainedExpectedImprovement( model=mm, best_f=0.0, objective_index=0, constraints={1: [1.0, -1.0]}, )
def __call__(self, data, masked_atom_indices=None): """ :param data: pytorch geometric data object. Assume that the edge ordering is the default pytorch geometric ordering, where the two directions of a single edge occur in pairs. Eg. data.edge_index = tensor([[0, 1, 1, 2, 2, 3], [1, 0, 2, 1, 3, 2]]) :param masked_atom_indices: If None, then randomly samples num_atoms * mask rate number of atom indices Otherwise a list of atom idx that sets the atoms to be masked (for debugging only) :return: None, Creates new attributes in original data object: data.mask_node_idx data.mask_node_label data.mask_edge_idx data.mask_edge_label """ if masked_atom_indices is None: # sample x distinct atoms to be masked, based on mask rate. But # will sample at least 1 atom num_atoms = data.x.size()[0] sample_size = int(num_atoms * self.mask_rate + 1) masked_atom_indices = random.sample(range(num_atoms), sample_size) # create mask node label by copying atom feature of mask atom mask_node_labels_list = [] for atom_idx in masked_atom_indices: mask_node_labels_list.append(data.x[atom_idx].view(1, -1)) data.mask_node_label = torch.cat(mask_node_labels_list, dim=0) data.masked_atom_indices = torch.tensor(masked_atom_indices) # modify the original node feature of the masked node for atom_idx in masked_atom_indices: data.x[atom_idx] = torch.tensor([self.num_atom_type, 0]) if self.mask_edge: # create mask edge labels by copying edge features of edges that are bonded to # mask atoms connected_edge_indices = [] for bond_idx, (u, v) in enumerate(data.edge_index.cpu().numpy().T): for atom_idx in masked_atom_indices: if atom_idx in set( (u, v)) and bond_idx not in connected_edge_indices: connected_edge_indices.append(bond_idx) if len(connected_edge_indices) > 0: # create mask edge labels by copying bond features of the bonds connected to # the mask atoms mask_edge_labels_list = [] for bond_idx in connected_edge_indices[::2]: # because the # edge ordering is such that two directions of a single # edge occur in pairs, so to get the unique undirected # edge indices, we take every 2nd edge index from list mask_edge_labels_list.append(data.edge_attr[bond_idx].view( 1, -1)) data.mask_edge_label = torch.cat(mask_edge_labels_list, dim=0) # modify the original bond features of the bonds connected to the mask atoms for bond_idx in connected_edge_indices: data.edge_attr[bond_idx] = torch.tensor( [self.num_edge_type, 0]) data.connected_edge_indices = torch.tensor( connected_edge_indices[::2]) else: data.mask_edge_label = torch.empty((0, 2)).to(torch.int64) data.connected_edge_indices = torch.tensor( connected_edge_indices).to(torch.int64) return data
def compute_multi_bald_batch( bayesian_model: nn.Module, available_loader, num_classes, k, b, target_size, initial_percentage, reduce_percentage, device=None, ) -> AcquisitionBatch: result = reduced_eval_consistent_bayesian_model( bayesian_model=bayesian_model, acquisition_function=AcquisitionFunction.bald, num_classes=num_classes, k=k, initial_percentage=initial_percentage, reduce_percentage=reduce_percentage, target_size=target_size, available_loader=available_loader, device=device, ) subset_split = result.subset_split partial_multi_bald_B = result.scores_B # Now we can compute the conditional entropy conditional_entropies_B = joint_entropy_exact.batch_conditional_entropy_B( result.logits_B_K_C) # We turn the logits into probabilities. probs_B_K_C = result.logits_B_K_C.exp_() # Don't need the result anymore. result = None torch_utils.gc_cuda() # torch_utils.cuda_meminfo() with torch.no_grad(): num_samples_per_ws = 40000 // k num_samples = num_samples_per_ws * k if device.type == "cuda": # KC_memory = k*num_classes*8 sample_MK_memory = num_samples * k * 8 MC_memory = num_samples * num_classes * 8 copy_buffer_memory = 256 * num_samples * num_classes * 8 slack_memory = 2 * 2**30 multi_bald_batch_size = (torch_utils.get_cuda_available_memory() - (sample_MK_memory + copy_buffer_memory + slack_memory)) // MC_memory global compute_multi_bald_bag_multi_bald_batch_size if compute_multi_bald_bag_multi_bald_batch_size != multi_bald_batch_size: compute_multi_bald_bag_multi_bald_batch_size = multi_bald_batch_size print( f"New compute_multi_bald_bag_multi_bald_batch_size = {multi_bald_batch_size}" ) else: multi_bald_batch_size = 16 subset_acquisition_bag = [] global_acquisition_bag = [] acquisition_bag_scores = [] # We use this for early-out in the b==0 case. MIN_SPREAD = 0.1 if b == 0: b = 100 early_out = True else: early_out = False prev_joint_probs_M_K = None prev_samples_M_K = None for i in range(b): torch_utils.gc_cuda() if i > 0: # Compute the joint entropy joint_entropies_B = torch.empty((len(probs_B_K_C), ), dtype=torch.float64) exact_samples = num_classes**i if exact_samples <= num_samples: prev_joint_probs_M_K = joint_entropy_exact.joint_probs_M_K( probs_B_K_C[subset_acquisition_bag[-1]][None].to( device), prev_joint_probs_M_K=prev_joint_probs_M_K, ) # torch_utils.cuda_meminfo() batch_exact_joint_entropy(probs_B_K_C, prev_joint_probs_M_K, multi_bald_batch_size, device, joint_entropies_B) else: if prev_joint_probs_M_K is not None: prev_joint_probs_M_K = None torch_utils.gc_cuda() # Gather new traces for the new subset_acquisition_bag. prev_samples_M_K = joint_entropy_sampling.sample_M_K( probs_B_K_C[subset_acquisition_bag].to(device), S=num_samples_per_ws) # torch_utils.cuda_meminfo() for joint_entropies_b, probs_b_K_C in with_progress_bar( torch_utils.split_tensors(joint_entropies_B, probs_B_K_C, multi_bald_batch_size), unit_scale=multi_bald_batch_size, ): joint_entropies_b.copy_(joint_entropy_sampling.batch( probs_b_K_C.to(device), prev_samples_M_K), non_blocking=True) # torch_utils.cuda_meminfo() prev_samples_M_K = None torch_utils.gc_cuda() partial_multi_bald_B = joint_entropies_B - conditional_entropies_B joint_entropies_B = None # Don't allow reselection partial_multi_bald_B[subset_acquisition_bag] = -math.inf winner_index = partial_multi_bald_B.argmax().item() # Actual MultiBALD is: actual_multi_bald_B = partial_multi_bald_B[ winner_index] - torch.sum( conditional_entropies_B[subset_acquisition_bag]) actual_multi_bald_B = actual_multi_bald_B.item() print(f"Actual MultiBALD: {actual_multi_bald_B}") # If we early out, we don't take the point that triggers the early out. # Only allow early-out after acquiring at least 1 sample. if early_out and i > 1: current_spread = actual_multi_bald_B[ winner_index] - actual_multi_bald_B.median() if current_spread < MIN_SPREAD: print("Early out") break acquisition_bag_scores.append(actual_multi_bald_B) subset_acquisition_bag.append(winner_index) # We need to map the index back to the actual dataset. global_acquisition_bag.append( subset_split.get_dataset_indices([winner_index]).item()) print(f"Acquisition bag: {sorted(global_acquisition_bag)}") return AcquisitionBatch(global_acquisition_bag, acquisition_bag_scores, None)
def test_MockModel(self): mp = MockPosterior() mm = MockModel(mp) X = torch.empty(0) self.assertEqual(mm.posterior(X), mp)
def pointwise_ops(self): a = torch.randn(4) b = torch.randn(4) t = torch.tensor([-1, -2, 3], dtype=torch.int8) r = torch.tensor([0, 1, 10, 0], dtype=torch.int8) t = torch.tensor([-1, -2, 3], dtype=torch.int8) s = torch.tensor([4, 0, 1, 0], dtype=torch.int8) f = torch.zeros(3) g = torch.tensor([-1, 0, 1]) w = torch.tensor([0.3810, 1.2774, -0.2972, -0.3719, 0.4637]) return ( torch.abs(torch.tensor([-1, -2, 3])), torch.absolute(torch.tensor([-1, -2, 3])), torch.acos(a), torch.arccos(a), torch.acosh(a.uniform_(1.0, 2.0)), torch.add(a, 20), torch.add(a, b, out=a), b.add(a), b.add(a, out=b), b.add_(a), b.add(1), torch.add(a, torch.randn(4, 1), alpha=10), torch.addcdiv(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.addcmul(torch.randn(1, 3), torch.randn(3, 1), torch.randn(1, 3), value=0.1), torch.angle(a), torch.asin(a), torch.arcsin(a), torch.asinh(a), torch.arcsinh(a), torch.atan(a), torch.arctan(a), torch.atanh(a.uniform_(-1.0, 1.0)), torch.arctanh(a.uniform_(-1.0, 1.0)), torch.atan2(a, a), torch.bitwise_not(t), torch.bitwise_and(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_or(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.bitwise_xor(t, torch.tensor([1, 0, 3], dtype=torch.int8)), torch.ceil(a), torch.ceil(float(torch.tensor(0.5))), torch.ceil(torch.tensor(0.5).item()), torch.clamp(a, min=-0.5, max=0.5), torch.clamp(a, min=0.5), torch.clamp(a, max=0.5), torch.clip(a, min=-0.5, max=0.5), torch.conj(a), torch.copysign(a, 1), torch.copysign(a, b), torch.cos(a), torch.cosh(a), torch.deg2rad( torch.tensor([[180.0, -180.0], [360.0, -360.0], [90.0, -90.0]])), torch.div(a, b), a.div(b), a.div(1), a.div_(b), torch.divide(a, b, rounding_mode="trunc"), torch.divide(a, b, rounding_mode="floor"), torch.digamma(torch.tensor([1.0, 0.5])), torch.erf(torch.tensor([0.0, -1.0, 10.0])), torch.erfc(torch.tensor([0.0, -1.0, 10.0])), torch.erfinv(torch.tensor([0.0, 0.5, -1.0])), torch.exp(torch.tensor([0.0, math.log(2.0)])), torch.exp(float(torch.tensor(1))), torch.exp2(torch.tensor([0.0, math.log(2.0), 3.0, 4.0])), torch.expm1(torch.tensor([0.0, math.log(2.0)])), torch.fake_quantize_per_channel_affine( torch.randn(2, 2, 2), (torch.randn(2) + 1) * 0.05, torch.zeros(2), 1, 0, 255, ), torch.fake_quantize_per_tensor_affine(a, 0.1, 0, 0, 255), torch.float_power(torch.randint(10, (4, )), 2), torch.float_power(torch.arange(1, 5), torch.tensor([2, -3, 4, -5])), torch.floor(a), torch.floor(float(torch.tensor(1))), torch.floor_divide(torch.tensor([4.0, 3.0]), torch.tensor([2.0, 2.0])), torch.floor_divide(torch.tensor([4.0, 3.0]), 1.4), torch.fmod(torch.tensor([-3, -2, -1, 1, 2, 3]), 2), torch.fmod(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.frac(torch.tensor([1.0, 2.5, -3.2])), torch.randn(4, dtype=torch.cfloat).imag, torch.ldexp(torch.tensor([1.0]), torch.tensor([1])), torch.ldexp(torch.tensor([1.0]), torch.tensor([1, 2, 3, 4])), torch.lerp(torch.arange(1.0, 5.0), torch.empty(4).fill_(10), 0.5), torch.lerp( torch.arange(1.0, 5.0), torch.empty(4).fill_(10), torch.full_like(torch.arange(1.0, 5.0), 0.5), ), torch.lgamma(torch.arange(0.5, 2, 0.5)), torch.log(torch.arange(5) + 10), torch.log10(torch.rand(5)), torch.log1p(torch.randn(5)), torch.log2(torch.rand(5)), torch.logaddexp(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-1.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([-100.0, -200.0, -300.0]), torch.tensor([-1, -2, -3])), torch.logaddexp2(torch.tensor([1.0, 2000.0, 30000.0]), torch.tensor([-1, -2, -3])), torch.logical_and(r, s), torch.logical_and(r.double(), s.double()), torch.logical_and(r.double(), s), torch.logical_and(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_not(torch.tensor([0, 1, -10], dtype=torch.int8)), torch.logical_not( torch.tensor([0.0, 1.5, -10.0], dtype=torch.double)), torch.logical_not( torch.tensor([0.0, 1.0, -10.0], dtype=torch.double), out=torch.empty(3, dtype=torch.int16), ), torch.logical_or(r, s), torch.logical_or(r.double(), s.double()), torch.logical_or(r.double(), s), torch.logical_or(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logical_xor(r, s), torch.logical_xor(r.double(), s.double()), torch.logical_xor(r.double(), s), torch.logical_xor(r, s, out=torch.empty(4, dtype=torch.bool)), torch.logit(torch.rand(5), eps=1e-6), torch.hypot(torch.tensor([4.0]), torch.tensor([3.0, 4.0, 5.0])), torch.i0(torch.arange(5, dtype=torch.float32)), torch.igamma(a, b), torch.igammac(a, b), torch.mul(torch.randn(3), 100), b.mul(a), b.mul(5), b.mul(a, out=b), b.mul_(a), b.mul_(5), torch.multiply(torch.randn(4, 1), torch.randn(1, 4)), torch.mvlgamma(torch.empty(2, 3).uniform_(1.0, 2.0), 2), torch.tensor([float("nan"), float("inf"), -float("inf"), 3.14]), torch.nan_to_num(w), torch.nan_to_num_(w), torch.nan_to_num(w, nan=2.0), torch.nan_to_num(w, nan=2.0, posinf=1.0), torch.neg(torch.randn(5)), # torch.nextafter(torch.tensor([1, 2]), torch.tensor([2, 1])) == torch.tensor([eps + 1, 2 - eps]), torch.polygamma(1, torch.tensor([1.0, 0.5])), torch.polygamma(2, torch.tensor([1.0, 0.5])), torch.polygamma(3, torch.tensor([1.0, 0.5])), torch.polygamma(4, torch.tensor([1.0, 0.5])), torch.pow(a, 2), torch.pow(2, float(torch.tensor(0.5))), torch.pow(torch.arange(1.0, 5.0), torch.arange(1.0, 5.0)), torch.rad2deg( torch.tensor([[3.142, -3.142], [6.283, -6.283], [1.570, -1.570]])), torch.randn(4, dtype=torch.cfloat).real, torch.reciprocal(a), torch.remainder(torch.tensor([-3.0, -2.0]), 2), torch.remainder(torch.tensor([1, 2, 3, 4, 5]), 1.5), torch.round(a), torch.round(torch.tensor(0.5).item()), torch.rsqrt(a), torch.sigmoid(a), torch.sign(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sgn(a), torch.signbit(torch.tensor([0.7, -1.2, 0.0, 2.3])), torch.sin(a), torch.sinc(a), torch.sinh(a), torch.sqrt(a), torch.square(a), torch.sub(torch.tensor((1, 2)), torch.tensor((0, 1)), alpha=2), b.sub(a), b.sub_(a), b.sub(5), torch.sum(5), torch.tan(a), torch.tanh(a), torch.true_divide(a, a), torch.trunc(a), torch.trunc_(a), torch.xlogy(f, g), torch.xlogy(f, g), torch.xlogy(f, 4), torch.xlogy(2, g), )