def __call__(self, image_batch, theta_aff, theta_aff_tps, use_cuda=True): sampling_grid_aff = self.affTnf(image_batch=None, theta_batch=theta_aff.view(-1,2,3), return_sampling_grid=True, return_warped_image=False) sampling_grid_aff_tps = self.tpsTnf(image_batch=None, theta_batch=theta_aff_tps, return_sampling_grid=True, return_warped_image=False) if self.padding_crop_factor is not None: sampling_grid_aff_tps = sampling_grid_aff_tps*self.padding_crop_factor; # put 1e10 value in region out of bounds of sampling_grid_aff in_bound_mask_aff = ((sampling_grid_aff[:,:,:,0]>-1) * (sampling_grid_aff[:,:,:,0]<1) * (sampling_grid_aff[:,:,:,1]>-1) * (sampling_grid_aff[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff = in_bound_mask_aff.expand_as(sampling_grid_aff) sampling_grid_aff = torch.mul(in_bound_mask_aff.float(),sampling_grid_aff) sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),sampling_grid_aff) # compose transformations sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3) # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp in_bound_mask_aff_tps=((sampling_grid_aff_tps[:,:,:,0]>-1) * (sampling_grid_aff_tps[:,:,:,0]<1) * (sampling_grid_aff_tps[:,:,:,1]>-1) * (sampling_grid_aff_tps[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp=torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),sampling_grid_aff_tps_comp) # sample transformed image warped_image_batch = F.grid_sample(image_batch, sampling_grid_aff_tps_comp) return warped_image_batch
def forward(self, x): x0 = self.conv.forward(x.float()) x = self.pool_mil(x0) x = x.squeeze(2).squeeze(2) x1 = torch.add(torch.mul(x0.view(x.size(0), 1000, -1), -1), 1) cumprod = torch.cumprod(x1, 2) out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1)) #out = F.softmax(out) return out
def forward(self, img, att_size=14): x0 = self.conv(img) x = self.pool_mil(x0) x = x.squeeze(2).squeeze(2) x = self.l1(x) x1 = torch.add(torch.mul(x.view(x.size(0), 1000, -1), -1), 1) cumprod = torch.cumprod(x1, 2) out = torch.max(x, torch.add(torch.mul(cumprod[:, :, -1], -1), 1)) return out
def match(self, passage_encoders, question_encoders, wq_matrix, wp_matrix, fw = True): ''' passage_encoders (pn_steps, batch, hidden_size) question_encoders (qn_steps, batch, hidden_size) wq_matrix (qn_steps, batch, hidden_size) wp_matrix (pn_steps, batch, hidden_size) ''' if fw: match_lstm = self.fw_match_lstm start = 0 end = passage_encoders.size(0) stride = 1 else: match_lstm = self.bw_match_lstm start = passage_encoders.size(0) - 1 end = -1 stride = -1 hx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda() cx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda() match_encoders = [0 for i in range(passage_encoders.size(0))] for i in range(start, end, stride): wphp = wp_matrix[i] wrhr = self.whr_net(hx) _sum = torch.add(wphp, wrhr) # batch, hidden_size _sum = _sum.expand(wq_matrix.size(0), wq_matrix.size(1), self.hidden_size) # qn_steps, batch, hidden_size g = self.tanh(torch.add(wq_matrix, _sum)) # qn_steps, batch, hidden_size g = torch.transpose(g, 0, 1)# batch, qn_steps, hidden_size wg = self.w_net(g) # bactch, qn_steps, 1 wg = wg.squeeze(-1) # bactch, qn_steps alpha = wg # bactch, qn_steps alpha = self.softmax(alpha).view(alpha.size(0), 1, alpha.size(1)) # batch,1, qn_steps attentionv = torch.bmm(alpha, question_encoders.transpose(0, 1)) # bacth, 1, hidden_size attentionv = attentionv.squeeze(1) # bacth, hidden_size inp = torch.cat([passage_encoders[i], attentionv], -1) hx, cx = match_lstm(inp, (hx, cx)) # batch, hidden_size match_encoders[i] = hx.view(1, hx.size(0), -1) match_encoders = torch.cat(match_encoders) return match_encoders
def updateOutput(self, input): self.output.resize_(1) assert input[0].dim() == 2 if self.diff is None: self.diff = input[0].new() torch.add(input[0], -1, input[1], out=self.diff).abs_() self.output.resize_(input[0].size(0)) self.output.zero_() self.output.add_(self.diff.pow_(self.norm).sum(1, keepdim=False)) self.output.pow_(1. / self.norm) return self.output
def forward(self, context_ids, doc_ids, target_noise_ids): """Sparse computation of scores (unnormalized log probabilities) that should be passed to the negative sampling loss. Parameters ---------- context_ids: torch.Tensor of size (batch_size, num_context_words) Vocabulary indices of context words. doc_ids: torch.Tensor of size (batch_size,) Document indices of paragraphs. target_noise_ids: torch.Tensor of size (batch_size, num_noise_words + 1) Vocabulary indices of target and noise words. The first element in each row is the ground truth index (i.e. the target), other elements are indices of samples from the noise distribution. Returns ------- autograd.Variable of size (batch_size, num_noise_words + 1) """ # combine a paragraph vector with word vectors of # input (context) words x = torch.add( self._D[doc_ids, :], torch.sum(self._W[context_ids, :], dim=1)) # sparse computation of scores (unnormalized log probabilities) # for negative sampling return torch.bmm( x.unsqueeze(1), self._O[:, target_noise_ids].permute(1, 0, 2)).squeeze()
def get_loss(self, image_a_pred, image_b_pred, mask_a, mask_b): loss = 0 # get the nonzero indices mask_a_indices_flat = torch.nonzero(mask_a) mask_b_indices_flat = torch.nonzero(mask_b) if len(mask_a_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) if len(mask_b_indices_flat) == 0: return Variable(torch.cuda.LongTensor([0]), requires_grad=True) # take 5000 random pixel samples of the object, using the mask num_samples = 10000 rand_numbers_a = (torch.rand(num_samples)*len(mask_a_indices_flat)).cuda() rand_indices_a = Variable(torch.floor(rand_numbers_a).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_a_indices_flat = torch.index_select(mask_a_indices_flat, 0, rand_indices_a).squeeze(1) rand_numbers_b = (torch.rand(num_samples)*len(mask_b_indices_flat)).cuda() rand_indices_b = Variable(torch.floor(rand_numbers_b).type(torch.cuda.LongTensor), requires_grad=False) randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) # index into the image and get descriptors M_margin = 0.5 # margin parameter random_img_a_object_descriptors = torch.index_select(image_a_pred, 1, randomized_mask_a_indices_flat) random_img_b_object_descriptors = torch.index_select(image_b_pred, 1, randomized_mask_b_indices_flat) pixel_wise_loss = (random_img_a_object_descriptors - random_img_b_object_descriptors).pow(2).sum(dim=2) pixel_wise_loss = torch.add(pixel_wise_loss, -2*M_margin) zeros_vec = torch.zeros_like(pixel_wise_loss) loss += torch.max(zeros_vec, pixel_wise_loss).sum() return loss
def unit_test(args): ''' test different (kinds of) predicate detectors ''' print("Torch uninitialized 5x3 matrix:") x_t = torch.Tensor(5, 3) print(x_t) print("Torch randomly initialized 5x3 matrix X:") x_t = torch.rand(5, 3) if args.verbose: print(x_t) print("size:", x_t.size()) print("Torch randomly initialized 5x3 matrix Y:") y_t = torch.rand(5, 3) if args.verbose: print(y_t) print("X + Y:") z_t = torch.add(x_t, y_t) print(z_t) print("slice (X + Y)[:, 1]:") print(z_t[:, 1]) num_wrong = 0 print("unit_test: num_tests:", 1, " num_wrong:", num_wrong, " -- ", "FAIL" if num_wrong else "PASS")
def forward(self, title, pg): r_gate = F.sigmoid(self.wrx(title) + self.wrh(pg)) i_gate = F.sigmoid(self.wix(title) + self.wih(pg)) n_gate = F.tanh(self.wnx(title) + torch.mul(r_gate, self.wnh(pg))) result = torch.mul(i_gate, pg) + torch.mul(torch.add(-i_gate, 1), n_gate) return result
def test_train(self): self._metric.train() calls = [[torch.FloatTensor([0.0]), torch.LongTensor([0])], [torch.FloatTensor([0.0, 0.1, 0.2, 0.3]), torch.LongTensor([0, 1, 2, 3])]] for i in range(len(self._states)): self._metric.process(self._states[i]) self.assertEqual(2, len(self._metric_function.call_args_list)) for i in range(len(self._metric_function.call_args_list)): self.assertTrue(torch.eq(self._metric_function.call_args_list[i][0][0], calls[i][0]).all) self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[i][0][1], -calls[i][1])), 1e-12).all) self._metric_function.reset_mock() self._metric.process_final({}) self._metric_function.assert_called_once() self.assertTrue(torch.eq(self._metric_function.call_args_list[0][0][1], torch.LongTensor([0, 1, 2, 3, 4])).all) self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[0][0][0], -torch.FloatTensor([0.0, 0.1, 0.2, 0.3, 0.4]))), 1e-12).all)
def fade_in_layer(self,x,alpha): for l in self.layers: x = l(x) x_new = self.next_block(x) x = self.toRGB(x) x_new = self.new_toRGB(x_new) return torch.add(x.mul(1.0-alpha),x_new.mul(alpha))
def forward(self, x): x = self.embed(x) x = self.dropout(x) # x = x.view(len(x), x.size(1), -1) # x = embed.view(len(x), embed.size(1), -1) bilstm_out, self.hidden = self.bilstm(x, self.hidden) bilstm_out = torch.transpose(bilstm_out, 0, 1) bilstm_out = torch.transpose(bilstm_out, 1, 2) # bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)).squeeze(2) bilstm_out = F.max_pool1d(bilstm_out, bilstm_out.size(2)) bilstm_out = bilstm_out.squeeze(2) hidden2lable = self.hidden2label1(F.tanh(bilstm_out)) gate_layer = F.sigmoid(self.gate_layer(bilstm_out)) # calculate highway layer values gate_hidden_layer = torch.mul(hidden2lable, gate_layer) # if write like follow ,can run,but not equal the HighWay NetWorks formula # gate_input = torch.mul((1 - gate_layer), hidden2lable) gate_input = torch.mul((1 - gate_layer), bilstm_out) highway_output = torch.add(gate_hidden_layer, gate_input) logit = self.logit_layer(highway_output) return logit
def test_local_var_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' x = torch.FloatTensor([1, 2, 3, 4]) y = torch.FloatTensor([[1, 2, 3, 4]]) z = torch.matmul(x, y.t()) assert (torch.equal(z, torch.FloatTensor([30]))) z = torch.add(x, y) assert (torch.equal(z, torch.FloatTensor([[2, 4, 6, 8]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.cross(x, y, dim=1) assert (torch.equal(z, torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]) z = torch.dist(x, y) t = torch.FloatTensor([z]) assert (torch.equal(t, torch.FloatTensor([0.]))) x = torch.FloatTensor([1, 2, 3]) y = torch.FloatTensor([1, 2, 3]) z = torch.dot(x, y) t = torch.FloatTensor([z]) assert torch.equal(t, torch.FloatTensor([14])) z = torch.eq(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1]))) z = torch.ge(x, y) assert (torch.equal(z, torch.ByteTensor([1, 1, 1])))
def test_remote_var_binary_methods(self): ''' Unit tests for methods mentioned on issue 1385 https://github.com/OpenMined/PySyft/issues/1385''' hook = TorchHook(verbose=False) local = hook.local_worker remote = VirtualWorker(hook, 1) local.add_worker(remote) x = Var(torch.FloatTensor([1, 2, 3, 4])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3, 4]])).send(remote) z = torch.matmul(x, y.t()) assert (torch.equal(z.get(), Var(torch.FloatTensor([30])))) z = torch.add(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([[2, 4, 6, 8]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.cross(x, y, dim=1) assert (torch.equal(z.get(), Var(torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]])))) x = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) y = Var(torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]])).send(remote) z = torch.dist(x, y) assert (torch.equal(z.get(), Var(torch.FloatTensor([0.])))) x = Var(torch.FloatTensor([1, 2, 3])).send(remote) y = Var(torch.FloatTensor([1, 2, 3])).send(remote) z = torch.dot(x, y) print(torch.equal(z.get(), Var(torch.FloatTensor([14])))) z = torch.eq(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1])))) z = torch.ge(x, y) assert (torch.equal(z.get(), Var(torch.ByteTensor([1, 1, 1]))))
def forward(self, x): if not self.equalInOut: x = self.relu1(self.bn1(x)) else: out = self.relu1(self.bn1(x)) out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x))) if self.droprate > 0: out = F.dropout(out, p=self.droprate, training=self.training) out = self.conv2(out) return torch.add(x if self.equalInOut else self.convShortcut(x), out)
def forward(self, lvec, rvec): mult_dist = torch.mul(lvec, rvec) abs_dist = torch.abs(torch.add(lvec, -rvec)) vec_dist = torch.cat((mult_dist, abs_dist), 1) out = F.sigmoid(self.wh(vec_dist)) out = F.log_softmax(self.wp(out)) return out
def forward(self, x): out1 = self.conv1(x) out = self.res_blocks(out1) out2 = self.conv2(out) out = torch.add(out1, out2) out = self.upsampling(out) out = self.conv3(out) return out
def fade_in_layer(self,x,alpha): x_new = self.new_fromRGB(x) x_new = self.next_block(x_new) x = self.avg_pool(x) x = self.fromRGB(x) x = torch.add(x.mul(1.0-alpha),x_new.mul(alpha)) for l in self.layers: x = l(x) return self.toOut(x.view(x.size(0),-1))
def test_lambda(self): trans = transforms.Lambda(lambda x: x.add(10)) x = torch.randn(10) y = trans(x) assert (y.equal(torch.add(x, 10))) trans = transforms.Lambda(lambda x: x.add_(10)) x = torch.randn(10) y = trans(x) assert (y.equal(x))
def test_validate(self): self._metric.eval() for i in range(len(self._states)): self._metric.process(self._states[i]) self._metric_function.assert_not_called() self._metric.process_final_validate({}) self._metric_function.assert_called_once() self.assertTrue(torch.eq(self._metric_function.call_args_list[0][0][1], torch.LongTensor([0, 1, 2, 3, 4])).all) self.assertTrue(torch.lt(torch.abs(torch.add(self._metric_function.call_args_list[0][0][0], -torch.FloatTensor([0.0, 0.1, 0.2, 0.3, 0.4]))), 1e-12).all)
def get_loss_original(self, image_a_pred, image_b_pred, matches_a, matches_b, non_matches_a, non_matches_b, M_margin=0.5, non_match_loss_weight=1.0): # this is pegged to it's implemenation at sha 87abdb63bb5b99d9632f5c4360b5f6f1cf54245f """ Computes the loss function DCN = Dense Correspondence Network num_images = number of images in this batch num_matches = number of matches num_non_matches = number of non-matches W = image width H = image height D = descriptor dimension match_loss = 1/num_matches \sum_{num_matches} ||descriptor_a - descriptor_b||_2^2 non_match_loss = 1/num_non_matches \sum_{num_non_matches} max(0, M_margin - ||descriptor_a - descriptor_b||_2^2 ) loss = match_loss + non_match_loss :param image_a_pred: Output of DCN network on image A. :type image_a_pred: torch.Variable(torch.FloatTensor) shape [1, W * H, D] :param image_b_pred: same as image_a_pred :type image_b_pred: :param matches_a: torch.Variable(torch.LongTensor) has shape [num_matches,], a (u,v) pair is mapped to (u,v) ---> image_width * v + u, this matches the shape of one dimension of image_a_pred :type matches_a: torch.Variable(torch.FloatTensor) :param matches_b: same as matches_b :type matches_b: :param non_matches_a: torch.Variable(torch.FloatTensor) has shape [num_non_matches,], a (u,v) pair is mapped to (u,v) ---> image_width * v + u, this matches the shape of image_a_pred :type non_matches_a: torch.Variable(torch.FloatTensor) :param non_matches_b: same as non_matches_a :type non_matches_b: :return: loss, match_loss, non_match_loss :rtype: torch.Variable(torch.FloatTensor) each of shape torch.Size([1]) """ num_matches = matches_a.size()[0] num_non_matches = non_matches_a.size()[0] matches_a_descriptors = torch.index_select(image_a_pred, 1, matches_a) matches_b_descriptors = torch.index_select(image_b_pred, 1, matches_b) match_loss = 1.0/num_matches * (matches_a_descriptors - matches_b_descriptors).pow(2).sum() # add loss via non_matches non_matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a) non_matches_b_descriptors = torch.index_select(image_b_pred, 1, non_matches_b) pixel_wise_loss = (non_matches_a_descriptors - non_matches_b_descriptors).pow(2).sum(dim=2) pixel_wise_loss = torch.add(torch.neg(pixel_wise_loss), M_margin) zeros_vec = torch.zeros_like(pixel_wise_loss) non_match_loss = non_match_loss_weight * 1.0/num_non_matches * torch.max(zeros_vec, pixel_wise_loss).sum() loss = match_loss + non_match_loss return loss, match_loss, non_match_loss
def forward(self, x): bahs, chs, _, _ = x.size() # Returns a new tensor with the same data as the self tensor but of a different size. chn_se = self.avg_pool(x).view(bahs, chs) chn_se = self.channel_excitation(chn_se).view(bahs, chs, 1, 1) chn_se = torch.mul(x, chn_se) spa_se = self.spatial_se(x) spa_se = torch.mul(x, spa_se) return torch.add(chn_se, 1, spa_se)
def updateOutput(self, input, target): # - log(input) * target - log(1 - input) * (1 - target) if input.nelement() != target.nelement(): raise RuntimeError("input and target size mismatch") if self.buffer is None: self.buffer = input.new() buffer = self.buffer weights = self.weights buffer.resize_as_(input) if weights is not None and target.dim() != 1: weights = self.weights.view(1, target.size(1)).expand_as(target) # log(input) * target torch.add(input, self.eps, out=buffer).log_() if weights is not None: buffer.mul_(weights) target_1d = target.contiguous().view(-1) # don't save a 1-d view of buffer: it should already be contiguous, and it's # used as non-1d tensor later. output = torch.dot(target_1d, buffer.contiguous().view(-1)) # log(1 - input) * (1 - target) torch.mul(input, -1, out=buffer).add_(1 + self.eps).log_() if weights is not None: buffer.mul_(weights) output = output + torch.sum(buffer) output = output - torch.dot(target_1d, buffer.contiguous().view(-1)) if self.sizeAverage: output = output / input.nelement() self.output = - output.item() return self.output
def theta_to_sampling_grid(out_h,out_w,theta_aff=None,theta_tps=None,theta_aff_tps=None,use_cuda=True,tps_reg_factor=0): affTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='affine',use_cuda=use_cuda) tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='tps',use_cuda=use_cuda,tps_reg_factor=tps_reg_factor) if theta_aff is not None: sampling_grid_aff = affTnf(image_batch=None, theta_batch=theta_aff.view(1,2,3), return_sampling_grid=True, return_warped_image=False) else: sampling_grid_aff=None if theta_tps is not None: sampling_grid_tps = tpsTnf(image_batch=None, theta_batch=theta_tps.view(1,-1), return_sampling_grid=True, return_warped_image=False) else: sampling_grid_tps=None if theta_aff is not None and theta_aff_tps is not None: sampling_grid_aff_tps = tpsTnf(image_batch=None, theta_batch=theta_aff_tps.view(1,-1), return_sampling_grid=True, return_warped_image=False) # put 1e10 value in region out of bounds of sampling_grid_aff sampling_grid_aff = sampling_grid_aff.clone() in_bound_mask_aff=Variable((sampling_grid_aff.data[:,:,:,0]>-1) & (sampling_grid_aff.data[:,:,:,0]<1) & (sampling_grid_aff.data[:,:,:,1]>-1) & (sampling_grid_aff.data[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff=in_bound_mask_aff.expand_as(sampling_grid_aff) sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),torch.mul(in_bound_mask_aff.float(),sampling_grid_aff)) # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3) in_bound_mask_aff_tps=Variable((sampling_grid_aff_tps.data[:,:,:,0]>-1) & (sampling_grid_aff_tps.data[:,:,:,0]<1) & (sampling_grid_aff_tps.data[:,:,:,1]>-1) & (sampling_grid_aff_tps.data[:,:,:,1]<1)).unsqueeze(3) in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp) sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp)) else: sampling_grid_aff_tps_comp = None return (sampling_grid_aff,sampling_grid_tps,sampling_grid_aff_tps_comp)
def test_lambda(self): trans = transforms.Lambda(lambda x: x.add(10)) x = torch.randn(10) y = trans(x) assert (y.equal(torch.add(x, 10))) trans = transforms.Lambda(lambda x: x.add_(10)) x = torch.randn(10) y = trans(x) assert (y.equal(x)) # Checking if Lambda can be printed as string trans.__repr__()
def forward(self, x): x1 = self.Conv1(x) x2 = self.BN1(x1) x3 = self.Relu(x2) x4 = self.Conv2(x3) x5 = self.BN2(x4) x6 = self.Relu(x5) x7 = self.Conv3(x6) x8 = self.BN3(x7) x9 = torch.add(x8,x1) x10 = self.Relu(x9) return x10
def _test_spadd_shape(self, shape_i, shape_v=None): shape = shape_i + (shape_v or []) x, _, _ = self._gen_sparse(len(shape_i), 10, shape) y = self.randn(*shape) r = random.random() res = torch.add(y, r, x) expected = y + r * self.safeToDense(x) self.assertEqual(res, expected) # Non contiguous dense tensor s = list(shape) s[0] = shape[-1] s[-1] = shape[0] y = self.randn(*s) y.transpose_(0, len(s) - 1) r = random.random() res = torch.add(y, r, x) expected = y + r * self.safeToDense(x) self.assertEqual(res, expected)
def updateGradInput(self, input, target): # - (target - input) / ( input (1 - input) ) # The gradient is slightly incorrect: # It should have be divided by (input + self.eps) (1 - input + self.eps) # but it is divided by input (1 - input + self.eps) + self.eps # This modification requires less memory to be computed. if input.nelement() != target.nelement(): raise RuntimeError("input and target size mismatch") if self.buffer is None: self.buffer = input.new() buffer = self.buffer weights = self.weights gradInput = self.gradInput if weights is not None and target.dim() != 1: weights = self.weights.view(1, target.size(1)).expand_as(target) buffer.resize_as_(input) # - x ( 1 + self.eps -x ) + self.eps torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps) gradInput.resize_as_(input) # y - x torch.add(target, -1, input, out=gradInput) # - (y - x) / ( x ( 1 + self.eps -x ) + self.eps ) gradInput.div_(buffer) if weights is not None: gradInput.mul_(weights) if self.sizeAverage: gradInput.div_(target.nelement()) return gradInput
def step(self, step, lprobs, scores): super()._init_buffers(lprobs) bsz, beam_size, vocab_size = lprobs.size() if beam_size % self.num_groups != 0: raise ValueError( 'DiverseBeamSearch requires --beam to be divisible by the number of groups' ) group_size = beam_size // self.num_groups # initialize diversity penalty if self.diversity_buf is None: self.diversity_buf = lprobs.new() torch.zeros(lprobs[:, 0, :].size(), out=self.diversity_buf) scores_G, indices_G, beams_G = [], [], [] for g in range(self.num_groups): lprobs_g = lprobs[:, g::self.num_groups, :] scores_g = scores[:, g::self.num_groups, :] if step > 0 else None # apply diversity penalty if g > 0: lprobs_g = torch.add(lprobs_g, self.diversity_strength, self.diversity_buf.unsqueeze(1)) else: lprobs_g = lprobs_g.contiguous() scores_buf, indices_buf, beams_buf = self.beam.step(step, lprobs_g, scores_g) beams_buf.mul_(self.num_groups).add_(g) scores_G.append(scores_buf.clone()) indices_G.append(indices_buf.clone()) beams_G.append(beams_buf.clone()) # update diversity penalty self.diversity_buf.scatter_add_( 1, indices_buf, self.diversity_buf.new_ones(indices_buf.size()) ) # interleave results from different groups self.scores_buf = torch.stack(scores_G, dim=2, out=self.scores_buf).view(bsz, -1) self.indices_buf = torch.stack(indices_G, dim=2, out=self.indices_buf).view(bsz, -1) self.beams_buf = torch.stack(beams_G, dim=2, out=self.beams_buf).view(bsz, -1) return self.scores_buf, self.indices_buf, self.beams_buf
def test_remote_tensor_binary_methods(self): hook = TorchHook(verbose = False) local = hook.local_worker remote = VirtualWorker(hook, 0) local.add_worker(remote) x = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote) y = torch.FloatTensor([1, 2, 3, 4, 5]).send(remote) assert (x.add_(y).get() == torch.FloatTensor([2,4,6,8,10])).all() x = torch.FloatTensor([1, 2, 3, 4]).send(remote) y = torch.FloatTensor([[1, 2, 3, 4]]).send(remote) z = torch.matmul(x, y.t()) assert (torch.equal(z.get(), torch.FloatTensor([30]))) z = torch.add(x, y) assert (torch.equal(z.get(), torch.FloatTensor([[2, 4, 6, 8]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) z = torch.cross(x, y, dim=1) assert (torch.equal(z.get(), torch.FloatTensor([[0, 0, 0], [0, 0, 0], [0, 0, 0]]))) x = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) y = torch.FloatTensor([[1, 2, 3], [3, 4, 5], [5, 6, 7]]).send(remote) z = torch.dist(x, y) t = torch.FloatTensor([z]) assert (torch.equal(t, torch.FloatTensor([0.]))) x = torch.FloatTensor([1, 2, 3]).send(remote) y = torch.FloatTensor([1, 2, 3]).send(remote) z = torch.dot(x, y) t = torch.FloatTensor([z]) assert torch.equal(t, torch.FloatTensor([14])) z = torch.eq(x, y) assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1]))) z = torch.ge(x, y) assert (torch.equal(z.get(), torch.ByteTensor([1, 1, 1])))
def add_mask(net, mask): for child in net.children(): for param in child.conv1[0].parameters(): param.data = torch.add(param.data, mask[0]) for child in net.children(): for param in child.conv2[0].parameters(): param.data = torch.add(param.data, mask[1]) for child in net.children(): for param in child.conv3[0].parameters(): param.data = torch.add(param.data, mask[2]) for child in net.children(): for param in child.conv4[0].parameters(): param.data = torch.add(param.data, mask[3]) for child in net.children(): for param in child.conv5[0].parameters(): param.data = torch.add(param.data, mask[4]) for child in net.children(): for param in child.conv6[0].parameters(): param.data = torch.add(param.data, mask[5]) for child in net.children(): for param in child.conv7[0].parameters(): param.data = torch.add(param.data, mask[6]) for child in net.children(): for param in child.conv8[0].parameters(): param.data = torch.add(param.data, mask[7]) for child in net.children(): for param in child.conv9[0].parameters(): param.data = torch.add(param.data, mask[8]) for child in net.children(): for param in child.conv10[0].parameters(): param.data = torch.add(param.data, mask[9]) for child in net.children(): for param in child.conv11[0].parameters(): param.data = torch.add(param.data, mask[10]) for child in net.children(): for param in child.conv12[0].parameters(): param.data = torch.add(param.data, mask[11]) for child in net.children(): for param in child.conv13[0].parameters(): param.data = torch.add(param.data, mask[12]) for child in net.children(): for param in child.fc1[1].parameters(): param.data = torch.add(param.data, mask[13]) for child in net.children(): for param in child.fc2[1].parameters(): param.data = torch.add(param.data, mask[14]) for child in net.children(): for param in child.fc3[0].parameters(): param.data = torch.add(param.data, mask[15])
def forward(self, img): # encoder e1 = self.conv1_bn(self.conv1(img)) e10 = F.relu(e1) e1a = F.relu(torch.add(self.layere1a(e10), 1, e10)) e1b = F.relu(torch.add(self.layere1b(e1a), 1, e1a)) e1c = F.relu(torch.add(self.layere1c(e10), 1, e1b)) e2 = self.conv2_bn(self.conv2(e1c)) e20 = F.relu(e2) e2a = F.relu(torch.add(self.layere2a(e20), 1, e20)) e2b = F.relu(torch.add(self.layere2b(e2a), 1, e2a)) e2c = F.relu(torch.add(self.layere2c(e2b), 1, e2b)) e3 = self.conv3_bn(self.conv3(e2c)) e30 = F.relu(e3) e3a = F.relu(torch.add(self.layere3a(e30), 1, e30)) e3b = F.relu(torch.add(self.layere3b(e3a), 1, e3a)) e3c = F.relu(torch.add(self.layere3c(e3b), 1, e3b)) e4 = self.conv4_bn(self.conv4(e3c)) e40 = F.relu(e4) e4a = F.relu(torch.add(self.layere4a(e40), 1, e40)) e4b = F.relu(torch.add(self.layere4b(e4a), 1, e4a)) e4c = F.relu(torch.add(self.layere4c(e4b), 1, e4b)) e5 = self.conv5_bn(self.conv5(e4c)) e50 = F.relu(e5) e5a = F.relu(torch.add(self.layere5a(e50), 1, e50)) e5b = F.relu(torch.add(self.layere5b(e5a), 1, e5a)) # e5c = Identity_block_for_G(e5b, 512) e6 = self.conv6_bn(self.conv6(e5b)) e60 = F.relu(e6) # decoder d10 = F.relu(torch.add(self.conv7_bn(self.conv7(e60)), 1, e5)) d1a = F.relu(torch.add(self.layerd1a(d10), 1, d10)) d1b = F.relu(torch.add(self.layerd1b(d1a), 1, d1a)) # d1c = Identity_block_for_G(d1b, 512) d20 = F.relu(torch.add(self.conv8_bn(self.conv8(d1b)), 1, e4)) d2a = F.relu(torch.add(self.layerd2a(d20), 1, d20)) d2b = F.relu(torch.add(self.layerd2b(d2a), 1, d2a)) d2c = F.relu(torch.add(self.layerd2c(d2b), 1, d2b)) d30 = F.relu(torch.add(self.conv9_bn(self.conv9(d2c)), 1, e3)) d3a = F.relu(torch.add(self.layerd3a(d30), 1, d30)) d3b = F.relu(torch.add(self.layerd3b(d3a), 1, d3a)) d3c = F.relu(torch.add(self.layerd3c(d3b), 1, d3b)) d40 = F.relu(torch.add(self.conv10_bn(self.conv10(d3c)), 1, e2)) d4a = F.relu(torch.add(self.layerd4a(d40), 1, d40)) d4b = F.relu(torch.add(self.layerd4b(d4a), 1, d4a)) d4c = F.relu(torch.add(self.layerd4c(d4b), 1, d4b)) d50 = F.relu(torch.add(self.conv11_bn(self.conv11(d4c)), 1, e1)) d5a = F.relu(torch.add(self.layerd5a(d50), 1, d50)) d5b = F.relu(torch.add(self.layerd5b(d5a), 1, d5a)) d5c = F.relu(torch.add(self.layerd5c(d5b), 1, d5b)) d60 = self.conv12_bn(self.conv12(d5c)) return F.tanh(d60)
def forward(self, query, key: Optional[Tensor], value: Optional[Tensor], key_padding_mask: Optional[Tensor] = None, incremental_state: Optional[Dict[str, Dict[ str, Optional[Tensor]]]] = None, need_weights: bool = True, static_kv: bool = False, attn_mask: Optional[Tensor] = None, before_softmax: bool = False, need_head_weights: bool = False, mask=None, loss_type: str = 'nmt') -> Tuple[Tensor, Optional[Tensor]]: if need_head_weights: need_weights = True tgt_len, bsz, embed_dim = query.size() assert embed_dim == self.embed_dim assert list(query.size()) == [tgt_len, bsz, embed_dim] if (not self.onnx_trace and not self.tpu # don't use PyTorch version on TPUs and incremental_state is None and not static_kv # A workaround for quantization to work. Otherwise JIT compilation # treats bias in linear module as method. and not torch.jit.is_scripting()): assert key is not None and value is not None return F.multi_head_attention_forward( query, key, value, self.embed_dim, self.num_heads, torch.empty([0]), torch.cat( (self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)), self.bias_k, self.bias_v, self.add_zero_attn, self.dropout_module.p, self.out_proj.weight, self.out_proj.bias, self.training or self.dropout_module.apply_during_inference, key_padding_mask, need_weights, attn_mask, use_separate_proj_weight=True, q_proj_weight=self.q_proj.weight, k_proj_weight=self.k_proj.weight, v_proj_weight=self.v_proj.weight, ) if incremental_state is not None: saved_state = self._get_input_buffer(incremental_state) if saved_state is not None and "prev_key" in saved_state: # previous time steps are cached - no need to recompute # key and value if they are static if static_kv: assert self.encoder_decoder_attention and not self.self_attention key = value = None else: saved_state = None if self.self_attention: q = self.q_proj(query) k = self.k_proj(query) v = self.v_proj(query) elif self.encoder_decoder_attention: # encoder-decoder attention q = self.q_proj(query) if key is None: assert value is None k = v = None else: k = self.k_proj(key) v = self.v_proj(key) else: assert key is not None and value is not None q = self.q_proj(query) k = self.k_proj(key) v = self.v_proj(value) q *= self.scaling if self.bias_k is not None: assert self.bias_v is not None k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)]) v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)]) if attn_mask is not None: attn_mask = torch.cat( [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat( [ key_padding_mask, key_padding_mask.new_zeros(key_padding_mask.size(0), 1), ], dim=1, ) q = (q.contiguous().view(tgt_len, bsz * self.num_heads, self.head_dim).transpose(0, 1)) if k is not None: k = (k.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)) if v is not None: v = (v.contiguous().view(-1, bsz * self.num_heads, self.head_dim).transpose(0, 1)) if saved_state is not None: # saved states are stored with shape (bsz, num_heads, seq_len, head_dim) if "prev_key" in saved_state: _prev_key = saved_state["prev_key"] assert _prev_key is not None prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim) if static_kv: k = prev_key else: assert k is not None k = torch.cat([prev_key, k], dim=1) if "prev_value" in saved_state: _prev_value = saved_state["prev_value"] assert _prev_value is not None prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim) if static_kv: v = prev_value else: assert v is not None v = torch.cat([prev_value, v], dim=1) prev_key_padding_mask: Optional[Tensor] = None if "prev_key_padding_mask" in saved_state: prev_key_padding_mask = saved_state["prev_key_padding_mask"] assert k is not None and v is not None key_padding_mask = MultiheadAttention._append_prev_key_padding_mask( key_padding_mask=key_padding_mask, prev_key_padding_mask=prev_key_padding_mask, batch_size=bsz, src_len=k.size(1), static_kv=static_kv, ) saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim) saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim) saved_state["prev_key_padding_mask"] = key_padding_mask # In this branch incremental_state is never None assert incremental_state is not None incremental_state = self._set_input_buffer(incremental_state, saved_state) assert k is not None src_len = k.size(1) # This is part of a workaround to get around fork/join parallelism # not supporting Optional types. if key_padding_mask is not None and key_padding_mask.dim() == 0: key_padding_mask = None if key_padding_mask is not None: assert key_padding_mask.size(0) == bsz assert key_padding_mask.size(1) == src_len if self.add_zero_attn: assert v is not None src_len += 1 k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1) v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1) if attn_mask is not None: attn_mask = torch.cat( [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1) if key_padding_mask is not None: key_padding_mask = torch.cat( [ key_padding_mask, torch.zeros(key_padding_mask.size(0), 1).type_as(key_padding_mask), ], dim=1, ) attn_weights = torch.bmm(q, k.transpose(1, 2)) attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz) assert list( attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len] if attn_mask is not None: attn_mask = attn_mask.unsqueeze(0) if self.onnx_trace: attn_mask = attn_mask.repeat(attn_weights.size(0), 1, 1) attn_weights += attn_mask if key_padding_mask is not None: # don't attend to padding symbols attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) if not self.tpu: attn_weights = attn_weights.masked_fill( key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), float("-inf")) else: attn_weights = attn_weights.transpose(0, 2) attn_weights = attn_weights.masked_fill( key_padding_mask, float('-inf')) attn_weights = attn_weights.transpose(0, 2) attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len) if before_softmax: return attn_weights, v attn_weights_float = utils.softmax(attn_weights, dim=-1, onnx_trace=self.onnx_trace) tmp = attn_weights_float g = torch.sigmoid(self.gate(q)) if loss_type == 'nmt': attn_weights_float = g * attn_weights_float + (1 - g) * torch.mul( attn_weights_float, torch.exp(1 - mask)) elif loss_type == 'mask': # attn_weights_float = torch.mul(attn_weights, mask) attn_weights_float = torch.add( torch.mul(attn_weights_float, mask), torch.mul(torch.mean(attn_weights_float, -1, True), 1 - mask)) # tmp=attn_weights_float # if key_padding_mask is not None: # attn_weights_float = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len) # attn_weights_float = attn_weights_float.masked_fill( # key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool), # float("-inf") # ) # attn_weights_float = attn_weights_float.view(bsz * self.num_heads, tgt_len, src_len) # attn_weights_float = utils.softmax(attn_weights_float, dim=-1, onnx_trace=self.onnx_trace) attn_weights = attn_weights_float.type_as(attn_weights) attn_probs = self.dropout_module(attn_weights) assert v is not None attn = torch.bmm(attn_probs, v) assert list( attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim] if self.onnx_trace and attn.size(1) == 1: # when ONNX tracing a single decoder step (sequence length == 1) # the transpose is a no-op copy before view, thus unnecessary attn = attn.contiguous().view(tgt_len, bsz, embed_dim) else: attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) attn = self.out_proj(attn) attn_weights: Optional[Tensor] = None if need_weights: attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0) if not need_head_weights: # average attention weights over heads attn_weights = attn_weights.mean(dim=0) # attn_weights = attn_weights[0] attn_weights = attn_weights_float.view(bsz, self.num_heads, tgt_len, src_len).transpose(1, 0) return attn, attn_weights, g
def bleu_score(translate_corpus: Sequence[str], reference_corpus: Sequence[str], n_gram: int = 4, smooth: bool = False) -> torch.Tensor: """Calculate BLEU score of machine translated text with one or more references. Args: translate_corpus: An iterable of machine translated corpus reference_corpus: An iterable of iterables of reference corpus n_gram: Gram value ranged from 1 to 4 (Default 4) smooth: Whether or not to apply smoothing – Lin et al. 2004 Return: A Tensor with BLEU Score Example: >>> translate_corpus = ['the cat is on the mat'.split()] >>> reference_corpus = [['there is a cat on the mat'.split(), 'a cat is on the mat'.split()]] >>> bleu_score(translate_corpus, reference_corpus) tensor(0.7598) """ assert len(translate_corpus) == len(reference_corpus) numerator = torch.zeros(n_gram) denominator = torch.zeros(n_gram) precision_scores = torch.zeros(n_gram) c = 0.0 r = 0.0 for (translation, references) in zip(translate_corpus, reference_corpus): c += len(translation) ref_len_list = [len(ref) for ref in references] ref_len_diff = [abs(len(translation) - x) for x in ref_len_list] r += ref_len_list[ref_len_diff.index(min(ref_len_diff))] translation_counter = _count_ngram(translation, n_gram) reference_counter = Counter() for ref in references: reference_counter |= _count_ngram(ref, n_gram) ngram_counter_clip = translation_counter & reference_counter for counter_clip in ngram_counter_clip: numerator[len(counter_clip) - 1] += ngram_counter_clip[counter_clip] for counter in translation_counter: denominator[len(counter) - 1] += translation_counter[counter] trans_len = torch.tensor(c) ref_len = torch.tensor(r) if min(numerator) == 0.0: return torch.tensor(0.0) if smooth: precision_scores = torch.add( numerator, torch.ones(n_gram)) / torch.add(denominator, torch.ones(n_gram)) else: precision_scores = numerator / denominator log_precision_scores = torch.tensor( [1.0 / n_gram] * n_gram) * torch.log(precision_scores) geometric_mean = torch.exp(torch.sum(log_precision_scores)) brevity_penalty = torch.tensor(1.0) if c > r else torch.exp(1 - (ref_len / trans_len)) bleu = brevity_penalty * geometric_mean return bleu
def forward(self, enc_w2v, w2v_lens, enc_kb_emb, key_emb, key_target_emb, mem_weights): ''' (9,274,339, 100) - ent_embed.pkl.npy (569, 100) - rel_embed.pkl.npy (3,000,000, 300) - w2v ''' embed_q = self.embed_A(enc_w2v) #seq_len*bs*w2v_emb embed_q = torch.cat((embed_q, enc_kb_emb), 2) #seq_len*bs*(w2v_emb + wiki_emb) packed_q = nn.utils.rnn.pack_padded_sequence(embed_q, w2v_lens, enforce_sorted=False) # pass through GRU _, q_state = self.gru(packed_q) #bs*cell_size q_state = self.dropout(q_state) q_state = q_state.squeeze() # from the encoder [1, hid_s, cell_s] #TODO: one hop is enough for our experiments for hop in range(self.config["hops"]): # --memory addressing-- q_last = self.C(q_state) # batch_size * (2*wikidata_embed_size) #q_last = q_state.mm(self.C).clamp(min=0) # batch_size * (2*wikidata_embed_size) q_temp1 = q_last.unsqueeze(1) # batch_size * 1 * (2*wikidata_embed_size) #q_temp1 = q_temp1/q_temp1.norm(dim=2)[:,:,None] # bs*1*wiki*2 #L2 normalized #q_temp1[q_temp1 != q_temp1] = 0 #key_emb #batch_size * size_memory * (2*wikidata_embed_size) #key_emb = key_emb/key_emb.norm(dim=2)[:,:,None] #bs*sm*wiki*2 #key_emb[key_emb != key_emb] = 0 #prod = key_emb * q_temp1 #dotted_1 = torch.sum(prod, 2) #bs * ms #same as dotted = torch.bmm(q_temp1, key_emb.transpose(2,1)) #bs*1*ms dotted = dotted.squeeze(1) #bs*ms probs = F.softmax(dotted, dim=1) * mem_weights # bs * ms probs = torch.unsqueeze(probs, 1) # bs * 1 * ms # --value reading-- #key_target_emb #bs * ms * wikidata_embed_size #values_emb = key_target_emb.transpose(2,1) #bs * wikidata_embed_size * ms, needs this shape when values_emb * probs #TODO: confirm, should be a weighted sum over value entries (e.g. dim 1), not of embedding dimension. #o_k = torch.sum(values_emb * probs, 2) #bs * wikidata_embed_size o_k = torch.bmm(probs, key_target_emb) #bs * 1 * wiki_size o_k = o_k.squeeze(1) #o_k = o_k.mm(self.R_1).clamp(min=0) #bs * cell_size o_k = self.R_1(o_k) #bs * cell_size q_state = torch.add(q_state, o_k) # find candidates, candidates are the value cells. (there is no other candidates in the data) #temp_1 = q_state.mm(self.B).clamp(min=0) #bs * wiki_embed temp_1 = self.B(q_state) #bs * wiki_embed temp_1 = temp_1.unsqueeze(1) # bs * 1 * wiki_embed #key_target_emb #bs * ms * wikidata_embed_size prob_mem = torch.sum(temp_1 * key_target_emb, 2) # batch_size * size_memory #prob_mem = F.log_softmax(prob_mem, dim=1) #NOTE: do not pass trough softmax if sigmoid is used #prob_mem = F.softmax(prob_mem, dim=1) mem_output = torch.sigmoid(prob_mem) * mem_weights return mem_output
def compute_loss_with_gradnorm(batch_X, batch_y_segmt, batch_y_depth, batch_mask_segmt, batch_mask_depth, model, task_weights=None, l01=None, l02=None, criterion=None, criterion2=None, optimizer=None, optimizer2=None, is_train=True, epoch=1): model.train(is_train) batch_X = batch_X.to(device, non_blocking=True) batch_y_segmt = batch_y_segmt.to(device, non_blocking=True) batch_y_depth = batch_y_depth.to(device, non_blocking=True) batch_mask_segmt = batch_mask_segmt.to(device, non_blocking=True) batch_mask_depth = batch_mask_depth.to(device, non_blocking=True) output = model(batch_X) image_loss, label_loss = criterion(output, batch_y_segmt, batch_y_depth, batch_mask_segmt, batch_mask_depth, task_weights=task_weights) if is_train: alpha = 0.16 l1 = task_weights[0] * image_loss * 0.5 l2 = task_weights[1] * label_loss * 0.5 if epoch == 1: l01 = l1.data l02 = l2.data optimizer.zero_grad() l1.backward(retain_graph=True) l2.backward(retain_graph=True) param = list(model.pretrained_encoder.layer4[-1].conv2.parameters()) G1R = torch.autograd.grad(l1, param[0], retain_graph=True, create_graph=True) G1 = torch.norm(G1R[0], 2) G2R = torch.autograd.grad(l2, param[0], retain_graph=True, create_graph=True) G2 = torch.norm(G2R[0], 2) G_avg = (G1 + G2) / 2 # Calculating relative losses lhat1 = torch.div(l1, l01) lhat2 = torch.div(l2, l02) lhat_avg = (lhat1 + lhat2) / 2 # Calculating relative inverse training rates for tasks inv_rate1 = torch.div(lhat1, lhat_avg) inv_rate2 = torch.div(lhat2, lhat_avg) # Calculating the constant target for Eq. 2 in the GradNorm paper C1 = G_avg * inv_rate1**alpha C2 = G_avg * inv_rate2**alpha C1 = C1.detach() C2 = C2.detach() optimizer2.zero_grad() # Calculating the gradient loss according to Eq. 2 in the GradNorm paper Lgrad = torch.add(criterion2(G1, C1), criterion2(G2, C2)) Lgrad.backward() # Updating loss weights optimizer2.step() optimizer.step() return (task_weights[0] * image_loss).item() + ( task_weights[1] * label_loss).item(), l01, l02
def _generate(self, src_tokens, beam_size=None, maxlen=None): bsz, srclen = src_tokens.size() maxlen = min(maxlen, self.maxlen) if maxlen is not None else self.maxlen # the max beam size is the dictionary size - 1, since we never select pad beam_size = beam_size if beam_size is not None else self.beam_size beam_size = min(beam_size, self.vocab_size - 1) encoder_outs = [] for model in self.models: if not self.retain_dropout: model.eval() if isinstance(model.decoder, FairseqIncrementalDecoder): model.decoder.set_beam_size(beam_size) # compute the encoder output for each beam encoder_out = model.encoder( src_tokens.repeat(1, beam_size).view(-1, srclen)) encoder_outs.append(encoder_out) # initialize buffers scores = encoder_outs[0][0].data.new(bsz * beam_size).fill_(0) tokens = src_tokens.data.new(bsz * beam_size, maxlen + 2).fill_(self.pad) tokens_buf = tokens.clone() tokens[:, 0] = self.eos attn = scores.new(bsz * beam_size, src_tokens.size(1), maxlen + 2) attn_buf = attn.clone() # list of completed sentences finalized = [[] for i in range(bsz)] finished = [False for i in range(bsz)] worst_finalized = [{ 'idx': None, 'score': float('Inf') } for i in range(bsz)] num_remaining_sent = bsz # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens) cand_offsets = torch.arange(0, cand_size).type_as(tokens) # helper function for allocating buffers on the fly buffers = {} def buffer(name, type_of=tokens): # noqa if name not in buffers: buffers[name] = type_of.new() return buffers[name] def is_finished(sent): """ Check whether we've finished generation for a given sentence, by comparing the worst score among finalized hypotheses to the best possible score among unfinalized hypotheses. """ assert len(finalized[sent]) <= beam_size if len(finalized[sent]) == beam_size: if self.stop_early: return True # stop if the best unfinalized score is worse than the worst # finalized one bbsz = sent * beam_size best_unfinalized_score = scores[bbsz:bbsz + beam_size].max() if self.normalize_scores: best_unfinalized_score /= maxlen if worst_finalized[sent]['score'] >= best_unfinalized_score: return True return False def finalize_hypos(step, bbsz_idx, scores): """ Finalize the given hypotheses at this step, while keeping the total number of finalized hypotheses per sentence <= beam_size. Note: the input must be in the desired finalization order, so that hypotheses that appear earlier in the input are preferred to those that appear later. Args: step: current time step bbsz_idx: A vector of indices in the range [0, bsz*beam_size), indicating which hypotheses to finalize scores: A vector of the same size as bbsz_idx containing scores for each hypothesis """ assert bbsz_idx.numel() == scores.numel() norm_scores = scores / math.pow( step + 1, self.len_penalty) if self.normalize_scores else scores sents_seen = set() for idx, score in zip(bbsz_idx.cpu(), norm_scores.cpu()): sent = idx // beam_size sents_seen.add(sent) def get_hypo(): hypo = tokens[ idx, 1:step + 2].clone() # skip the first index, which is EOS hypo[step] = self.eos attention = attn[idx, :, 1:step + 2].clone() _, alignment = attention.max(dim=0) return { 'tokens': hypo, 'score': score, 'attention': attention, 'alignment': alignment, } if len(finalized[sent]) < beam_size: finalized[sent].append(get_hypo()) elif score > worst_finalized[sent]['score']: # replace worst hypo for this sentence with new/better one worst_idx = worst_finalized[sent]['idx'] finalized[sent][worst_idx] = get_hypo() # find new worst finalized hypo for this sentence idx, s = min(enumerate(finalized[sent]), key=lambda r: r[1]['score']) worst_finalized[sent] = { 'score': s['score'], 'idx': idx, } # return number of hypotheses finished this step num_finished = 0 for sent in sents_seen: # check termination conditions for this sentence if not finished[sent] and is_finished(sent): finished[sent] = True num_finished += 1 return num_finished reorder_state = None for step in range(maxlen + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: for model in self.models: if isinstance(model.decoder, FairseqIncrementalDecoder): model.decoder.reorder_incremental_state(reorder_state) probs, avg_attn_scores = self._decode(tokens[:, :step + 1], encoder_outs) if step == 0: # at the first step all hypotheses are equally likely, so use # only the first beam probs = probs.unfold(0, 1, beam_size).squeeze(2).contiguous() else: # make probs contain cumulative scores for each hypothesis probs.add_(scores.view(-1, 1)) probs[:, self.pad] = -math.inf # never select pad probs[:, self.unk] -= self.unk_penalty # apply unk penalty # Record attention scores attn[:, :, step + 1].copy_(avg_attn_scores) # take the best 2 x beam_size predictions. We'll choose the first # beam_size of these which don't predict eos to continue with. cand_scores = buffer('cand_scores', type_of=scores) cand_indices = buffer('cand_indices') cand_beams = buffer('cand_beams') probs.view(bsz, -1).topk( min(cand_size, probs.view(bsz, -1).size(1) - 1), # -1 so we never select pad out=(cand_scores, cand_indices)) torch.div(cand_indices, self.vocab_size, out=cand_beams) cand_indices.fmod_(self.vocab_size) # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add_(bbsz_offsets) # finalize hypotheses that end in eos eos_mask = cand_indices.eq(self.eos) if step >= self.minlen: eos_bbsz_idx = buffer('eos_bbsz_idx') # only consider eos when it's among the top beam_size indices cand_bbsz_idx[:, :beam_size].masked_select( eos_mask[:, :beam_size], out=eos_bbsz_idx) if eos_bbsz_idx.numel() > 0: eos_scores = buffer('eos_scores', type_of=scores) cand_scores[:, :beam_size].masked_select( eos_mask[:, :beam_size], out=eos_scores) num_remaining_sent -= finalize_hypos( step, eos_bbsz_idx, eos_scores) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break # set active_mask so that values > cand_size indicate eos hypos # and values < cand_size indicate candidate active hypos. # After, the min values per row are the top candidate active hypos active_mask = buffer('active_mask') torch.add(eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[:eos_mask.size(1)], out=active_mask) # get the top beam_size active hypotheses, which are just the hypos # with the smallest values in active_mask active_hypos, _ignore = buffer('active_hypos'), buffer('_ignore') active_mask.topk(beam_size, 1, largest=False, out=(_ignore, active_hypos)) active_bbsz_idx = buffer('active_bbsz_idx') cand_bbsz_idx.gather(1, active_hypos, out=active_bbsz_idx) active_scores = cand_scores.gather(1, active_hypos, out=scores.view(bsz, beam_size)) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # finalize all active hypotheses once we hit maxlen # finalize_hypos will take care of adding the EOS markers if step == maxlen: num_remaining_sent -= finalize_hypos(step, active_bbsz_idx, active_scores) assert num_remaining_sent == 0 break # copy tokens for active hypotheses torch.index_select(tokens[:, :step + 1], dim=0, index=active_bbsz_idx, out=tokens_buf[:, :step + 1]) cand_indices.gather(1, active_hypos, out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1]) # copy attention for active hypotheses torch.index_select(attn[:, :, :step + 2], dim=0, index=active_bbsz_idx, out=attn_buf[:, :, :step + 2]) # swap buffers old_tokens = tokens tokens = tokens_buf tokens_buf = old_tokens old_attn = attn attn = attn_buf attn_buf = old_attn # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(bsz): finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True) return finalized
def _decode_target( self, encoder_input, encoder_outs, incremental_states, diversity_sibling_gamma=0.0, beam_size=None, maxlen=None, prefix_tokens=None, ): src_tokens_tensor = pytorch_translate_utils.get_source_tokens_tensor( encoder_input["src_tokens"]) beam_size = beam_size if beam_size is not None else self.beam_size bsz = src_tokens_tensor.size(0) reorder_indices = (torch.arange(bsz).view(-1, 1).repeat( 1, beam_size).view(-1).long()) for i, model in enumerate(self.models): encoder_outs[i] = model.encoder.reorder_encoder_out( encoder_out=encoder_outs[i], new_order=reorder_indices.type_as(src_tokens_tensor), ) maxlen = min(maxlen, self.maxlen) if maxlen is not None else self.maxlen # initialize buffers scores = src_tokens_tensor.new(bsz * beam_size, maxlen + 1).float().fill_(0) scores_buf = scores.clone() tokens = src_tokens_tensor.new(bsz * beam_size, maxlen + 2).fill_(self.pad) tokens_buf = tokens.clone() tokens[:, 0] = self.eos # may differ from input length if isinstance(encoder_outs[0], (list, tuple)): src_encoding_len = encoder_outs[0][0].size(0) elif isinstance(encoder_outs[0], dict): if isinstance(encoder_outs[0]["encoder_out"], tuple): # Fairseq compatibility src_encoding_len = encoder_outs[0]["encoder_out"][0].size(1) else: src_encoding_len = encoder_outs[0]["encoder_out"].size(0) attn = scores.new(bsz * beam_size, src_encoding_len, maxlen + 2) attn_buf = attn.clone() # list of completed sentences finalized = [[] for i in range(bsz)] finished = [False for i in range(bsz)] worst_finalized = [{ "idx": None, "score": -math.inf } for i in range(bsz)] num_remaining_sent = bsz # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens) cand_offsets = torch.arange(0, cand_size).type_as(tokens) # helper function for allocating buffers on the fly buffers = {} # init constraints constraints = self._build_constraints(src_tokens_tensor, beam_size) def buffer(name, type_of=tokens): # noqa if name not in buffers: buffers[name] = type_of.new() return buffers[name] def is_finished(sent, step, unfinalized_scores=None): """ Check whether we've finished generation for a given sentence, by comparing the worst score among finalized hypotheses to the best possible score among unfinalized hypotheses. """ assert len(finalized[sent]) <= beam_size if len(finalized[sent]) == beam_size: if self.stop_early or step == maxlen or unfinalized_scores is None: return True # stop if the best unfinalized score is worse than the worst # finalized one best_unfinalized_score = unfinalized_scores[sent].max() if self.normalize_scores: best_unfinalized_score /= (maxlen + 1)**self.len_penalty if worst_finalized[sent]["score"] >= best_unfinalized_score: return True return False def finalize_hypos(step, bbsz_idx, eos_scores, unfinalized_scores=None): """ Finalize the given hypotheses at this step, while keeping the total number of finalized hypotheses per sentence <= beam_size. Note: the input must be in the desired finalization order, so that hypotheses that appear earlier in the input are preferred to those that appear later. Args: step: current time step bbsz_idx: A vector of indices in the range [0, bsz*beam_size), indicating which hypotheses to finalize eos_scores: A vector of the same size as bbsz_idx containing scores for each hypothesis unfinalized_scores: A vector containing scores for all unfinalized hypotheses """ assert bbsz_idx.numel() == eos_scores.numel() # clone relevant token and attention tensors tokens_clone = tokens.index_select(0, bbsz_idx) tokens_clone = tokens_clone[:, 1:step + 2] # skip the first index, which is EOS tokens_clone[:, step] = self.eos attn_clone = attn.index_select(0, bbsz_idx)[:, :, 1:step + 2] # compute scores per token position pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1] pos_scores[:, step] = eos_scores # convert from cumulative to per-position scores pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] # normalize sentence-level scores if self.normalize_scores: eos_scores /= (step + 1)**self.len_penalty sents_seen = set() for i, (idx, score) in enumerate( zip(bbsz_idx.tolist(), eos_scores.tolist())): sent = idx // beam_size sents_seen.add(sent) def get_hypo(): _, alignment = attn_clone[i].max(dim=0) return { "tokens": tokens_clone[i], "score": score, "attention": attn_clone[i], # src_len x tgt_len "alignment": alignment, "positional_scores": pos_scores[i], } if len(finalized[sent]) < beam_size: finalized[sent].append(get_hypo()) elif not self.stop_early and score > worst_finalized[sent][ "score"]: # replace worst hypo for this sentence with new/better one worst_idx = worst_finalized[sent]["idx"] if worst_idx is not None: finalized[sent][worst_idx] = get_hypo() # find new worst finalized hypo for this sentence idx, s = min(enumerate(finalized[sent]), key=lambda r: r[1]["score"]) worst_finalized[sent] = {"score": s["score"], "idx": idx} # return number of hypotheses finished this step num_finished = 0 for sent in sents_seen: # check termination conditions for this sentence if not finished[sent] and is_finished(sent, step, unfinalized_scores): finished[sent] = True num_finished += 1 return num_finished reorder_state = None for step in range(maxlen + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: for model in self.models: if isinstance(model.decoder, FairseqIncrementalDecoder): model.decoder.reorder_incremental_state( incremental_states[model], reorder_state) # Run decoder for one step logprobs, avg_attn, possible_translation_tokens = self._decode( tokens[:, :step + 1], encoder_outs, incremental_states) logprobs[:, self.pad] = -math.inf # never select pad # apply unk reward if possible_translation_tokens is None: # No vocab reduction, so unk is represented by self.unk at # position self.unk unk_index = self.unk logprobs[:, unk_index] += self.unk_reward else: # When we use vocab reduction, the token value self.unk may not # be at the position self.unk, but somewhere else in the list # of possible_translation_tokens. It's also possible not to # show up in possible_translation_tokens at all, meaning we # can't generate an unk. unk_pos = torch.nonzero( possible_translation_tokens == self.unk) if unk_pos.size()[0] != 0: # only add unk_reward if unk index appears in # possible_translation_tokens unk_index = unk_pos[0][0] logprobs[:, unk_index] += self.unk_reward # external lexicon reward logprobs[:, self.lexicon_indices] += self.lexicon_reward logprobs += self.word_reward logprobs[:, self.eos] -= self.word_reward # Record attention scores attn[:, :, step + 1].copy_(avg_attn) cand_scores = buffer("cand_scores", type_of=scores) cand_indices = buffer("cand_indices") cand_beams = buffer("cand_beams") eos_bbsz_idx = buffer("eos_bbsz_idx") eos_scores = buffer("eos_scores", type_of=scores) scores = scores.type_as(logprobs) scores_buf = scores_buf.type_as(logprobs) if step < maxlen: self._apply_constraint_penalty(scores) # stub call if prefix_tokens is not None and step < prefix_tokens.size(1): logprobs_slice = logprobs.view(bsz, -1, logprobs.size(-1))[:, 0, :] cand_scores = torch.gather( logprobs_slice, dim=1, index=prefix_tokens[:, step].view(-1, 1)).expand( -1, cand_size) cand_indices = (prefix_tokens[:, step].view(-1, 1).expand( bsz, cand_size)) cand_beams.resize_as_(cand_indices).fill_(0) else: possible_tokens_size = self.vocab_size if possible_translation_tokens is not None: possible_tokens_size = possible_translation_tokens.size( 0) if diversity_sibling_gamma > 0: logprobs = self.diversity_sibling_rank( logprobs.view(bsz, -1, possible_tokens_size), diversity_sibling_gamma, ) cand_scores, cand_indices, cand_beams = self.search.step( step, logprobs.view(bsz, -1, possible_tokens_size), scores.view(bsz, beam_size, -1)[:, :, :step], ) # vocabulary reduction if possible_translation_tokens is not None: possible_translation_tokens = possible_translation_tokens.view( 1, possible_tokens_size).expand( cand_indices.size(0), possible_tokens_size) cand_indices = torch.gather( possible_translation_tokens, dim=1, index=cand_indices, out=cand_indices, ) else: # finalize all active hypotheses once we hit maxlen # pick the hypothesis with the highest log prob of EOS right now logprobs.add_(scores[:, step - 1].view(-1, 1)) torch.sort( logprobs[:, self.eos], descending=True, out=(eos_scores, eos_bbsz_idx), ) num_remaining_sent -= finalize_hypos(step, eos_bbsz_idx, eos_scores) assert num_remaining_sent == 0 break # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add_(bbsz_offsets) # finalize hypotheses that end in eos eos_mask = cand_indices.eq(self.eos) if step >= self.minlen: # only consider eos when it's among the top beam_size indices torch.masked_select( cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_bbsz_idx, ) if eos_bbsz_idx.numel() > 0: torch.masked_select( cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_scores, ) self._apply_eos_constraints(constraints, eos_bbsz_idx, eos_scores) num_remaining_sent -= finalize_hypos( step, eos_bbsz_idx, eos_scores, cand_scores) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break assert step < maxlen # set active_mask so that values > cand_size indicate eos hypos # and values < cand_size indicate candidate active hypos. # After, the min values per row are the top candidate active hypos active_mask = buffer("active_mask") torch.add( eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[:eos_mask.size(1)], out=active_mask, ) # get the top beam_size active hypotheses, which are just the hypos # with the smallest values in active_mask active_hypos, _ignore = buffer("active_hypos"), buffer("_ignore") torch.topk( active_mask, k=beam_size, dim=1, largest=False, out=(_ignore, active_hypos), ) active_bbsz_idx = buffer("active_bbsz_idx") torch.gather(cand_bbsz_idx, dim=1, index=active_hypos, out=active_bbsz_idx) active_scores = torch.gather( cand_scores, dim=1, index=active_hypos, out=scores[:, step].view(bsz, beam_size), ) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # copy tokens and scores for active hypotheses torch.index_select( tokens[:, :step + 1], dim=0, index=active_bbsz_idx, out=tokens_buf[:, :step + 1], ) torch.gather( cand_indices, dim=1, index=active_hypos, out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1], ) # update constraints for next step constraints = self._reorder_constraints(constraints, active_bbsz_idx) self._update_constraints(constraints, tokens_buf[:, step + 1], step) if step > 0: torch.index_select( scores[:, :step], dim=0, index=active_bbsz_idx, out=scores_buf[:, :step], ) torch.gather( cand_scores, dim=1, index=active_hypos, out=scores_buf.view(bsz, beam_size, -1)[:, :, step], ) # copy attention for active hypotheses torch.index_select( attn[:, :, :step + 2], dim=0, index=active_bbsz_idx, out=attn_buf[:, :, :step + 2], ) # swap buffers tokens, tokens_buf = tokens_buf, tokens scores, scores_buf = scores_buf, scores attn, attn_buf = attn_buf, attn # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(bsz): finalized[sent] = sorted(finalized[sent], key=lambda r: r["score"], reverse=True) self._finalize_constrained_results(finalized, scores.device) return finalized
def forward(self, inputs): return torch.add(*inputs)
def discriminator_loss(gen_images, real_images): real = real_images.new_full((real_images.shape[0], 1), real_label) gen = gen_images.new_full((gen_images.shape[0], 1), fake_label) realloss = disc_loss_criterion(disc_net(real_images), real) genloss = disc_loss_criterion(disc_net(gen_images.detach()), gen) return torch.div(torch.add(realloss, genloss), 2)
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize, **kwargs): """ Args: heatmap: array of size (n * k * h * w) -n: number of views, -k: number of joints -h: heatmap height, -w: heatmap width grid: list of k ndarrays of size (nbins * 3) -k: number of joints; 1 when the grid is shared in PSM -nbins: number of bins in the grid bbox2D: bounding box on which heatmap is computed Returns: unary_of_all_joints: a list of ndarray of size nbins """ n, k = heatmap.shape[0], heatmap.shape[1] h, w = heatmap.shape[2], heatmap.shape[3] nbins = grid[0].shape[0] current_device = torch.device('cuda:{}'.format(heatmap.get_device())) # unary_of_all_joints = [] # for j in range(k): # unary = np.zeros(nbins, dtype=np.float32) # for c in range(n): # # grid_id = 0 if len(grid) == 1 else j # xy = cameras.project_pose(grid[grid_id], cam[c]) # trans = get_affine_transform(bbox2D[c]['center'], # bbox2D[c]['scale'], 0, imgSize) # # xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize # # for i in range(nbins): # # xy[i] = affine_transform(xy[i], trans) * np.array([w, h]) / imgSize # # hmap = heatmap[c, j, :, :] # point_x, point_y = np.arange(hmap.shape[0]), np.arange( # hmap.shape[1]) # rgi = RegularGridInterpolator( # points=[point_x, point_y], # values=hmap.transpose(), # bounds_error=False, # fill_value=0) # score = rgi(xy) # unary = unary + np.reshape(score, newshape=unary.shape) # unary_of_all_joints.append(unary) # return unary_of_all_joints # torch version # heatmaps = torch.tensor(heatmap, dtype=torch.float32) heatmaps = heatmap grid_cords = np.zeros([n, k, nbins, 2], dtype=np.float32) for c in range(n): for j in range(k): grid_id = 0 if len(grid) == 1 else j xy = cameras.project_pose(grid[grid_id], cam[c]) trans = get_affine_transform(bbox2D[c]['center'], bbox2D[c]['scale'], 0, imgSize) xy = affine_transform_pts(xy, trans) * np.array([w, h]) / imgSize # xy of shape (4096,2) # xy is cord of certain view and certain joint if len(grid) == 1: # psm 4096bins grid_cords[c, 0, :, :] = xy / np.array( [h - 1, w - 1], dtype=np.float32) * 2.0 - 1.0 for j in range(1, k): grid_cords[c, j, :, :] = grid_cords[c, 0, :, :] break # since all joints share same grid, no need computing for each joint, just copy it else: grid_cords[c, j, :, :] = xy / np.array( [h - 1, w - 1], dtype=np.float32) * 2.0 - 1.0 grid_cords_tensor = torch.as_tensor(grid_cords).to(current_device) unary_all_views_joints = grid_sample(heatmaps, grid_cords_tensor) # unary_all_views_joints -> shape(4,16,16,4096) unary_all_views = torch.zeros(n, k, nbins).to(current_device) for j in range(k): unary_all_views[:, j, :] = unary_all_views_joints[:, j, j, :] unary_tensor = torch.zeros(k, nbins).to(current_device) for una in unary_all_views: unary_tensor = torch.add(unary_tensor, una) return unary_tensor
def forward(self, x, i): mm0 = torch.mul(x, self.w0) mm1 = torch.add(mm0, self.w1) return mm1
def local_add(t1, t2): return torch.add(t1, t2)
def forward(self, x): down = self.relu1(self.bn1(self.down_conv(x))) out = self.do1(down) out = self.ops(out) out = self.relu2(torch.add(out, down)) return out
def equal(x, y, prec=1e-4): return torch.all(torch.lt(torch.abs(torch.add(x, -y)), prec))
def additionalTermsLoss(self): return torch.add(self.hparams[1]*torch.dot(self.flat_wdot,self.flat_wdot),\ self.hparams[2]*torch.dot(self.flat_w,self.flat_w))
def forward(self, sentence, p_sentence, pos_tags, lengths, target_idx_in, region_marks, local_roles_voc, frames, local_roles_mask, sent_pred_lemmas_idx, dep_tags, dep_heads, targets, test=False): embeds = self.word_embeddings(sentence) embeds = embeds.view(self.batch_size, len(sentence[0]), self.word_emb_dim) pos_embeds = self.pos_embeddings(pos_tags) fixed_embeds = self.word_fixed_embeddings(p_sentence) fixed_embeds = fixed_embeds.view(self.batch_size, len(sentence[0]), self.word_emb_dim) sent_pred_lemmas_embeds = self.p_lemma_embeddings(sent_pred_lemmas_idx) region_marks = region_marks.view(self.batch_size, len(sentence[0]), 1) embeds = torch.cat((embeds, fixed_embeds, pos_embeds, sent_pred_lemmas_embeds, region_marks), 2) #embeds = torch.cat((embeds, fixed_embeds, pos_embeds, region_marks), 2) # share_layer embeds_sort, lengths_sort, unsort_idx = self.sort_batch( embeds, lengths) embeds_sort = rnn.pack_padded_sequence(embeds_sort, lengths_sort, batch_first=True) # hidden states [time_steps * batch_size * hidden_units] hidden_states, self.hidden = self.BiLSTM_share(embeds_sort, self.hidden) # it seems that hidden states is already batch first, we don't need swap the dims # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, ) hidden_states, lens = rnn.pad_packed_sequence(hidden_states, batch_first=True) #hidden_states = hidden_states.transpose(0, 1) hidden_states = hidden_states[unsort_idx] bf_e = torch.tensor(hidden_states.data.numpy()) concat_embeds = torch.zeros(bf_e.size()[0], bf_e.size()[1], bf_e.size()[2]) for i in range(bf_e.size()[0]): for j in range(bf_e.size()[1]): if dep_heads[i][j] > 0: concat_embeds[i, j] = bf_e[i, dep_heads[i][j] - 1] head_features = torch.tensor( F.tanh(self.hidden2tag_M(bf_e)).data.numpy()) head_features.requires_grad_(False) dep_tag_space = self.MLP( F.tanh(self.hidden2tag_M(bf_e) + self.hidden2tag_H(concat_embeds))).view( len(sentence[0]) * self.batch_size, -1) hidden_states = torch.cat((hidden_states, head_features), 2) # SRL layer embeds_sort, lengths_sort, unsort_idx = self.sort_batch( hidden_states, lengths) embeds_sort = rnn.pack_padded_sequence(embeds_sort, lengths_sort.cpu().numpy(), batch_first=True) # hidden states [time_steps * batch_size * hidden_units] hidden_states, self.hidden_2 = self.BiLSTM_SRL(embeds_sort, self.hidden_2) # it seems that hidden states is already batch first, we don't need swap the dims # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, ) hidden_states, lens = rnn.pad_packed_sequence(hidden_states, batch_first=True) # hidden_states = hidden_states.transpose(0, 1) hidden_states = hidden_states[unsort_idx] # B * H hidden_states_3 = hidden_states predicate_embeds = hidden_states_3[ np.arange(0, hidden_states_3.size()[0]), target_idx_in] # T * B * H added_embeds = Variable( torch.zeros(hidden_states_3.size()[1], hidden_states_3.size()[0], hidden_states_3.size()[2])) predicate_embeds = added_embeds + predicate_embeds # B * T * H predicate_embeds = predicate_embeds.transpose(0, 1) hidden_states = torch.cat((hidden_states_3, predicate_embeds), 2) # print(hidden_states) # non-linear map and rectify the roles' embeddings # roles = Variable(torch.from_numpy(np.arange(0, self.tagset_size))) # B * roles # log(local_roles_voc) # log(frames) # B * roles * h role_embeds = self.role_embeddings(local_roles_voc) frame_embeds = self.frame_embeddings(frames) role_embeds = torch.cat((role_embeds, frame_embeds), 2) mapped_roles = F.relu(self.role_map(role_embeds)) mapped_roles = torch.transpose(mapped_roles, 1, 2) # b, times, roles tag_space = torch.matmul(hidden_states, mapped_roles) #tag_space = hidden_states.mm(mapped_roles) # b, roles #sub = torch.div(torch.add(local_roles_mask, -1.0), _BIG_NUMBER) sub = torch.add(local_roles_mask, -1.0) * _BIG_NUMBER sub = torch.FloatTensor(sub.numpy()) # b, roles, times tag_space = torch.transpose(tag_space, 0, 1) tag_space += sub # b, T, roles tag_space = torch.transpose(tag_space, 0, 1) tag_space = tag_space.view(len(sentence[0]) * self.batch_size, -1) SRLprobs = F.softmax(tag_space, dim=1) wrong_l_nums = 0.0 all_l_nums = 0.0 dep_labels = np.argmax(dep_tag_space.data.numpy(), axis=1) for predict_l, gold_l in zip(dep_labels, dep_tags.view(-1).data.numpy()): if gold_l != 0: all_l_nums += 1 if predict_l != gold_l and gold_l != 0: wrong_l_nums += 1 #loss_function = nn.NLLLoss(ignore_index=0) targets = targets.view(-1) #tag_scores = F.log_softmax(tag_space) #loss = loss_function(tag_scores, targets) loss_function = nn.CrossEntropyLoss(ignore_index=0) SRLloss = loss_function(tag_space, targets) DEPloss = loss_function(dep_tag_space, dep_tags.view(-1)) loss = SRLloss + DEPloss return SRLloss, DEPloss, loss, SRLprobs, wrong_l_nums, all_l_nums
def forward(self, data): conv1_7x7_s2 = self.conv1_7x7_s2(data) conv1_7x7_s2_bn = self.conv1_7x7_s2_bn(conv1_7x7_s2) conv1_7x7_s2_bnxx = self.conv1_relu_7x7_s2(conv1_7x7_s2_bn) pool1_3x3_s2 = self.pool1_3x3_s2(conv1_7x7_s2_bnxx) conv2_1_1x1_reduce = self.conv2_1_1x1_reduce(pool1_3x3_s2) conv2_1_1x1_reduce_bn = self.conv2_1_1x1_reduce_bn(conv2_1_1x1_reduce) conv2_1_1x1_reduce_bnxx = self.conv2_1_1x1_reduce_relu(conv2_1_1x1_reduce_bn) conv2_1_3x3 = self.conv2_1_3x3(conv2_1_1x1_reduce_bnxx) conv2_1_3x3_bn = self.conv2_1_3x3_bn(conv2_1_3x3) conv2_1_3x3_bnxx = self.conv2_1_3x3_relu(conv2_1_3x3_bn) conv2_1_1x1_increase = self.conv2_1_1x1_increase(conv2_1_3x3_bnxx) conv2_1_1x1_increase_bn = self.conv2_1_1x1_increase_bn(conv2_1_1x1_increase) conv2_1_1x1_proj = self.conv2_1_1x1_proj(pool1_3x3_s2) conv2_1_1x1_proj_bn = self.conv2_1_1x1_proj_bn(conv2_1_1x1_proj) conv2_1 = torch.add(conv2_1_1x1_proj_bn, 1, conv2_1_1x1_increase_bn) conv2_1x = self.conv2_1_relu(conv2_1) conv2_2_1x1_reduce = self.conv2_2_1x1_reduce(conv2_1x) conv2_2_1x1_reduce_bn = self.conv2_2_1x1_reduce_bn(conv2_2_1x1_reduce) conv2_2_1x1_reduce_bnxx = self.conv2_2_1x1_reduce_relu(conv2_2_1x1_reduce_bn) conv2_2_3x3 = self.conv2_2_3x3(conv2_2_1x1_reduce_bnxx) conv2_2_3x3_bn = self.conv2_2_3x3_bn(conv2_2_3x3) conv2_2_3x3_bnxx = self.conv2_2_3x3_relu(conv2_2_3x3_bn) conv2_2_1x1_increase = self.conv2_2_1x1_increase(conv2_2_3x3_bnxx) conv2_2_1x1_increase_bn = self.conv2_2_1x1_increase_bn(conv2_2_1x1_increase) conv2_2 = torch.add(conv2_1x, 1, conv2_2_1x1_increase_bn) conv2_2x = self.conv2_2_relu(conv2_2) conv2_3_1x1_reduce = self.conv2_3_1x1_reduce(conv2_2x) conv2_3_1x1_reduce_bn = self.conv2_3_1x1_reduce_bn(conv2_3_1x1_reduce) conv2_3_1x1_reduce_bnxx = self.conv2_3_1x1_reduce_relu(conv2_3_1x1_reduce_bn) conv2_3_3x3 = self.conv2_3_3x3(conv2_3_1x1_reduce_bnxx) conv2_3_3x3_bn = self.conv2_3_3x3_bn(conv2_3_3x3) conv2_3_3x3_bnxx = self.conv2_3_3x3_relu(conv2_3_3x3_bn) conv2_3_1x1_increase = self.conv2_3_1x1_increase(conv2_3_3x3_bnxx) conv2_3_1x1_increase_bn = self.conv2_3_1x1_increase_bn(conv2_3_1x1_increase) conv2_3 = torch.add(conv2_2x, 1, conv2_3_1x1_increase_bn) conv2_3x = self.conv2_3_relu(conv2_3) conv3_1_1x1_reduce = self.conv3_1_1x1_reduce(conv2_3x) conv3_1_1x1_reduce_bn = self.conv3_1_1x1_reduce_bn(conv3_1_1x1_reduce) conv3_1_1x1_reduce_bnxx = self.conv3_1_1x1_reduce_relu(conv3_1_1x1_reduce_bn) conv3_1_3x3 = self.conv3_1_3x3(conv3_1_1x1_reduce_bnxx) conv3_1_3x3_bn = self.conv3_1_3x3_bn(conv3_1_3x3) conv3_1_3x3_bnxx = self.conv3_1_3x3_relu(conv3_1_3x3_bn) conv3_1_1x1_increase = self.conv3_1_1x1_increase(conv3_1_3x3_bnxx) conv3_1_1x1_increase_bn = self.conv3_1_1x1_increase_bn(conv3_1_1x1_increase) conv3_1_1x1_proj = self.conv3_1_1x1_proj(conv2_3x) conv3_1_1x1_proj_bn = self.conv3_1_1x1_proj_bn(conv3_1_1x1_proj) conv3_1 = torch.add(conv3_1_1x1_proj_bn, 1, conv3_1_1x1_increase_bn) conv3_1x = self.conv3_1_relu(conv3_1) conv3_2_1x1_reduce = self.conv3_2_1x1_reduce(conv3_1x) conv3_2_1x1_reduce_bn = self.conv3_2_1x1_reduce_bn(conv3_2_1x1_reduce) conv3_2_1x1_reduce_bnxx = self.conv3_2_1x1_reduce_relu(conv3_2_1x1_reduce_bn) conv3_2_3x3 = self.conv3_2_3x3(conv3_2_1x1_reduce_bnxx) conv3_2_3x3_bn = self.conv3_2_3x3_bn(conv3_2_3x3) conv3_2_3x3_bnxx = self.conv3_2_3x3_relu(conv3_2_3x3_bn) conv3_2_1x1_increase = self.conv3_2_1x1_increase(conv3_2_3x3_bnxx) conv3_2_1x1_increase_bn = self.conv3_2_1x1_increase_bn(conv3_2_1x1_increase) conv3_2 = torch.add(conv3_1x, 1, conv3_2_1x1_increase_bn) conv3_2x = self.conv3_2_relu(conv3_2) conv3_3_1x1_reduce = self.conv3_3_1x1_reduce(conv3_2x) conv3_3_1x1_reduce_bn = self.conv3_3_1x1_reduce_bn(conv3_3_1x1_reduce) conv3_3_1x1_reduce_bnxx = self.conv3_3_1x1_reduce_relu(conv3_3_1x1_reduce_bn) conv3_3_3x3 = self.conv3_3_3x3(conv3_3_1x1_reduce_bnxx) conv3_3_3x3_bn = self.conv3_3_3x3_bn(conv3_3_3x3) conv3_3_3x3_bnxx = self.conv3_3_3x3_relu(conv3_3_3x3_bn) conv3_3_1x1_increase = self.conv3_3_1x1_increase(conv3_3_3x3_bnxx) conv3_3_1x1_increase_bn = self.conv3_3_1x1_increase_bn(conv3_3_1x1_increase) conv3_3 = torch.add(conv3_2x, 1, conv3_3_1x1_increase_bn) conv3_3x = self.conv3_3_relu(conv3_3) conv3_4_1x1_reduce = self.conv3_4_1x1_reduce(conv3_3x) conv3_4_1x1_reduce_bn = self.conv3_4_1x1_reduce_bn(conv3_4_1x1_reduce) conv3_4_1x1_reduce_bnxx = self.conv3_4_1x1_reduce_relu(conv3_4_1x1_reduce_bn) conv3_4_3x3 = self.conv3_4_3x3(conv3_4_1x1_reduce_bnxx) conv3_4_3x3_bn = self.conv3_4_3x3_bn(conv3_4_3x3) conv3_4_3x3_bnxx = self.conv3_4_3x3_relu(conv3_4_3x3_bn) conv3_4_1x1_increase = self.conv3_4_1x1_increase(conv3_4_3x3_bnxx) conv3_4_1x1_increase_bn = self.conv3_4_1x1_increase_bn(conv3_4_1x1_increase) conv3_4 = torch.add(conv3_3x, 1, conv3_4_1x1_increase_bn) conv3_4x = self.conv3_4_relu(conv3_4) conv4_1_1x1_reduce = self.conv4_1_1x1_reduce(conv3_4x) conv4_1_1x1_reduce_bn = self.conv4_1_1x1_reduce_bn(conv4_1_1x1_reduce) conv4_1_1x1_reduce_bnxx = self.conv4_1_1x1_reduce_relu(conv4_1_1x1_reduce_bn) conv4_1_3x3 = self.conv4_1_3x3(conv4_1_1x1_reduce_bnxx) conv4_1_3x3_bn = self.conv4_1_3x3_bn(conv4_1_3x3) conv4_1_3x3_bnxx = self.conv4_1_3x3_relu(conv4_1_3x3_bn) conv4_1_1x1_increase = self.conv4_1_1x1_increase(conv4_1_3x3_bnxx) conv4_1_1x1_increase_bn = self.conv4_1_1x1_increase_bn(conv4_1_1x1_increase) conv4_1_1x1_proj = self.conv4_1_1x1_proj(conv3_4x) conv4_1_1x1_proj_bn = self.conv4_1_1x1_proj_bn(conv4_1_1x1_proj) conv4_1 = torch.add(conv4_1_1x1_proj_bn, 1, conv4_1_1x1_increase_bn) conv4_1x = self.conv4_1_relu(conv4_1) conv4_2_1x1_reduce = self.conv4_2_1x1_reduce(conv4_1x) conv4_2_1x1_reduce_bn = self.conv4_2_1x1_reduce_bn(conv4_2_1x1_reduce) conv4_2_1x1_reduce_bnxx = self.conv4_2_1x1_reduce_relu(conv4_2_1x1_reduce_bn) conv4_2_3x3 = self.conv4_2_3x3(conv4_2_1x1_reduce_bnxx) conv4_2_3x3_bn = self.conv4_2_3x3_bn(conv4_2_3x3) conv4_2_3x3_bnxx = self.conv4_2_3x3_relu(conv4_2_3x3_bn) conv4_2_1x1_increase = self.conv4_2_1x1_increase(conv4_2_3x3_bnxx) conv4_2_1x1_increase_bn = self.conv4_2_1x1_increase_bn(conv4_2_1x1_increase) conv4_2 = torch.add(conv4_1x, 1, conv4_2_1x1_increase_bn) conv4_2x = self.conv4_2_relu(conv4_2) conv4_3_1x1_reduce = self.conv4_3_1x1_reduce(conv4_2x) conv4_3_1x1_reduce_bn = self.conv4_3_1x1_reduce_bn(conv4_3_1x1_reduce) conv4_3_1x1_reduce_bnxx = self.conv4_3_1x1_reduce_relu(conv4_3_1x1_reduce_bn) conv4_3_3x3 = self.conv4_3_3x3(conv4_3_1x1_reduce_bnxx) conv4_3_3x3_bn = self.conv4_3_3x3_bn(conv4_3_3x3) conv4_3_3x3_bnxx = self.conv4_3_3x3_relu(conv4_3_3x3_bn) conv4_3_1x1_increase = self.conv4_3_1x1_increase(conv4_3_3x3_bnxx) conv4_3_1x1_increase_bn = self.conv4_3_1x1_increase_bn(conv4_3_1x1_increase) conv4_3 = torch.add(conv4_2x, 1, conv4_3_1x1_increase_bn) conv4_3x = self.conv4_3_relu(conv4_3) conv4_4_1x1_reduce = self.conv4_4_1x1_reduce(conv4_3x) conv4_4_1x1_reduce_bn = self.conv4_4_1x1_reduce_bn(conv4_4_1x1_reduce) conv4_4_1x1_reduce_bnxx = self.conv4_4_1x1_reduce_relu(conv4_4_1x1_reduce_bn) conv4_4_3x3 = self.conv4_4_3x3(conv4_4_1x1_reduce_bnxx) conv4_4_3x3_bn = self.conv4_4_3x3_bn(conv4_4_3x3) conv4_4_3x3_bnxx = self.conv4_4_3x3_relu(conv4_4_3x3_bn) conv4_4_1x1_increase = self.conv4_4_1x1_increase(conv4_4_3x3_bnxx) conv4_4_1x1_increase_bn = self.conv4_4_1x1_increase_bn(conv4_4_1x1_increase) conv4_4 = torch.add(conv4_3x, 1, conv4_4_1x1_increase_bn) conv4_4x = self.conv4_4_relu(conv4_4) conv4_5_1x1_reduce = self.conv4_5_1x1_reduce(conv4_4x) conv4_5_1x1_reduce_bn = self.conv4_5_1x1_reduce_bn(conv4_5_1x1_reduce) conv4_5_1x1_reduce_bnxx = self.conv4_5_1x1_reduce_relu(conv4_5_1x1_reduce_bn) conv4_5_3x3 = self.conv4_5_3x3(conv4_5_1x1_reduce_bnxx) conv4_5_3x3_bn = self.conv4_5_3x3_bn(conv4_5_3x3) conv4_5_3x3_bnxx = self.conv4_5_3x3_relu(conv4_5_3x3_bn) conv4_5_1x1_increase = self.conv4_5_1x1_increase(conv4_5_3x3_bnxx) conv4_5_1x1_increase_bn = self.conv4_5_1x1_increase_bn(conv4_5_1x1_increase) conv4_5 = torch.add(conv4_4x, 1, conv4_5_1x1_increase_bn) conv4_5x = self.conv4_5_relu(conv4_5) conv4_6_1x1_reduce = self.conv4_6_1x1_reduce(conv4_5x) conv4_6_1x1_reduce_bn = self.conv4_6_1x1_reduce_bn(conv4_6_1x1_reduce) conv4_6_1x1_reduce_bnxx = self.conv4_6_1x1_reduce_relu(conv4_6_1x1_reduce_bn) conv4_6_3x3 = self.conv4_6_3x3(conv4_6_1x1_reduce_bnxx) conv4_6_3x3_bn = self.conv4_6_3x3_bn(conv4_6_3x3) conv4_6_3x3_bnxx = self.conv4_6_3x3_relu(conv4_6_3x3_bn) conv4_6_1x1_increase = self.conv4_6_1x1_increase(conv4_6_3x3_bnxx) conv4_6_1x1_increase_bn = self.conv4_6_1x1_increase_bn(conv4_6_1x1_increase) conv4_6 = torch.add(conv4_5x, 1, conv4_6_1x1_increase_bn) conv4_6x = self.conv4_6_relu(conv4_6) conv5_1_1x1_reduce = self.conv5_1_1x1_reduce(conv4_6x) conv5_1_1x1_reduce_bn = self.conv5_1_1x1_reduce_bn(conv5_1_1x1_reduce) conv5_1_1x1_reduce_bnxx = self.conv5_1_1x1_reduce_relu(conv5_1_1x1_reduce_bn) conv5_1_3x3 = self.conv5_1_3x3(conv5_1_1x1_reduce_bnxx) conv5_1_3x3_bn = self.conv5_1_3x3_bn(conv5_1_3x3) conv5_1_3x3_bnxx = self.conv5_1_3x3_relu(conv5_1_3x3_bn) conv5_1_1x1_increase = self.conv5_1_1x1_increase(conv5_1_3x3_bnxx) conv5_1_1x1_increase_bn = self.conv5_1_1x1_increase_bn(conv5_1_1x1_increase) conv5_1_1x1_proj = self.conv5_1_1x1_proj(conv4_6x) conv5_1_1x1_proj_bn = self.conv5_1_1x1_proj_bn(conv5_1_1x1_proj) conv5_1 = torch.add(conv5_1_1x1_proj_bn, 1, conv5_1_1x1_increase_bn) conv5_1x = self.conv5_1_relu(conv5_1) conv5_2_1x1_reduce = self.conv5_2_1x1_reduce(conv5_1x) conv5_2_1x1_reduce_bn = self.conv5_2_1x1_reduce_bn(conv5_2_1x1_reduce) conv5_2_1x1_reduce_bnxx = self.conv5_2_1x1_reduce_relu(conv5_2_1x1_reduce_bn) conv5_2_3x3 = self.conv5_2_3x3(conv5_2_1x1_reduce_bnxx) conv5_2_3x3_bn = self.conv5_2_3x3_bn(conv5_2_3x3) conv5_2_3x3_bnxx = self.conv5_2_3x3_relu(conv5_2_3x3_bn) conv5_2_1x1_increase = self.conv5_2_1x1_increase(conv5_2_3x3_bnxx) conv5_2_1x1_increase_bn = self.conv5_2_1x1_increase_bn(conv5_2_1x1_increase) conv5_2 = torch.add(conv5_1x, 1, conv5_2_1x1_increase_bn) conv5_2x = self.conv5_2_relu(conv5_2) conv5_3_1x1_reduce = self.conv5_3_1x1_reduce(conv5_2x) conv5_3_1x1_reduce_bn = self.conv5_3_1x1_reduce_bn(conv5_3_1x1_reduce) conv5_3_1x1_reduce_bnxx = self.conv5_3_1x1_reduce_relu(conv5_3_1x1_reduce_bn) conv5_3_3x3 = self.conv5_3_3x3(conv5_3_1x1_reduce_bnxx) conv5_3_3x3_bn = self.conv5_3_3x3_bn(conv5_3_3x3) conv5_3_3x3_bnxx = self.conv5_3_3x3_relu(conv5_3_3x3_bn) conv5_3_1x1_increase = self.conv5_3_1x1_increase(conv5_3_3x3_bnxx) conv5_3_1x1_increase_bn = self.conv5_3_1x1_increase_bn(conv5_3_1x1_increase) conv5_3 = torch.add(conv5_2x, 1, conv5_3_1x1_increase_bn) conv5_3x = self.conv5_3_relu(conv5_3) pool5_7x7_s1 = self.pool5_7x7_s1(conv5_3x) classifier_preflatten = self.classifier(pool5_7x7_s1) classifier = classifier_preflatten.view(classifier_preflatten.size(0), -1) return classifier, pool5_7x7_s1
def compute(self, config, budget, working_directory, *args, **kwargs): model_cnt = 10 #만들 모델 갯수 feature_encoder = [] relation_network = [] feature_encoder_optim = [] relation_network_optim = [] for i in range(model_cnt): feature_encoder.insert( i, embedding_function( num_embedding_layers=config['num_embedding_layers'], num_filters_1=config['num_filters_1'], num_filters_2=config['num_filters_2'] if 'num_filters_2' in config else None, num_filters_3=config['num_filters_3'] if 'num_filters_3' in config else None, num_filters_4=config['num_filters_4'] if 'num_filters_4' in config else None, dropout_rate=config['dropout_rate'], kernel_size=3)) relation_network.insert( i, relation_function( num_relation_layers=config['num_relation_layers'], embedding_output_filter=embedding_output_filter * 2, input_length=input_length, num_rela_filters_1=config['num_rela_filters_1'] if 'num_rela_filters_1' in config else None, num_rela_filters_2=config['num_rela_filters_2'] if 'num_rela_filters_2' in config else None, num_rela_filters_3=config['num_rela_filters_3'] if 'num_rela_filters_3' in config else None, num_rela_filters_4=config['num_rela_filters_4'] if 'num_rela_filters_4' in config else None, rela_dropout_rate=config['rela_dropout_rate'], kernel_size=3, num_fc_units=config['num_fc_units'])) feature_encoder[i].apply(weights_init) relation_network[i].apply(weights_init) if config['optimizer'] == 'Adam': feature_encoder_optim.insert( i, torch.optim.Adam(feature_encoder[i].parameters(), lr=config['lr'])) relation_network_optim.insert( i, torch.optim.Adam(relation_network[i].parameters(), lr=config['lr'])) else: feature_encoder_optim.insert( i, torch.optim.SGD(feature_encoder[i].parameters(), lr=config['lr'], momentum=config['sgd_momentum'])) relation_network_optim.insert( i, torch.optim.SGD(relation_network[i].parameters(), lr=config['lr'], momentum=config['sgd_momentum'])) #첫번째 사람에 대한 모델 for i in range(model_cnt): relation = [] for episode in range(int(budget)): for j in range(model_cnt): support_feature1 = feature_encoder[i]( Support[j][:, :, 0:10].float()) support_feature2 = feature_encoder[i]( Support[j][:, :, 10:20].float()) support_feature3 = feature_encoder[i]( Support[j][:, :, 20:30].float()) feature = torch.add(support_feature1, support_feature2) support_feature = torch.add(feature, support_feature3) query_feature = feature_encoder[i](Query[i].float()) #feature_map 합침 feature_pair = torch.cat((support_feature, query_feature), dim=1) #relation funcion에 대입 relation.insert(j, relation_network[i](feature_pair)) mse = nn.MSELoss() if i == j: label = torch.tensor(1, dtype=torch.float32) else: label = torch.tensor(0, dtype=torch.float32) loss = mse(relation[j], label) #정답이 1이 나와야함 feature_encoder[i].zero_grad() relation_network[i].zero_grad() loss.backward() feature_encoder_optim[i].step() relation_network_optim[i].step() if (episode + 1) == budget: predict_label = torch.max(relation[j].data) print("sub:", j, "최대 예측값:", predict_label, "loss", loss.item()) if (j + 1) == model_cnt: print( "\t---------------------------------------------------------" )
def simple_addition(x, y): """ TODO: Implement a simple addition function that accepts two tensors and returns the result. """ return torch.add(x, y)
def from_importance_weights(target_policy_log_probs, behavior_policy_log_probs, log_rhos, discounts, rewards, values, bootstrap_value, clip_rho_threshold=1.0, clip_pg_rho_threshold=1.0, behavior_relevance_threshold=1.0): """V-trace from log importance weights.""" with torch.no_grad(): kl_div = (behavior_policy_log_probs.exp() * (behavior_policy_log_probs - target_policy_log_probs)).sum(-1).unsqueeze(-1) per_step_behavioral = (kl_div < behavior_relevance_threshold).float() output_list = [] prev_val = torch.ones((1, 1)).to(kl_div.device) for threshold_val_t in per_step_behavioral.unbind(): threshold_val_t = threshold_val_t.view(1, -1) threshold_val_t *= prev_val prev_val = threshold_val_t output_list.append(threshold_val_t) mask = torch.cat(output_list) # mask = torch.cumprod(per_step_behavioral, dim=0).squeeze(-1) rhos = torch.exp(log_rhos) if clip_rho_threshold is not None: clipped_rhos = torch.clamp(rhos, max=clip_rho_threshold) else: clipped_rhos = rhos cs = torch.clamp(rhos, max=1.0) # Append bootstrapped value to get [v1, ..., v_t+1] values_t_plus_1 = torch.cat( [values[1:], torch.unsqueeze(bootstrap_value, 0)], dim=0) deltas = clipped_rhos * (rewards + discounts * values_t_plus_1 - values) acc = torch.zeros_like(bootstrap_value) result = [] for t in range(discounts.shape[0] - 1, -1, -1): acc = deltas[t] + discounts[t] * cs[t] * acc * mask[t] result.append(acc) result.reverse() vs_minus_v_xs = torch.stack(result) # Add V(x_s) to get v_s. vs = torch.add(vs_minus_v_xs, values) # Advantage for policy gradient. broadcasted_bootstrap_values = torch.ones_like(vs[0]) * bootstrap_value vs_t_plus_1 = torch.cat( [vs[1:], broadcasted_bootstrap_values.unsqueeze(0)], dim=0) if clip_pg_rho_threshold is not None: clipped_pg_rhos = torch.clamp(rhos, max=clip_pg_rho_threshold) else: clipped_pg_rhos = rhos pg_advantages = clipped_pg_rhos * (rewards + discounts * vs_t_plus_1 - values) * mask # Make sure no gradients backpropagated through the returned values. return VTraceReturns(vs=vs, pg_advantages=pg_advantages, mask=mask)
def getting_started(): print(util.Section('Getting Started')) # construction print(util.SubSection('Construction')) xa1 = torch.empty(5, 3) # uninitialized xa2 = torch.rand(5, 3) # randomly initialized matrix xa3 = torch.zeros(5, 3, dtype=torch.long) # filled zeros and of dtype long xa4 = torch.tensor([5.5, 3]) # directly from data xa5 = xa3.new_ones(5, 3, dtype=torch.double) # new_* method take in sizes xa6 = torch.randn_like(xa5, dtype=torch.float) # override dtype with same size print(f'x size = {xa6.size()}') # operations xb1 = torch.rand(5, 3) yb1 = torch.rand(5, 3) # operation: add print(util.SubSection('Operations: Add')) print(f'xb1 + yb1 = {xb1 + yb1}') print(f'xb1 + yb1 = {torch.add(xb1, yb1)}') # with output argument rb1 = torch.empty(5, 3) torch.add(xb1, yb1, out=rb1) print(f'rb1 = {rb1}') # add in place yb1.add_(xb1) print(f'yb1 = {yb1}') # index print(f'xb1[:,1] = {xb1[:, 1]}') # operation: resize print(util.SubSection('Operations: Resize')) xb2 = torch.randn(4, 4) yb2 = xb2.view(16) zb2 = xb2.view(-1, 8) print(f'xb2 = {xb2}') print(f'yb2 = {yb2}') print(f'zb2 = {zb2}') print( f'xb2.size = {xb2.size()}, yb2.size = {yb2.size()}, zb2.size = {zb2.size()}' ) # if only one element, can use .item() to get the values as a python number xb3 = torch.randn(1) print(f'xb3 = {xb3}') print(f'xb3.item() = {xb3.item()}') # numpy bridge, change one will change the other print(util.SubSection('NumPy Bridge')) # torch => numpy xc1 = torch.ones(5) print(f'xc1 = {xc1}') yc1 = xc1.numpy() print(f'yc1 = {yc1}') # add, y will also changed xc1.add_(1) print(f'xc1 = {xc1}') print(f'yc1 = {yc1}') # numpy => torch xc2 = np.ones(5) yc2 = torch.from_numpy(xc2) np.add(xc2, 1, out=xc2) print(f'xc2 = {xc2}') print(f'yc2 = {yc2}') # CUDA tensors print(util.SubSection('CUDA Tensors')) xd1 = torch.rand((3, 2)) if torch.cuda.is_available(): print('use CUDA') device = torch.device('cuda') yd1 = torch.ones_like(xd1, device=device) # directly create a tensor on GPU xd2 = xd1.to(device) zd1 = xd2 + yd1 print(f'zd1 = {zd1}') print(f'to CPU, zd1 = {zd1.to("cpu", torch.double)}' ) # "to" can also change dtype together
def _generate(self, model, sample, prefix_tokens=None, bos_token=None, **kwargs): if not self.retain_dropout: model.eval() # model.forward normally channels prev_output_tokens into the decoder # separately, but SequenceGenerator directly calls model.encoder encoder_input = { k: v for k, v in sample['net_input'].items() if k != 'prev_output_tokens' } src_tokens = encoder_input['src_tokens'] if src_tokens.dim() > 2: src_lengths = encoder_input['src_lengths'] else: src_lengths = (src_tokens.ne(self.eos) & src_tokens.ne(self.pad)).long().sum(dim=1) input_size = src_tokens.size() # batch dimension goes first followed by source lengths bsz = input_size[0] src_len = input_size[1] beam_size = self.beam_size if self.match_source_len: max_len = src_lengths.max().item() else: max_len = min( int(self.max_len_a * src_len + self.max_len_b), # exclude the EOS marker model.max_decoder_positions() - 1, ) assert self.min_len <= max_len, 'min_len cannot be larger than max_len, please adjust these!' # compute the encoder output for each beam encoder_outs = model.forward_encoder(encoder_input) new_order = torch.arange(bsz).view(-1, 1).repeat(1, beam_size).view(-1) new_order = new_order.to(src_tokens.device).long() encoder_outs = model.reorder_encoder_out(encoder_outs, new_order) # initialize buffers scores = src_tokens.new(bsz * beam_size, max_len + 1).float().fill_(0) scores_buf = scores.clone() tokens = src_tokens.new(bsz * beam_size, max_len + 2).long().fill_(self.pad) tokens_buf = tokens.clone() tokens[:, 0] = self.eos if bos_token is None else bos_token attn, attn_buf = None, None # The blacklist indicates candidates that should be ignored. # For example, suppose we're sampling and have already finalized 2/5 # samples. Then the blacklist would mark 2 positions as being ignored, # so that we only finalize the remaining 3 samples. blacklist = src_tokens.new_zeros(bsz, beam_size).eq( -1) # forward and backward-compatible False mask # list of completed sentences finalized = [[] for i in range(bsz)] finished = [False for i in range(bsz)] num_remaining_sent = bsz # number of candidate hypos per step cand_size = 2 * beam_size # 2 x beam size in case half are EOS # offset arrays for converting between different indexing schemes bbsz_offsets = (torch.arange(0, bsz) * beam_size).unsqueeze(1).type_as(tokens) cand_offsets = torch.arange(0, cand_size).type_as(tokens) # helper function for allocating buffers on the fly buffers = {} def buffer(name, type_of=tokens): # noqa if name not in buffers: buffers[name] = type_of.new() return buffers[name] def is_finished(sent, step, unfin_idx): """ Check whether we've finished generation for a given sentence, by comparing the worst score among finalized hypotheses to the best possible score among unfinalized hypotheses. """ assert len(finalized[sent]) <= beam_size if len(finalized[sent]) == beam_size or step == max_len: return True return False def finalize_hypos(step, bbsz_idx, eos_scores): """ Finalize the given hypotheses at this step, while keeping the total number of finalized hypotheses per sentence <= beam_size. Note: the input must be in the desired finalization order, so that hypotheses that appear earlier in the input are preferred to those that appear later. Args: step: current time step bbsz_idx: A vector of indices in the range [0, bsz*beam_size), indicating which hypotheses to finalize eos_scores: A vector of the same size as bbsz_idx containing scores for each hypothesis """ assert bbsz_idx.numel() == eos_scores.numel() # clone relevant token and attention tensors tokens_clone = tokens.index_select(0, bbsz_idx) tokens_clone = tokens_clone[:, 1:step + 2] # skip the first index, which is EOS assert not tokens_clone.eq(self.eos).any() tokens_clone[:, step] = self.eos attn_clone = attn.index_select( 0, bbsz_idx)[:, :, 1:step + 2] if attn is not None else None # compute scores per token position pos_scores = scores.index_select(0, bbsz_idx)[:, :step + 1] pos_scores[:, step] = eos_scores # convert from cumulative to per-position scores pos_scores[:, 1:] = pos_scores[:, 1:] - pos_scores[:, :-1] # normalize sentence-level scores if self.normalize_scores: eos_scores /= (step + 1)**self.len_penalty cum_unfin = [] prev = 0 for f in finished: if f: prev += 1 else: cum_unfin.append(prev) sents_seen = set() for i, (idx, score) in enumerate( zip(bbsz_idx.tolist(), eos_scores.tolist())): unfin_idx = idx // beam_size sent = unfin_idx + cum_unfin[unfin_idx] sents_seen.add((sent, unfin_idx)) if self.match_source_len and step > src_lengths[unfin_idx]: score = -math.inf def get_hypo(): if attn_clone is not None: # remove padding tokens from attn scores hypo_attn = attn_clone[i] else: hypo_attn = None return { 'tokens': tokens_clone[i], 'score': score, 'attention': hypo_attn, # src_len x tgt_len 'alignment': None, 'positional_scores': pos_scores[i], } if len(finalized[sent]) < beam_size: finalized[sent].append(get_hypo()) newly_finished = [] for sent, unfin_idx in sents_seen: # check termination conditions for this sentence if not finished[sent] and is_finished(sent, step, unfin_idx): finished[sent] = True newly_finished.append(unfin_idx) return newly_finished reorder_state = None batch_idxs = None for step in range(max_len + 1): # one extra step for EOS marker # reorder decoder internal states based on the prev choice of beams if reorder_state is not None: if batch_idxs is not None: # update beam indices to take into account removed sentences corr = batch_idxs - torch.arange( batch_idxs.numel()).type_as(batch_idxs) reorder_state.view(-1, beam_size).add_( corr.unsqueeze(-1) * beam_size) model.reorder_incremental_state(reorder_state) encoder_outs = model.reorder_encoder_out( encoder_outs, reorder_state) lprobs, avg_attn_scores = model.forward_decoder( tokens[:, :step + 1], encoder_outs, temperature=self.temperature, ) lprobs[lprobs != lprobs] = -math.inf lprobs[:, self.pad] = -math.inf # never select pad lprobs[:, self.unk] -= self.unk_penalty # apply unk penalty # handle max length constraint if step >= max_len: lprobs[:, :self.eos] = -math.inf lprobs[:, self.eos + 1:] = -math.inf elif self.eos_factor is not None: # only consider EOS if its score is no less than a specified # factor of the best candidate score disallow_eos_mask = lprobs[:, self. eos] < self.eos_factor * lprobs.max( dim=1)[0] lprobs[disallow_eos_mask, self.eos] = -math.inf # handle prefix tokens (possibly with different lengths) if prefix_tokens is not None and step < prefix_tokens.size( 1) and step < max_len: prefix_toks = prefix_tokens[:, step].unsqueeze(-1).repeat( 1, beam_size).view(-1) prefix_lprobs = lprobs.gather(-1, prefix_toks.unsqueeze(-1)) prefix_mask = prefix_toks.ne(self.pad) lprobs[prefix_mask] = -math.inf lprobs[prefix_mask] = lprobs[prefix_mask].scatter_( -1, prefix_toks[prefix_mask].unsqueeze(-1), prefix_lprobs[prefix_mask]) # if prefix includes eos, then we should make sure tokens and # scores are the same across all beams eos_mask = prefix_toks.eq(self.eos) if eos_mask.any(): # validate that the first beam matches the prefix first_beam = tokens[eos_mask].view( -1, beam_size, tokens.size(-1))[:, 0, 1:step + 1] eos_mask_batch_dim = eos_mask.view(-1, beam_size)[:, 0] target_prefix = prefix_tokens[eos_mask_batch_dim][:, :step] assert (first_beam == target_prefix).all() def replicate_first_beam(tensor, mask): tensor = tensor.view(-1, beam_size, tensor.size(-1)) tensor[mask] = tensor[mask][:, :1, :] return tensor.view(-1, tensor.size(-1)) # copy tokens, scores and lprobs from the first beam to all beams tokens = replicate_first_beam(tokens, eos_mask_batch_dim) scores = replicate_first_beam(scores, eos_mask_batch_dim) lprobs = replicate_first_beam(lprobs, eos_mask_batch_dim) elif step < self.min_len: # minimum length constraint (does not apply if using prefix_tokens) lprobs[:, self.eos] = -math.inf if self.no_repeat_ngram_size > 0: # for each beam and batch sentence, generate a list of previous ngrams gen_ngrams = [{} for bbsz_idx in range(bsz * beam_size)] for bbsz_idx in range(bsz * beam_size): gen_tokens = tokens[bbsz_idx].tolist() for ngram in zip(*[ gen_tokens[i:] for i in range(self.no_repeat_ngram_size) ]): gen_ngrams[bbsz_idx][tuple(ngram[:-1])] = \ gen_ngrams[bbsz_idx].get(tuple(ngram[:-1]), []) + [ngram[-1]] # Record attention scores if type(avg_attn_scores) is list: avg_attn_scores = avg_attn_scores[0] if avg_attn_scores is not None: if attn is None: if src_tokens.dim() > 2: attn = scores.new( bsz * beam_size, encoder_outs[0]["encoder_out"][0].size(0), max_len + 2, ) else: attn = scores.new(bsz * beam_size, src_tokens.size(1), max_len + 2) attn_buf = attn.clone() attn[:, :, step + 1].copy_(avg_attn_scores) scores = scores.type_as(lprobs) scores_buf = scores_buf.type_as(lprobs) eos_bbsz_idx = buffer('eos_bbsz_idx') eos_scores = buffer('eos_scores', type_of=scores) self.search.set_src_lengths(src_lengths) if self.no_repeat_ngram_size > 0: def calculate_banned_tokens(bbsz_idx): # before decoding the next token, prevent decoding of ngrams that have already appeared ngram_index = tuple( tokens[bbsz_idx, step + 2 - self.no_repeat_ngram_size:step + 1].tolist()) return gen_ngrams[bbsz_idx].get(ngram_index, []) if step + 2 - self.no_repeat_ngram_size >= 0: # no banned tokens if we haven't generated no_repeat_ngram_size tokens yet banned_tokens = [ calculate_banned_tokens(bbsz_idx) for bbsz_idx in range(bsz * beam_size) ] else: banned_tokens = [[] for bbsz_idx in range(bsz * beam_size)] for bbsz_idx in range(bsz * beam_size): lprobs[bbsz_idx, banned_tokens[bbsz_idx]] = -math.inf cand_scores, cand_indices, cand_beams = self.search.step( step, lprobs.view(bsz, -1, self.vocab_size), scores.view(bsz, beam_size, -1)[:, :, :step], ) # cand_bbsz_idx contains beam indices for the top candidate # hypotheses, with a range of values: [0, bsz*beam_size), # and dimensions: [bsz, cand_size] cand_bbsz_idx = cand_beams.add(bbsz_offsets) # finalize hypotheses that end in eos, except for blacklisted ones # or candidates with a score of -inf eos_mask = cand_indices.eq(self.eos) & cand_scores.ne(-math.inf) eos_mask[:, :beam_size][blacklist] = 0 # only consider eos when it's among the top beam_size indices torch.masked_select( cand_bbsz_idx[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_bbsz_idx, ) finalized_sents = set() if eos_bbsz_idx.numel() > 0: torch.masked_select( cand_scores[:, :beam_size], mask=eos_mask[:, :beam_size], out=eos_scores, ) finalized_sents = finalize_hypos(step, eos_bbsz_idx, eos_scores) num_remaining_sent -= len(finalized_sents) assert num_remaining_sent >= 0 if num_remaining_sent == 0: break assert step < max_len if len(finalized_sents) > 0: new_bsz = bsz - len(finalized_sents) # construct batch_idxs which holds indices of batches to keep for the next pass batch_mask = cand_indices.new_ones(bsz) batch_mask[cand_indices.new(finalized_sents)] = 0 batch_idxs = batch_mask.nonzero().squeeze(-1) eos_mask = eos_mask[batch_idxs] cand_beams = cand_beams[batch_idxs] bbsz_offsets.resize_(new_bsz, 1) cand_bbsz_idx = cand_beams.add(bbsz_offsets) cand_scores = cand_scores[batch_idxs] cand_indices = cand_indices[batch_idxs] if prefix_tokens is not None: prefix_tokens = prefix_tokens[batch_idxs] src_lengths = src_lengths[batch_idxs] blacklist = blacklist[batch_idxs] scores = scores.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, -1) scores_buf.resize_as_(scores) tokens = tokens.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, -1) tokens_buf.resize_as_(tokens) if attn is not None: attn = attn.view(bsz, -1)[batch_idxs].view( new_bsz * beam_size, attn.size(1), -1) attn_buf.resize_as_(attn) bsz = new_bsz else: batch_idxs = None # Set active_mask so that values > cand_size indicate eos or # blacklisted hypos and values < cand_size indicate candidate # active hypos. After this, the min values per row are the top # candidate active hypos. active_mask = buffer('active_mask') eos_mask[:, :beam_size] |= blacklist torch.add( eos_mask.type_as(cand_offsets) * cand_size, cand_offsets[:eos_mask.size(1)], out=active_mask, ) # get the top beam_size active hypotheses, which are just the hypos # with the smallest values in active_mask active_hypos, new_blacklist = buffer('active_hypos'), buffer( 'new_blacklist') torch.topk(active_mask, k=beam_size, dim=1, largest=False, out=(new_blacklist, active_hypos)) # update blacklist to ignore any finalized hypos blacklist = new_blacklist.ge(cand_size)[:, :beam_size] assert (~blacklist).any(dim=1).all() active_bbsz_idx = buffer('active_bbsz_idx') torch.gather( cand_bbsz_idx, dim=1, index=active_hypos, out=active_bbsz_idx, ) active_scores = torch.gather( cand_scores, dim=1, index=active_hypos, out=scores[:, step].view(bsz, beam_size), ) active_bbsz_idx = active_bbsz_idx.view(-1) active_scores = active_scores.view(-1) # copy tokens and scores for active hypotheses torch.index_select( tokens[:, :step + 1], dim=0, index=active_bbsz_idx, out=tokens_buf[:, :step + 1], ) torch.gather( cand_indices, dim=1, index=active_hypos, out=tokens_buf.view(bsz, beam_size, -1)[:, :, step + 1], ) if step > 0: torch.index_select( scores[:, :step], dim=0, index=active_bbsz_idx, out=scores_buf[:, :step], ) torch.gather( cand_scores, dim=1, index=active_hypos, out=scores_buf.view(bsz, beam_size, -1)[:, :, step], ) # copy attention for active hypotheses if attn is not None: torch.index_select( attn[:, :, :step + 2], dim=0, index=active_bbsz_idx, out=attn_buf[:, :, :step + 2], ) # swap buffers tokens, tokens_buf = tokens_buf, tokens scores, scores_buf = scores_buf, scores if attn is not None: attn, attn_buf = attn_buf, attn # reorder incremental state in decoder reorder_state = active_bbsz_idx # sort by score descending for sent in range(len(finalized)): finalized[sent] = sorted(finalized[sent], key=lambda r: r['score'], reverse=True) return finalized
def forward(self, rgb_inputs, depth_inputs): ######## DEPTH ENCODER ######## # Stage 1 #x = self.conv11d(depth_inputs) x_1 = self.CBR1_DEPTH_ENC(depth_inputs) x, id1_d = self.pool1_d(x_1) # Stage 2 x_2 = self.CBR2_DEPTH_ENC(x) x, id2_d = self.pool2_d(x_2) # Stage 3 x_3 = self.CBR3_DEPTH_ENC(x) x, id3_d = self.pool4_d(x_3) x = self.dropout3_d(x) # Stage 4 x_4 = self.CBR4_DEPTH_ENC(x) x, id4_d = self.pool4_d(x_4) x = self.dropout4_d(x) # Stage 5 x_5 = self.CBR5_DEPTH_ENC(x) ######## RGB ENCODER ######## # Stage 1 y = self.CBR1_RGB_ENC(rgb_inputs) y = torch.add(y,x_1) y = torch.div(y,2) y, id1 = self.pool1(y) # Stage 2 y = self.CBR2_RGB_ENC(y) y = torch.add(y,x_2) y = torch.div(y,2) y, id2 = self.pool2(y) # Stage 3 y = self.CBR3_RGB_ENC(y) y = torch.add(y,x_3) y = torch.div(y,2) y, id3 = self.pool3(y) y = self.dropout3(y) # Stage 4 y = self.CBR4_RGB_ENC(y) y = torch.add(y,x_4) y = torch.div(y,2) y, id4 = self.pool4(y) y = self.dropout4(y) # Stage 5 y = self.CBR5_RGB_ENC(y) y = torch.add(y,x_5) y = torch.div(y,2) y_size = y.size() y, id5 = self.pool5(y) y = self.dropout5(y) ######## DECODER ######## # Stage 5 dec y = self.unpool5(y, id5,output_size=y_size) y = self.CBR5_RGB_DEC(y) y = self.dropout5_dec(y) # Stage 4 dec y = self.unpool4(y, id4) y = self.CBR4_RGB_DEC(y) y = self.dropout4_dec(y) # Stage 3 dec y = self.unpool3(y, id3) y = self.CBR3_RGB_DEC(y) y = self.dropout3_dec(y) # Stage 2 dec y = self.unpool2(y, id2) y = self.CBR2_RGB_DEC(y) # Stage 1 dec y = self.unpool1(y, id1) y = self.CBR1_RGB_DEC(y) return y
def forward(self, x): #print('x',x.size()) e1 = self.encoder1(x) #; print('e1',e1.size()) e2 = self.encoder2(e1) #; print('e2',e2.size()) e3 = self.encoder3(e2) #; print('e3',e3.size()) e4 = self.encoder4(e3) #; print('e4',e4.size()) e5 = self.encoder5(e4) #; print('e5',e5.size()) f = self.center(e5) #; print('center',f.size()) if self.dilation: f1 = self.center1(f) #; print('center',f1.size()) f2 = self.center2(f1) #; print('center',f2.size()) # f3=self.center3(f2); print('center',f3.size()) # f4=self.center4(f3); print('center',f4.size()) #f5=self.center5(f4) f = torch.add(f, 1, f1) f = torch.add(f, 1, f2) # f=torch.cat(( # f, # f1, # f2, # f3, # f4, # ),1) f = F.interpolate(f, scale_factor=2, mode='bilinear', align_corners=True) d5 = self.decoder5(torch.cat([f, e5], 1)) #; print('d5',d5.size()) d5 = F.interpolate(d5, scale_factor=2, mode='bilinear', align_corners=True) d4 = self.decoder4(torch.cat([d5, e4], 1)) #; print('d4',d4.size()) d4 = F.interpolate(d4, scale_factor=2, mode='bilinear', align_corners=True) d3 = self.decoder3(torch.cat([d4, e3], 1)) #; print('d3',d3.size()) d3 = F.interpolate(d3, scale_factor=2, mode='bilinear', align_corners=True) d2 = self.decoder2(torch.cat([d3, e2], 1)) #; print('d2',d2.size()) d2 = F.interpolate(d2, scale_factor=2, mode='bilinear', align_corners=True) d1 = self.decoder1(d2) #; print('d1',d1.size()) f = torch.cat( (d1, F.interpolate( d2, scale_factor=1, mode='bilinear', align_corners=False), F.interpolate( d3, scale_factor=2, mode='bilinear', align_corners=False), F.interpolate( d4, scale_factor=4, mode='bilinear', align_corners=False), F.interpolate( d5, scale_factor=8, mode='bilinear', align_corners=False)), 1) f = F.dropout2d(f, p=0.20) logit = self.logit(f) #; print('logit',logit.size()) return logit
def forward(self, x, state=None): h = x[:, :3] f = x[:, 3:] h = self.relu1_1(self.conv1_1(h)) h = self.relu1_2(self.conv1_2(h)) h = self.pool1(h) h = self.relu2_1(self.conv2_1(h)) h = self.relu2_2(self.conv2_2(h)) h = self.pool2(h) h = self.relu3_1(self.conv3_1(h)) h = self.relu3_2(self.conv3_2(h)) h = self.relu3_3(self.conv3_3(h)) #Flow forward-pass here f = self.relu_flow1_1(self.conv_flow1_1(f)) f = self.relu_flow1_2(self.conv_flow1_2(f)) f = self.pool_flow1(f) f = self.relu_flow2_1(self.conv_flow2_1(f)) f = self.relu_flow2_2(self.conv_flow2_2(f)) f = self.pool_flow2(f) f = self.relu_flow3_1(self.conv_flow3_1(f)) f = self.relu_flow3_2(self.conv_flow3_2(f)) f = self.relu_flow3_3(self.conv_flow3_3(f)) #Sum flow and RGB features (could concatenate instead) h = torch.add(h, f) h = self.pool3(h) pool3 = h # 1/8 h = self.relu4_1(self.conv4_1(h)) h = self.relu4_2(self.conv4_2(h)) h = self.relu4_3(self.conv4_3(h)) h = self.pool4(h) pool4 = h # 1/16 h = self.relu5_1(self.conv5_1(h)) h = self.relu5_2(self.conv5_2(h)) h = self.relu5_3(self.conv5_3(h)) h = self.pool5(h) h = self.relu6(self.fc6(h)) h = self.drop6(h) #h, _ = self.fc7(h, None) h = self.fc7(h) #This is the ConvLSTM Block h = self.relu7(h) h = self.drop7(h) h = self.score_fr(h) h = self.upscore2(h) upscore2 = h # 1/16 h = self.score_pool4(pool4) h = h[:, :, 5:5 + upscore2.size()[2], 5:5 + upscore2.size()[3]] score_pool4c = h # 1/16 h = upscore2 + score_pool4c # 1/16 h = self.upscore_pool4(h) upscore_pool4 = h # 1/8 h = self.score_pool3(pool3) h = h[:, :, 9:9 + upscore_pool4.size()[2], 9:9 + upscore_pool4.size()[3]] score_pool3c = h # 1/8 h = upscore_pool4 + score_pool3c # 1/8 h = self.upscore8(h) h = h[:, :, 31:31 + x.size()[2], 31:31 + x.size()[3]].contiguous() #print(h[:,1].shape) h[:, 0] = F.sigmoid(h[:, 0].clone()) return h
def forward(self, x, lengths, volatile=False, target_align=None, length_whole=None): """ sample a tree for each sentence """ lengths = length_whole max_select_cnt = int(lengths.max(dim=0)[0].item()) - 1 tree_indices = list() tree_probs = list() span_bounds = list() features = list() left_span_features = list() right_span_features = list() # closed range: [left_bounds[i], right_bounds[i]] left_bounds = utils.add_dim( torch.arange(0, max_select_cnt + 1, dtype=torch.long, device=x.device), 0, x.size(0)) right_bounds = left_bounds # use the embedding layer to generate the embeddings for the whole sentence # * is correct? # TODO use more hidden layers from bert model if self.embedding_type == 'bert': #! segment_ids segment_ids = torch.ones_like(x) #import ipdb; ipdb.set_trace() # TODO get whole word embedding here sem_embeddings, _ = self.sem_embedding(x, segment_ids) #! ADD embeddings together ind = target_align #import ipdb; ipdb.set_trace() # import ipdb; ipdb.set_trace() sem_embeddings = torch.matmul(ind.permute(0, 2, 1), sem_embeddings) sem_embeddings = sem_embeddings / sem_embeddings.max( dim=2, keepdim=True)[0] # print("bert go!") else: sem_embeddings = self.sem_embedding(x) syn_embeddings = sem_embeddings output_word_embeddings = sem_embeddings * \ sequence_mask(lengths, max_length=lengths.max()).unsqueeze(-1).float() valid_bs = lengths.size(0) for i in range(max_select_cnt): seq_length = sem_embeddings.size(1) # set invalid positions to 0 prob # [0, 0, ..., 1, 1, ...] length_mask = 1 - sequence_mask((lengths - 1 - i).clamp(min=0), max_length=seq_length - 1).float() # 0 = done undone_mask = 1 - length_mask[:, 0] syn_feats = torch.cat((l2norm( syn_embeddings[:, 1:]), l2norm(syn_embeddings[:, :-1])), dim=2) prob_logits = self.syn_score(syn_feats).squeeze(-1) prob_logits = prob_logits - 1e10 * length_mask probs = F.softmax(prob_logits, dim=1) if not volatile: sampler = Categorical(probs) indices = sampler.sample() else: indices = probs.max(1)[1] tree_indices.append(indices) tree_probs.append(index_one_hot_ellipsis(probs, 1, indices)) this_spans = torch.stack([ index_one_hot_ellipsis(left_bounds, 1, indices), index_one_hot_ellipsis(right_bounds, 1, indices + 1) ], dim=1) this_features = torch.add( index_one_hot_ellipsis(sem_embeddings, 1, indices), index_one_hot_ellipsis(sem_embeddings, 1, indices + 1)) this_left_features = index_one_hot_ellipsis( sem_embeddings, 1, indices) this_right_features = index_one_hot_ellipsis( sem_embeddings, 1, indices + 1) this_features = l2norm(this_features) this_left_features = l2norm(this_left_features) this_right_features = l2norm(this_right_features) span_bounds.append(this_spans) features.append( l2norm(this_features) * undone_mask.unsqueeze(-1).float()) left_span_features.append(this_left_features * undone_mask.unsqueeze(-1).float()) right_span_features.append(this_right_features * undone_mask.unsqueeze(-1).float()) # update word embeddings left_mask = sequence_mask(indices, max_length=seq_length).float() right_mask = 1 - sequence_mask(indices + 2, max_length=seq_length).float() center_mask = index_mask(indices, max_length=seq_length).float() update_masks = (left_mask, right_mask, center_mask) this_features_syn = torch.add( index_one_hot_ellipsis(syn_embeddings, 1, indices), index_one_hot_ellipsis(syn_embeddings, 1, indices + 1)) this_features_syn = l2norm(this_features_syn) syn_embeddings = self.update_with_mask(syn_embeddings, syn_embeddings, this_features_syn, *update_masks) sem_embeddings = self.update_with_mask(sem_embeddings, sem_embeddings, this_features, *update_masks) left_bounds = self.update_with_mask(left_bounds, left_bounds, this_spans[:, 0], *update_masks) right_bounds = self.update_with_mask(right_bounds, right_bounds, this_spans[:, 1], *update_masks) return features, left_span_features, right_span_features, output_word_embeddings, tree_indices, \ tree_probs, span_bounds
def forward(self, x): out = self.conv_block(x) repeat_num = 16 // self.in_channels x16 = x.repeat([1, repeat_num, 1, 1, 1][: self.spatial_dims + 2]) out = self.act_function(torch.add(out, x16)) return out
def se(self, X, Y, Re=None, batch_size=25, parallel=0, sampling=100, each_species=True): dataLoader = self._get_DataLoader(X, Y, Re, batch_size=batch_size, shuffle=False) loss_func = self.__build_loss_function(train=True) se = [] y_dim = np.size(self.weights_numpy[0][0], 1) weights = self.weights[0][0] zero = torch.tensor(0.0, dtype=self.dtype).to(self.device) one = torch.tensor(1.0, dtype=self.dtype).to(self.device) re_loss = lambda value: torch.distributions.Normal(zero, one).log_prob( value) _ = sys.stdout.write("\nCalculating standard errors...\n") if each_species: for i in range(y_dim): _ = sys.stdout.write("\rSpecies: {}/{} ".format(i + 1, y_dim)) sys.stdout.flush() weights = torch.tensor(self.weights_numpy[0][0][:, i].reshape( [-1, 1]), device=self.device, dtype=self.dtype, requires_grad=True).to(self.device) if i == 0: constants = torch.tensor(self.weights_numpy[0][0][:, (i + 1):], device=self.device, dtype=self.dtype).to(self.device) w = torch.cat([weights, constants], dim=1) elif i < y_dim: w = torch.cat([ torch.tensor(self.weights_numpy[0][0][:, 0:i], device=self.device, dtype=self.dtype).to(self.device), weights, torch.tensor(self.weights_numpy[0][0][:, (i + 1):], device=self.device, dtype=self.dtype).to(self.device) ], dim=1) else: constants = torch.tensor(self.weights_numpy[0][0][:, 0:i], device=self.device, dtype=self.dtype).to(self.device) w = torch.cat([constants, weights], dim=1) for step, (x, y, re) in enumerate(dataLoader): x = x.to(self.device, non_blocking=True) y = y.to(self.device, non_blocking=True) spatial_re = self.re.gather( 0, re.to(self.device, non_blocking=True)) mu = torch.nn.functional.linear(x, w.t()) loss = loss_func(mu, y, spatial_re, x.shape[0], sampling).sum().add( re_loss(spatial_re).sum()) #loss = torch.add(torch.sum(loss), torch.sum(re_loss(spatial_re))) first_gradients = torch.autograd.grad(loss, weights, retain_graph=True, create_graph=True, allow_unused=True) second = [] for j in range(self.input_shape): second.append( torch.autograd.grad(first_gradients[0][j, 0], inputs=weights, retain_graph=True, create_graph=False, allow_unused=False)[0]) hessian = torch.cat(second, dim=1) if step < 1: hessian_out = hessian else: hessian_out += hessian se.append( torch.sqrt(torch.diag( torch.inverse(hessian_out))).data.cpu().numpy()) return se else: for step, (x, y, re) in enumerate(dataLoader): x = x.to(self.device, non_blocking=True) y = y.to(self.device, non_blocking=True) spatial_re = self.re.gather( 0, re.to(self.device, non_blocking=True)) mu = self.layers[0](x) loss = torch.add( torch.sum( loss_func(mu, y, spatial_re, x.shape[0], sampling)), torch.sum(re_loss(spatial_re))) first_gradients = torch.autograd.grad( loss, weights, retain_graph=True, create_graph=True, allow_unused=True)[0].reshape([-1]) hessian = [] for j in range(first_gradients.shape[0]): hessian.append( torch.autograd.grad( first_gradients[j], inputs=weights, retain_graph=True, create_graph=False, allow_unused=False)[0].reshape([-1]).reshape( [y_dim * self.input_shape, 1])) hessian = torch.cat(hessian, dim=1) if step < 1: hessian_out = hessian else: hessian_out += hessian return hessian_out.data.cpu().numpy()
def forward(self, x): r""" The :func:`~gpytorch.variational.VariationalStrategy.forward` method determines how to marginalize out the inducing point function values. Specifically, forward defines how to transform a variational distribution over the inducing point values, :math:`q(u)`, in to a variational distribution over the function values at specified locations x, :math:`q(f|x)`, by integrating :math:`\int p(f|x, u)q(u)du` :param torch.Tensor x: Locations x to get the variational posterior of the function values at. :rtype: ~gpytorch.distributions.MultivariateNormal :return: The distribution :math:`q(f|x)` """ variational_dist = self.variational_distribution inducing_points = self.inducing_points if inducing_points.dim() < x.dim(): inducing_points = inducing_points.expand(*x.shape[:-2], *inducing_points.shape[-2:]) if len(variational_dist.batch_shape) < x.dim() - 2: variational_dist = variational_dist.expand(x.shape[:-2]) # If our points equal the inducing points, we're done if torch.equal(x, inducing_points): # De-whiten the prior covar prior_covar = self.prior_distribution.lazy_covariance_matrix if isinstance(variational_dist.lazy_covariance_matrix, RootLazyTensor): predictive_covar = RootLazyTensor(prior_covar @ variational_dist.lazy_covariance_matrix.root.evaluate()) else: predictive_covar = MatmulLazyTensor(prior_covar @ variational_dist.covariance_matrix, prior_covar) # Cache some values for the KL divergence if self.training: self._mean_diff_inv_quad_memo, self._logdet_memo = prior_covar.inv_quad_logdet( (variational_dist.mean - self.prior_distribution.mean), logdet=True ) return MultivariateNormal(variational_dist.mean, predictive_covar) # Otherwise, we have to marginalize else: num_induc = inducing_points.size(-2) full_inputs = torch.cat([inducing_points, x], dim=-2) full_output = self.model.forward(full_inputs) full_mean, full_covar = full_output.mean, full_output.lazy_covariance_matrix # Mean terms test_mean = full_mean[..., num_induc:] induc_mean = full_mean[..., :num_induc] mean_diff = (variational_dist.mean - induc_mean).unsqueeze(-1) # Covariance terms induc_induc_covar = full_covar[..., :num_induc, :num_induc].add_jitter() induc_data_covar = full_covar[..., :num_induc, num_induc:].evaluate() data_data_covar = full_covar[..., num_induc:, num_induc:] # If we're less than a certain size, we'll compute the Cholesky decomposition of induc_induc_covar cholesky = False if settings.fast_computations.log_prob.off() or (num_induc <= settings.max_cholesky_size.value()): induc_induc_covar = CholLazyTensor(induc_induc_covar.cholesky()) cholesky = True # Cache the CG results # Do not use preconditioning for whitened VI, as it does not seem to improve performance. with settings.max_preconditioner_size(0): with torch.no_grad(): eager_rhs = torch.cat([induc_data_covar, mean_diff], -1) solve, probe_vecs, probe_vec_norms, probe_vec_solves, tmats = CachedCGLazyTensor.precompute_terms( induc_induc_covar, eager_rhs.detach(), logdet_terms=(not cholesky), include_tmats=(not settings.skip_logdet_forward.on() and not cholesky), ) eager_rhss = [eager_rhs.detach()] solves = [solve.detach()] if settings.skip_logdet_forward.on() and self.training: eager_rhss.append(torch.cat([probe_vecs, eager_rhs], -1)) solves.append(torch.cat([probe_vec_solves, solve[..., : eager_rhs.size(-1)]], -1)) elif not self.training: eager_rhss.append(eager_rhs[..., :-1]) solves.append(solve[..., :-1]) induc_induc_covar = CachedCGLazyTensor( induc_induc_covar, eager_rhss=eager_rhss, solves=solves, probe_vectors=probe_vecs, probe_vector_norms=probe_vec_norms, probe_vector_solves=probe_vec_solves, probe_vector_tmats=tmats, ) # Compute some terms that will be necessary for the predicitve covariance and KL divergence if self.training: interp_data_data_var_plus_mean_diff_inv_quad, logdet = induc_induc_covar.inv_quad_logdet( torch.cat([induc_data_covar, mean_diff], -1), logdet=True, reduce_inv_quad=False ) interp_data_data_var = interp_data_data_var_plus_mean_diff_inv_quad[..., :-1] mean_diff_inv_quad = interp_data_data_var_plus_mean_diff_inv_quad[..., -1] # Compute predictive mean predictive_mean = torch.add( test_mean, induc_induc_covar.inv_matmul(mean_diff, left_tensor=induc_data_covar.transpose(-1, -2)).squeeze(-1), ) # Compute the predictive covariance is_root_lt = isinstance(variational_dist.lazy_covariance_matrix, RootLazyTensor) is_repeated_root_lt = isinstance( variational_dist.lazy_covariance_matrix, BatchRepeatLazyTensor ) and isinstance(variational_dist.lazy_covariance_matrix.base_lazy_tensor, RootLazyTensor) if is_root_lt: predictive_covar = RootLazyTensor( induc_data_covar.transpose(-1, -2) @ variational_dist.lazy_covariance_matrix.root.evaluate() ) elif is_repeated_root_lt: predictive_covar = RootLazyTensor( induc_data_covar.transpose(-1, -2) @ variational_dist.lazy_covariance_matrix.root_decomposition().root.evaluate() ) else: predictive_covar = MatmulLazyTensor( induc_data_covar.transpose(-1, -2), predictive_covar @ induc_data_covar ) if self.training: data_covariance = DiagLazyTensor((data_data_covar.diag() - interp_data_data_var).clamp(0, math.inf)) else: neg_induc_data_data_covar = torch.matmul( induc_data_covar.transpose(-1, -2).mul(-1), induc_induc_covar.inv_matmul(induc_data_covar) ) data_covariance = data_data_covar + neg_induc_data_data_covar predictive_covar = PsdSumLazyTensor(predictive_covar, data_covariance) # Save the logdet, mean_diff_inv_quad, prior distribution for the ELBO if self.training: self._memoize_cache["prior_distribution_memo"] = MultivariateNormal(induc_mean, induc_induc_covar) self._memoize_cache["logdet_memo"] = -logdet self._memoize_cache["mean_diff_inv_quad_memo"] = mean_diff_inv_quad return MultivariateNormal(predictive_mean, predictive_covar)