def l2_pixel_loss(self, matches_b, non_matches_b, M_pixel=None): """ Apply l2 loss in pixel space. This weights non-matches more if they are "far away" in pixel space. :param matches_b: A torch.LongTensor with shape torch.Shape([num_matches]) :param non_matches_b: A torch.LongTensor with shape torch.Shape([num_non_matches]) :return l2 loss per sample: A torch.FloatTensorof with shape torch.Shape([num_matches]) """ if M_pixel is None: M_pixel = self._config['M_pixel'] num_non_matches_per_match = len(non_matches_b)/len(matches_b) ground_truth_pixels_for_non_matches_b = torch.t(matches_b.repeat(num_non_matches_per_match,1)).contiguous().view(-1,1) ground_truth_u_v_b = self.flattened_pixel_locations_to_u_v(ground_truth_pixels_for_non_matches_b) sampled_u_v_b = self.flattened_pixel_locations_to_u_v(non_matches_b.unsqueeze(1)) # each element is always within [0,1], you have 1 if you are at least M_pixel away in # L2 norm in pixel space norm_degree = 2 squared_l2_pixel_loss = 1.0/M_pixel * torch.clamp((ground_truth_u_v_b - sampled_u_v_b).float().norm(norm_degree,1), max=M_pixel) return squared_l2_pixel_loss, ground_truth_u_v_b, sampled_u_v_b
def get_triplet_loss(image_a_pred, image_b_pred, matches_a, matches_b, non_matches_a, non_matches_b, alpha): """ Computes the loss function \sum_{triplets} ||D(I_a, u_a, I_b, u_{b,match})||_2^2 - ||D(I_a, u_a, I_b, u_{b,non-match)||_2^2 + alpha """ num_matches = matches_a.size()[0] num_non_matches = non_matches_a.size()[0] multiplier = num_non_matches / num_matches ## non_matches_a is already replicated up to be the right size ## non_matches_b is also that side ## matches_a is just a smaller version of non_matches_a ## matches_b is the only thing that needs to be replicated up in size matches_b_long = torch.t(matches_b.repeat(multiplier, 1)).contiguous().view(-1) matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a) matches_b_descriptors = torch.index_select(image_b_pred, 1, matches_b_long) non_matches_b_descriptors = torch.index_select(image_b_pred, 1, non_matches_b) triplet_losses = (matches_a_descriptors - matches_b_descriptors).pow(2) - (matches_a_descriptors - non_matches_b_descriptors).pow(2) + alpha triplet_loss = 1.0 / num_non_matches * torch.clamp(triplet_losses, min=0).sum() return triplet_loss
def avg_pool1d(x, seq_lens): # shape is same as below out = [] for index, t in enumerate(x): t = t[:seq_lens[index], :] t = torch.t(t).unsqueeze(0) out.append(F.avg_pool1d(t, t.size(2))) out = torch.cat(out).squeeze(2) return out
def avg_pool1d(self, x, seq_lens): # x:[N,L,O_in] out = [] for index, t in enumerate(x): t = t[:seq_lens[index], :] t = torch.t(t).unsqueeze(0) out.append(F.avg_pool1d(t, t.size(2))) out = torch.cat(out).squeeze(2) return out
def _update_u_v(self): u = getattr(self.module, self.name + "_u") v = getattr(self.module, self.name + "_v") w = getattr(self.module, self.name + "_bar") height = w.data.shape[0] for _ in range(self.power_iterations): v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data)) u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data)) # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data)) sigma = u.dot(w.view(height, -1).mv(v)) setattr(self.module, self.name, w / sigma.expand_as(w))
def pytorch_optim(W, edges, wc, n, alpha=1, beta=1, max_iter=50, x0=None, lr=1): d = W.shape[1] X = prng.randn(n, d) if x0 is None else np.copy(x0) tX = autograd.Variable(torch.from_numpy(X), requires_grad=True) tW = autograd.Variable(torch.from_numpy(W), requires_grad=False) target = autograd.Variable(torch.from_numpy(wc), requires_grad=False) head = autograd.Variable(torch.from_numpy(edges[:, 0]), requires_grad=False) tail = autograd.Variable(torch.from_numpy(edges[:, 1]), requires_grad=False) loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam([tX], weight_decay=0, lr=lr) l_res = np.zeros((max_iter + 1, 4)) nX = X / np.sqrt((X**2).sum(1))[:, np.newaxis] m = nX[edges[:, 0]] * nX[edges[:, 1]] scores = [email protected] l_res[0, 3] = np.einsum('ij,ji->i', m, W[wc, :].T).mean() pred = np.argmax(scores, 1) l_res[0, 1] = AMI(wc, pred) S = tX[head] * tX[tail] output = torch.mm(S, torch.t(tW)) loss = loss_fn(output, target) l_res[0, 0] = (loss_fn(output, target).data[0]) for i in range(max_iter): optimizer.zero_grad() S = tX[head] * tX[tail] output = torch.mm(S, torch.t(tW)) # avg_norm = torch.mean(torch.norm(tX, p=2, dim=1)) # avg_edge = torch.mean(torch.diag(torch.mm(S, torch.t(tW[wc, :])))) loss = loss_fn(output, target) # - alpha*avg_edge# + beta*avg_norm l_res[i + 1, 0] = (loss_fn(output, target).data[0]) loss.backward() optimizer.step() nX = X / np.sqrt((X**2).sum(1))[:, np.newaxis] m = nX[edges[:, 0]] * nX[edges[:, 1]] scores = [email protected] l_res[i + 1, 3] = np.einsum('ij,ji->i', m, W[wc, :].T).mean() pred = np.argmax(scores, 1) l_res[i + 1, 1] = AMI(wc, pred) # print(np.sqrt((X**2).sum(1)).mean()) return nX, l_res[:i + 2, :]
def _log_forward(self, input=None): """Forward pass of the computation graph in logarithm domain (pytorch)""" # IMPORTANT: Cast to pytorch format input = Variable(torch.from_numpy(input).float(), requires_grad=False) # Linear transformation z = torch.matmul(input, torch.t(self.weight)) + self.bias # Softmax implemented in log domain log_tilde_z = torch.nn.LogSoftmax()(z) # NOTE that this is a pytorch class! return log_tilde_z
def max_pool1d(x, seq_lens): """ :param x: (B, L, D) :param seq_lens: (B) :return: (B, D) """ out = [] for index, t in enumerate(x): # t: (L, D) t = t[:seq_lens[index], :] t = torch.t(t).unsqueeze(0) # (L, D) -> (D, L) -> (1, D, L) out.append(F.max_pool1d(t, t.size(2))) # [(1, D, 1)] out = torch.cat(out).squeeze(2) # B * (1, D, 1) -> (B, D, 1) -> (B, D) return out
def _log_forward(self, input): """ Forward pass """ # Ensure the type matches torch type input = cast_float(input) # Input tilde_z = input # ---------- # Solution to Exercise 6.4 for n in range(self.num_layers - 1): # Get weigths and bias of the layer (even and odd positions) weight, bias = self.parameters[n] # Linear transformation z = torch.matmul(tilde_z, torch.t(weight)) + bias # Non-linear transformation tilde_z = torch.sigmoid(z) # Get weigths and bias of the layer (even and odd positions) weight, bias = self.parameters[self.num_layers - 1] # Linear transformation z = torch.matmul(tilde_z, torch.t(weight)) + bias # Softmax is computed in log-domain to prevent underflow/overflow log_tilde_z = self.logsoftmax(z) # End of solution to Exercise 6.4 # ---------- return log_tilde_z
def cos(a, b): print("a:", a) print("b:", b) if 1 == len(a.shape): assert len(a.shape) == len(b.shape) a_duplicate = a.repeat(1,1) b_duplicate = b.repeat(1,1) else: vec_dim = a.shape[-1] assert b.shape[-1] == vec_dim a_size = a.shape[0] b_size = b.shape[0] a_duplicate = torch.t(a).repeat(b_size,1,1).transpose(1,2).transpose(0,1) #print("a_duplicate:", a_duplicate) #print("a_duplicate.shape:",a_duplicate.shape) b_duplicate = b.repeat(a_size, 1, 1) #print("b_duplicate:", b_duplicate) #print("b_duplicate.shape:", b_duplicate.shape) cos = F.cosine_similarity(a_duplicate, b_duplicate, dim=-1) print("cos:", cos) return cos
#bunny_translation = Variable(torch.from_numpy(\ # np.array([0.0485, -0.1651, -0.0795],dtype=np.float32)), requires_grad=True) #bunny_rotation = Variable(torch.from_numpy(\ # np.array([-0.2,0.1,-0.1],dtype=np.float32)), requires_grad=True) target = Variable( torch.from_numpy(image.imread('test/results/bunny_box/target.exr'))) optimizer = torch.optim.Adam([bunny_translation, bunny_rotation], lr=1e-2) for t in range(200): print('iteration:', t) optimizer.zero_grad() # Forward pass: render the image bunny_rotation_matrix = transform.torch_rotate_matrix(bunny_rotation) shapes[-1].vertices = \ (bunny_vertices-torch.mean(bunny_vertices, 0))@torch.t(bunny_rotation_matrix) + \ torch.mean(bunny_vertices, 0) + bunny_translation args=render_pytorch.RenderFunction.serialize_scene(\ cam, materials, shapes, lights, resolution, num_samples = 4, max_bounces = 6) img = render(t + 1, *args) image.imwrite(img.data.numpy(), 'test/results/bunny_box/iter_{}.png'.format(t)) dirac = np.zeros([7, 7], dtype=np.float32) dirac[3, 3] = 1.0 dirac = Variable(torch.from_numpy(dirac)) f = np.zeros([3, 3, 7, 7], dtype=np.float32) gf = scipy.ndimage.filters.gaussian_filter(dirac, 1.0) f[0, 0, :, :] = gf
translation_params = torch.tensor([0.1, -0.1, 0.1], device = pyredner.get_device(), requires_grad=True) translation = translation_params * 100.0 euler_angles = torch.tensor([0.1, -0.1, 0.1], requires_grad=True) # We obtain the teapot vertices we want to apply the transformation on. shape0_vertices = shapes[0].vertices.clone() shape1_vertices = shapes[1].vertices.clone() # We can use pyredner.gen_rotate_matrix to generate 3x3 rotation matrices rotation_matrix = pyredner.gen_rotate_matrix(euler_angles) if pyredner.get_use_gpu(): rotation_matrix = rotation_matrix.cuda() center = torch.mean(torch.cat([shape0_vertices, shape1_vertices]), 0) # We shift the vertices to the center, apply rotation matrix, # then shift back to the original space. shapes[0].vertices = \ (shape0_vertices - center) @ torch.t(rotation_matrix) + \ center + translation shapes[1].vertices = \ (shape1_vertices - center) @ torch.t(rotation_matrix) + \ center + translation # Since we changed the vertices, we need to regenerate the shading normals shapes[0].normals = pyredner.compute_vertex_normal(shapes[0].vertices, shapes[0].indices) shapes[1].normals = pyredner.compute_vertex_normal(shapes[1].vertices, shapes[1].indices) # We need to serialize the scene again to get the new arguments. scene_args = pyredner.RenderFunction.serialize_scene(\ scene = scene, num_samples = 512, max_bounces = 1) # Render the initial guess. img = render(1, *scene_args) # Save the images.
def forward(self, x): x = self.relu(x) return torch.t(x)
def pdist(vectors): distance_matrix = -2 * vectors.mm(torch.t(vectors)) + vectors.pow(2).sum( dim=1).view(1, -1) + vectors.pow(2).sum(dim=1).view(-1, 1) return distance_matrix
def D(p, Q, lam, alpha): return (torch.diag( torch.mv(torch.t(Q), p) / (lam * coth_torch(alpha * lam))))
def cos_dist(anchor, positive): """Given batch of anchor descriptors and positive descriptors calculate distance matrix""" return torch.bmm(anchor.unsqueeze(0), torch.t(positive).unsqueeze(0)).squeeze(0)
def _train_or_test(model, dataloader, optimizer=None, class_specific=True, use_l1_mask=True, coefs=None, log=print): ''' model: the multi-gpu model dataloader: optimizer: if None, will be test evaluation ''' is_train = optimizer is not None start = time.time() n_examples = 0 n_correct = 0 n_batches = 0 total_cross_entropy = 0 total_cluster_cost = 0 # separation cost is meaningful only for class_specific total_separation_cost = 0 total_avg_separation_cost = 0 for i, (image, label) in enumerate(dataloader): input = image.cuda() target = label.cuda() # torch.enable_grad() has no effect outside of no_grad() grad_req = torch.enable_grad() if is_train else torch.no_grad() with grad_req: # nn.Module has implemented __call__() function # so no need to call .forward output, min_distances = model(input) # compute loss cross_entropy = torch.nn.functional.cross_entropy(output, target) if class_specific: max_dist = (model.module.prototype_shape[1] * model.module.prototype_shape[2] * model.module.prototype_shape[3]) # prototypes_of_correct_class is a tensor of shape batch_size * num_prototypes # calculate cluster cost prototypes_of_correct_class = torch.t( model.module.prototype_class_identity[:, label]).cuda() inverted_distances, _ = torch.max( (max_dist - min_distances) * prototypes_of_correct_class, dim=1) cluster_cost = torch.mean(max_dist - inverted_distances) # calculate separation cost prototypes_of_wrong_class = 1 - prototypes_of_correct_class inverted_distances_to_nontarget_prototypes, _ = \ torch.max((max_dist - min_distances) * prototypes_of_wrong_class, dim=1) separation_cost = torch.mean( max_dist - inverted_distances_to_nontarget_prototypes) # calculate avg cluster cost avg_separation_cost = \ torch.sum(min_distances * prototypes_of_wrong_class, dim=1) / torch.sum(prototypes_of_wrong_class, dim=1) avg_separation_cost = torch.mean(avg_separation_cost) if use_l1_mask: l1_mask = 1 - torch.t( model.module.prototype_class_identity).cuda() l1 = (model.module.last_layer.weight * l1_mask).norm(p=1) else: l1 = model.module.last_layer.weight.norm(p=1) else: min_distance, _ = torch.min(min_distances, dim=1) cluster_cost = torch.mean(min_distance) l1 = model.module.last_layer.weight.norm(p=1) # evaluation statistics _, predicted = torch.max(output.data, 1) n_examples += target.size(0) n_correct += (predicted == target).sum().item() n_batches += 1 total_cross_entropy += cross_entropy.item() total_cluster_cost += cluster_cost.item() total_separation_cost += separation_cost.item() total_avg_separation_cost += avg_separation_cost.item() # compute gradient and do SGD step if is_train: if class_specific: if coefs is not None: loss = (coefs['crs_ent'] * cross_entropy + coefs['clst'] * cluster_cost + coefs['sep'] * separation_cost + coefs['l1'] * l1) else: loss = cross_entropy + 0.8 * cluster_cost - 0.08 * separation_cost + 1e-4 * l1 else: if coefs is not None: loss = (coefs['crs_ent'] * cross_entropy + coefs['clst'] * cluster_cost + coefs['l1'] * l1) else: loss = cross_entropy + 0.8 * cluster_cost + 1e-4 * l1 optimizer.zero_grad() loss.backward() optimizer.step() del input del target del output del predicted del min_distances end = time.time() log('\ttime: \t{0}'.format(end - start)) log('\tcross ent: \t{0}'.format(total_cross_entropy / n_batches)) log('\tcluster: \t{0}'.format(total_cluster_cost / n_batches)) if class_specific: log('\tseparation:\t{0}'.format(total_separation_cost / n_batches)) log('\tavg separation:\t{0}'.format(total_avg_separation_cost / n_batches)) log('\taccu: \t\t{0}%'.format(n_correct / n_examples * 100)) log('\tl1: \t\t{0}'.format( model.module.last_layer.weight.norm(p=1).item())) p = model.module.prototype_vectors.view(model.module.num_prototypes, -1).cpu() with torch.no_grad(): p_avg_pair_dist = torch.mean(list_of_distances(p, p)) log('\tp dist pair: \t{0}'.format(p_avg_pair_dist.item())) return n_correct / n_examples
def eval_with_viterbi(model, samples, masks, labels, gold_predicate, label_vocab, transition_matrix): """ model: A pytorch module samples: dataset samples (n * max_len) masks: dataset mask (n * max_len) gold_predicate: 0/1 gold predicate(n * max_len) labels: dataset labels (n * max_len) label_vocab: a torchtext vocab for labels """ all_preds = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu) all_labels = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu) prediction_labels = [] predicts_list = [] with torch.no_grad(): # for i in tqdm(range(0, samples.shape[0], cfg.batch_size), total=(samples.shape[0]//cfg.batch_size), desc="Validation"): for i in tqdm(range(samples.shape[0]), total=(len(samples)), desc="Validation"): # for i in range(samples.shape[0]): # tokens: 1 * length of sentence # label_list: 1 * length # tokens = torch.tensor(samples[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() # label_list = torch.tensor(labels[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() # cur_masks = torch.tensor(masks[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() tokens = torch.tensor(samples[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) label_list = torch.tensor(labels[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) cur_masks = torch.tensor(masks[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) cur_gold_predicate = torch.tensor( gold_predicate[i, :][masks[i] == 1], dtype=torch.float32).unsqueeze(0).cuda(cfg.use_which_gpu) tokens = torch.tensor(tokens, dtype=torch.long).cuda(cfg.use_which_gpu) cur_masks = torch.tensor(cur_masks, dtype=torch.long).cuda(cfg.use_which_gpu) cur_gold_predicate = torch.tensor(cur_gold_predicate, dtype=torch.float32).cuda( cfg.use_which_gpu) # tokens: len * 1 tokens = torch.t(tokens) #logit: length * 1 * labels logit: torch.Tensor = model(tokens, cur_masks, cur_gold_predicate) # argmax predictions # predictions: length * 1 # _, predictions = logit.max(dim=2) # _, predictions_drew = logit.max(dim=2) predictions, predicates_index = call_viterbi( logit, transition_matrix, label_vocab.stoi) prediction_labels.append(predictions) predicts_list.append(predicates_index) return prediction_labels
def eval_with_micro_F1(model, samples, masks, labels, gold_predicate, label_vocab, transition_matrix): """ model: A pytorch module samples: dataset samples (n * max_len) masks: dataset mask (n * max_len) gold_predicate: 0/1 gold predicate(n * max_len) labels: dataset labels (n * max_len) label_vocab: a torchtext vocab for labels """ all_preds = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu) all_labels = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu) prediction_labels = [] predicts_list = [] with torch.no_grad(): # for i in tqdm(range(0, samples.shape[0], cfg.batch_size), total=(samples.shape[0]//cfg.batch_size), desc="Validation"): for i in tqdm(range(samples.shape[0]), total=(len(samples)), desc="Validation"): # for i in range(samples.shape[0]): # tokens: 1 * length of sentence # label_list: 1 * length # tokens = torch.tensor(samples[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() # label_list = torch.tensor(labels[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() # cur_masks = torch.tensor(masks[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda() tokens = torch.tensor(samples[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) label_list = torch.tensor(labels[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) cur_masks = torch.tensor(masks[i, :][masks[i] == 1], dtype=torch.long).unsqueeze(0).cuda( cfg.use_which_gpu) cur_gold_predicate = torch.tensor( gold_predicate[i, :][masks[i] == 1], dtype=torch.float32).unsqueeze(0).cuda(cfg.use_which_gpu) tokens = torch.tensor(tokens, dtype=torch.long).cuda(cfg.use_which_gpu) cur_masks = torch.tensor(cur_masks, dtype=torch.long).cuda(cfg.use_which_gpu) cur_gold_predicate = torch.tensor(cur_gold_predicate, dtype=torch.float32).cuda( cfg.use_which_gpu) # tokens: len * 1 tokens = torch.t(tokens) #logit: length * 1 * labels logit: torch.Tensor = model(tokens, cur_masks, cur_gold_predicate) # argmax predictions # predictions: length * 1 # _, predictions = logit.max(dim=2) # _, predictions_drew = logit.max(dim=2) predictions = call_viterbi(logit, transition_matrix) predictions = torch.from_numpy(np.array(predictions)).cuda( cfg.use_which_gpu) # print(predictions) # print(predictions_drew) # import sys # sys.exit() # predictions: length predictions.squeeze_() # label_list: length label_list = torch.tensor( label_list, dtype=torch.long).squeeze().cuda(cfg.use_which_gpu) try: all_preds = torch.cat((all_preds, predictions)) all_labels = torch.cat((all_labels, label_list)) except: pass return metrics.f1_score(y_true=all_labels.cpu(), y_pred=all_preds.cpu(), average='micro')
def loss_HardNet(anchor, positive, column_row_swap=False, anchor_swap=False, anchor_ave=False, margin=1.0, batch_reduce='min', loss_type="triplet_margin"): """HardNet margin loss - calculates loss based on distance matrix based on positive distance and closest negative distance. """ assert anchor.size() == positive.size( ), "Input sizes between positive and negative must be equal." assert anchor.dim() == 2, "Inputd must be a 2D matrix." eps = 1e-8 dist_matrix = distance_matrix_vector(anchor, positive) + eps eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1))).cuda() # steps to filter out same patches that occur in distance matrix as negatives pos1 = torch.diag(dist_matrix) dist_without_min_on_diag = dist_matrix + eye * 10 mask = (dist_without_min_on_diag.ge(0.008) - 1) * -1 mask = mask.type_as(dist_without_min_on_diag) * 10 dist_without_min_on_diag = dist_without_min_on_diag + mask if batch_reduce == 'min': min_neg = torch.min(dist_without_min_on_diag, 1)[0] if column_row_swap: min_neg2 = torch.min(dist_without_min_on_diag, 0)[0] min_neg = torch.min(min_neg, min_neg2) if False: dist_matrix_a = distance_matrix_vector(anchor, anchor) + eps dist_matrix_p = distance_matrix_vector(positive, positive) + eps dist_without_min_on_diag_a = dist_matrix_a + eye * 10 dist_without_min_on_diag_p = dist_matrix_p + eye * 10 min_neg_a = torch.min(dist_without_min_on_diag_a, 1)[0] min_neg_p = torch.t(torch.min(dist_without_min_on_diag_p, 0)[0]) min_neg_3 = torch.min(min_neg_p, min_neg_a) min_neg = torch.min(min_neg, min_neg_3) print(min_neg_a) print(min_neg_p) print(min_neg_3) print(min_neg) min_neg = min_neg pos = pos1 elif batch_reduce == 'average': pos = pos1.repeat(anchor.size(0)).view(-1, 1).squeeze(0) min_neg = dist_without_min_on_diag.view(-1, 1) if column_row_swap: min_neg2 = torch.t(dist_without_min_on_diag).contiguous().view( -1, 1) min_neg = torch.min(min_neg, min_neg2) min_neg = min_neg.squeeze(0) elif batch_reduce == 'random': idxs = torch.autograd.Variable( torch.randperm(anchor.size()[0]).long()).cuda() min_neg = dist_without_min_on_diag.gather(1, idxs.view(-1, 1)) if column_row_swap: min_neg2 = torch.t(dist_without_min_on_diag).gather( 1, idxs.view(-1, 1)) min_neg = torch.min(min_neg, min_neg2) min_neg = torch.t(min_neg).squeeze(0) pos = pos1 else: print('Unknown batch reduce mode. Try min, average or random') sys.exit(1) if loss_type == "triplet_margin": loss = torch.clamp(margin + pos - min_neg, min=0.0) elif loss_type == 'softmax': exp_pos = torch.exp(2.0 - pos) exp_den = exp_pos + torch.exp(2.0 - min_neg) + eps loss = -torch.log(exp_pos / exp_den) elif loss_type == 'contrastive': loss = torch.clamp(margin - min_neg, min=0.0) + pos else: print('Unknown loss type. Try triplet_margin, softmax or contrastive') sys.exit(1) loss = torch.mean(loss) return loss
def ratio_matrix_vector(a, p): eps = 1e-12 return a.expand( p.size(0), a.size(0)) / (torch.t(p.expand(a.size(0), p.size(0))) + eps)
def iou(box1, box2): from shapely.geometry import Polygon a = Polygon(torch.t(box1)).convex_hull b = Polygon(torch.t(box2)).convex_hull return a.intersection(b).area / a.union(b).area
def __getitem__(self, index): # find shape that contains the point with given global index shape_ind, patch_ind = self.shape_index(index) shape = self.shape_cache.get(shape_ind) if shape.pidx is None: center_point_ind = patch_ind else: center_point_ind = shape.pidx[patch_ind] # get neighboring points (within euclidean distance patch_radius) patch_pts = torch.FloatTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind]), 3).zero_() # patch_pts_valid = torch.ByteTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind])).zero_() patch_pts_valid = [] scale_ind_range = np.zeros([len(self.patch_radius_absolute[shape_ind]), 2], dtype='int') for radius_index, patch_radius in enumerate(self.patch_radius_absolute[shape_ind]): patch_pts, patch_pts_valid, scale_ind_range = self.select_patch_points(patch_radius, index, center_point_ind, shape, radius_index, scale_ind_range, patch_pts_valid, patch_pts) if self.include_normals: patch_normal = torch.from_numpy(shape.normals[center_point_ind, :]) if self.include_curvatures: patch_curv = torch.from_numpy(shape.curv[center_point_ind, :]) # scale curvature to match the scaled vertices (curvature*s matches position/s): patch_curv = patch_curv * self.patch_radius_absolute[shape_ind][0] if self.include_original: original = shape.pts[center_point_ind] if self.include_clean_points: # patch_clean_points = torch.from_numpy(shape.clean_points[center_point_ind, :]) tmp = [] patch_clean_points = torch.FloatTensor(self.points_per_patch, 3).zero_() scale_clean_ind_range = np.zeros([len(self.patch_radius_absolute[shape_ind]), 2], dtype='int') # clean_patch_radius = float(sum(self.patch_radius_absolute[shape_ind]))/len(self.patch_radius_absolute[shape_ind]) clean_patch_radius = self.patch_radius_absolute[shape_ind][0] patch_clean_points, _, _ = self.select_patch_points(clean_patch_radius, index, center_point_ind, shape, 0, scale_clean_ind_range, tmp, patch_clean_points, clean_points=True) if self.use_pca: # compute pca of points in the patch: # center the patch around the mean: pts_mean = patch_pts[patch_pts_valid, :].mean(0) patch_pts[patch_pts_valid, :] = patch_pts[patch_pts_valid, :] - pts_mean trans, _, _ = torch.svd(torch.t(patch_pts[patch_pts_valid, :])) patch_pts[patch_pts_valid, :] = torch.mm(patch_pts[patch_pts_valid, :], trans) cp_new = -pts_mean # since the patch was originally centered, the original cp was at (0,0,0) cp_new = torch.matmul(cp_new, trans) # re-center on original center point patch_pts[patch_pts_valid, :] = patch_pts[patch_pts_valid, :] - cp_new if self.include_normals: patch_normal = torch.matmul(patch_normal, trans) else: trans = torch.eye(3).float() # get point tuples from the current patch if self.point_tuple > 1: patch_tuples = torch.FloatTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind]), 3*self.point_tuple).zero_() for s, rad in enumerate(self.patch_radius_absolute[shape_ind]): start = scale_ind_range[s, 0] end = scale_ind_range[s, 1] point_count = end - start tuple_count = point_count**self.point_tuple # get linear indices of the tuples if tuple_count > self.points_per_patch: patch_tuple_inds = self.rng.choice(tuple_count, self.points_per_patch, replace=False) tuple_count = self.points_per_patch else: patch_tuple_inds = np.arange(tuple_count) # linear tuple index to index for each tuple element patch_tuple_inds = np.unravel_index(patch_tuple_inds, (point_count,)*self.point_tuple) for t in range(self.point_tuple): patch_tuples[start:start+tuple_count, t*3:(t+1)*3] = patch_pts[start+patch_tuple_inds[t], :] patch_pts = patch_tuples patch_feats = () for pfeat in self.patch_features: if pfeat == 'normal': patch_feats = patch_feats + (patch_normal,) elif pfeat == 'max_curvature': patch_feats = patch_feats + (patch_curv[0:1],) elif pfeat == 'min_curvature': patch_feats = patch_feats + (patch_curv[1:2],) elif pfeat == 'clean_points': patch_feats = patch_feats + (patch_clean_points,) elif pfeat == "original": patch_feats = patch_feats + (original,patch_radius) else: raise ValueError('Unknown patch feature: %s' % (pfeat)) return (patch_pts,) + patch_feats + (trans,)
def z(self, input, weight, bias): ''' sum(i*w) + b ''' z = torch.matmul(torch.t(weight), input) + bias return z
# estimator.fit(commonZ) # centroids =estimator.cluster_centers_ # label_pred = estimator.labels_ # acc = metrics.acc(label_true, label_pred) # nmi = metrics.nmi(label_true, label_pred) # ACC_all.append(acc) # NMI_all.append(nmi) # print(' '*8 + '|==> acc: %.4f, nmi: %.4f <==|' # % (acc, nmi)) sio.savemat('commonZg.mat', {'Z': commonZ_step2}) q1 = 1.0 / (1.0 + (torch.sum( torch.pow( torch.unsqueeze(torch.FloatTensor(commonZ_step1), 1) - torch.FloatTensor(centroids0), 2), 2))) q = torch.t(torch.t(q1) / torch.sum(q1, 1)) p1 = torch.pow(q, 2) / torch.sum(q, 0) p = torch.t(torch.t(p1) / torch.sum(p1, 1)) #center = torch.FloatTensor(centroids).cuda() #center = torch.FloatTensor(centroids_step2).cuda() #model.clu.weights.data = center ################################################# # Step3: VIGAN ################################################# print('step 3') total_steps = 0 #eee = [] #ACC_all=[] #NMI_all=[] loss_ave = []
def forward(self, theta, x, **kwargs): """ Parameters ---------- theta : x : **kwargs : Returns ------- """ # Conditioner try: h = self.activation_function( F.linear(theta, torch.t(self.Wx)) + F.linear(x, torch.t(self.Ms[0] * self.Ws[0]), self.bs[0])) except RuntimeError: logger.error("Abort! Abort!") logger.info("MADE settings: n_inputs = %s, n_conditionals = %s", self.n_inputs, self.n_conditionals) logger.info( "Shapes: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s", theta.shape, self.Wx.shape, x.shape, self.Ms[0].shape, self.Ws[0].shape, self.bs[0].shape, ) logger.info( "Types: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s", type(theta), type(self.Wx), type(x), type(self.Ms[0]), type(self.Ws[0]), type(self.bs[0]), ) logger.info( "CUDA: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s", theta.is_cuda, self.Wx.is_cuda, x.is_cuda, self.Ms[0].is_cuda, self.Ws[0].is_cuda, self.bs[0].is_cuda, ) raise for M, W, b in zip(self.Ms[1:], self.Ws[1:], self.bs[1:]): h = self.activation_function(F.linear(h, torch.t(M * W), b)) # Gaussian parameters self.m = F.linear(h, torch.t(self.Mmp * self.Wm), self.bm) self.logp = F.linear(h, torch.t(self.Mmp * self.Wp), self.bp) # u(x) u = torch.exp(0.5 * self.logp) * (x - self.m) # log det du/dx logdet_dudx = 0.5 * torch.sum(self.logp, dim=1) return u, logdet_dudx
def from_torch(attention: TorchBertAttention, layer_norm: Optional[TorchLayerNorm] = None, is_trans_weight: bool = False): """ load an attn model from huggingface bert attention model. """ ln_params = {} if layer_norm is not None: ln_params = {k: v for k, v in layer_norm.named_parameters()} params = {k: v for k, v in attention.named_parameters()} with torch.no_grad(): if is_trans_weight: # merge self.query.weight, self.query.weight and self.query.weight together as qkv.weight qkv_weight = torch.cat( (params['self.query.weight'], params['self.key.weight'], params['self.value.weight']), 0) output_weight = params['output.dense.weight'] k_w = params['self.key.weight'] v_w = params['self.value.weight'] q_w = params['self.query.weight'] else: # merge self.query.weight, self.query.weight and self.query.weight together as qkv.weight qkv_weight = torch.clone( torch.t( torch.cat((params['self.query.weight'], params['self.key.weight'], params['self.value.weight']), 0).contiguous()).contiguous()) output_weight = torch.clone( torch.t(params['output.dense.weight']).contiguous()) k_w = torch.clone( torch.t(params['self.key.weight']).contiguous()) v_w = torch.clone( torch.t(params['self.value.weight']).contiguous()) q_w = torch.clone( torch.t(params['self.query.weight']).contiguous()) qkv_bias = torch.cat( (params['self.query.bias'], params['self.key.bias'], params['self.value.bias']), 0) if layer_norm is not None: att = MultiHeadedAttentionSmartBatch( convert2tt_tensor(k_w), convert2tt_tensor(params['self.key.bias']), convert2tt_tensor(v_w), convert2tt_tensor(params['self.value.bias']), convert2tt_tensor(q_w), convert2tt_tensor(params['self.query.bias']), convert2tt_tensor(output_weight), convert2tt_tensor(params['output.dense.bias']), convert2tt_tensor(qkv_weight), convert2tt_tensor(qkv_bias), convert2tt_tensor(params['output.LayerNorm.weight']), convert2tt_tensor(params['output.LayerNorm.bias']), convert2tt_tensor(ln_params['weight']), convert2tt_tensor(ln_params['bias']), attention.self.num_attention_heads) else: att = MultiHeadedAttentionSmartBatch( convert2tt_tensor(k_w), convert2tt_tensor(params['self.key.bias']), convert2tt_tensor(v_w), convert2tt_tensor(params['self.value.bias']), convert2tt_tensor(q_w), convert2tt_tensor(params['self.query.bias']), convert2tt_tensor(output_weight), convert2tt_tensor(params['output.dense.bias']), convert2tt_tensor(qkv_weight), convert2tt_tensor(qkv_bias), convert2tt_tensor(params['output.LayerNorm.weight']), convert2tt_tensor(params['output.LayerNorm.bias']), attention.self.num_attention_heads) return att
def forward(self, inputs): x = inputs y = torch.stack([100*self.w0*inputs[:,0],0.1*self.w1*inputs[:,1]]) y = torch.t(y) return y.contiguous()
def perturb_past( past, model, last, unpert_past=None, unpert_logits=None, accumulated_hidden=None, grad_norms=None, stepsize=0.01, one_hot_bows_vectors=None, classifier=None, class_label=None, loss_type=0, num_iterations=3, horizon_length=1, window_length=0, decay=False, gamma=1.5, kl_scale=0.01, device="cuda", ): # Generate inital perturbed past grad_accumulator = [(np.zeros(p.shape).astype("float32")) for p in past] if accumulated_hidden is None: accumulated_hidden = 0 if decay: decay_mask = torch.arange(0.0, 1.0 + SMALL_CONST, 1.0 / (window_length))[1:] else: decay_mask = 1.0 # TODO fix this comment (SUMANTH) # Generate a mask is gradient perturbated is based on a past window _, _, _, curr_length, _ = past[0].shape if curr_length > window_length and window_length > 0: ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:]) zeros_key_val_shape = ( tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:]) ) ones_mask = torch.ones(ones_key_val_shape) ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3) ones_mask = ones_mask.permute(0, 1, 2, 4, 3) window_mask = torch.cat((ones_mask, torch.zeros(zeros_key_val_shape)), dim=-2).to(device) else: window_mask = torch.ones_like(past[0]).to(device) # accumulate perturbations for num_iterations loss_per_iter = [] new_accumulated_hidden = None for i in range(num_iterations): #print("Iteration ", i + 1) curr_perturbation = [ to_var(torch.from_numpy(p_), requires_grad=True, device=device) for p_ in grad_accumulator ] # Compute hidden using perturbed past perturbed_past = list(map(add, past, curr_perturbation)) _, _, _, curr_length, _ = curr_perturbation[0].shape all_logits, _, all_hidden = model(last, past=perturbed_past) hidden = all_hidden[-1] new_accumulated_hidden = accumulated_hidden + torch.sum(hidden, dim=1).detach() # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth) logits = all_logits[:, -1, :] probs = F.softmax(logits, dim=-1) loss = 0.0 loss_list = [] if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM: for one_hot_bow in one_hot_bows_vectors: bow_logits = torch.mm(probs, torch.t(one_hot_bow)) bow_loss = -torch.log(torch.sum(bow_logits)) loss += bow_loss loss_list.append(bow_loss) #print(" pplm_bow_loss:", loss.data.cpu().numpy()) if loss_type == 2 or loss_type == 3: ce_loss = torch.nn.CrossEntropyLoss() # TODO why we need to do this assignment and not just using unpert_past? (Sumanth) curr_unpert_past = unpert_past curr_probs = torch.unsqueeze(probs, dim=1) wte = model.resize_token_embeddings() for _ in range(horizon_length): inputs_embeds = torch.matmul(curr_probs, wte.weight.data) _, curr_unpert_past, curr_all_hidden = model(past=curr_unpert_past, inputs_embeds=inputs_embeds) curr_hidden = curr_all_hidden[-1] new_accumulated_hidden = new_accumulated_hidden + torch.sum(curr_hidden, dim=1) prediction = classifier(new_accumulated_hidden / (curr_length + 1 + horizon_length)) label = torch.tensor(prediction.shape[0] * [class_label], device=device, dtype=torch.long) discrim_loss = ce_loss(prediction, label) #print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy()) loss += discrim_loss loss_list.append(discrim_loss) kl_loss = 0.0 if kl_scale > 0.0: unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1) unpert_probs = unpert_probs + SMALL_CONST * (unpert_probs <= SMALL_CONST).float().to(device).detach() correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach() corrected_probs = probs + correction.detach() kl_loss = kl_scale * ((corrected_probs * (corrected_probs / unpert_probs).log()).sum()) #print(" kl_loss", kl_loss.data.cpu().numpy()) loss += kl_loss loss_per_iter.append(loss.data.cpu().numpy()) #print(" pplm_loss", (loss - kl_loss).data.cpu().numpy()) # compute gradients loss.backward() # calculate gradient norms if grad_norms is not None and loss_type == PPLM_BOW: grad_norms = [ torch.max(grad_norms[index], torch.norm(p_.grad * window_mask)) for index, p_ in enumerate(curr_perturbation) ] else: grad_norms = [ (torch.norm(p_.grad * window_mask) + SMALL_CONST) for index, p_ in enumerate(curr_perturbation) ] # normalize gradients grad = [ -stepsize * (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy() for index, p_ in enumerate(curr_perturbation) ] # accumulate gradient grad_accumulator = list(map(add, grad, grad_accumulator)) # reset gradients, just to make sure for p_ in curr_perturbation: p_.grad.data.zero_() # removing past from the graph new_past = [] for p_ in past: new_past.append(p_.detach()) past = new_past # apply the accumulated perturbations to the past grad_accumulator = [to_var(torch.from_numpy(p_), requires_grad=True, device=device) for p_ in grad_accumulator] pert_past = list(map(add, past, grad_accumulator)) return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
def forward(self, s_vec_batched, qa_pairs_batched, cpt_paths_batched, rel_paths_batched, ana_mode=False): self.device = self.concept_emd.weight.device # multiple GPUs need to specify device final_vecs = [] if ana_mode: path_att_scores = [] qa_pair_att_scores = [] for index in range( len(s_vec_batched)): # len = batch_size * num_choices # for each question-answer statement s_vec = s_vec_batched[index].to(self.device) cpt_paths = cpt_paths_batched[index] rel_paths = rel_paths_batched[index] if len( qa_pairs_batched[index] ) == 0 or False: # if "or True" then we can do abalation study raw_qas_vecs = torch.cat((torch.zeros(1, self.concept_dim).to( self.device), torch.zeros(1, self.concept_dim).to( self.device), torch.stack([s_vec]).to(self.device)), dim=1).to(self.device) qas_vecs = self.qas_encoder(raw_qas_vecs) # print("0:", qas_vecs.size()) latent_rel_vecs = torch.cat( (qas_vecs, torch.zeros(1, self.lstm_dim).to(self.device)), dim=1) else: q_seq = [] a_seq = [] qa_path_num = [] tmp_cpt_paths = [] for qa_pair in qa_pairs_batched[ index]: # for each possible qc, ac pair q, a = qa_pair[0], qa_pair[1] q_seq.append(q) a_seq.append(a) qa_cpt_paths, qa_rel_paths = self.paths_group( cpt_paths, rel_paths, q, a, k=self.num_random_paths) # self.num_random_paths qa_path_num.append(len(qa_cpt_paths)) tmp_cpt_paths.extend(qa_cpt_paths) # assert that the order is contiunous if self.num_random_paths is None: assert tmp_cpt_paths == cpt_paths q_seq = torch.LongTensor(q_seq).to(self.device) a_seq = torch.LongTensor(a_seq).to(self.device) q_vecs = self.concept_emd(q_seq) a_vecs = self.concept_emd(a_seq) # q_vecs = q_vecss[index] # self.concept_emd(q_seq) # a_vecs = a_vecss[index] # self.concept_emd(a_seq) s_vecs = torch.stack([s_vec] * len(qa_pairs_batched[index])) raw_qas_vecs = torch.cat((q_vecs, a_vecs, s_vecs), dim=1) # all the qas triple vectors associated with a statement qas_vecs = self.qas_encoder(raw_qas_vecs) # print(qas_vecs.size()) # print(len(all_qa_cpt_paths_embeds)) pooled_path_vecs = [] # batched path encoding batched_all_qa_cpt_paths_embeds = self.concept_emd( torch.LongTensor(cpt_paths).to(self.device)).permute( 1, 0, 2) batched_all_qa_rel_paths_embeds = self.relation_emd( torch.LongTensor(rel_paths).to(self.device)).permute( 1, 0, 2) batched_all_qa_cpt_rel_path_embeds = torch.cat( (batched_all_qa_cpt_paths_embeds, batched_all_qa_rel_paths_embeds), dim=2) # # batched_all_qa_cpt_rel_path_embeds = over_batched_all_qa_cpt_rel_path_embeds[0:None,path_splits[index][0]:path_splits[index][1],0:None] # if False then abiliate the LSTM if True: batched_lstm_outs, _ = self.lstm( batched_all_qa_cpt_rel_path_embeds) else: batched_lstm_outs = torch.zeros( batched_all_qa_cpt_rel_path_embeds.size()[0], batched_all_qa_cpt_rel_path_embeds.size()[1], self.lstm_dim).to(self.device) if self.path_attention: query_vecs = self.qas_pathlstm_att(qas_vecs) cur_start = 0 for index in range(len(qa_path_num)): if self.path_attention: query_vec = query_vecs[index] cur_end = cur_start + qa_path_num[index] # mean_pooled_path_vec = batched_lstm_outs[-1, cur_start:cur_end, :].mean(dim=0) # mean pooling # attention pooling blo = batched_lstm_outs[-1, cur_start:cur_end, :] if self.path_attention: att_scores = torch.mv( blo, query_vec) # path-level attention scores norm_att_scores = F.softmax(att_scores, dim=0) att_pooled_path_vec = torch.mv(torch.t(blo), norm_att_scores) if ana_mode: path_att_scores.append(norm_att_scores) else: att_pooled_path_vec = blo.mean(dim=0) cur_start = cur_end pooled_path_vecs.append(att_pooled_path_vec) pooled_path_vecs = torch.stack(pooled_path_vecs) latent_rel_vecs = torch.cat((qas_vecs, pooled_path_vecs), dim=1) # qas and KE-qas # final_vec = latent_rel_vecs.mean(dim=0).to(self.device) # mean pooling # att pooling if self.path_attention: sent_as_query = self.sent_ltrel_att( s_vec) # sent attend on qas r_att_scores = torch.mv( qas_vecs, sent_as_query) # qa-pair-level attention scores norm_r_att_scores = F.softmax(r_att_scores, dim=0) if ana_mode: qa_pair_att_scores.append(norm_r_att_scores) final_vec = torch.mv(torch.t(latent_rel_vecs), norm_r_att_scores) else: final_vec = latent_rel_vecs.mean(dim=0).to( self.device) # mean pooling final_vecs.append(torch.cat((final_vec, s_vec), dim=0)) logits = self.hidden2output(torch.stack(final_vecs)) if not ana_mode: return logits else: return logits, path_att_scores, qa_pair_att_scores
def dtaudp(p, alpha, lam, Q): return (Q.mv( torch.diag(1 / softabs_map(lam, alpha)).mv((torch.t(Q).mv(p)))))
def forward(self, s_vec_batched, qa_pairs_batched, cpt_paths_batched, rel_paths_batched, graphs, concept_mapping_dicts, ana_mode=False): self.device = self.concept_emd.weight.device # multiple GPUs need to specify device final_vecs = [] output_graphs = self.graph_encoder(graphs) output_concept_embeds = torch.cat( (output_graphs.ndata["h"], torch.zeros(1, self.graph_output_dim).to( self.device))) # len(output_concept_embeds) as padding # new_concept_embed = nn.Embedding(output_concept_embeds.size()[0], output_concept_embeds.size()[1]) # new_concept_embed.weight = nn.Parameter(output_concept_embeds) new_concept_embed = torch.cat((output_graphs.ndata["h"], s_vec_batched.new_zeros( (1, self.graph_output_dim)))) new_concept_embed = new_concept_embed.to(self.device) if ana_mode: path_att_scores = [] qa_pair_att_scores = [] for index in range( len(s_vec_batched)): # len = batch_size * num_choices # for each question-answer statement s_vec = s_vec_batched[index].to(self.device) cpt_paths = cpt_paths_batched[index] rel_paths = rel_paths_batched[index] if len( qa_pairs_batched[index] ) == 0 or False: # if "or True" then we can do abalation study raw_qas_vecs = torch.cat( (torch.zeros(1, self.graph_output_dim + self.concept_dim).to(self.device), torch.zeros(1, self.graph_output_dim + self.concept_dim).to(self.device), torch.stack([s_vec]).to(self.device)), dim=1).to(self.device) qas_vecs = self.qas_encoder(raw_qas_vecs) # print("0:", qas_vecs.size()) latent_rel_vecs = torch.cat( (qas_vecs, torch.zeros(1, self.lstm_dim).to(self.device)), dim=1) else: q_seq = [] a_seq = [] qa_path_num = [] tmp_cpt_paths = [] for qa_pair in qa_pairs_batched[ index]: # for each possible qc, ac pair q, a = qa_pair[0], qa_pair[1] q_seq.append(q) a_seq.append(a) qa_cpt_paths, qa_rel_paths = self.paths_group( cpt_paths, rel_paths, q, a, k=self.num_random_paths) # self.num_random_paths qa_path_num.append(len(qa_cpt_paths)) tmp_cpt_paths.extend(qa_cpt_paths) # assert that the order is contiunous if self.num_random_paths is None: assert tmp_cpt_paths == cpt_paths mdict = concept_mapping_dicts[index] # new_q_vecs = new_concept_embed( # torch.LongTensor([mdict.get(c, len(output_concept_embeds) - 1) for c in q_seq]).to(self.device)) # new_a_vecs = new_concept_embed( # torch.LongTensor([mdict.get(c, len(output_concept_embeds) - 1) for c in a_seq]).to(self.device)) new_q_vecs = new_concept_embed[torch.LongTensor([ mdict.get(c, len(output_concept_embeds) - 1) for c in q_seq ]).to(self.device)].view(len(q_seq), -1) new_a_vecs = new_concept_embed[torch.LongTensor([ mdict.get(c, len(output_concept_embeds) - 1) for c in a_seq ]).to(self.device)].view(len(a_seq), -1) ## new_q_vecs = torch.index_select(output_concept_embeds, 0, q_seq) ## new_a_vecs = torch.index_select(output_concept_embeds, 0, a_seq) q_vecs = self.concept_emd( torch.LongTensor(q_seq).to(self.device)) a_vecs = self.concept_emd( torch.LongTensor(a_seq).to(self.device)) q_vecs = torch.cat((q_vecs, new_q_vecs), dim=1) a_vecs = torch.cat((a_vecs, new_a_vecs), dim=1) s_vecs = torch.stack([s_vec] * len(qa_pairs_batched[index])) raw_qas_vecs = torch.cat((q_vecs, a_vecs, s_vecs), dim=1) # all the qas triple vectors associated with a statement qas_vecs = self.qas_encoder(raw_qas_vecs) # print(qas_vecs.size()) # print(len(all_qa_cpt_paths_embeds)) pooled_path_vecs = [] # batched path encoding #### Method 1 # cpt_max_len = len(cpt_paths[0]) # mdicted_cpaths = [] # for cpt_path in cpt_paths: # mdicted_cpaths.extend([mdict.get(c, len(output_concept_embeds)-1) for c in cpt_path]) # mdicted_cpaths = torch.LongTensor(mdicted_cpaths).to(self.device) # assert len(mdicted_cpaths) == cpt_max_len * len(cpt_paths) # flatten # indexed_selection = torch.index_select(output_concept_embeds, 0, mdicted_cpaths) # batched_all_qa_cpt_paths_embeds = torch.stack([torch.stack(path) for path in list(zip(*(iter(indexed_selection),) * cpt_max_len))]) # batched_all_qa_cpt_paths_embeds = batched_all_qa_cpt_paths_embeds.permute(1, 0, 2) #### Method 2 # batched_all_qa_cpt_paths_embeds = [] # for cpt_path in cpt_paths: # path_concept_vecs = [output_concept_embeds[c] for c in [mdict.get(c, -1) for c in cpt_path] if c >= 0] # path_concept_vecs = [output_graphs.ndata["h"][c] for c in [mdict.get(c, -1) for c in cpt_path] if c >= 0] # zero_paddings = [torch.zeros(self.graph_output_dim).to(self.device)] * (len(cpt_path)-len(path_concept_vecs)) # path_concept_vecs = torch.stack(path_concept_vecs+zero_paddings) # batched_all_qa_cpt_paths_embeds.append(path_concept_vecs) # batched_all_qa_cpt_paths_embeds = torch.stack(batched_all_qa_cpt_paths_embeds).permute(1, 0, 2) #### Method 3 mdicted_cpaths = [] for cpt_path in cpt_paths: mdicted_cpaths.append([ mdict.get(c, len(output_concept_embeds) - 1) for c in cpt_path ]) mdicted_cpaths = torch.LongTensor(mdicted_cpaths).to( self.device) # new_batched_all_qa_cpt_paths_embeds = new_concept_embed(mdicted_cpaths).permute(1, 0, 2) new_batched_all_qa_cpt_paths_embeds = new_concept_embed[ mdicted_cpaths].view(len(cpt_paths), len(cpt_paths[0]), -1).permute(1, 0, 2) batched_all_qa_cpt_paths_embeds = self.concept_emd( torch.LongTensor(cpt_paths).to(self.device)).permute( 1, 0, 2) # old concept embed batched_all_qa_cpt_paths_embeds = torch.cat( (batched_all_qa_cpt_paths_embeds, new_batched_all_qa_cpt_paths_embeds), dim=2) batched_all_qa_rel_paths_embeds = self.relation_emd( torch.LongTensor(rel_paths).to(self.device)).permute( 1, 0, 2) batched_all_qa_cpt_rel_path_embeds = torch.cat( (batched_all_qa_cpt_paths_embeds, batched_all_qa_rel_paths_embeds), dim=2) # # batched_all_qa_cpt_rel_path_embeds = over_batched_all_qa_cpt_rel_path_embeds[0:None,path_splits[index][0]:path_splits[index][1],0:None] # if False then abiliate the LSTM if True: batched_lstm_outs, _ = self.lstm( batched_all_qa_cpt_rel_path_embeds) else: batched_lstm_outs = torch.zeros( batched_all_qa_cpt_rel_path_embeds.size()[0], batched_all_qa_cpt_rel_path_embeds.size()[1], self.lstm_dim).to(self.device) if self.path_attention: query_vecs = self.qas_pathlstm_att(qas_vecs) cur_start = 0 for index in range(len(qa_path_num)): if self.path_attention: query_vec = query_vecs[index] cur_end = cur_start + qa_path_num[index] # mean_pooled_path_vec = batched_lstm_outs[-1, cur_start:cur_end, :].mean(dim=0) # mean pooling # attention pooling blo = batched_lstm_outs[-1, cur_start:cur_end, :] if self.path_attention: att_scores = torch.mv( blo, query_vec) # path-level attention scores norm_att_scores = F.softmax(att_scores, dim=0) att_pooled_path_vec = torch.mv(torch.t(blo), norm_att_scores) if ana_mode: path_att_scores.append(norm_att_scores) else: att_pooled_path_vec = blo.mean(dim=0) cur_start = cur_end pooled_path_vecs.append(att_pooled_path_vec) pooled_path_vecs = torch.stack(pooled_path_vecs) latent_rel_vecs = torch.cat((qas_vecs, pooled_path_vecs), dim=1) # qas and KE-qas # final_vec = latent_rel_vecs.mean(dim=0).to(self.device) # mean pooling # att pooling if self.path_attention: sent_as_query = self.sent_ltrel_att( s_vec) # sent attend on qas r_att_scores = torch.mv( qas_vecs, sent_as_query) # qa-pair-level attention scores norm_r_att_scores = F.softmax(r_att_scores, dim=0) if ana_mode: qa_pair_att_scores.append(norm_r_att_scores) final_vec = torch.mv(torch.t(latent_rel_vecs), norm_r_att_scores) else: final_vec = latent_rel_vecs.mean(dim=0).to( self.device) # mean pooling final_vecs.append(torch.cat((final_vec, s_vec), dim=0)) logits = self.hidden2output(torch.stack(final_vecs)) if not ana_mode: return logits else: return logits, path_att_scores, qa_pair_att_scores
requires_grad=False).cuda() pred = malconv(exe_input) prob = sigmoid(pred).cpu().data.numpy()[0][0] print("prob: ", prob) if prob < 0.5: break print("change " + str(j) + "th byte") try: min_index = -1 min_di = 100000 wj = -w[j:j + 1, :] nj = wj / torch.norm(wj, 2) zj = z[j:j + 1, :] for i in range(1, 256): mi = embed( Variable(torch.from_numpy(np.array([i]))).cuda()).data si = torch.matmul((nj), torch.t(mi - zj)) di = torch.norm(mi - (zj + si * nj)) si = si.cpu().numpy() if si > 0 and di < min_di: min_di = di min_index = i if min_index != -1: data[j] = min_index changes.append(min_index) except: continue print("finish ", t) changes = np.array(changes) np.save("changes.npy", changes)
def precond_beta_mgpu_block(A, b, tol=1e-16): '''Run conjugate gradient on multiple GPUs when A = X.T . X does not fit on the GPU by splitting A across GPUs. Preconditioning is performed using a sparse approximate LU factorization with the default options in scipy. ''' _message( 'Computing beta (using approximate inverse preconditioning of A)...') padding = A.shape[0] * 5e3 mem_avail = np.max([available_gpu_memory(i) for i in range(number_gpus())]) total_tensor_size = mem_avail // 8 - padding split = int(total_tensor_size // A.shape[0] - 3) Minv = sp.sparse.linalg.spilu(A.numpy()).solve(np.eye(b.shape[0])) Minv = torch.from_numpy(Minv) A_split = torch.split(A, split, dim=0) A_ = [] for i in range(len(A_split)): A_.append(A_split[i].cuda(device='cuda:' + str(i))) Minv_split = torch.split(Minv, split, dim=0) Minv_ = [] for i in range(len(Minv_split)): Minv_.append(Minv_split[i].cuda(device='cuda:' + str(i))) b_gpu = b.cuda(device='cuda:0') x = torch.zeros(b_gpu.size(), dtype=torch.float64).cuda(device='cuda:0') r = b_gpu.clone().cuda(device='cuda:0') z = torch.matmul(Minv, b).cuda(device='cuda:0') p = b_gpu.clone().cuda(device='cuda:0') rr = torch.sum(torch.matmul(torch.t(r), r)) rz = torch.sum(torch.matmul(torch.t(r), z)) numiter = 0 while rr > tol**2: numiter += 1 if numiter % 100 == 0: _message('Reached iteration {}'.format(numiter)) p_, Ap_ = [], [] for i in range(len(A_)): p_.append(p.cuda(device='cuda:' + str(i))) Ap_.append(torch.matmul(A_[i], p_[i]).cpu()) Ap = torch.cat(Ap_, dim=0).cuda(device='cuda:0') del p_, Ap_ torch.cuda.empty_cache() alpha = rz / torch.sum(torch.matmul(torch.t(p), Ap)) x += alpha * p rnew = alpha * Ap r_, znew_ = [], [] for i in range(len(Minv_)): r_.append(r.cuda(device='cuda:' + str(i))) znew_.append(torch.matmul(Minv_[i], r_[i]).cpu()) znew = torch.cat(znew_, dim=0).cuda(device='cuda:0') beta = torch.sum(znew * (rnew - r)) / rz p = znew + beta * p r = rnew z = znew rz = torch.sum(torch.matmul(torch.t(r), z)) _message('Converged after {} iterations'.format(numiter)) x_cpu = x.cpu() del A_, A_split, b_gpu, x, r, p, Ap, alpha, beta, rr, rr_new torch.cuda.empty_cache() _message('Done computing beta!') return x_cpu
def __getitem__(self, index): """ The method through which the dataset is accessed for training. The index param is not currently used, and instead each dataset[i] is the result of a random sampling over: - random scene - random rgbd frame from that scene - random rgbd frame (different enough pose) from that scene - various randomization in the match generation and non-match generation procedure returns a large amount of variables, separated by commas. 0th return arg: the type of data sampled (this can be used as a flag for different loss functions) 0th rtype: string 1st, 2nd return args: image_a_rgb, image_b_rgb 1st, 2nd rtype: 3-dimensional torch.FloatTensor of shape (image_height, image_width, 3) 3rd, 4th return args: matches_a, matches_b 3rd, 4th rtype: 1-dimensional torch.LongTensor of shape (num_matches) 5th, 6th return args: non_matches_a, non_matches_b 5th, 6th rtype: 1-dimensional torch.LongTensor of shape (num_non_matches) Return values 3,4,5,6 are all in the "single index" format for pixels. That is (u,v) --> n = u + image_width * v """ # stores metadata about this data metadata = dict() # pick a scene scene_name = self.get_random_scene_name() metadata['scene_name'] = scene_name # image a image_a_idx = self.get_random_image_index(scene_name) image_a_rgb, image_a_depth, image_a_mask, image_a_pose = self.get_rgbd_mask_pose(scene_name, image_a_idx) metadata['image_a_idx'] = image_a_idx # image b image_b_idx = self.get_img_idx_with_different_pose(scene_name, image_a_pose, num_attempts=50) metadata['image_b_idx'] = image_b_idx if image_b_idx is None: logging.info("no frame with sufficiently different pose found, returning") # TODO: return something cleaner than no-data image_a_rgb_tensor = self.rgb_image_to_tensor(image_a_rgb) return self.return_empty_data(image_a_rgb_tensor, image_a_rgb_tensor) image_b_rgb, image_b_depth, image_b_mask, image_b_pose = self.get_rgbd_mask_pose(scene_name, image_b_idx) image_a_depth_numpy = np.asarray(image_a_depth) image_b_depth_numpy = np.asarray(image_b_depth) # find correspondences uv_a, uv_b = correspondence_finder.batch_find_pixel_correspondences(image_a_depth_numpy, image_a_pose, image_b_depth_numpy, image_b_pose, num_attempts=self.num_matching_attempts, img_a_mask=np.asarray(image_a_mask)) if uv_a is None: logging.info("no matches found, returning") image_a_rgb_tensor = self.rgb_image_to_tensor(image_a_rgb) return self.return_empty_data(image_a_rgb_tensor, image_a_rgb_tensor) if self.debug: # downsample so can plot num_matches_to_plot = 10 indexes_to_keep = (torch.rand(num_matches_to_plot)*len(uv_a[0])).floor().type(torch.LongTensor) uv_a = (torch.index_select(uv_a[0], 0, indexes_to_keep), torch.index_select(uv_a[1], 0, indexes_to_keep)) uv_b = (torch.index_select(uv_b[0], 0, indexes_to_keep), torch.index_select(uv_b[1], 0, indexes_to_keep)) # data augmentation if self._domain_randomize: image_a_rgb = correspondence_augmentation.random_domain_randomize_background(image_a_rgb, image_a_mask) image_b_rgb = correspondence_augmentation.random_domain_randomize_background(image_b_rgb, image_b_mask) if not self.debug: [image_a_rgb], uv_a = correspondence_augmentation.random_image_and_indices_mutation([image_a_rgb], uv_a) [image_b_rgb, image_b_mask], uv_b = correspondence_augmentation.random_image_and_indices_mutation([image_b_rgb, image_b_mask], uv_b) else: # also mutate depth just for plotting [image_a_rgb, image_a_depth], uv_a = correspondence_augmentation.random_image_and_indices_mutation([image_a_rgb, image_a_depth], uv_a) [image_b_rgb, image_b_depth, image_b_mask], uv_b = correspondence_augmentation.random_image_and_indices_mutation([image_b_rgb, image_b_depth, image_b_mask], uv_b) image_a_depth_numpy = np.asarray(image_a_depth) image_b_depth_numpy = np.asarray(image_b_depth) # find non_correspondences if index%2: metadata['non_match_type'] = 'masked' logging.debug("masking non-matches") image_b_mask = torch.from_numpy(np.asarray(image_b_mask)).type(torch.FloatTensor) else: metadata['non_match_type'] = 'non_masked' logging.debug("not masking non-matches") image_b_mask = None image_b_shape = image_b_depth_numpy.shape image_width = image_b_shape[1] image_height = image_b_shape[1] uv_b_non_matches = correspondence_finder.create_non_correspondences(uv_b, image_b_shape, num_non_matches_per_match=self.num_non_matches_per_match, img_b_mask=image_b_mask) if self.debug: # only want to bring in plotting code if in debug mode import correspondence_plotter # Just show all images uv_a_long = (torch.t(uv_a[0].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1), torch.t(uv_a[1].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1)) uv_b_non_matches_long = (uv_b_non_matches[0].view(-1,1), uv_b_non_matches[1].view(-1,1) ) # Show correspondences if uv_a is not None: fig, axes = correspondence_plotter.plot_correspondences_direct(image_a_rgb, image_a_depth_numpy, image_b_rgb, image_b_depth_numpy, uv_a, uv_b, show=False) correspondence_plotter.plot_correspondences_direct(image_a_rgb, image_a_depth_numpy, image_b_rgb, image_b_depth_numpy, uv_a_long, uv_b_non_matches_long, use_previous_plot=(fig,axes), circ_color='r') # image_a_rgb, image_b_rgb = self.both_to_tensor([image_a_rgb, image_b_rgb]) # convert PIL.Image to torch.FloatTensor image_a_rgb = self.rgb_image_to_tensor(image_a_rgb) image_b_rgb = self.rgb_image_to_tensor(image_b_rgb) uv_a_long = (torch.t(uv_a[0].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1), torch.t(uv_a[1].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1)) uv_b_non_matches_long = (uv_b_non_matches[0].view(-1,1), uv_b_non_matches[1].view(-1,1) ) # flatten correspondences and non_correspondences matches_a = uv_a[1].long()*image_width+uv_a[0].long() matches_b = uv_b[1].long()*image_width+uv_b[0].long() non_matches_a = uv_a_long[1].long()*image_width+uv_a_long[0].long() non_matches_a = non_matches_a.squeeze(1) non_matches_b = uv_b_non_matches_long[1].long()*image_width+uv_b_non_matches_long[0].long() non_matches_b = non_matches_b.squeeze(1) return "matches", image_a_rgb, image_b_rgb, matches_a, matches_b, non_matches_a, non_matches_b, metadata
def forward(self, x): return torch.t(x)
print("cv*b failed") #cv_res = Variable(torch.tensor([]), requires_grad=True) for ccv in cv: print("ccv*b:", ccv*b) # cv_res += ccv*b print("cv_by_list:", [ccv*b for ccv in cv]) print("\n") # Matrix(dim2, dim1) dot_product with BatchVector(batchsize, dim1) batchsize=3 dim1=2 dim2=4 v = torch.randn(batchsize, dim1) M = torch.randn(dim2, dim1) print("v:",v) print("torch.t(v):",torch.t(v)) print("M:",M) print("M*v^T:", M.matmul(torch.t(v))) print("\n") # BatchMatrix(batch_size, dim2, dim1) element wise product with BatchVector(batchsize, dim2) dim1=2 dim2=3 batch_size=4 x=torch.rand(dim2, dim1) x_batch=torch.rand(batch_size, dim2, dim1) y=torch.rand(batch_size, dim2) print("x.shape:",x.shape) print("x_batch.shape:",x_batch.shape) print("y.shape:",y.shape) #print("x:",x)
def train_model(self, args): num_exp = args.num_exp start_graph = args.start_graph end_graph = args.end_graph window_size = args.window dropout = args.dropout alpha = args.alpha learning_rate = args.learning_rate negative_sample = args.ns teacher_n_heads = args.teacher_n_heads teacher_embed_dim = args.teacher_embed_size student_embed_dim = args.student_emb student_n_heads = args.student_heads results = {} print("Start training") for graph in range(start_graph, end_graph + 1): results[graph] = { 'teacher': { 'num_params': 0, 'mae': 0., 'rmse': 0. }, 'student': { 'num_params': 0, 'mae': 0., 'rmse': 0. } } teacher_mae = [] teacher_rmse = [] teacher_number_of_params = [] student_mae = [] student_rmse = [] student_number_of_params = [] train_adj_norm, train_adj_label, train_adj_ind, features, test_adj, test_adj_ind = self.construct_dataset( graph, window_size, negative_sample) for i in range(num_exp): if torch.cuda.is_available(): torch.cuda.empty_cache() print("Experiment ", i) num_cells = len(train_adj_norm) teacher_model = EGAD(num_cells, features[0].shape[0], 2 * teacher_embed_dim, teacher_embed_dim, teacher_n_heads, dropout, alpha).to(device=self.device) model_params = self.count_parameters(teacher_model) teacher_number_of_params.append(model_params) optimizer = optim.Adam(teacher_model.parameters(), lr=learning_rate) for epoch in range(100): teacher_model.train() optimizer.zero_grad() output = teacher_model(features, train_adj_norm) reconstruction = torch.sigmoid( torch.mm(output, torch.t(output))) reconstructed_val = reconstruction[train_adj_ind] predicted = reconstructed_val target = train_adj_label criterion = nn.MSELoss() R_loss = criterion(predicted, target) loss_train = torch.sqrt(R_loss) loss_train.backward() optimizer.step() print("Teacher finished") teacher_model.eval() final_output = teacher_model(features, train_adj_norm) train_embeddings = self.get_edge_embeddings( final_output, train_adj_ind).detach().to(device=self.device) test_embeddings = self.get_edge_embeddings( final_output, test_adj_ind).detach().to(device=self.device) mae_score, rmse_score = self.evaluate_model( teacher_embed_dim, train_embeddings, train_adj_label, test_embeddings, test_adj) teacher_mae.append(mae_score) teacher_rmse.append(rmse_score) print("TEACHER FINISHED for GRAPH {} and EXP {}".format( graph, i)) ##### STUDENT if args.distillation == 1: student_model = EGAD(num_cells, features[0].shape[0], 2 * student_embed_dim, student_embed_dim, student_n_heads, dropout, alpha).to(device=self.device) model_params = self.count_parameters(student_model) student_number_of_params.append(model_params) optimizer = optim.Adam(student_model.parameters(), lr=learning_rate) for epoch in range(100): student_model.train() optimizer.zero_grad() output = student_model(features, train_adj_norm) reconstruction = torch.sigmoid( torch.mm(output, torch.t(output))) teacher_output = teacher_model(features, train_adj_norm) teacher_reconstruction = torch.sigmoid( torch.mm(teacher_output, torch.t(teacher_output))) student_reconstructed_val = reconstruction[ train_adj_ind] teacher_reconstruction_val = teacher_reconstruction[ train_adj_ind] criterion = nn.MSELoss() student_R_loss = criterion( student_reconstructed_val, train_adj_label) + criterion( teacher_reconstruction_val, train_adj_label) loss_train = torch.sqrt(student_R_loss) loss_train.backward() optimizer.step() student_model.eval() final_output = student_model(features, train_adj_norm) train_embeddings = self.get_edge_embeddings( final_output, train_adj_ind).detach().to(device=self.device) test_embeddings = self.get_edge_embeddings( final_output, test_adj_ind).detach().to(device=self.device) mae_score, rmse_score = self.evaluate_model( student_embed_dim, train_embeddings, train_adj_label, test_embeddings, test_adj) student_mae.append(mae_score) student_rmse.append(rmse_score) results[graph]['teacher']['num_params'] = np.mean( teacher_number_of_params) results[graph]['teacher']['mae'] = np.mean(teacher_mae) results[graph]['teacher']['rmse'] = np.mean(teacher_rmse) if args.distillation == 1: results[graph]['student']['num_params'] = np.mean( student_number_of_params) results[graph]['student']['mae'] = np.mean(student_mae) results[graph]['student']['rmse'] = np.mean(student_rmse) print( "Graph {} : TEACHER N_PARAMS {} : TEACHER MAE {} : TEACHER RMSE {} : STUDENT N_PARAMS {} : STUDENT MAE {} : STUDENT RMSE {}" .format(graph, results[graph]['teacher']['num_params'], results[graph]['teacher']['mae'], results[graph]['teacher']['rmse'], results[graph]['student']['num_params'], results[graph]['student']['mae'], results[graph]['student']['rmse'])) return results
def forward(self, x): return torch.mm(self.thetas, torch.t(x).float())
import numpy as np import time import torch n1 = torch.rand(20000, 3).cuda() n2 = torch.rand(21000, 3).cuda() end = time.time() sum_1 = torch.t(torch.sum(n1**2, 1).repeat(n2.size()[0], 1)) sum_2 = torch.sum(n2**2, 1).repeat(n1.size()[0], 1) knnDist, _ = torch.min(torch.addmm(1.0, sum_1 + sum_2, -2.0, n1, torch.t(n2)), 0) knnDist = torch.sqrt(knnDist) print(time.time() - end)
def forward(self, fea_v, length, target_start, target_end): if self.add_char: word_v = fea_v[0] char_v = fea_v[1] else: word_v = fea_v batch_size = word_v.size(0) seq_length = word_v.size(1) word_emb = self.embedding(word_v) word_emb = self.dropout_emb(word_emb) if self.static: word_static = self.embedding_static(word_v) word_static = self.dropout_emb(word_static) word_emb = torch.cat([word_emb, word_static], 2) x = torch.transpose(word_emb, 0, 1) packed_words = pack_padded_sequence(x, length) lstm_out, self.hidden = self.lstm(packed_words, self.hidden) lstm_out, _ = pad_packed_sequence(lstm_out) ##### lstm_out: (seq_len, batch_size, hidden_size) lstm_out = self.dropout_lstm(lstm_out) x = lstm_out x = x.transpose(0, 1) ##### batch version # x: variable (seq_len, batch_size, hidden_size) # target_start: variable (batch_size) _, start = torch.max(target_start.unsqueeze(0), dim=1) max_start = utils.to_scalar(target_start[start]) _, end = torch.min(target_end.unsqueeze(0), dim=1) min_end = utils.to_scalar(target_end[end]) max_length = 0 for index in range(batch_size): x_len = x[index].size(0) start = utils.to_scalar(target_start[index]) end = utils.to_scalar(target_end[index]) none_t = x_len - (end - start + 1) if none_t > max_length: max_length = none_t left_save = [] mask_left_save = [] right_save = [] mask_right_save = [] target_save = [] none_target = [] mask_none_target = [] for idx in range(batch_size): mask_none_t = [] none_t = None x_len_cur = x[idx].size(0) start_cur = utils.to_scalar(target_start[idx]) end_cur = utils.to_scalar(target_end[idx]) if start_cur != 0: left = x[idx][:start_cur] none_t = left mask_none_t.extend([1] * start_cur) if end_cur != (x_len_cur - 1): right = x[idx][(end_cur + 1):] if none_t is not None: none_t = torch.cat([none_t, right], 0) else: none_t = right mask_none_t.extend([1] * (x_len_cur - end_cur - 1)) if len(mask_none_t) != max_length: add_t = Variable( torch.zeros((max_length - len(mask_none_t)), self.lstm_hiddens)) if self.use_cuda: add_t = add_t.cuda() mask_none_t.extend([0] * (max_length - len(mask_none_t))) # print(add_t) none_t = torch.cat([none_t, add_t], 0) mask_none_target.append(mask_none_t) none_target.append(none_t.unsqueeze(0)) x_len_cur = x[idx].size(0) start_cur = utils.to_scalar(target_start[idx]) left_len_cur = start_cur left_len_max = max_start if start_cur != 0: x_cur_left = x[idx][:start_cur] left_len_sub = left_len_max - left_len_cur mask_cur_left = [1 for _ in range(left_len_cur)] else: x_cur_left = x[idx][0].unsqueeze(0) left_len_sub = left_len_max - 1 # mask_cur_left = [-1e+20] mask_cur_left = [0] # x_cur_left: variable (start_cur, two_hidden_size) # mask_cur_left = [1 for _ in range(start_cur)] # mask_cur_left: list (start_cur) if start_cur < max_start: if left_len_sub == 0: print('error') add = Variable(torch.rand(left_len_sub, self.lstm_hiddens)) if self.use_cuda: add = add.cuda() x_cur_left = torch.cat([x_cur_left, add], dim=0) # x_cur_left: variable (max_start, two_hidden_size) left_save.append(x_cur_left.unsqueeze(0)) # mask_cur_left.extend([-1e+20 for _ in range(left_len_sub)]) mask_cur_left.extend([0 for _ in range(left_len_sub)]) # mask_cur_left: list (max_start) mask_left_save.append(mask_cur_left) else: left_save.append(x_cur_left.unsqueeze(0)) mask_left_save.append(mask_cur_left) end_cur = utils.to_scalar(target_end[idx]) right_len_cur = x_len_cur - end_cur - 1 right_len_max = x_len_cur - min_end - 1 if (end_cur + 1) != x_len_cur: x_cur_right = x[idx][(end_cur + 1):] right_len_sub = right_len_max - right_len_cur mask_cur_right = [1 for _ in range(right_len_cur)] else: x_cur_right = x[idx][end_cur].unsqueeze(0) right_len_sub = right_len_max - right_len_cur - 1 # mask_cur_right = [-1e+20] mask_cur_right = [0] # x_cur_right: variable ((x_len_cur-end_cur-1), two_hidden_size) # mask_cur_right = [1 for _ in range(right_len_cur)] # mask_cur_right: list (x_len_cur-end_cur-1==right_len) if end_cur > min_end: if right_len_sub == 0: print('error2') add = Variable(torch.rand(right_len_sub, self.lstm_hiddens)) if self.use_cuda: add = add.cuda() x_cur_right = torch.cat([x_cur_right, add], dim=0) right_save.append(x_cur_right.unsqueeze(0)) # mask_cur_right.extend([-1e+20 for _ in range(right_len_sub)]) mask_cur_right.extend([0 for _ in range(right_len_sub)]) mask_right_save.append(mask_cur_right) else: right_save.append(x_cur_right.unsqueeze(0)) mask_right_save.append(mask_cur_right) # target_sub = end_cur-start_cur x_target = x[idx][start_cur:(end_cur + 1)] x_average_target = torch.mean(x_target, 0) target_save.append(x_average_target.unsqueeze(0)) mask_left_save = Variable(torch.ByteTensor(mask_left_save)) # mask_left_save: variable (batch_size, left_len_max) mask_right_save = Variable(torch.ByteTensor(mask_right_save)) # mask_right_save: variable (batch_size, right_len_max) left_save = torch.cat(left_save, dim=0) right_save = torch.cat(right_save, dim=0) target_save = torch.cat(target_save, dim=0) # left_save: variable (batch_size, left_len_max, two_hidden_size) # right_save: variable (batch_size, right_len_max, two_hidden_size) # target_save: variable (batch_size, two_hidden_size) none_target = torch.cat(none_target, 0) mask_none_target = Variable(torch.ByteTensor(mask_none_target)) if self.use_cuda: mask_right_save = mask_right_save.cuda() mask_left_save = mask_left_save.cuda() left_save = left_save.cuda() right_save = right_save.cuda() target_save = target_save.cuda() mask_none_target = mask_none_target.cuda() none_target = none_target.cuda() # s, s_alpha = self.attention(x, target_save, None) s = self.attention(none_target, target_save, mask_none_target) # s_l, s_l_alpha = self.attention_l(left_save, target_save, mask_left_save) # s_r, s_r_alpha = self.attention_r(right_save, target_save, mask_right_save) s_l = self.attention_l(left_save, target_save, mask_left_save) s_r = self.attention_r(right_save, target_save, mask_right_save) w1s = torch.mm(self.w1, torch.t(s)) u1t = torch.mm(self.u1, torch.t(target_save)) if self.use_cuda: w1s = w1s.cuda() u1t = u1t.cuda() if batch_size == self.batch_size: z = torch.exp(w1s + u1t + self.b1) else: z = torch.exp(w1s + u1t) z_all = z # z_all: variable (two_hidden_size, batch_size) z_all = z_all.unsqueeze(2) w2s = torch.mm(self.w2, torch.t(s_l)) u2t = torch.mm(self.u2, torch.t(target_save)) if self.use_cuda: w2s = w2s.cuda() u2t = u2t.cuda() if batch_size == self.batch_size: z_l = torch.exp(w2s + u2t + self.b2) else: z_l = torch.exp(w2s + u2t) # print(z_all) # print(z_l) z_all = torch.cat([z_all, z_l.unsqueeze(2)], dim=2) w3s = torch.mm(self.w3, torch.t(s_r)) u3t = torch.mm(self.u3, torch.t(target_save)) if self.use_cuda: w3s = w3s.cuda() u3t = u3t.cuda() if batch_size == self.batch_size: z_r = torch.exp(w3s + u3t + self.b3) else: z_r = torch.exp(w3s + u3t) z_all = torch.cat([z_all, z_r.unsqueeze(2)], dim=2) # z_all: variable (two_hidden_size, batch_size, 3) if self.use_cuda: z_all = F.softmax(z_all, dim=2) else: z_all = F.softmax(z_all) # z_all = torch.t(z_all) z_all = z_all.permute(2, 1, 0) # z = torch.unsqueeze(z_all[:batch_size], 0) # z_l = torch.unsqueeze(z_all[batch_size:(2*batch_size)], 0) # z_r = torch.unsqueeze(z_all[(2*batch_size):], 0) # z = z_all[:batch_size] # z_l = z_all[batch_size:(2*batch_size)] # z_r = z_all[(2*batch_size):] z = z_all[0] z_l = z_all[1] z_r = z_all[2] ss = torch.mul(z, s) ss = torch.add(ss, torch.mul(z_l, s_l)) ss = torch.add(ss, torch.mul(z_r, s_r)) logit = self.linear_2(ss) # print(logit) # alpha = [s_alpha, s_l_alpha, s_r_alpha] # return logit, alpha return logit
def forward(self, x, seq_lengths, transE_args, cuda): ''' Args: x: input[0] is arg1, input[1] is arg2 input[0]: (batch, max_length) input[1]: (batch, max_length) Returns: num_output size ''' arg1 = x[0] # [N, arg1_max_length] [128, 80] arg2 = x[1] # [N, arg2_max_length] [128, 80] # knowledge-enhance with transE self.kg_relation, self.kg_relation_list = self.deal_transE( transE_args, seq_lengths.size(0), seq_lengths[0], cuda) arg1_embed = self.encoder(arg1) arg1_embed = self.drop_en( arg1_embed) # [N, arg1_max_length, embed_size] [128, 80, 300] arg2_embed = self.encoder(arg2) arg2_embed = self.drop_en( arg2_embed) # [N, arg1_max_length, embed_size] [128, 80, 300] out_rnn1, ht = self.rnn(arg1_embed, None) # [128, 80, 600] out_rnn2, ht = self.rnn(arg2_embed, None) # [128, 80, 600] last_tensor1 = out_rnn1.contiguous().view( seq_lengths.size(0) * seq_lengths[0], -1) # [128 * 80, 600] last_tensor2 = out_rnn2.contiguous().view( seq_lengths.size(0) * seq_lengths[0], -1) # [128 * 80, 600] last_tensor = torch.mm(last_tensor1, self.rand_matrix) # [128 * 80, 600] last_tensor = torch.mm(last_tensor, torch.t(last_tensor2)) # [128 * 80, 128 * 80] last_tensor = torch.tanh(last_tensor) # [128 * 80, 128 * 80] last_tensor = last_tensor + self.kg_relation # [128 * 80, 128 * 80] add knowledge self.last_tensor = last_tensor # torch.softmax(last_tensor, dim=1) [128 * 80, 128 * 80] sf1 = torch.mean(F.softmax( last_tensor, dim=1), dim=0, keepdim=True).view(-1, 1).expand( seq_lengths.size(0) * seq_lengths[0], self.embed_size * 2) # 每行相加为1 [1, 128 * 80] -> [128 * 80, 1] -> [128 * 80, 600] sf2 = torch.mean(F.softmax( last_tensor, dim=0), dim=1, keepdim=True).expand( seq_lengths.size(0) * seq_lengths[0], self.embed_size * 2) # 每列相加为1 [128 * 80, 1] -> [128 * 80, 600] out1 = last_tensor1.mul(sf2).view( seq_lengths.size(0), -1, self.embed_size * 2) # [128 * 80, 600] -> [128, 80, 600] out2 = last_tensor2.mul(sf1).view( seq_lengths.size(0), -1, self.embed_size * 2) # [128 * 80, 600] -> [128, 80, 600] out = torch.cat((out1, out2), 1).view(seq_lengths.size(0), -1) # [128, 160, 600] -> [128, 160 * 600] fc_input = self.bn2(out) # [128, 160 * 600] out_last = F.log_softmax(self.fc(fc_input), dim=1) # [128, 4] return out_last
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None): """ Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space. Optionally, the non-matches can be sampled from a mask for image b. Returns non-matches as pixel positions in image b. Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions. ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences() :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.: (torch.FloatTensor, torch.FloatTensor) :param img_b_shape: tuple of (H,W) which is the shape of the image (optional) :param num_non_matches_per_match: int (optional) :param img_b_mask: torch.FloatTensor (can be cuda or not) - masked image, we will select from the non-zero entries - shape is H x W :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor). - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match]) - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a """ image_width = img_b_shape[1] image_height = img_b_shape[0] if uv_b_matches == None: return None num_matches = len(uv_b_matches[0]) def get_random_uv_b_non_matches(): return pytorch_rand_select_pixel(width=image_width,height=image_height, num_samples=num_matches*num_non_matches_per_match) if img_b_mask is not None: img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1) mask_b_indices_flat = torch.nonzero(img_b_mask_flat) if len(mask_b_indices_flat) == 0: print "warning, empty mask b" uv_b_non_matches = get_random_uv_b_non_matches() else: num_samples = num_matches*num_non_matches_per_match rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat) rand_indices_b = torch.floor(rand_numbers_b).long() randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1) uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width) else: uv_b_non_matches = get_random_uv_b_non_matches() # for each in uv_a, we want non-matches # first just randomly sample "non_matches" # we will later move random samples that were too close to being matches uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match)) # uv_b_matches can now be used to make sure no "non_matches" are too close # to preserve tensor size, rather than pruning, we can perturb these in pixel space copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1)) copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1)) diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float) diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float) diffs_0_flattened = diffs_0.view(-1,1) diffs_1_flattened = diffs_1.view(-1,1) diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1) diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1) need_to_be_perturbed = torch.zeros_like(diffs_0_flattened) ones = torch.zeros_like(diffs_0_flattened) num_pixels_too_close = 1.0 threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close # determine which pixels are too close to being matches need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed) need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed) minimal_perturb = num_pixels_too_close/2 minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb std_dev = 10 random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector perturb_vector = need_to_be_perturbed*random_vector uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1) uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector # now just need to wrap around any that went out of bounds # handle wrapping in width lower_bound = 0.0 upper_bound = image_width*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, uv_b_non_matches_0_flat - upper_bound_vec, uv_b_non_matches_0_flat) uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, uv_b_non_matches_0_flat + upper_bound_vec, uv_b_non_matches_0_flat) # handle wrapping in height lower_bound = 0.0 upper_bound = image_height*1.0 - 1 lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, uv_b_non_matches_1_flat - upper_bound_vec, uv_b_non_matches_1_flat) uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, uv_b_non_matches_1_flat + upper_bound_vec, uv_b_non_matches_1_flat) return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match), uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
def main(config, needs_save, study_name, k, n_splits): if config.run.visible_devices: os.environ['CUDA_VISIBLE_DEVICES'] = config.run.visible_devices seed = check_manual_seed(config.run.seed) print('Using seed: {}'.format(seed)) train_data_loader, test_data_loader, data_train = get_k_hold_data_loader( config.dataset, k=k, n_splits=n_splits, ) data_train = torch.from_numpy(data_train).float().cuda(non_blocking=True) data_train = torch.t(data_train) model = get_model(config.model) model.cuda() model = nn.DataParallel(model) print('count params: ', count_parameters(model.module)) saved_model_path, _, _ = get_saved_model_path( config, study_name, config.model.checkpoint_epoch, k, n_splits, ) model.load_state_dict(torch.load(saved_model_path)['model']) model.eval() if config.model.model_name == 'MLP': embedding = model.module.get_embedding() elif config.model.model_name == 'ModifiedMLP': embedding = model.module.get_embedding() elif config.model.model_name == 'DietNetworks': embedding = model.module.get_embedding(data_train) elif config.model.model_name == 'ModifiedDietNetworks': embedding = model.module.get_embedding(data_train) embedding = embedding.detach().cpu().numpy() emb_pca = PCA(n_components=2) emb_pca.fit_transform(embedding) if config.run.decomp == '1D': print('Approximate by 1D PCA') axis_1= torch.from_numpy(emb_pca.components_[0]) score_1 = np.dot(embedding, axis_1) approx = np.outer(score_1, axis_1) elif config.run.decomp == '2D': print('Approximate by 2D PCA') axis_1= torch.from_numpy(emb_pca.components_[0]) score_1 = np.dot(embedding, axis_1) axis_2= torch.from_numpy(emb_pca.components_[1]) score_2 = np.dot(embedding, axis_2) approx = np.outer(score_1, axis_1) + np.outer(score_2, axis_2) # approx = np.outer(score_2, axis_2) approx = torch.from_numpy(approx).float().cuda(non_blocking=True) criterion = nn.CrossEntropyLoss() def inference(engine, batch): x = batch['data'].float().cuda(non_blocking=True) y = batch['label'].long().cuda(non_blocking=True) assert config.run.transposed_matrix == 'overall' x_t = data_train with torch.no_grad(): out, _ = model.module.approx(x, approx) l_discriminative = criterion(out, y) l_total = l_discriminative metrics = calc_metrics(out, y) metrics.update({ 'l_total': l_total.item(), 'l_discriminative': l_discriminative.item(), }) torch.cuda.synchronize() return metrics evaluator = Engine(inference) monitoring_metrics = ['l_total', 'l_discriminative', 'accuracy'] for metric in monitoring_metrics: RunningAverage( alpha=0.98, output_transform=partial(lambda x, metric: x[metric], metric=metric) ).attach(evaluator, metric) pbar = ProgressBar() pbar.attach(evaluator, metric_names=monitoring_metrics) evaluator.run(test_data_loader, 1) columns = ['k', 'n_splits', 'epoch', 'iteration'] + list(evaluator.state.metrics.keys()) values = [str(k), str(n_splits), str(evaluator.state.epoch), str(evaluator.state.iteration)] \ + [str(value) for value in evaluator.state.metrics.values()] values = {c: v for (c, v) in zip(columns, values)} values.update({ 'variance_ratio_1': emb_pca.explained_variance_ratio_[0], 'variance_ratio_2': emb_pca.explained_variance_ratio_[1], }) return values
def input_word_dimension_labeling_deep(self, model, N_HIDDEN_LAYERS, word: str): word_idx = self.word2idx[word] word_emb = np.array(self.embeddings[word_idx]) data = torch.tensor(word_emb).float().to(model.device) pred = torch.argmax(model.forward(data)) label = self.labels[pred] print('Prediction for word:', word, '-', label) in_weights = model.hidden_layers[0].weight # [500,1000] # activated_weights = torch.mul(data, in_weights) # [500,1000] # activated_weights_len = len(activated_weights) # # activated_weights = torch.t(activated_weights) # [1000,500] mul_weights = torch.mul(data, in_weights) # [500,1000] activated_weights_len = len(mul_weights) mul_weights = torch.t(mul_weights) # [1000,500] # sum bias to the list of activated weights. mul_weights = mul_weights + model.hidden_layers[0].bias.div( activated_weights_len) activated_weights = torch.relu(torch.sum(mul_weights, 0)) for idx, val in enumerate(activated_weights): if val == 0.0: mul_weights[:, idx] = 0.0 # sum bias to the list of activated weights. # mul_weights = mul_weights + model.hidden_layers[0].bias.div(activated_weights_len) # activated_emb_to_out_weights = torch.matmul(activated_emb_to_out_weights, out_weights) for i in range(1, N_HIDDEN_LAYERS): next_layer = torch.t(model.hidden_layers[i].weight) mul_weights = torch.matmul( mul_weights, next_layer) # it will be [1000,4] in the end activated_weights = torch.relu(torch.sum(mul_weights, 0)) bias = model.hidden_layers[i].bias.div( activated_weights_len) # it will be length = 4 in the end mul_weights = mul_weights + bias for idx, val in enumerate(activated_weights): if val == 0.0: mul_weights[:, idx] = 0.0 dimension_label_value_list = [] for i in range(self.EMBED_DIM): dim_values = mul_weights[i] label_ind = dim_values.argmax() dimension_label_value_list.append( (i, label_ind, dim_values[label_ind])) dimension_label_value_list = sorted(dimension_label_value_list, key=lambda x: x[2], reverse=True) for i in range(len(dimension_label_value_list)): dim = dimension_label_value_list[i][0] label = self.labels[dimension_label_value_list[i][1]] value = dimension_label_value_list[i][2] top_emb = sorted(enumerate(self.embeddings), key=lambda x: x[1][dim], reverse=True)[:5] top_emb = [(self.idx2word[emb_idx]) for emb_idx, emb in top_emb] print( 'dimension %d labelled as %s with score %f. Top words in this dimension:' % (dim, label, value.item())) print(top_emb)
a = torch.tensor(np.arange(24).reshape(4, 3, 2)); print(a) print(a.chunk(2)) print(a.chunk(2, dim=1)) # split 3 into 2 and 1; print(a.chunk(4)) ## transpose torch.manual_seed(1) x = torch.randn(2, 3); print(x, x.shape) # tensor([[ 0.6614, 0.2669, 0.0617], [ 0.6213, -0.4519, -0.1661]]) torch.Size([2, 3]) tmp = x.transpose(0, 1); print(tmp, tmp.shape) # tensor([[ 0.6614, 0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]]) torch.Size([3, 2]) x = torch.ones(2, 3, 4); print(x.shape) # torch.Size([2, 3, 4]) print(x.transpose(0, 1).shape) # torch.Size([3, 2, 4]) print(x.transpose(1, 2).shape) # torch.Size([2, 4, 3]) ## t(): Convenience method of transpose() for 2D tensors. The given tensor must be 2 dimensional. Swap dimensions 1 and 2 print(x.t()) # tensor([[ 0.6614, 0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]]) print(torch.t(x)) # tensor([[ 0.6614, 0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]]) ## eq() x = torch.Tensor([[1, 2], [3, 0]]) print(x.eq(0)) # tensor([[0, 0], [0, 1]], dtype=torch.uint8) ## permute; 变换维度, 和 transpose() 差不多, 但后者只能交换两个 x = torch.randn(2, 3, 5); print(x) tmp = x.permute(2, 0, 1); print(tmp, tmp.shape) # torch.Size([5, 2, 3]) ## repeat(*sizes): sizes (torch.Size or int...): The number of times to repeat this tensor along each dimension x = torch.Tensor([1, 2, 3]); print(x) print(x.repeat(2)) # 维度保持不变 print(x.repeat(4, 2)) print(x.repeat(2, 2, 2)) # 还可以扩充维度 x = torch.Tensor([[1, 2], [3, 4]]); print(x, x.shape) # torch.Size([2, 2])