def l2_pixel_loss(self, matches_b, non_matches_b, M_pixel=None):
        """
        Apply l2 loss in pixel space.

        This weights non-matches more if they are "far away" in pixel space.

        :param matches_b: A torch.LongTensor with shape torch.Shape([num_matches])
        :param non_matches_b: A torch.LongTensor with shape torch.Shape([num_non_matches])
        :return l2 loss per sample: A torch.FloatTensorof with shape torch.Shape([num_matches])
        """

        if M_pixel is None:
            M_pixel = self._config['M_pixel']

        num_non_matches_per_match = len(non_matches_b)/len(matches_b)

        ground_truth_pixels_for_non_matches_b = torch.t(matches_b.repeat(num_non_matches_per_match,1)).contiguous().view(-1,1)

        ground_truth_u_v_b = self.flattened_pixel_locations_to_u_v(ground_truth_pixels_for_non_matches_b)
        sampled_u_v_b      = self.flattened_pixel_locations_to_u_v(non_matches_b.unsqueeze(1))

        # each element is always within [0,1], you have 1 if you are at least M_pixel away in
        # L2 norm in pixel space
        norm_degree = 2
        squared_l2_pixel_loss = 1.0/M_pixel * torch.clamp((ground_truth_u_v_b - sampled_u_v_b).float().norm(norm_degree,1), max=M_pixel)


        return squared_l2_pixel_loss, ground_truth_u_v_b, sampled_u_v_b
    def get_triplet_loss(image_a_pred, image_b_pred, matches_a, matches_b, non_matches_a, non_matches_b, alpha):
        """
        Computes the loss function

        \sum_{triplets} ||D(I_a, u_a, I_b, u_{b,match})||_2^2 - ||D(I_a, u_a, I_b, u_{b,non-match)||_2^2 + alpha 

        """
        num_matches = matches_a.size()[0]
        num_non_matches = non_matches_a.size()[0]
        multiplier = num_non_matches / num_matches

        ## non_matches_a is already replicated up to be the right size
        ## non_matches_b is also that side
        ## matches_a is just a smaller version of non_matches_a
        ## matches_b is the only thing that needs to be replicated up in size

        matches_b_long =  torch.t(matches_b.repeat(multiplier, 1)).contiguous().view(-1)
                         
        matches_a_descriptors = torch.index_select(image_a_pred, 1, non_matches_a)
        matches_b_descriptors      = torch.index_select(image_b_pred, 1, matches_b_long)
        non_matches_b_descriptors  = torch.index_select(image_b_pred, 1, non_matches_b)

        triplet_losses = (matches_a_descriptors - matches_b_descriptors).pow(2) - (matches_a_descriptors - non_matches_b_descriptors).pow(2) + alpha
        triplet_loss = 1.0 / num_non_matches * torch.clamp(triplet_losses, min=0).sum()

        return triplet_loss
    def avg_pool1d(x, seq_lens):  # shape is same as below
        out = []
        for index, t in enumerate(x):
            t = t[:seq_lens[index], :]
            t = torch.t(t).unsqueeze(0)
            out.append(F.avg_pool1d(t, t.size(2)))

        out = torch.cat(out).squeeze(2)
        return out
    def avg_pool1d(self, x, seq_lens):
        # x:[N,L,O_in]
        out = []
        for index, t in enumerate(x):
            t = t[:seq_lens[index], :]
            t = torch.t(t).unsqueeze(0)
            out.append(F.avg_pool1d(t, t.size(2)))

        out = torch.cat(out).squeeze(2)
        return out
    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))
Beispiel #6
0
def pytorch_optim(W, edges, wc, n, alpha=1, beta=1, max_iter=50, x0=None, lr=1):
    d = W.shape[1]
    X = prng.randn(n, d) if x0 is None else np.copy(x0)
    tX = autograd.Variable(torch.from_numpy(X), requires_grad=True)
    tW = autograd.Variable(torch.from_numpy(W), requires_grad=False)
    target = autograd.Variable(torch.from_numpy(wc), requires_grad=False)
    head = autograd.Variable(torch.from_numpy(edges[:, 0]), requires_grad=False)
    tail = autograd.Variable(torch.from_numpy(edges[:, 1]), requires_grad=False)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam([tX], weight_decay=0, lr=lr)
    l_res = np.zeros((max_iter + 1, 4))
    nX = X / np.sqrt((X**2).sum(1))[:, np.newaxis]
    m = nX[edges[:, 0]] * nX[edges[:, 1]]
    scores = [email protected]
    l_res[0, 3] = np.einsum('ij,ji->i', m, W[wc, :].T).mean()
    pred = np.argmax(scores, 1)
    l_res[0, 1] = AMI(wc, pred)
    S = tX[head] * tX[tail]
    output = torch.mm(S, torch.t(tW))
    loss = loss_fn(output, target)
    l_res[0, 0] = (loss_fn(output, target).data[0])
    for i in range(max_iter):
        optimizer.zero_grad()
        S = tX[head] * tX[tail]
        output = torch.mm(S, torch.t(tW))
        # avg_norm = torch.mean(torch.norm(tX, p=2, dim=1))
        # avg_edge = torch.mean(torch.diag(torch.mm(S, torch.t(tW[wc, :]))))
        loss = loss_fn(output, target)  # - alpha*avg_edge# + beta*avg_norm
        l_res[i + 1, 0] = (loss_fn(output, target).data[0])
        loss.backward()
        optimizer.step()
        nX = X / np.sqrt((X**2).sum(1))[:, np.newaxis]
        m = nX[edges[:, 0]] * nX[edges[:, 1]]
        scores = [email protected]
        l_res[i + 1, 3] = np.einsum('ij,ji->i', m, W[wc, :].T).mean()
        pred = np.argmax(scores, 1)
        l_res[i + 1, 1] = AMI(wc, pred)
    # print(np.sqrt((X**2).sum(1)).mean())
    return nX, l_res[:i + 2, :]
Beispiel #7
0
    def _log_forward(self, input=None):
        """Forward pass of the computation graph in logarithm domain (pytorch)"""

        # IMPORTANT: Cast to pytorch format
        input = Variable(torch.from_numpy(input).float(), requires_grad=False)

        # Linear transformation
        z =  torch.matmul(input, torch.t(self.weight)) + self.bias

        # Softmax implemented in log domain
        log_tilde_z = torch.nn.LogSoftmax()(z)

        # NOTE that this is a pytorch class!
        return log_tilde_z
    def max_pool1d(x, seq_lens):
        """
        :param x: (B, L, D)
        :param seq_lens: (B)
        :return: (B, D)
        """
        out = []
        for index, t in enumerate(x):  # t: (L, D)
            t = t[:seq_lens[index], :]
            t = torch.t(t).unsqueeze(0)  # (L, D) -> (D, L) -> (1, D, L)
            out.append(F.max_pool1d(t, t.size(2)))  # [(1, D, 1)]

        out = torch.cat(out).squeeze(2)  # B * (1, D, 1) -> (B, D, 1) -> (B, D)
        return out
Beispiel #9
0
    def _log_forward(self, input):
        """
        Forward pass
        """

        # Ensure the type matches torch type
        input = cast_float(input)

        # Input
        tilde_z = input

        # ----------
        # Solution to Exercise 6.4
        for n in range(self.num_layers - 1):

            # Get weigths and bias of the layer (even and odd positions)
            weight, bias = self.parameters[n]

            # Linear transformation
            z = torch.matmul(tilde_z, torch.t(weight)) + bias

            # Non-linear transformation
            tilde_z = torch.sigmoid(z)

        # Get weigths and bias of the layer (even and odd positions)
        weight, bias = self.parameters[self.num_layers - 1]

        # Linear transformation
        z = torch.matmul(tilde_z, torch.t(weight)) + bias

        # Softmax is computed in log-domain to prevent underflow/overflow
        log_tilde_z = self.logsoftmax(z)

        # End of solution to Exercise 6.4
        # ----------

        return log_tilde_z
Beispiel #10
0
def cos(a, b):
    print("a:", a)
    print("b:", b)

    if 1 == len(a.shape):
        assert len(a.shape) == len(b.shape)
        a_duplicate = a.repeat(1,1)
        b_duplicate = b.repeat(1,1)
    else:
        vec_dim = a.shape[-1]
        assert b.shape[-1] == vec_dim

        a_size = a.shape[0]
        b_size = b.shape[0]
        a_duplicate = torch.t(a).repeat(b_size,1,1).transpose(1,2).transpose(0,1)
        #print("a_duplicate:", a_duplicate)
        #print("a_duplicate.shape:",a_duplicate.shape)
        b_duplicate = b.repeat(a_size, 1, 1)
        #print("b_duplicate:", b_duplicate)
        #print("b_duplicate.shape:", b_duplicate.shape)
    cos = F.cosine_similarity(a_duplicate, b_duplicate, dim=-1)
    print("cos:", cos)
    return cos
Beispiel #11
0
#bunny_translation = Variable(torch.from_numpy(\
#    np.array([0.0485, -0.1651, -0.0795],dtype=np.float32)), requires_grad=True)
#bunny_rotation = Variable(torch.from_numpy(\
#    np.array([-0.2,0.1,-0.1],dtype=np.float32)), requires_grad=True)
target = Variable(
    torch.from_numpy(image.imread('test/results/bunny_box/target.exr')))

optimizer = torch.optim.Adam([bunny_translation, bunny_rotation], lr=1e-2)
for t in range(200):
    print('iteration:', t)
    optimizer.zero_grad()
    # Forward pass: render the image
    bunny_rotation_matrix = transform.torch_rotate_matrix(bunny_rotation)

    shapes[-1].vertices = \
        (bunny_vertices-torch.mean(bunny_vertices, 0))@torch.t(bunny_rotation_matrix) + \
        torch.mean(bunny_vertices, 0) + bunny_translation
    args=render_pytorch.RenderFunction.serialize_scene(\
        cam, materials, shapes, lights, resolution,
        num_samples = 4,
        max_bounces = 6)
    img = render(t + 1, *args)
    image.imwrite(img.data.numpy(),
                  'test/results/bunny_box/iter_{}.png'.format(t))

    dirac = np.zeros([7, 7], dtype=np.float32)
    dirac[3, 3] = 1.0
    dirac = Variable(torch.from_numpy(dirac))
    f = np.zeros([3, 3, 7, 7], dtype=np.float32)
    gf = scipy.ndimage.filters.gaussian_filter(dirac, 1.0)
    f[0, 0, :, :] = gf
Beispiel #12
0
translation_params = torch.tensor([0.1, -0.1, 0.1],
    device = pyredner.get_device(), requires_grad=True)
translation = translation_params * 100.0
euler_angles = torch.tensor([0.1, -0.1, 0.1], requires_grad=True)
# We obtain the teapot vertices we want to apply the transformation on.
shape0_vertices = shapes[0].vertices.clone()
shape1_vertices = shapes[1].vertices.clone()
# We can use pyredner.gen_rotate_matrix to generate 3x3 rotation matrices
rotation_matrix = pyredner.gen_rotate_matrix(euler_angles)
if pyredner.get_use_gpu():
    rotation_matrix = rotation_matrix.cuda()
center = torch.mean(torch.cat([shape0_vertices, shape1_vertices]), 0)
# We shift the vertices to the center, apply rotation matrix,
# then shift back to the original space.
shapes[0].vertices = \
    (shape0_vertices - center) @ torch.t(rotation_matrix) + \
    center + translation
shapes[1].vertices = \
    (shape1_vertices - center) @ torch.t(rotation_matrix) + \
    center + translation
# Since we changed the vertices, we need to regenerate the shading normals
shapes[0].normals = pyredner.compute_vertex_normal(shapes[0].vertices, shapes[0].indices)
shapes[1].normals = pyredner.compute_vertex_normal(shapes[1].vertices, shapes[1].indices)
# We need to serialize the scene again to get the new arguments.
scene_args = pyredner.RenderFunction.serialize_scene(\
    scene = scene,
    num_samples = 512,
    max_bounces = 1)
# Render the initial guess.
img = render(1, *scene_args)
# Save the images.
 def forward(self, x):
     x = self.relu(x)
     return torch.t(x)
Beispiel #14
0
def pdist(vectors):
    distance_matrix = -2 * vectors.mm(torch.t(vectors)) + vectors.pow(2).sum(
        dim=1).view(1, -1) + vectors.pow(2).sum(dim=1).view(-1, 1)
    return distance_matrix
Beispiel #15
0
def D(p, Q, lam, alpha):
    return (torch.diag(
        torch.mv(torch.t(Q), p) / (lam * coth_torch(alpha * lam))))
def cos_dist(anchor, positive):
    """Given batch of anchor descriptors and positive descriptors calculate distance matrix"""
    return torch.bmm(anchor.unsqueeze(0),
                     torch.t(positive).unsqueeze(0)).squeeze(0)
Beispiel #17
0
def _train_or_test(model,
                   dataloader,
                   optimizer=None,
                   class_specific=True,
                   use_l1_mask=True,
                   coefs=None,
                   log=print):
    '''
    model: the multi-gpu model
    dataloader:
    optimizer: if None, will be test evaluation
    '''
    is_train = optimizer is not None
    start = time.time()
    n_examples = 0
    n_correct = 0
    n_batches = 0
    total_cross_entropy = 0
    total_cluster_cost = 0
    # separation cost is meaningful only for class_specific
    total_separation_cost = 0
    total_avg_separation_cost = 0

    for i, (image, label) in enumerate(dataloader):
        input = image.cuda()
        target = label.cuda()
        # torch.enable_grad() has no effect outside of no_grad()
        grad_req = torch.enable_grad() if is_train else torch.no_grad()
        with grad_req:
            # nn.Module has implemented __call__() function
            # so no need to call .forward
            output, min_distances = model(input)

            # compute loss
            cross_entropy = torch.nn.functional.cross_entropy(output, target)

            if class_specific:
                max_dist = (model.module.prototype_shape[1] *
                            model.module.prototype_shape[2] *
                            model.module.prototype_shape[3])

                # prototypes_of_correct_class is a tensor of shape batch_size * num_prototypes
                # calculate cluster cost
                prototypes_of_correct_class = torch.t(
                    model.module.prototype_class_identity[:, label]).cuda()
                inverted_distances, _ = torch.max(
                    (max_dist - min_distances) * prototypes_of_correct_class,
                    dim=1)
                cluster_cost = torch.mean(max_dist - inverted_distances)

                # calculate separation cost
                prototypes_of_wrong_class = 1 - prototypes_of_correct_class
                inverted_distances_to_nontarget_prototypes, _ = \
                    torch.max((max_dist - min_distances) * prototypes_of_wrong_class, dim=1)
                separation_cost = torch.mean(
                    max_dist - inverted_distances_to_nontarget_prototypes)

                # calculate avg cluster cost
                avg_separation_cost = \
                    torch.sum(min_distances * prototypes_of_wrong_class, dim=1) / torch.sum(prototypes_of_wrong_class, dim=1)
                avg_separation_cost = torch.mean(avg_separation_cost)

                if use_l1_mask:
                    l1_mask = 1 - torch.t(
                        model.module.prototype_class_identity).cuda()
                    l1 = (model.module.last_layer.weight * l1_mask).norm(p=1)
                else:
                    l1 = model.module.last_layer.weight.norm(p=1)

            else:
                min_distance, _ = torch.min(min_distances, dim=1)
                cluster_cost = torch.mean(min_distance)
                l1 = model.module.last_layer.weight.norm(p=1)

            # evaluation statistics
            _, predicted = torch.max(output.data, 1)
            n_examples += target.size(0)
            n_correct += (predicted == target).sum().item()

            n_batches += 1
            total_cross_entropy += cross_entropy.item()
            total_cluster_cost += cluster_cost.item()
            total_separation_cost += separation_cost.item()
            total_avg_separation_cost += avg_separation_cost.item()
        # compute gradient and do SGD step
        if is_train:
            if class_specific:
                if coefs is not None:
                    loss = (coefs['crs_ent'] * cross_entropy +
                            coefs['clst'] * cluster_cost +
                            coefs['sep'] * separation_cost + coefs['l1'] * l1)
                else:
                    loss = cross_entropy + 0.8 * cluster_cost - 0.08 * separation_cost + 1e-4 * l1
            else:
                if coefs is not None:
                    loss = (coefs['crs_ent'] * cross_entropy +
                            coefs['clst'] * cluster_cost + coefs['l1'] * l1)
                else:
                    loss = cross_entropy + 0.8 * cluster_cost + 1e-4 * l1
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        del input
        del target
        del output
        del predicted
        del min_distances

    end = time.time()

    log('\ttime: \t{0}'.format(end - start))
    log('\tcross ent: \t{0}'.format(total_cross_entropy / n_batches))
    log('\tcluster: \t{0}'.format(total_cluster_cost / n_batches))
    if class_specific:
        log('\tseparation:\t{0}'.format(total_separation_cost / n_batches))
        log('\tavg separation:\t{0}'.format(total_avg_separation_cost /
                                            n_batches))
    log('\taccu: \t\t{0}%'.format(n_correct / n_examples * 100))
    log('\tl1: \t\t{0}'.format(
        model.module.last_layer.weight.norm(p=1).item()))
    p = model.module.prototype_vectors.view(model.module.num_prototypes,
                                            -1).cpu()
    with torch.no_grad():
        p_avg_pair_dist = torch.mean(list_of_distances(p, p))
    log('\tp dist pair: \t{0}'.format(p_avg_pair_dist.item()))

    return n_correct / n_examples
Beispiel #18
0
def eval_with_viterbi(model, samples, masks, labels, gold_predicate,
                      label_vocab, transition_matrix):
    """
    model: A pytorch module
    samples: dataset samples (n * max_len)
    masks: dataset mask (n * max_len)
    gold_predicate: 0/1 gold predicate(n * max_len)
    labels: dataset labels (n * max_len)
    label_vocab: a torchtext vocab for labels
    """
    all_preds = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu)
    all_labels = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu)
    prediction_labels = []
    predicts_list = []
    with torch.no_grad():
        # for i in tqdm(range(0, samples.shape[0], cfg.batch_size), total=(samples.shape[0]//cfg.batch_size), desc="Validation"):
        for i in tqdm(range(samples.shape[0]),
                      total=(len(samples)),
                      desc="Validation"):
            # for i in range(samples.shape[0]):
            # tokens: 1 * length of sentence
            # label_list: 1 * length

            # tokens = torch.tensor(samples[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            # label_list = torch.tensor(labels[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            # cur_masks = torch.tensor(masks[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            tokens = torch.tensor(samples[i, :][masks[i] == 1],
                                  dtype=torch.long).unsqueeze(0).cuda(
                                      cfg.use_which_gpu)
            label_list = torch.tensor(labels[i, :][masks[i] == 1],
                                      dtype=torch.long).unsqueeze(0).cuda(
                                          cfg.use_which_gpu)
            cur_masks = torch.tensor(masks[i, :][masks[i] == 1],
                                     dtype=torch.long).unsqueeze(0).cuda(
                                         cfg.use_which_gpu)
            cur_gold_predicate = torch.tensor(
                gold_predicate[i, :][masks[i] == 1],
                dtype=torch.float32).unsqueeze(0).cuda(cfg.use_which_gpu)

            tokens = torch.tensor(tokens,
                                  dtype=torch.long).cuda(cfg.use_which_gpu)
            cur_masks = torch.tensor(cur_masks,
                                     dtype=torch.long).cuda(cfg.use_which_gpu)
            cur_gold_predicate = torch.tensor(cur_gold_predicate,
                                              dtype=torch.float32).cuda(
                                                  cfg.use_which_gpu)
            # tokens: len * 1
            tokens = torch.t(tokens)
            #logit: length * 1 * labels
            logit: torch.Tensor = model(tokens, cur_masks, cur_gold_predicate)

            # argmax predictions
            # predictions: length * 1
            # _, predictions = logit.max(dim=2)
            # _, predictions_drew = logit.max(dim=2)
            predictions, predicates_index = call_viterbi(
                logit, transition_matrix, label_vocab.stoi)
            prediction_labels.append(predictions)
            predicts_list.append(predicates_index)

    return prediction_labels
Beispiel #19
0
def eval_with_micro_F1(model, samples, masks, labels, gold_predicate,
                       label_vocab, transition_matrix):
    """
    model: A pytorch module
    samples: dataset samples (n * max_len)
    masks: dataset mask (n * max_len)
    gold_predicate: 0/1 gold predicate(n * max_len)
    labels: dataset labels (n * max_len)
    label_vocab: a torchtext vocab for labels
    """
    all_preds = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu)
    all_labels = torch.tensor([], dtype=torch.long).cuda(cfg.use_which_gpu)
    prediction_labels = []
    predicts_list = []
    with torch.no_grad():
        # for i in tqdm(range(0, samples.shape[0], cfg.batch_size), total=(samples.shape[0]//cfg.batch_size), desc="Validation"):
        for i in tqdm(range(samples.shape[0]),
                      total=(len(samples)),
                      desc="Validation"):
            # for i in range(samples.shape[0]):
            # tokens: 1 * length of sentence
            # label_list: 1 * length

            # tokens = torch.tensor(samples[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            # label_list = torch.tensor(labels[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            # cur_masks = torch.tensor(masks[i: i+cfg.batch_size,:][masks[i: i+cfg.batch_size]==1], dtype=torch.long).unsqueeze(0).cuda()
            tokens = torch.tensor(samples[i, :][masks[i] == 1],
                                  dtype=torch.long).unsqueeze(0).cuda(
                                      cfg.use_which_gpu)
            label_list = torch.tensor(labels[i, :][masks[i] == 1],
                                      dtype=torch.long).unsqueeze(0).cuda(
                                          cfg.use_which_gpu)
            cur_masks = torch.tensor(masks[i, :][masks[i] == 1],
                                     dtype=torch.long).unsqueeze(0).cuda(
                                         cfg.use_which_gpu)
            cur_gold_predicate = torch.tensor(
                gold_predicate[i, :][masks[i] == 1],
                dtype=torch.float32).unsqueeze(0).cuda(cfg.use_which_gpu)

            tokens = torch.tensor(tokens,
                                  dtype=torch.long).cuda(cfg.use_which_gpu)
            cur_masks = torch.tensor(cur_masks,
                                     dtype=torch.long).cuda(cfg.use_which_gpu)
            cur_gold_predicate = torch.tensor(cur_gold_predicate,
                                              dtype=torch.float32).cuda(
                                                  cfg.use_which_gpu)
            # tokens: len * 1
            tokens = torch.t(tokens)
            #logit: length * 1 * labels
            logit: torch.Tensor = model(tokens, cur_masks, cur_gold_predicate)

            # argmax predictions
            # predictions: length * 1
            # _, predictions = logit.max(dim=2)
            # _, predictions_drew = logit.max(dim=2)
            predictions = call_viterbi(logit, transition_matrix)
            predictions = torch.from_numpy(np.array(predictions)).cuda(
                cfg.use_which_gpu)
            # print(predictions)
            # print(predictions_drew)
            # import sys
            # sys.exit()

            # predictions: length
            predictions.squeeze_()
            # label_list: length
            label_list = torch.tensor(
                label_list, dtype=torch.long).squeeze().cuda(cfg.use_which_gpu)
            try:
                all_preds = torch.cat((all_preds, predictions))
                all_labels = torch.cat((all_labels, label_list))
            except:
                pass

    return metrics.f1_score(y_true=all_labels.cpu(),
                            y_pred=all_preds.cpu(),
                            average='micro')
Beispiel #20
0
def loss_HardNet(anchor,
                 positive,
                 column_row_swap=False,
                 anchor_swap=False,
                 anchor_ave=False,
                 margin=1.0,
                 batch_reduce='min',
                 loss_type="triplet_margin"):
    """HardNet margin loss - calculates loss based on distance matrix based on positive distance and closest negative distance.
    """

    assert anchor.size() == positive.size(
    ), "Input sizes between positive and negative must be equal."
    assert anchor.dim() == 2, "Inputd must be a 2D matrix."
    eps = 1e-8
    dist_matrix = distance_matrix_vector(anchor, positive) + eps
    eye = torch.autograd.Variable(torch.eye(dist_matrix.size(1))).cuda()

    # steps to filter out same patches that occur in distance matrix as negatives
    pos1 = torch.diag(dist_matrix)
    dist_without_min_on_diag = dist_matrix + eye * 10
    mask = (dist_without_min_on_diag.ge(0.008) - 1) * -1
    mask = mask.type_as(dist_without_min_on_diag) * 10
    dist_without_min_on_diag = dist_without_min_on_diag + mask
    if batch_reduce == 'min':
        min_neg = torch.min(dist_without_min_on_diag, 1)[0]
        if column_row_swap:
            min_neg2 = torch.min(dist_without_min_on_diag, 0)[0]
            min_neg = torch.min(min_neg, min_neg2)
        if False:
            dist_matrix_a = distance_matrix_vector(anchor, anchor) + eps
            dist_matrix_p = distance_matrix_vector(positive, positive) + eps
            dist_without_min_on_diag_a = dist_matrix_a + eye * 10
            dist_without_min_on_diag_p = dist_matrix_p + eye * 10
            min_neg_a = torch.min(dist_without_min_on_diag_a, 1)[0]
            min_neg_p = torch.t(torch.min(dist_without_min_on_diag_p, 0)[0])
            min_neg_3 = torch.min(min_neg_p, min_neg_a)
            min_neg = torch.min(min_neg, min_neg_3)
            print(min_neg_a)
            print(min_neg_p)
            print(min_neg_3)
            print(min_neg)
        min_neg = min_neg
        pos = pos1
    elif batch_reduce == 'average':
        pos = pos1.repeat(anchor.size(0)).view(-1, 1).squeeze(0)
        min_neg = dist_without_min_on_diag.view(-1, 1)
        if column_row_swap:
            min_neg2 = torch.t(dist_without_min_on_diag).contiguous().view(
                -1, 1)
            min_neg = torch.min(min_neg, min_neg2)
        min_neg = min_neg.squeeze(0)
    elif batch_reduce == 'random':
        idxs = torch.autograd.Variable(
            torch.randperm(anchor.size()[0]).long()).cuda()
        min_neg = dist_without_min_on_diag.gather(1, idxs.view(-1, 1))
        if column_row_swap:
            min_neg2 = torch.t(dist_without_min_on_diag).gather(
                1, idxs.view(-1, 1))
            min_neg = torch.min(min_neg, min_neg2)
        min_neg = torch.t(min_neg).squeeze(0)
        pos = pos1
    else:
        print('Unknown batch reduce mode. Try min, average or random')
        sys.exit(1)
    if loss_type == "triplet_margin":
        loss = torch.clamp(margin + pos - min_neg, min=0.0)
    elif loss_type == 'softmax':
        exp_pos = torch.exp(2.0 - pos)
        exp_den = exp_pos + torch.exp(2.0 - min_neg) + eps
        loss = -torch.log(exp_pos / exp_den)
    elif loss_type == 'contrastive':
        loss = torch.clamp(margin - min_neg, min=0.0) + pos
    else:
        print('Unknown loss type. Try triplet_margin, softmax or contrastive')
        sys.exit(1)
    loss = torch.mean(loss)
    return loss
Beispiel #21
0
def ratio_matrix_vector(a, p):
    eps = 1e-12
    return a.expand(
        p.size(0), a.size(0)) / (torch.t(p.expand(a.size(0), p.size(0))) + eps)
Beispiel #22
0
def iou(box1, box2):
    from shapely.geometry import Polygon
    a = Polygon(torch.t(box1)).convex_hull
    b = Polygon(torch.t(box2)).convex_hull
    
    return a.intersection(b).area / a.union(b).area
Beispiel #23
0
    def __getitem__(self, index):

        # find shape that contains the point with given global index
        shape_ind, patch_ind = self.shape_index(index)

        shape = self.shape_cache.get(shape_ind)
        if shape.pidx is None:
            center_point_ind = patch_ind
        else:
            center_point_ind = shape.pidx[patch_ind]

        # get neighboring points (within euclidean distance patch_radius)
        patch_pts = torch.FloatTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind]), 3).zero_()
        # patch_pts_valid = torch.ByteTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind])).zero_()
        patch_pts_valid = []
        scale_ind_range = np.zeros([len(self.patch_radius_absolute[shape_ind]), 2], dtype='int')
        for radius_index, patch_radius in enumerate(self.patch_radius_absolute[shape_ind]):
            patch_pts, patch_pts_valid, scale_ind_range = self.select_patch_points(patch_radius, index,
                center_point_ind, shape, radius_index, scale_ind_range, patch_pts_valid, patch_pts)
        if self.include_normals:
            patch_normal = torch.from_numpy(shape.normals[center_point_ind, :])

        if self.include_curvatures:
            patch_curv = torch.from_numpy(shape.curv[center_point_ind, :])
            # scale curvature to match the scaled vertices (curvature*s matches position/s):
            patch_curv = patch_curv * self.patch_radius_absolute[shape_ind][0]
        if self.include_original:
            original = shape.pts[center_point_ind]

        if self.include_clean_points:
            # patch_clean_points = torch.from_numpy(shape.clean_points[center_point_ind, :])
            tmp = []
            patch_clean_points = torch.FloatTensor(self.points_per_patch, 3).zero_()
            scale_clean_ind_range = np.zeros([len(self.patch_radius_absolute[shape_ind]), 2], dtype='int')
            # clean_patch_radius = float(sum(self.patch_radius_absolute[shape_ind]))/len(self.patch_radius_absolute[shape_ind])
            clean_patch_radius = self.patch_radius_absolute[shape_ind][0]
            patch_clean_points, _, _ = self.select_patch_points(clean_patch_radius, index,
                center_point_ind, shape, 0, scale_clean_ind_range, tmp, patch_clean_points, clean_points=True)
        if self.use_pca:

            # compute pca of points in the patch:
            # center the patch around the mean:
            pts_mean = patch_pts[patch_pts_valid, :].mean(0)
            patch_pts[patch_pts_valid, :] = patch_pts[patch_pts_valid, :] - pts_mean

            trans, _, _ = torch.svd(torch.t(patch_pts[patch_pts_valid, :]))
            patch_pts[patch_pts_valid, :] = torch.mm(patch_pts[patch_pts_valid, :], trans)

            cp_new = -pts_mean # since the patch was originally centered, the original cp was at (0,0,0)
            cp_new = torch.matmul(cp_new, trans)

            # re-center on original center point
            patch_pts[patch_pts_valid, :] = patch_pts[patch_pts_valid, :] - cp_new

            if self.include_normals:
                patch_normal = torch.matmul(patch_normal, trans)
        else:
            trans = torch.eye(3).float()


        # get point tuples from the current patch
        if self.point_tuple > 1:
            patch_tuples = torch.FloatTensor(self.points_per_patch*len(self.patch_radius_absolute[shape_ind]), 3*self.point_tuple).zero_()
            for s, rad in enumerate(self.patch_radius_absolute[shape_ind]):
                start = scale_ind_range[s, 0]
                end = scale_ind_range[s, 1]
                point_count = end - start

                tuple_count = point_count**self.point_tuple

                # get linear indices of the tuples
                if tuple_count > self.points_per_patch:
                    patch_tuple_inds = self.rng.choice(tuple_count, self.points_per_patch, replace=False)
                    tuple_count = self.points_per_patch
                else:
                    patch_tuple_inds = np.arange(tuple_count)

                # linear tuple index to index for each tuple element
                patch_tuple_inds = np.unravel_index(patch_tuple_inds, (point_count,)*self.point_tuple)

                for t in range(self.point_tuple):
                    patch_tuples[start:start+tuple_count, t*3:(t+1)*3] = patch_pts[start+patch_tuple_inds[t], :]


            patch_pts = patch_tuples

        patch_feats = ()
        for pfeat in self.patch_features:
            if pfeat == 'normal':
                patch_feats = patch_feats + (patch_normal,)
            elif pfeat == 'max_curvature':
                patch_feats = patch_feats + (patch_curv[0:1],)
            elif pfeat == 'min_curvature':
                patch_feats = patch_feats + (patch_curv[1:2],)
            elif pfeat == 'clean_points':
                patch_feats = patch_feats + (patch_clean_points,)
            elif pfeat == "original":
                patch_feats = patch_feats + (original,patch_radius)
            else:
                raise ValueError('Unknown patch feature: %s' % (pfeat))
        return (patch_pts,) + patch_feats + (trans,)
Beispiel #24
0
	def z(self, input, weight, bias):
		'''
			sum(i*w) + b
		'''		
		z = torch.matmul(torch.t(weight), input) + bias
		return z
Beispiel #25
0
#        estimator.fit(commonZ)
#        centroids =estimator.cluster_centers_
#        label_pred = estimator.labels_
#        acc = metrics.acc(label_true, label_pred)
#        nmi = metrics.nmi(label_true, label_pred)
#        ACC_all.append(acc)
#        NMI_all.append(nmi)
#        print(' '*8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
#                   % (acc, nmi))

sio.savemat('commonZg.mat', {'Z': commonZ_step2})
q1 = 1.0 / (1.0 + (torch.sum(
    torch.pow(
        torch.unsqueeze(torch.FloatTensor(commonZ_step1), 1) -
        torch.FloatTensor(centroids0), 2), 2)))
q = torch.t(torch.t(q1) / torch.sum(q1, 1))
p1 = torch.pow(q, 2) / torch.sum(q, 0)
p = torch.t(torch.t(p1) / torch.sum(p1, 1))
#center = torch.FloatTensor(centroids).cuda()
#center = torch.FloatTensor(centroids_step2).cuda()
#model.clu.weights.data = center

#################################################
# Step3:  VIGAN
#################################################
print('step 3')
total_steps = 0
#eee = []
#ACC_all=[]
#NMI_all=[]
loss_ave = []
    def forward(self, theta, x, **kwargs):
        """

        Parameters
        ----------
        theta :
            
        x :
            
        **kwargs :
            

        Returns
        -------

        """
        # Conditioner
        try:
            h = self.activation_function(
                F.linear(theta, torch.t(self.Wx)) +
                F.linear(x, torch.t(self.Ms[0] * self.Ws[0]), self.bs[0]))
        except RuntimeError:
            logger.error("Abort! Abort!")
            logger.info("MADE settings: n_inputs = %s, n_conditionals = %s",
                        self.n_inputs, self.n_conditionals)
            logger.info(
                "Shapes: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s",
                theta.shape,
                self.Wx.shape,
                x.shape,
                self.Ms[0].shape,
                self.Ws[0].shape,
                self.bs[0].shape,
            )
            logger.info(
                "Types: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s",
                type(theta),
                type(self.Wx),
                type(x),
                type(self.Ms[0]),
                type(self.Ws[0]),
                type(self.bs[0]),
            )
            logger.info(
                "CUDA: theta %s, Wx %s, x %s, Ms %s, Ws %s, bs %s",
                theta.is_cuda,
                self.Wx.is_cuda,
                x.is_cuda,
                self.Ms[0].is_cuda,
                self.Ws[0].is_cuda,
                self.bs[0].is_cuda,
            )
            raise

        for M, W, b in zip(self.Ms[1:], self.Ws[1:], self.bs[1:]):
            h = self.activation_function(F.linear(h, torch.t(M * W), b))

        # Gaussian parameters
        self.m = F.linear(h, torch.t(self.Mmp * self.Wm), self.bm)
        self.logp = F.linear(h, torch.t(self.Mmp * self.Wp), self.bp)

        # u(x)
        u = torch.exp(0.5 * self.logp) * (x - self.m)

        # log det du/dx
        logdet_dudx = 0.5 * torch.sum(self.logp, dim=1)

        return u, logdet_dudx
Beispiel #27
0
    def from_torch(attention: TorchBertAttention,
                   layer_norm: Optional[TorchLayerNorm] = None,
                   is_trans_weight: bool = False):
        """
        load an attn model from huggingface bert attention model.
        """
        ln_params = {}
        if layer_norm is not None:
            ln_params = {k: v for k, v in layer_norm.named_parameters()}
        params = {k: v for k, v in attention.named_parameters()}
        with torch.no_grad():
            if is_trans_weight:
                # merge self.query.weight, self.query.weight and self.query.weight together as qkv.weight
                qkv_weight = torch.cat(
                    (params['self.query.weight'], params['self.key.weight'],
                     params['self.value.weight']), 0)
                output_weight = params['output.dense.weight']
                k_w = params['self.key.weight']
                v_w = params['self.value.weight']
                q_w = params['self.query.weight']
            else:
                # merge self.query.weight, self.query.weight and self.query.weight together as qkv.weight
                qkv_weight = torch.clone(
                    torch.t(
                        torch.cat((params['self.query.weight'],
                                   params['self.key.weight'],
                                   params['self.value.weight']),
                                  0).contiguous()).contiguous())
                output_weight = torch.clone(
                    torch.t(params['output.dense.weight']).contiguous())
                k_w = torch.clone(
                    torch.t(params['self.key.weight']).contiguous())
                v_w = torch.clone(
                    torch.t(params['self.value.weight']).contiguous())
                q_w = torch.clone(
                    torch.t(params['self.query.weight']).contiguous())

            qkv_bias = torch.cat(
                (params['self.query.bias'], params['self.key.bias'],
                 params['self.value.bias']), 0)

            if layer_norm is not None:
                att = MultiHeadedAttentionSmartBatch(
                    convert2tt_tensor(k_w),
                    convert2tt_tensor(params['self.key.bias']),
                    convert2tt_tensor(v_w),
                    convert2tt_tensor(params['self.value.bias']),
                    convert2tt_tensor(q_w),
                    convert2tt_tensor(params['self.query.bias']),
                    convert2tt_tensor(output_weight),
                    convert2tt_tensor(params['output.dense.bias']),
                    convert2tt_tensor(qkv_weight), convert2tt_tensor(qkv_bias),
                    convert2tt_tensor(params['output.LayerNorm.weight']),
                    convert2tt_tensor(params['output.LayerNorm.bias']),
                    convert2tt_tensor(ln_params['weight']),
                    convert2tt_tensor(ln_params['bias']),
                    attention.self.num_attention_heads)
            else:
                att = MultiHeadedAttentionSmartBatch(
                    convert2tt_tensor(k_w),
                    convert2tt_tensor(params['self.key.bias']),
                    convert2tt_tensor(v_w),
                    convert2tt_tensor(params['self.value.bias']),
                    convert2tt_tensor(q_w),
                    convert2tt_tensor(params['self.query.bias']),
                    convert2tt_tensor(output_weight),
                    convert2tt_tensor(params['output.dense.bias']),
                    convert2tt_tensor(qkv_weight), convert2tt_tensor(qkv_bias),
                    convert2tt_tensor(params['output.LayerNorm.weight']),
                    convert2tt_tensor(params['output.LayerNorm.bias']),
                    attention.self.num_attention_heads)
            return att
 def forward(self, inputs):
     x = inputs
     y = torch.stack([100*self.w0*inputs[:,0],0.1*self.w1*inputs[:,1]])
     y = torch.t(y)
     return y.contiguous()
Beispiel #29
0
def perturb_past(
    past,
    model,
    last,
    unpert_past=None,
    unpert_logits=None,
    accumulated_hidden=None,
    grad_norms=None,
    stepsize=0.01,
    one_hot_bows_vectors=None,
    classifier=None,
    class_label=None,
    loss_type=0,
    num_iterations=3,
    horizon_length=1,
    window_length=0,
    decay=False,
    gamma=1.5,
    kl_scale=0.01,
    device="cuda",
):
    # Generate inital perturbed past
    grad_accumulator = [(np.zeros(p.shape).astype("float32")) for p in past]

    if accumulated_hidden is None:
        accumulated_hidden = 0

    if decay:
        decay_mask = torch.arange(0.0, 1.0 + SMALL_CONST, 1.0 / (window_length))[1:]
    else:
        decay_mask = 1.0

    # TODO fix this comment (SUMANTH)
    # Generate a mask is gradient perturbated is based on a past window
    _, _, _, curr_length, _ = past[0].shape

    if curr_length > window_length and window_length > 0:
        ones_key_val_shape = tuple(past[0].shape[:-2]) + tuple([window_length]) + tuple(past[0].shape[-1:])

        zeros_key_val_shape = (
            tuple(past[0].shape[:-2]) + tuple([curr_length - window_length]) + tuple(past[0].shape[-1:])
        )

        ones_mask = torch.ones(ones_key_val_shape)
        ones_mask = decay_mask * ones_mask.permute(0, 1, 2, 4, 3)
        ones_mask = ones_mask.permute(0, 1, 2, 4, 3)

        window_mask = torch.cat((ones_mask, torch.zeros(zeros_key_val_shape)), dim=-2).to(device)
    else:
        window_mask = torch.ones_like(past[0]).to(device)

    # accumulate perturbations for num_iterations
    loss_per_iter = []
    new_accumulated_hidden = None
    for i in range(num_iterations):
        #print("Iteration ", i + 1)
        curr_perturbation = [
            to_var(torch.from_numpy(p_), requires_grad=True, device=device) for p_ in grad_accumulator
        ]

        # Compute hidden using perturbed past
        perturbed_past = list(map(add, past, curr_perturbation))
        _, _, _, curr_length, _ = curr_perturbation[0].shape
        all_logits, _, all_hidden = model(last, past=perturbed_past)
        hidden = all_hidden[-1]
        new_accumulated_hidden = accumulated_hidden + torch.sum(hidden, dim=1).detach()
        # TODO: Check the layer-norm consistency of this with trained discriminator (Sumanth)
        logits = all_logits[:, -1, :]
        probs = F.softmax(logits, dim=-1)

        loss = 0.0
        loss_list = []
        if loss_type == PPLM_BOW or loss_type == PPLM_BOW_DISCRIM:
            for one_hot_bow in one_hot_bows_vectors:
                bow_logits = torch.mm(probs, torch.t(one_hot_bow))
                bow_loss = -torch.log(torch.sum(bow_logits))
                loss += bow_loss
                loss_list.append(bow_loss)
            #print(" pplm_bow_loss:", loss.data.cpu().numpy())

        if loss_type == 2 or loss_type == 3:
            ce_loss = torch.nn.CrossEntropyLoss()
            # TODO why we need to do this assignment and not just using unpert_past? (Sumanth)
            curr_unpert_past = unpert_past
            curr_probs = torch.unsqueeze(probs, dim=1)
            wte = model.resize_token_embeddings()
            for _ in range(horizon_length):
                inputs_embeds = torch.matmul(curr_probs, wte.weight.data)
                _, curr_unpert_past, curr_all_hidden = model(past=curr_unpert_past, inputs_embeds=inputs_embeds)
                curr_hidden = curr_all_hidden[-1]
                new_accumulated_hidden = new_accumulated_hidden + torch.sum(curr_hidden, dim=1)

            prediction = classifier(new_accumulated_hidden / (curr_length + 1 + horizon_length))

            label = torch.tensor(prediction.shape[0] * [class_label], device=device, dtype=torch.long)
            discrim_loss = ce_loss(prediction, label)
            #print(" pplm_discrim_loss:", discrim_loss.data.cpu().numpy())
            loss += discrim_loss
            loss_list.append(discrim_loss)

        kl_loss = 0.0
        if kl_scale > 0.0:
            unpert_probs = F.softmax(unpert_logits[:, -1, :], dim=-1)
            unpert_probs = unpert_probs + SMALL_CONST * (unpert_probs <= SMALL_CONST).float().to(device).detach()
            correction = SMALL_CONST * (probs <= SMALL_CONST).float().to(device).detach()
            corrected_probs = probs + correction.detach()
            kl_loss = kl_scale * ((corrected_probs * (corrected_probs / unpert_probs).log()).sum())
            #print(" kl_loss", kl_loss.data.cpu().numpy())
            loss += kl_loss

        loss_per_iter.append(loss.data.cpu().numpy())
        #print(" pplm_loss", (loss - kl_loss).data.cpu().numpy())

        # compute gradients
        loss.backward()

        # calculate gradient norms
        if grad_norms is not None and loss_type == PPLM_BOW:
            grad_norms = [
                torch.max(grad_norms[index], torch.norm(p_.grad * window_mask))
                for index, p_ in enumerate(curr_perturbation)
            ]
        else:
            grad_norms = [
                (torch.norm(p_.grad * window_mask) + SMALL_CONST) for index, p_ in enumerate(curr_perturbation)
            ]

        # normalize gradients
        grad = [
            -stepsize * (p_.grad * window_mask / grad_norms[index] ** gamma).data.cpu().numpy()
            for index, p_ in enumerate(curr_perturbation)
        ]

        # accumulate gradient
        grad_accumulator = list(map(add, grad, grad_accumulator))

        # reset gradients, just to make sure
        for p_ in curr_perturbation:
            p_.grad.data.zero_()

        # removing past from the graph
        new_past = []
        for p_ in past:
            new_past.append(p_.detach())
        past = new_past

    # apply the accumulated perturbations to the past
    grad_accumulator = [to_var(torch.from_numpy(p_), requires_grad=True, device=device) for p_ in grad_accumulator]
    pert_past = list(map(add, past, grad_accumulator))

    return pert_past, new_accumulated_hidden, grad_norms, loss_per_iter
Beispiel #30
0
    def forward(self,
                s_vec_batched,
                qa_pairs_batched,
                cpt_paths_batched,
                rel_paths_batched,
                ana_mode=False):
        self.device = self.concept_emd.weight.device  # multiple GPUs need to specify device
        final_vecs = []

        if ana_mode:
            path_att_scores = []
            qa_pair_att_scores = []

        for index in range(
                len(s_vec_batched)):  # len = batch_size * num_choices
            # for each question-answer statement

            s_vec = s_vec_batched[index].to(self.device)
            cpt_paths = cpt_paths_batched[index]
            rel_paths = rel_paths_batched[index]

            if len(
                    qa_pairs_batched[index]
            ) == 0 or False:  # if "or True" then we can do abalation study
                raw_qas_vecs = torch.cat((torch.zeros(1, self.concept_dim).to(
                    self.device), torch.zeros(1, self.concept_dim).to(
                        self.device), torch.stack([s_vec]).to(self.device)),
                                         dim=1).to(self.device)

                qas_vecs = self.qas_encoder(raw_qas_vecs)
                # print("0:", qas_vecs.size())
                latent_rel_vecs = torch.cat(
                    (qas_vecs, torch.zeros(1, self.lstm_dim).to(self.device)),
                    dim=1)
            else:
                q_seq = []
                a_seq = []

                qa_path_num = []

                tmp_cpt_paths = []
                for qa_pair in qa_pairs_batched[
                        index]:  # for each possible qc, ac pair
                    q, a = qa_pair[0], qa_pair[1]
                    q_seq.append(q)
                    a_seq.append(a)

                    qa_cpt_paths, qa_rel_paths = self.paths_group(
                        cpt_paths, rel_paths, q, a,
                        k=self.num_random_paths)  # self.num_random_paths

                    qa_path_num.append(len(qa_cpt_paths))
                    tmp_cpt_paths.extend(qa_cpt_paths)

                    # assert that the order is contiunous
                if self.num_random_paths is None:
                    assert tmp_cpt_paths == cpt_paths

                q_seq = torch.LongTensor(q_seq).to(self.device)
                a_seq = torch.LongTensor(a_seq).to(self.device)
                q_vecs = self.concept_emd(q_seq)
                a_vecs = self.concept_emd(a_seq)

                # q_vecs = q_vecss[index] # self.concept_emd(q_seq)
                # a_vecs = a_vecss[index] # self.concept_emd(a_seq)

                s_vecs = torch.stack([s_vec] * len(qa_pairs_batched[index]))
                raw_qas_vecs = torch.cat((q_vecs, a_vecs, s_vecs), dim=1)
                # all the qas triple vectors associated with a statement

                qas_vecs = self.qas_encoder(raw_qas_vecs)

                # print(qas_vecs.size())
                # print(len(all_qa_cpt_paths_embeds))

                pooled_path_vecs = []

                # batched path encoding
                batched_all_qa_cpt_paths_embeds = self.concept_emd(
                    torch.LongTensor(cpt_paths).to(self.device)).permute(
                        1, 0, 2)
                batched_all_qa_rel_paths_embeds = self.relation_emd(
                    torch.LongTensor(rel_paths).to(self.device)).permute(
                        1, 0, 2)

                batched_all_qa_cpt_rel_path_embeds = torch.cat(
                    (batched_all_qa_cpt_paths_embeds,
                     batched_all_qa_rel_paths_embeds),
                    dim=2)
                #

                # batched_all_qa_cpt_rel_path_embeds = over_batched_all_qa_cpt_rel_path_embeds[0:None,path_splits[index][0]:path_splits[index][1],0:None]
                # if False then abiliate the LSTM
                if True:
                    batched_lstm_outs, _ = self.lstm(
                        batched_all_qa_cpt_rel_path_embeds)
                else:
                    batched_lstm_outs = torch.zeros(
                        batched_all_qa_cpt_rel_path_embeds.size()[0],
                        batched_all_qa_cpt_rel_path_embeds.size()[1],
                        self.lstm_dim).to(self.device)

                if self.path_attention:
                    query_vecs = self.qas_pathlstm_att(qas_vecs)

                cur_start = 0
                for index in range(len(qa_path_num)):
                    if self.path_attention:
                        query_vec = query_vecs[index]
                    cur_end = cur_start + qa_path_num[index]

                    # mean_pooled_path_vec = batched_lstm_outs[-1, cur_start:cur_end, :].mean(dim=0)  # mean pooling
                    # attention pooling
                    blo = batched_lstm_outs[-1, cur_start:cur_end, :]
                    if self.path_attention:
                        att_scores = torch.mv(
                            blo, query_vec)  # path-level attention scores
                        norm_att_scores = F.softmax(att_scores, dim=0)
                        att_pooled_path_vec = torch.mv(torch.t(blo),
                                                       norm_att_scores)
                        if ana_mode:
                            path_att_scores.append(norm_att_scores)
                    else:
                        att_pooled_path_vec = blo.mean(dim=0)

                    cur_start = cur_end
                    pooled_path_vecs.append(att_pooled_path_vec)

                pooled_path_vecs = torch.stack(pooled_path_vecs)
                latent_rel_vecs = torch.cat((qas_vecs, pooled_path_vecs),
                                            dim=1)  # qas and KE-qas

            # final_vec = latent_rel_vecs.mean(dim=0).to(self.device)  # mean pooling

            # att pooling
            if self.path_attention:
                sent_as_query = self.sent_ltrel_att(
                    s_vec)  # sent attend on qas
                r_att_scores = torch.mv(
                    qas_vecs, sent_as_query)  # qa-pair-level attention scores
                norm_r_att_scores = F.softmax(r_att_scores, dim=0)
                if ana_mode:
                    qa_pair_att_scores.append(norm_r_att_scores)

                final_vec = torch.mv(torch.t(latent_rel_vecs),
                                     norm_r_att_scores)

            else:
                final_vec = latent_rel_vecs.mean(dim=0).to(
                    self.device)  # mean pooling

            final_vecs.append(torch.cat((final_vec, s_vec), dim=0))

        logits = self.hidden2output(torch.stack(final_vecs))
        if not ana_mode:
            return logits
        else:
            return logits, path_att_scores, qa_pair_att_scores
Beispiel #31
0
def dtaudp(p, alpha, lam, Q):
    return (Q.mv(
        torch.diag(1 / softabs_map(lam, alpha)).mv((torch.t(Q).mv(p)))))
Beispiel #32
0
    def forward(self,
                s_vec_batched,
                qa_pairs_batched,
                cpt_paths_batched,
                rel_paths_batched,
                graphs,
                concept_mapping_dicts,
                ana_mode=False):
        self.device = self.concept_emd.weight.device  # multiple GPUs need to specify device
        final_vecs = []
        output_graphs = self.graph_encoder(graphs)
        output_concept_embeds = torch.cat(
            (output_graphs.ndata["h"],
             torch.zeros(1, self.graph_output_dim).to(
                 self.device)))  # len(output_concept_embeds) as padding

        # new_concept_embed = nn.Embedding(output_concept_embeds.size()[0], output_concept_embeds.size()[1])
        # new_concept_embed.weight = nn.Parameter(output_concept_embeds)

        new_concept_embed = torch.cat((output_graphs.ndata["h"],
                                       s_vec_batched.new_zeros(
                                           (1, self.graph_output_dim))))
        new_concept_embed = new_concept_embed.to(self.device)

        if ana_mode:
            path_att_scores = []
            qa_pair_att_scores = []

        for index in range(
                len(s_vec_batched)):  # len = batch_size * num_choices
            # for each question-answer statement

            s_vec = s_vec_batched[index].to(self.device)
            cpt_paths = cpt_paths_batched[index]
            rel_paths = rel_paths_batched[index]

            if len(
                    qa_pairs_batched[index]
            ) == 0 or False:  # if "or True" then we can do abalation study
                raw_qas_vecs = torch.cat(
                    (torch.zeros(1, self.graph_output_dim +
                                 self.concept_dim).to(self.device),
                     torch.zeros(1, self.graph_output_dim +
                                 self.concept_dim).to(self.device),
                     torch.stack([s_vec]).to(self.device)),
                    dim=1).to(self.device)

                qas_vecs = self.qas_encoder(raw_qas_vecs)
                # print("0:", qas_vecs.size())
                latent_rel_vecs = torch.cat(
                    (qas_vecs, torch.zeros(1, self.lstm_dim).to(self.device)),
                    dim=1)
            else:
                q_seq = []
                a_seq = []

                qa_path_num = []

                tmp_cpt_paths = []
                for qa_pair in qa_pairs_batched[
                        index]:  # for each possible qc, ac pair
                    q, a = qa_pair[0], qa_pair[1]
                    q_seq.append(q)
                    a_seq.append(a)

                    qa_cpt_paths, qa_rel_paths = self.paths_group(
                        cpt_paths, rel_paths, q, a,
                        k=self.num_random_paths)  # self.num_random_paths

                    qa_path_num.append(len(qa_cpt_paths))
                    tmp_cpt_paths.extend(qa_cpt_paths)

                    # assert that the order is contiunous
                if self.num_random_paths is None:
                    assert tmp_cpt_paths == cpt_paths

                mdict = concept_mapping_dicts[index]
                # new_q_vecs = new_concept_embed(
                #     torch.LongTensor([mdict.get(c, len(output_concept_embeds) - 1) for c in q_seq]).to(self.device))
                # new_a_vecs = new_concept_embed(
                #     torch.LongTensor([mdict.get(c, len(output_concept_embeds) - 1) for c in a_seq]).to(self.device))

                new_q_vecs = new_concept_embed[torch.LongTensor([
                    mdict.get(c,
                              len(output_concept_embeds) - 1) for c in q_seq
                ]).to(self.device)].view(len(q_seq), -1)
                new_a_vecs = new_concept_embed[torch.LongTensor([
                    mdict.get(c,
                              len(output_concept_embeds) - 1) for c in a_seq
                ]).to(self.device)].view(len(a_seq), -1)

                ## new_q_vecs = torch.index_select(output_concept_embeds, 0, q_seq)
                ## new_a_vecs = torch.index_select(output_concept_embeds, 0, a_seq)

                q_vecs = self.concept_emd(
                    torch.LongTensor(q_seq).to(self.device))
                a_vecs = self.concept_emd(
                    torch.LongTensor(a_seq).to(self.device))

                q_vecs = torch.cat((q_vecs, new_q_vecs), dim=1)
                a_vecs = torch.cat((a_vecs, new_a_vecs), dim=1)

                s_vecs = torch.stack([s_vec] * len(qa_pairs_batched[index]))
                raw_qas_vecs = torch.cat((q_vecs, a_vecs, s_vecs), dim=1)
                # all the qas triple vectors associated with a statement

                qas_vecs = self.qas_encoder(raw_qas_vecs)

                # print(qas_vecs.size())
                # print(len(all_qa_cpt_paths_embeds))

                pooled_path_vecs = []

                # batched path encoding

                #### Method 1
                # cpt_max_len = len(cpt_paths[0])
                # mdicted_cpaths = []
                # for cpt_path in cpt_paths:
                #     mdicted_cpaths.extend([mdict.get(c, len(output_concept_embeds)-1) for c in cpt_path])
                # mdicted_cpaths = torch.LongTensor(mdicted_cpaths).to(self.device)
                # assert len(mdicted_cpaths) == cpt_max_len * len(cpt_paths)  # flatten
                # indexed_selection = torch.index_select(output_concept_embeds, 0, mdicted_cpaths)
                # batched_all_qa_cpt_paths_embeds = torch.stack([torch.stack(path) for path in list(zip(*(iter(indexed_selection),) * cpt_max_len))])
                # batched_all_qa_cpt_paths_embeds = batched_all_qa_cpt_paths_embeds.permute(1, 0, 2)

                #### Method 2
                # batched_all_qa_cpt_paths_embeds = []
                # for cpt_path in cpt_paths:
                # path_concept_vecs = [output_concept_embeds[c] for c in [mdict.get(c, -1) for c in cpt_path] if c >= 0]
                # path_concept_vecs = [output_graphs.ndata["h"][c] for c in [mdict.get(c, -1) for c in cpt_path] if c >= 0]
                # zero_paddings = [torch.zeros(self.graph_output_dim).to(self.device)] * (len(cpt_path)-len(path_concept_vecs))
                # path_concept_vecs = torch.stack(path_concept_vecs+zero_paddings)
                # batched_all_qa_cpt_paths_embeds.append(path_concept_vecs)
                # batched_all_qa_cpt_paths_embeds = torch.stack(batched_all_qa_cpt_paths_embeds).permute(1, 0, 2)

                #### Method 3
                mdicted_cpaths = []
                for cpt_path in cpt_paths:
                    mdicted_cpaths.append([
                        mdict.get(c,
                                  len(output_concept_embeds) - 1)
                        for c in cpt_path
                    ])
                mdicted_cpaths = torch.LongTensor(mdicted_cpaths).to(
                    self.device)
                # new_batched_all_qa_cpt_paths_embeds = new_concept_embed(mdicted_cpaths).permute(1, 0, 2)
                new_batched_all_qa_cpt_paths_embeds = new_concept_embed[
                    mdicted_cpaths].view(len(cpt_paths), len(cpt_paths[0]),
                                         -1).permute(1, 0, 2)

                batched_all_qa_cpt_paths_embeds = self.concept_emd(
                    torch.LongTensor(cpt_paths).to(self.device)).permute(
                        1, 0, 2)  # old concept embed

                batched_all_qa_cpt_paths_embeds = torch.cat(
                    (batched_all_qa_cpt_paths_embeds,
                     new_batched_all_qa_cpt_paths_embeds),
                    dim=2)

                batched_all_qa_rel_paths_embeds = self.relation_emd(
                    torch.LongTensor(rel_paths).to(self.device)).permute(
                        1, 0, 2)

                batched_all_qa_cpt_rel_path_embeds = torch.cat(
                    (batched_all_qa_cpt_paths_embeds,
                     batched_all_qa_rel_paths_embeds),
                    dim=2)
                #

                # batched_all_qa_cpt_rel_path_embeds = over_batched_all_qa_cpt_rel_path_embeds[0:None,path_splits[index][0]:path_splits[index][1],0:None]
                # if False then abiliate the LSTM
                if True:
                    batched_lstm_outs, _ = self.lstm(
                        batched_all_qa_cpt_rel_path_embeds)
                else:
                    batched_lstm_outs = torch.zeros(
                        batched_all_qa_cpt_rel_path_embeds.size()[0],
                        batched_all_qa_cpt_rel_path_embeds.size()[1],
                        self.lstm_dim).to(self.device)

                if self.path_attention:
                    query_vecs = self.qas_pathlstm_att(qas_vecs)

                cur_start = 0
                for index in range(len(qa_path_num)):
                    if self.path_attention:
                        query_vec = query_vecs[index]
                    cur_end = cur_start + qa_path_num[index]

                    # mean_pooled_path_vec = batched_lstm_outs[-1, cur_start:cur_end, :].mean(dim=0)  # mean pooling
                    # attention pooling
                    blo = batched_lstm_outs[-1, cur_start:cur_end, :]
                    if self.path_attention:
                        att_scores = torch.mv(
                            blo, query_vec)  # path-level attention scores
                        norm_att_scores = F.softmax(att_scores, dim=0)
                        att_pooled_path_vec = torch.mv(torch.t(blo),
                                                       norm_att_scores)
                        if ana_mode:
                            path_att_scores.append(norm_att_scores)
                    else:
                        att_pooled_path_vec = blo.mean(dim=0)

                    cur_start = cur_end
                    pooled_path_vecs.append(att_pooled_path_vec)

                pooled_path_vecs = torch.stack(pooled_path_vecs)
                latent_rel_vecs = torch.cat((qas_vecs, pooled_path_vecs),
                                            dim=1)  # qas and KE-qas

            # final_vec = latent_rel_vecs.mean(dim=0).to(self.device)  # mean pooling

            # att pooling
            if self.path_attention:
                sent_as_query = self.sent_ltrel_att(
                    s_vec)  # sent attend on qas
                r_att_scores = torch.mv(
                    qas_vecs, sent_as_query)  # qa-pair-level attention scores
                norm_r_att_scores = F.softmax(r_att_scores, dim=0)
                if ana_mode:
                    qa_pair_att_scores.append(norm_r_att_scores)

                final_vec = torch.mv(torch.t(latent_rel_vecs),
                                     norm_r_att_scores)

            else:
                final_vec = latent_rel_vecs.mean(dim=0).to(
                    self.device)  # mean pooling

            final_vecs.append(torch.cat((final_vec, s_vec), dim=0))

        logits = self.hidden2output(torch.stack(final_vecs))
        if not ana_mode:
            return logits
        else:
            return logits, path_att_scores, qa_pair_att_scores
Beispiel #33
0
                                     requires_grad=False).cuda()
                pred = malconv(exe_input)
                prob = sigmoid(pred).cpu().data.numpy()[0][0]
                print("prob: ", prob)
                if prob < 0.5:
                    break
                print("change " + str(j) + "th byte")
            try:
                min_index = -1
                min_di = 100000
                wj = -w[j:j + 1, :]
                nj = wj / torch.norm(wj, 2)
                zj = z[j:j + 1, :]
                for i in range(1, 256):
                    mi = embed(
                        Variable(torch.from_numpy(np.array([i]))).cuda()).data
                    si = torch.matmul((nj), torch.t(mi - zj))
                    di = torch.norm(mi - (zj + si * nj))
                    si = si.cpu().numpy()
                    if si > 0 and di < min_di:
                        min_di = di
                        min_index = i
                if min_index != -1:
                    data[j] = min_index
                    changes.append(min_index)
            except:
                continue
        print("finish ", t)
changes = np.array(changes)
np.save("changes.npy", changes)
Beispiel #34
0
def precond_beta_mgpu_block(A, b, tol=1e-16):
    '''Run conjugate gradient on multiple GPUs when A = X.T . X does not fit on
    the GPU by splitting A across GPUs. Preconditioning is performed using a
    sparse approximate LU factorization with the default options in scipy.
    '''
    _message(
        'Computing beta (using approximate inverse preconditioning of A)...')
    padding = A.shape[0] * 5e3
    mem_avail = np.max([available_gpu_memory(i) for i in range(number_gpus())])
    total_tensor_size = mem_avail // 8 - padding
    split = int(total_tensor_size // A.shape[0] - 3)
    Minv = sp.sparse.linalg.spilu(A.numpy()).solve(np.eye(b.shape[0]))
    Minv = torch.from_numpy(Minv)
    A_split = torch.split(A, split, dim=0)
    A_ = []
    for i in range(len(A_split)):
        A_.append(A_split[i].cuda(device='cuda:' + str(i)))
    Minv_split = torch.split(Minv, split, dim=0)
    Minv_ = []
    for i in range(len(Minv_split)):
        Minv_.append(Minv_split[i].cuda(device='cuda:' + str(i)))
    b_gpu = b.cuda(device='cuda:0')

    x = torch.zeros(b_gpu.size(), dtype=torch.float64).cuda(device='cuda:0')
    r = b_gpu.clone().cuda(device='cuda:0')
    z = torch.matmul(Minv, b).cuda(device='cuda:0')
    p = b_gpu.clone().cuda(device='cuda:0')
    rr = torch.sum(torch.matmul(torch.t(r), r))
    rz = torch.sum(torch.matmul(torch.t(r), z))
    numiter = 0
    while rr > tol**2:
        numiter += 1
        if numiter % 100 == 0:
            _message('Reached iteration {}'.format(numiter))

        p_, Ap_ = [], []
        for i in range(len(A_)):
            p_.append(p.cuda(device='cuda:' + str(i)))
            Ap_.append(torch.matmul(A_[i], p_[i]).cpu())
        Ap = torch.cat(Ap_, dim=0).cuda(device='cuda:0')
        del p_, Ap_
        torch.cuda.empty_cache()

        alpha = rz / torch.sum(torch.matmul(torch.t(p), Ap))
        x += alpha * p
        rnew = alpha * Ap
        r_, znew_ = [], []
        for i in range(len(Minv_)):
            r_.append(r.cuda(device='cuda:' + str(i)))
            znew_.append(torch.matmul(Minv_[i], r_[i]).cpu())
        znew = torch.cat(znew_, dim=0).cuda(device='cuda:0')
        beta = torch.sum(znew * (rnew - r)) / rz
        p = znew + beta * p
        r = rnew
        z = znew
        rz = torch.sum(torch.matmul(torch.t(r), z))
    _message('Converged after {} iterations'.format(numiter))

    x_cpu = x.cpu()

    del A_, A_split, b_gpu, x, r, p, Ap, alpha, beta, rr, rr_new
    torch.cuda.empty_cache()
    _message('Done computing beta!')

    return x_cpu
    def __getitem__(self, index):
        """
        The method through which the dataset is accessed for training.

        The index param is not currently used, and instead each dataset[i] is the result of
        a random sampling over:
        - random scene
        - random rgbd frame from that scene
        - random rgbd frame (different enough pose) from that scene
        - various randomization in the match generation and non-match generation procedure

        returns a large amount of variables, separated by commas.

        0th return arg: the type of data sampled (this can be used as a flag for different loss functions)
        0th rtype: string

        1st, 2nd return args: image_a_rgb, image_b_rgb
        1st, 2nd rtype: 3-dimensional torch.FloatTensor of shape (image_height, image_width, 3)

        3rd, 4th return args: matches_a, matches_b
        3rd, 4th rtype: 1-dimensional torch.LongTensor of shape (num_matches)

        5th, 6th return args: non_matches_a, non_matches_b
        5th, 6th rtype: 1-dimensional torch.LongTensor of shape (num_non_matches)

        Return values 3,4,5,6 are all in the "single index" format for pixels. That is

        (u,v) --> n = u + image_width * v

        """

        # stores metadata about this data
        metadata = dict()


        # pick a scene
        scene_name = self.get_random_scene_name()
        metadata['scene_name'] = scene_name

        # image a
        image_a_idx = self.get_random_image_index(scene_name)
        image_a_rgb, image_a_depth, image_a_mask, image_a_pose = self.get_rgbd_mask_pose(scene_name, image_a_idx)

        metadata['image_a_idx'] = image_a_idx

        # image b
        image_b_idx = self.get_img_idx_with_different_pose(scene_name, image_a_pose, num_attempts=50)
        metadata['image_b_idx'] = image_b_idx
        if image_b_idx is None:
            logging.info("no frame with sufficiently different pose found, returning")
            # TODO: return something cleaner than no-data
            image_a_rgb_tensor = self.rgb_image_to_tensor(image_a_rgb)
            return self.return_empty_data(image_a_rgb_tensor, image_a_rgb_tensor)

        image_b_rgb, image_b_depth, image_b_mask, image_b_pose = self.get_rgbd_mask_pose(scene_name, image_b_idx)

        image_a_depth_numpy = np.asarray(image_a_depth)
        image_b_depth_numpy = np.asarray(image_b_depth)

        # find correspondences
        uv_a, uv_b = correspondence_finder.batch_find_pixel_correspondences(image_a_depth_numpy, image_a_pose, 
                                                                           image_b_depth_numpy, image_b_pose, 
                                                                           num_attempts=self.num_matching_attempts, img_a_mask=np.asarray(image_a_mask))

        if uv_a is None:
            logging.info("no matches found, returning")
            image_a_rgb_tensor = self.rgb_image_to_tensor(image_a_rgb)
            return self.return_empty_data(image_a_rgb_tensor, image_a_rgb_tensor)

        if self.debug:
            # downsample so can plot
            num_matches_to_plot = 10
            indexes_to_keep = (torch.rand(num_matches_to_plot)*len(uv_a[0])).floor().type(torch.LongTensor)
            uv_a = (torch.index_select(uv_a[0], 0, indexes_to_keep), torch.index_select(uv_a[1], 0, indexes_to_keep))
            uv_b = (torch.index_select(uv_b[0], 0, indexes_to_keep), torch.index_select(uv_b[1], 0, indexes_to_keep))

        # data augmentation
        if self._domain_randomize:
            image_a_rgb = correspondence_augmentation.random_domain_randomize_background(image_a_rgb, image_a_mask)
            image_b_rgb = correspondence_augmentation.random_domain_randomize_background(image_b_rgb, image_b_mask)


        if not self.debug:
            [image_a_rgb], uv_a                 = correspondence_augmentation.random_image_and_indices_mutation([image_a_rgb], uv_a)
            [image_b_rgb, image_b_mask], uv_b   = correspondence_augmentation.random_image_and_indices_mutation([image_b_rgb, image_b_mask], uv_b)
        else: # also mutate depth just for plotting
            [image_a_rgb, image_a_depth], uv_a               = correspondence_augmentation.random_image_and_indices_mutation([image_a_rgb, image_a_depth], uv_a)
            [image_b_rgb, image_b_depth, image_b_mask], uv_b = correspondence_augmentation.random_image_and_indices_mutation([image_b_rgb, image_b_depth, image_b_mask], uv_b)
            image_a_depth_numpy = np.asarray(image_a_depth)
            image_b_depth_numpy = np.asarray(image_b_depth)

        # find non_correspondences

        if index%2:
            metadata['non_match_type'] = 'masked'
            logging.debug("masking non-matches")
            image_b_mask = torch.from_numpy(np.asarray(image_b_mask)).type(torch.FloatTensor)
        else:
            metadata['non_match_type'] = 'non_masked'
            logging.debug("not masking non-matches")
            image_b_mask = None
            
        image_b_shape = image_b_depth_numpy.shape
        image_width  = image_b_shape[1]
        image_height = image_b_shape[1]

        uv_b_non_matches = correspondence_finder.create_non_correspondences(uv_b, image_b_shape, 
            num_non_matches_per_match=self.num_non_matches_per_match, img_b_mask=image_b_mask)

        if self.debug:
            # only want to bring in plotting code if in debug mode
            import correspondence_plotter

            # Just show all images 
            uv_a_long = (torch.t(uv_a[0].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1), 
                     torch.t(uv_a[1].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1))
            uv_b_non_matches_long = (uv_b_non_matches[0].view(-1,1), uv_b_non_matches[1].view(-1,1) )
            
            # Show correspondences
            if uv_a is not None:
                fig, axes = correspondence_plotter.plot_correspondences_direct(image_a_rgb, image_a_depth_numpy, image_b_rgb, image_b_depth_numpy, uv_a, uv_b, show=False)
                correspondence_plotter.plot_correspondences_direct(image_a_rgb, image_a_depth_numpy, image_b_rgb, image_b_depth_numpy,
                                                  uv_a_long, uv_b_non_matches_long,
                                                  use_previous_plot=(fig,axes),
                                                  circ_color='r')


        # image_a_rgb, image_b_rgb = self.both_to_tensor([image_a_rgb, image_b_rgb])

        # convert PIL.Image to torch.FloatTensor
        image_a_rgb = self.rgb_image_to_tensor(image_a_rgb)
        image_b_rgb = self.rgb_image_to_tensor(image_b_rgb)

        uv_a_long = (torch.t(uv_a[0].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1), 
                     torch.t(uv_a[1].repeat(self.num_non_matches_per_match, 1)).contiguous().view(-1,1))
        uv_b_non_matches_long = (uv_b_non_matches[0].view(-1,1), uv_b_non_matches[1].view(-1,1) )

        # flatten correspondences and non_correspondences
        matches_a = uv_a[1].long()*image_width+uv_a[0].long()
        matches_b = uv_b[1].long()*image_width+uv_b[0].long()
        non_matches_a = uv_a_long[1].long()*image_width+uv_a_long[0].long()
        non_matches_a = non_matches_a.squeeze(1)
        non_matches_b = uv_b_non_matches_long[1].long()*image_width+uv_b_non_matches_long[0].long()
        non_matches_b = non_matches_b.squeeze(1)

        return "matches", image_a_rgb, image_b_rgb, matches_a, matches_b, non_matches_a, non_matches_b, metadata
Beispiel #36
0
 def forward(self, x):
     return torch.t(x)
    print("cv*b failed")
#cv_res = Variable(torch.tensor([]), requires_grad=True)
for ccv in cv:
    print("ccv*b:", ccv*b)
#    cv_res += ccv*b
print("cv_by_list:", [ccv*b for ccv in cv])
print("\n")

# Matrix(dim2, dim1) dot_product with BatchVector(batchsize, dim1)
batchsize=3
dim1=2
dim2=4
v = torch.randn(batchsize, dim1)
M = torch.randn(dim2, dim1)
print("v:",v)
print("torch.t(v):",torch.t(v))
print("M:",M)
print("M*v^T:", M.matmul(torch.t(v)))
print("\n")

# BatchMatrix(batch_size, dim2, dim1) element wise product with BatchVector(batchsize, dim2)
dim1=2
dim2=3
batch_size=4
x=torch.rand(dim2, dim1)
x_batch=torch.rand(batch_size, dim2, dim1)
y=torch.rand(batch_size, dim2)
print("x.shape:",x.shape)
print("x_batch.shape:",x_batch.shape)
print("y.shape:",y.shape)
#print("x:",x)
Beispiel #38
0
    def train_model(self, args):
        num_exp = args.num_exp
        start_graph = args.start_graph
        end_graph = args.end_graph
        window_size = args.window
        dropout = args.dropout
        alpha = args.alpha
        learning_rate = args.learning_rate
        negative_sample = args.ns

        teacher_n_heads = args.teacher_n_heads
        teacher_embed_dim = args.teacher_embed_size

        student_embed_dim = args.student_emb
        student_n_heads = args.student_heads

        results = {}
        print("Start training")
        for graph in range(start_graph, end_graph + 1):
            results[graph] = {
                'teacher': {
                    'num_params': 0,
                    'mae': 0.,
                    'rmse': 0.
                },
                'student': {
                    'num_params': 0,
                    'mae': 0.,
                    'rmse': 0.
                }
            }
            teacher_mae = []
            teacher_rmse = []
            teacher_number_of_params = []

            student_mae = []
            student_rmse = []
            student_number_of_params = []
            train_adj_norm, train_adj_label, train_adj_ind, features, test_adj, test_adj_ind = self.construct_dataset(
                graph, window_size, negative_sample)
            for i in range(num_exp):
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
                print("Experiment ", i)
                num_cells = len(train_adj_norm)
                teacher_model = EGAD(num_cells, features[0].shape[0],
                                     2 * teacher_embed_dim, teacher_embed_dim,
                                     teacher_n_heads, dropout,
                                     alpha).to(device=self.device)
                model_params = self.count_parameters(teacher_model)
                teacher_number_of_params.append(model_params)
                optimizer = optim.Adam(teacher_model.parameters(),
                                       lr=learning_rate)
                for epoch in range(100):
                    teacher_model.train()
                    optimizer.zero_grad()
                    output = teacher_model(features, train_adj_norm)
                    reconstruction = torch.sigmoid(
                        torch.mm(output, torch.t(output)))

                    reconstructed_val = reconstruction[train_adj_ind]

                    predicted = reconstructed_val
                    target = train_adj_label

                    criterion = nn.MSELoss()

                    R_loss = criterion(predicted, target)
                    loss_train = torch.sqrt(R_loss)
                    loss_train.backward()

                    optimizer.step()
                print("Teacher finished")

                teacher_model.eval()

                final_output = teacher_model(features, train_adj_norm)
                train_embeddings = self.get_edge_embeddings(
                    final_output,
                    train_adj_ind).detach().to(device=self.device)
                test_embeddings = self.get_edge_embeddings(
                    final_output, test_adj_ind).detach().to(device=self.device)

                mae_score, rmse_score = self.evaluate_model(
                    teacher_embed_dim, train_embeddings, train_adj_label,
                    test_embeddings, test_adj)
                teacher_mae.append(mae_score)
                teacher_rmse.append(rmse_score)

                print("TEACHER FINISHED for GRAPH {} and EXP {}".format(
                    graph, i))

                ##### STUDENT
                if args.distillation == 1:

                    student_model = EGAD(num_cells, features[0].shape[0],
                                         2 * student_embed_dim,
                                         student_embed_dim, student_n_heads,
                                         dropout, alpha).to(device=self.device)
                    model_params = self.count_parameters(student_model)
                    student_number_of_params.append(model_params)
                    optimizer = optim.Adam(student_model.parameters(),
                                           lr=learning_rate)
                    for epoch in range(100):
                        student_model.train()
                        optimizer.zero_grad()
                        output = student_model(features, train_adj_norm)
                        reconstruction = torch.sigmoid(
                            torch.mm(output, torch.t(output)))

                        teacher_output = teacher_model(features,
                                                       train_adj_norm)
                        teacher_reconstruction = torch.sigmoid(
                            torch.mm(teacher_output, torch.t(teacher_output)))

                        student_reconstructed_val = reconstruction[
                            train_adj_ind]
                        teacher_reconstruction_val = teacher_reconstruction[
                            train_adj_ind]

                        criterion = nn.MSELoss()
                        student_R_loss = criterion(
                            student_reconstructed_val,
                            train_adj_label) + criterion(
                                teacher_reconstruction_val, train_adj_label)
                        loss_train = torch.sqrt(student_R_loss)
                        loss_train.backward()

                        optimizer.step()

                    student_model.eval()
                    final_output = student_model(features, train_adj_norm)
                    train_embeddings = self.get_edge_embeddings(
                        final_output,
                        train_adj_ind).detach().to(device=self.device)
                    test_embeddings = self.get_edge_embeddings(
                        final_output,
                        test_adj_ind).detach().to(device=self.device)

                    mae_score, rmse_score = self.evaluate_model(
                        student_embed_dim, train_embeddings, train_adj_label,
                        test_embeddings, test_adj)

                    student_mae.append(mae_score)
                    student_rmse.append(rmse_score)

            results[graph]['teacher']['num_params'] = np.mean(
                teacher_number_of_params)
            results[graph]['teacher']['mae'] = np.mean(teacher_mae)
            results[graph]['teacher']['rmse'] = np.mean(teacher_rmse)
            if args.distillation == 1:
                results[graph]['student']['num_params'] = np.mean(
                    student_number_of_params)
                results[graph]['student']['mae'] = np.mean(student_mae)
                results[graph]['student']['rmse'] = np.mean(student_rmse)
            print(
                "Graph {} : TEACHER N_PARAMS {} : TEACHER MAE {} : TEACHER RMSE {} : STUDENT N_PARAMS {} : STUDENT MAE {} : STUDENT RMSE {}"
                .format(graph, results[graph]['teacher']['num_params'],
                        results[graph]['teacher']['mae'],
                        results[graph]['teacher']['rmse'],
                        results[graph]['student']['num_params'],
                        results[graph]['student']['mae'],
                        results[graph]['student']['rmse']))
        return results
Beispiel #39
0
 def forward(self, x):
     return torch.mm(self.thetas, torch.t(x).float())
import numpy as np
import time
import torch

n1 = torch.rand(20000, 3).cuda()
n2 = torch.rand(21000, 3).cuda()

end = time.time()

sum_1 = torch.t(torch.sum(n1**2, 1).repeat(n2.size()[0], 1))

sum_2 = torch.sum(n2**2, 1).repeat(n1.size()[0], 1)

knnDist, _ = torch.min(torch.addmm(1.0, sum_1 + sum_2, -2.0, n1, torch.t(n2)),
                       0)
knnDist = torch.sqrt(knnDist)

print(time.time() - end)
Beispiel #41
0
    def forward(self, fea_v, length, target_start, target_end):
        if self.add_char:
            word_v = fea_v[0]
            char_v = fea_v[1]
        else:
            word_v = fea_v
        batch_size = word_v.size(0)
        seq_length = word_v.size(1)

        word_emb = self.embedding(word_v)
        word_emb = self.dropout_emb(word_emb)
        if self.static:
            word_static = self.embedding_static(word_v)
            word_static = self.dropout_emb(word_static)
            word_emb = torch.cat([word_emb, word_static], 2)

        x = torch.transpose(word_emb, 0, 1)
        packed_words = pack_padded_sequence(x, length)
        lstm_out, self.hidden = self.lstm(packed_words, self.hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        ##### lstm_out: (seq_len, batch_size, hidden_size)
        lstm_out = self.dropout_lstm(lstm_out)
        x = lstm_out
        x = x.transpose(0, 1)
        ##### batch version
        # x: variable (seq_len, batch_size, hidden_size)
        # target_start: variable (batch_size)
        _, start = torch.max(target_start.unsqueeze(0), dim=1)
        max_start = utils.to_scalar(target_start[start])
        _, end = torch.min(target_end.unsqueeze(0), dim=1)
        min_end = utils.to_scalar(target_end[end])

        max_length = 0
        for index in range(batch_size):
            x_len = x[index].size(0)
            start = utils.to_scalar(target_start[index])
            end = utils.to_scalar(target_end[index])
            none_t = x_len - (end - start + 1)
            if none_t > max_length: max_length = none_t

        left_save = []
        mask_left_save = []
        right_save = []
        mask_right_save = []
        target_save = []
        none_target = []
        mask_none_target = []
        for idx in range(batch_size):
            mask_none_t = []
            none_t = None
            x_len_cur = x[idx].size(0)
            start_cur = utils.to_scalar(target_start[idx])
            end_cur = utils.to_scalar(target_end[idx])

            if start_cur != 0:
                left = x[idx][:start_cur]
                none_t = left
                mask_none_t.extend([1] * start_cur)
            if end_cur != (x_len_cur - 1):
                right = x[idx][(end_cur + 1):]
                if none_t is not None:
                    none_t = torch.cat([none_t, right], 0)
                else:
                    none_t = right
                mask_none_t.extend([1] * (x_len_cur - end_cur - 1))
            if len(mask_none_t) != max_length:
                add_t = Variable(
                    torch.zeros((max_length - len(mask_none_t)),
                                self.lstm_hiddens))
                if self.use_cuda: add_t = add_t.cuda()
                mask_none_t.extend([0] * (max_length - len(mask_none_t)))
                # print(add_t)
                none_t = torch.cat([none_t, add_t], 0)
            mask_none_target.append(mask_none_t)
            none_target.append(none_t.unsqueeze(0))

            x_len_cur = x[idx].size(0)
            start_cur = utils.to_scalar(target_start[idx])
            left_len_cur = start_cur
            left_len_max = max_start
            if start_cur != 0:
                x_cur_left = x[idx][:start_cur]
                left_len_sub = left_len_max - left_len_cur
                mask_cur_left = [1 for _ in range(left_len_cur)]
            else:
                x_cur_left = x[idx][0].unsqueeze(0)
                left_len_sub = left_len_max - 1
                # mask_cur_left = [-1e+20]
                mask_cur_left = [0]
            # x_cur_left: variable (start_cur, two_hidden_size)
            # mask_cur_left = [1 for _ in range(start_cur)]
            # mask_cur_left: list (start_cur)
            if start_cur < max_start:
                if left_len_sub == 0: print('error')
                add = Variable(torch.rand(left_len_sub, self.lstm_hiddens))
                if self.use_cuda: add = add.cuda()
                x_cur_left = torch.cat([x_cur_left, add], dim=0)
                # x_cur_left: variable (max_start, two_hidden_size)
                left_save.append(x_cur_left.unsqueeze(0))
                # mask_cur_left.extend([-1e+20 for _ in range(left_len_sub)])
                mask_cur_left.extend([0 for _ in range(left_len_sub)])
                # mask_cur_left: list (max_start)
                mask_left_save.append(mask_cur_left)
            else:
                left_save.append(x_cur_left.unsqueeze(0))
                mask_left_save.append(mask_cur_left)

            end_cur = utils.to_scalar(target_end[idx])
            right_len_cur = x_len_cur - end_cur - 1
            right_len_max = x_len_cur - min_end - 1
            if (end_cur + 1) != x_len_cur:
                x_cur_right = x[idx][(end_cur + 1):]
                right_len_sub = right_len_max - right_len_cur
                mask_cur_right = [1 for _ in range(right_len_cur)]
            else:
                x_cur_right = x[idx][end_cur].unsqueeze(0)
                right_len_sub = right_len_max - right_len_cur - 1
                # mask_cur_right = [-1e+20]
                mask_cur_right = [0]
            # x_cur_right: variable ((x_len_cur-end_cur-1), two_hidden_size)
            # mask_cur_right = [1 for _ in range(right_len_cur)]
            # mask_cur_right: list (x_len_cur-end_cur-1==right_len)
            if end_cur > min_end:
                if right_len_sub == 0: print('error2')
                add = Variable(torch.rand(right_len_sub, self.lstm_hiddens))
                if self.use_cuda: add = add.cuda()
                x_cur_right = torch.cat([x_cur_right, add], dim=0)
                right_save.append(x_cur_right.unsqueeze(0))
                # mask_cur_right.extend([-1e+20 for _ in range(right_len_sub)])
                mask_cur_right.extend([0 for _ in range(right_len_sub)])
                mask_right_save.append(mask_cur_right)
            else:
                right_save.append(x_cur_right.unsqueeze(0))
                mask_right_save.append(mask_cur_right)

            # target_sub = end_cur-start_cur
            x_target = x[idx][start_cur:(end_cur + 1)]
            x_average_target = torch.mean(x_target, 0)
            target_save.append(x_average_target.unsqueeze(0))
        mask_left_save = Variable(torch.ByteTensor(mask_left_save))
        # mask_left_save: variable (batch_size, left_len_max)
        mask_right_save = Variable(torch.ByteTensor(mask_right_save))
        # mask_right_save: variable (batch_size, right_len_max)
        left_save = torch.cat(left_save, dim=0)
        right_save = torch.cat(right_save, dim=0)
        target_save = torch.cat(target_save, dim=0)
        # left_save: variable (batch_size, left_len_max, two_hidden_size)
        # right_save: variable (batch_size, right_len_max, two_hidden_size)
        # target_save: variable (batch_size, two_hidden_size)
        none_target = torch.cat(none_target, 0)
        mask_none_target = Variable(torch.ByteTensor(mask_none_target))
        if self.use_cuda:
            mask_right_save = mask_right_save.cuda()
            mask_left_save = mask_left_save.cuda()
            left_save = left_save.cuda()
            right_save = right_save.cuda()
            target_save = target_save.cuda()
            mask_none_target = mask_none_target.cuda()
            none_target = none_target.cuda()

        # s, s_alpha = self.attention(x, target_save, None)
        s = self.attention(none_target, target_save, mask_none_target)
        # s_l, s_l_alpha = self.attention_l(left_save, target_save, mask_left_save)
        # s_r, s_r_alpha = self.attention_r(right_save, target_save, mask_right_save)
        s_l = self.attention_l(left_save, target_save, mask_left_save)
        s_r = self.attention_r(right_save, target_save, mask_right_save)

        w1s = torch.mm(self.w1, torch.t(s))
        u1t = torch.mm(self.u1, torch.t(target_save))
        if self.use_cuda:
            w1s = w1s.cuda()
            u1t = u1t.cuda()

        if batch_size == self.batch_size:
            z = torch.exp(w1s + u1t + self.b1)
        else:
            z = torch.exp(w1s + u1t)

        z_all = z
        # z_all: variable (two_hidden_size, batch_size)
        z_all = z_all.unsqueeze(2)

        w2s = torch.mm(self.w2, torch.t(s_l))
        u2t = torch.mm(self.u2, torch.t(target_save))
        if self.use_cuda:
            w2s = w2s.cuda()
            u2t = u2t.cuda()
        if batch_size == self.batch_size:
            z_l = torch.exp(w2s + u2t + self.b2)
        else:
            z_l = torch.exp(w2s + u2t)
        # print(z_all)
        # print(z_l)
        z_all = torch.cat([z_all, z_l.unsqueeze(2)], dim=2)

        w3s = torch.mm(self.w3, torch.t(s_r))
        u3t = torch.mm(self.u3, torch.t(target_save))
        if self.use_cuda:
            w3s = w3s.cuda()
            u3t = u3t.cuda()
        if batch_size == self.batch_size:
            z_r = torch.exp(w3s + u3t + self.b3)
        else:
            z_r = torch.exp(w3s + u3t)
        z_all = torch.cat([z_all, z_r.unsqueeze(2)], dim=2)

        # z_all: variable (two_hidden_size, batch_size, 3)
        if self.use_cuda:
            z_all = F.softmax(z_all, dim=2)
        else:
            z_all = F.softmax(z_all)
        # z_all = torch.t(z_all)
        z_all = z_all.permute(2, 1, 0)
        # z = torch.unsqueeze(z_all[:batch_size], 0)
        # z_l = torch.unsqueeze(z_all[batch_size:(2*batch_size)], 0)
        # z_r = torch.unsqueeze(z_all[(2*batch_size):], 0)
        # z = z_all[:batch_size]
        # z_l = z_all[batch_size:(2*batch_size)]
        # z_r = z_all[(2*batch_size):]
        z = z_all[0]
        z_l = z_all[1]
        z_r = z_all[2]

        ss = torch.mul(z, s)
        ss = torch.add(ss, torch.mul(z_l, s_l))
        ss = torch.add(ss, torch.mul(z_r, s_r))

        logit = self.linear_2(ss)
        # print(logit)
        # alpha = [s_alpha, s_l_alpha, s_r_alpha]
        # return logit, alpha
        return logit
    def forward(self, x, seq_lengths, transE_args, cuda):
        '''
        Args:
            x: input[0] is arg1, input[1] is arg2
            input[0]: (batch, max_length)
            input[1]: (batch, max_length)

        Returns:
            num_output size
        '''
        arg1 = x[0]  # [N, arg1_max_length] [128, 80]
        arg2 = x[1]  # [N, arg2_max_length] [128, 80]

        # knowledge-enhance with transE
        self.kg_relation, self.kg_relation_list = self.deal_transE(
            transE_args, seq_lengths.size(0), seq_lengths[0], cuda)

        arg1_embed = self.encoder(arg1)
        arg1_embed = self.drop_en(
            arg1_embed)  # [N, arg1_max_length, embed_size] [128, 80, 300]

        arg2_embed = self.encoder(arg2)
        arg2_embed = self.drop_en(
            arg2_embed)  # [N, arg1_max_length, embed_size] [128, 80, 300]

        out_rnn1, ht = self.rnn(arg1_embed, None)  # [128, 80, 600]
        out_rnn2, ht = self.rnn(arg2_embed, None)  # [128, 80, 600]

        last_tensor1 = out_rnn1.contiguous().view(
            seq_lengths.size(0) * seq_lengths[0], -1)  # [128 * 80, 600]
        last_tensor2 = out_rnn2.contiguous().view(
            seq_lengths.size(0) * seq_lengths[0], -1)  # [128 * 80, 600]

        last_tensor = torch.mm(last_tensor1,
                               self.rand_matrix)  # [128 * 80, 600]
        last_tensor = torch.mm(last_tensor,
                               torch.t(last_tensor2))  # [128 * 80, 128 * 80]
        last_tensor = torch.tanh(last_tensor)  # [128 * 80, 128 * 80]

        last_tensor = last_tensor + self.kg_relation  # [128 * 80, 128 * 80] add knowledge
        self.last_tensor = last_tensor

        #  torch.softmax(last_tensor, dim=1) [128 * 80, 128 * 80]
        sf1 = torch.mean(F.softmax(
            last_tensor, dim=1), dim=0, keepdim=True).view(-1, 1).expand(
                seq_lengths.size(0) * seq_lengths[0], self.embed_size *
                2)  # 每行相加为1 [1, 128 * 80] -> [128 * 80, 1] -> [128 * 80, 600]

        sf2 = torch.mean(F.softmax(
            last_tensor, dim=0), dim=1, keepdim=True).expand(
                seq_lengths.size(0) * seq_lengths[0],
                self.embed_size * 2)  # 每列相加为1 [128 * 80, 1] -> [128 * 80, 600]

        out1 = last_tensor1.mul(sf2).view(
            seq_lengths.size(0), -1,
            self.embed_size * 2)  # [128 * 80, 600] -> [128, 80, 600]
        out2 = last_tensor2.mul(sf1).view(
            seq_lengths.size(0), -1,
            self.embed_size * 2)  # [128 * 80, 600] -> [128, 80, 600]

        out = torch.cat((out1, out2),
                        1).view(seq_lengths.size(0),
                                -1)  # [128, 160, 600] -> [128, 160 * 600]

        fc_input = self.bn2(out)  # [128, 160 * 600]
        out_last = F.log_softmax(self.fc(fc_input), dim=1)  # [128, 4]

        return out_last
def create_non_correspondences(uv_b_matches, img_b_shape, num_non_matches_per_match=100, img_b_mask=None):
    """
    Takes in pixel matches (uv_b_matches) that correspond to matches in another image, and generates non-matches by just sampling in image space.

    Optionally, the non-matches can be sampled from a mask for image b.

    Returns non-matches as pixel positions in image b.

    Please see 'coordinate_conventions.md' documentation for an explanation of pixel coordinate conventions.

    ## Note that arg uv_b_matches are the outputs of batch_find_pixel_correspondences()

    :param uv_b_matches: tuple of torch.FloatTensors, where each FloatTensor is length n, i.e.:
        (torch.FloatTensor, torch.FloatTensor)

    :param img_b_shape: tuple of (H,W) which is the shape of the image

    (optional)
    :param num_non_matches_per_match: int

    (optional)
    :param img_b_mask: torch.FloatTensor (can be cuda or not)
        - masked image, we will select from the non-zero entries
        - shape is H x W
     
    :return: tuple of torch.FloatTensors, i.e. (torch.FloatTensor, torch.FloatTensor).
        - The first element of the tuple is all "u" pixel positions, and the right element of the tuple is all "v" positions
        - Each torch.FloatTensor is of shape torch.Shape([num_matches, non_matches_per_match])
        - This shape makes it so that each row of the non-matches corresponds to the row for the match in uv_a
    """
    image_width  = img_b_shape[1]
    image_height = img_b_shape[0]

    if uv_b_matches == None:
        return None

    num_matches = len(uv_b_matches[0])

    def get_random_uv_b_non_matches():
        return pytorch_rand_select_pixel(width=image_width,height=image_height, 
            num_samples=num_matches*num_non_matches_per_match)

    if img_b_mask is not None:
        img_b_mask_flat = img_b_mask.view(-1,1).squeeze(1)
        mask_b_indices_flat = torch.nonzero(img_b_mask_flat)
        if len(mask_b_indices_flat) == 0:
            print "warning, empty mask b"
            uv_b_non_matches = get_random_uv_b_non_matches()
        else:
            num_samples = num_matches*num_non_matches_per_match
            rand_numbers_b = torch.rand(num_samples)*len(mask_b_indices_flat)
            rand_indices_b = torch.floor(rand_numbers_b).long()
            randomized_mask_b_indices_flat = torch.index_select(mask_b_indices_flat, 0, rand_indices_b).squeeze(1)
            uv_b_non_matches = (randomized_mask_b_indices_flat%image_width, randomized_mask_b_indices_flat/image_width)
    else:
        uv_b_non_matches = get_random_uv_b_non_matches()
    
    # for each in uv_a, we want non-matches
    # first just randomly sample "non_matches"
    # we will later move random samples that were too close to being matches
    uv_b_non_matches = (uv_b_non_matches[0].view(num_matches,num_non_matches_per_match), uv_b_non_matches[1].view(num_matches,num_non_matches_per_match))

    # uv_b_matches can now be used to make sure no "non_matches" are too close
    # to preserve tensor size, rather than pruning, we can perturb these in pixel space
    copied_uv_b_matches_0 = torch.t(uv_b_matches[0].repeat(num_non_matches_per_match, 1))
    copied_uv_b_matches_1 = torch.t(uv_b_matches[1].repeat(num_non_matches_per_match, 1))

    diffs_0 = copied_uv_b_matches_0 - uv_b_non_matches[0].type(dtype_float)
    diffs_1 = copied_uv_b_matches_1 - uv_b_non_matches[1].type(dtype_float)

    diffs_0_flattened = diffs_0.view(-1,1)
    diffs_1_flattened = diffs_1.view(-1,1)

    diffs_0_flattened = torch.abs(diffs_0_flattened).squeeze(1)
    diffs_1_flattened = torch.abs(diffs_1_flattened).squeeze(1)


    need_to_be_perturbed = torch.zeros_like(diffs_0_flattened)
    ones = torch.zeros_like(diffs_0_flattened)
    num_pixels_too_close = 1.0
    threshold = torch.ones_like(diffs_0_flattened)*num_pixels_too_close

    # determine which pixels are too close to being matches
    need_to_be_perturbed = where(diffs_0_flattened < threshold, ones, need_to_be_perturbed)
    need_to_be_perturbed = where(diffs_1_flattened < threshold, ones, need_to_be_perturbed)

    minimal_perturb        = num_pixels_too_close/2
    minimal_perturb_vector = (torch.rand(len(need_to_be_perturbed))*2).floor()*(minimal_perturb*2)-minimal_perturb
    std_dev = 10
    random_vector = torch.randn(len(need_to_be_perturbed))*std_dev + minimal_perturb_vector
    perturb_vector = need_to_be_perturbed*random_vector

    uv_b_non_matches_0_flat = uv_b_non_matches[0].view(-1,1).type(dtype_float).squeeze(1)
    uv_b_non_matches_1_flat = uv_b_non_matches[1].view(-1,1).type(dtype_float).squeeze(1)

    uv_b_non_matches_0_flat = uv_b_non_matches_0_flat + perturb_vector
    uv_b_non_matches_1_flat = uv_b_non_matches_1_flat + perturb_vector

    # now just need to wrap around any that went out of bounds

    # handle wrapping in width
    lower_bound = 0.0
    upper_bound = image_width*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_0_flat) * upper_bound

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat > upper_bound_vec, 
        uv_b_non_matches_0_flat - upper_bound_vec, 
        uv_b_non_matches_0_flat)

    uv_b_non_matches_0_flat = where(uv_b_non_matches_0_flat < lower_bound_vec, 
        uv_b_non_matches_0_flat + upper_bound_vec, 
        uv_b_non_matches_0_flat)

    # handle wrapping in height
    lower_bound = 0.0
    upper_bound = image_height*1.0 - 1
    lower_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * lower_bound
    upper_bound_vec = torch.ones_like(uv_b_non_matches_1_flat) * upper_bound

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat > upper_bound_vec, 
        uv_b_non_matches_1_flat - upper_bound_vec, 
        uv_b_non_matches_1_flat)

    uv_b_non_matches_1_flat = where(uv_b_non_matches_1_flat < lower_bound_vec, 
        uv_b_non_matches_1_flat + upper_bound_vec, 
        uv_b_non_matches_1_flat)

    return (uv_b_non_matches_0_flat.view(num_matches, num_non_matches_per_match),
        uv_b_non_matches_1_flat.view(num_matches, num_non_matches_per_match))
Beispiel #44
0
def main(config, needs_save, study_name, k, n_splits):
    if config.run.visible_devices:
        os.environ['CUDA_VISIBLE_DEVICES'] = config.run.visible_devices

    seed = check_manual_seed(config.run.seed)
    print('Using seed: {}'.format(seed))

    train_data_loader, test_data_loader, data_train = get_k_hold_data_loader(
        config.dataset,
        k=k,
        n_splits=n_splits,
    )

    data_train = torch.from_numpy(data_train).float().cuda(non_blocking=True)
    data_train = torch.t(data_train)

    model = get_model(config.model)
    model.cuda()
    model = nn.DataParallel(model)

    print('count params: ', count_parameters(model.module))

    saved_model_path, _, _ = get_saved_model_path(
        config,
        study_name,
        config.model.checkpoint_epoch,
        k,
        n_splits,
    )

    model.load_state_dict(torch.load(saved_model_path)['model'])
    model.eval()

    if config.model.model_name == 'MLP':
        embedding = model.module.get_embedding()

    elif config.model.model_name == 'ModifiedMLP':
        embedding = model.module.get_embedding()

    elif config.model.model_name == 'DietNetworks':
        embedding = model.module.get_embedding(data_train)

    elif config.model.model_name == 'ModifiedDietNetworks':
        embedding = model.module.get_embedding(data_train)

    embedding = embedding.detach().cpu().numpy()

    emb_pca = PCA(n_components=2)
    emb_pca.fit_transform(embedding)

    if config.run.decomp == '1D':
        print('Approximate by 1D PCA')
        axis_1= torch.from_numpy(emb_pca.components_[0])
        score_1 = np.dot(embedding, axis_1)
        approx = np.outer(score_1, axis_1)

    elif config.run.decomp == '2D':
        print('Approximate by 2D PCA')
        axis_1= torch.from_numpy(emb_pca.components_[0])
        score_1 = np.dot(embedding, axis_1)
        axis_2= torch.from_numpy(emb_pca.components_[1])
        score_2 = np.dot(embedding, axis_2)
        approx = np.outer(score_1, axis_1) + np.outer(score_2, axis_2)
        # approx = np.outer(score_2, axis_2)

    approx = torch.from_numpy(approx).float().cuda(non_blocking=True)

    criterion = nn.CrossEntropyLoss()

    def inference(engine, batch):

        x = batch['data'].float().cuda(non_blocking=True)
        y = batch['label'].long().cuda(non_blocking=True)

        assert config.run.transposed_matrix == 'overall'
        x_t = data_train

        with torch.no_grad():
            out, _ = model.module.approx(x, approx)
            l_discriminative = criterion(out, y)
            l_total = l_discriminative

        metrics = calc_metrics(out, y)

        metrics.update({
            'l_total': l_total.item(),
            'l_discriminative': l_discriminative.item(),
        })

        torch.cuda.synchronize()

        return metrics

    evaluator = Engine(inference)

    monitoring_metrics = ['l_total', 'l_discriminative', 'accuracy']

    for metric in monitoring_metrics:
        RunningAverage(
            alpha=0.98,
            output_transform=partial(lambda x, metric: x[metric], metric=metric)
        ).attach(evaluator, metric)

    pbar = ProgressBar()
    pbar.attach(evaluator, metric_names=monitoring_metrics)

    evaluator.run(test_data_loader, 1)

    columns = ['k', 'n_splits', 'epoch', 'iteration'] + list(evaluator.state.metrics.keys())
    values = [str(k), str(n_splits), str(evaluator.state.epoch), str(evaluator.state.iteration)] \
           + [str(value) for value in evaluator.state.metrics.values()]

    values = {c: v for (c, v) in zip(columns, values)}
    values.update({
        'variance_ratio_1': emb_pca.explained_variance_ratio_[0],
        'variance_ratio_2': emb_pca.explained_variance_ratio_[1],
    })
    return values
Beispiel #45
0
    def input_word_dimension_labeling_deep(self, model, N_HIDDEN_LAYERS,
                                           word: str):
        word_idx = self.word2idx[word]
        word_emb = np.array(self.embeddings[word_idx])
        data = torch.tensor(word_emb).float().to(model.device)
        pred = torch.argmax(model.forward(data))
        label = self.labels[pred]
        print('Prediction for word:', word, '-', label)
        in_weights = model.hidden_layers[0].weight  # [500,1000]

        # activated_weights = torch.mul(data, in_weights)  # [500,1000]
        # activated_weights_len = len(activated_weights)
        #
        # activated_weights = torch.t(activated_weights)  # [1000,500]

        mul_weights = torch.mul(data, in_weights)  # [500,1000]
        activated_weights_len = len(mul_weights)

        mul_weights = torch.t(mul_weights)  # [1000,500]

        # sum bias to the list of activated weights.
        mul_weights = mul_weights + model.hidden_layers[0].bias.div(
            activated_weights_len)

        activated_weights = torch.relu(torch.sum(mul_weights, 0))
        for idx, val in enumerate(activated_weights):
            if val == 0.0:
                mul_weights[:, idx] = 0.0

        # sum bias to the list of activated weights.

        # mul_weights = mul_weights + model.hidden_layers[0].bias.div(activated_weights_len)

        # activated_emb_to_out_weights = torch.matmul(activated_emb_to_out_weights, out_weights)

        for i in range(1, N_HIDDEN_LAYERS):
            next_layer = torch.t(model.hidden_layers[i].weight)
            mul_weights = torch.matmul(
                mul_weights, next_layer)  # it will be [1000,4] in the end
            activated_weights = torch.relu(torch.sum(mul_weights, 0))
            bias = model.hidden_layers[i].bias.div(
                activated_weights_len)  # it will be length = 4 in the end
            mul_weights = mul_weights + bias

            for idx, val in enumerate(activated_weights):
                if val == 0.0:
                    mul_weights[:, idx] = 0.0

        dimension_label_value_list = []
        for i in range(self.EMBED_DIM):
            dim_values = mul_weights[i]
            label_ind = dim_values.argmax()
            dimension_label_value_list.append(
                (i, label_ind, dim_values[label_ind]))

        dimension_label_value_list = sorted(dimension_label_value_list,
                                            key=lambda x: x[2],
                                            reverse=True)
        for i in range(len(dimension_label_value_list)):
            dim = dimension_label_value_list[i][0]
            label = self.labels[dimension_label_value_list[i][1]]
            value = dimension_label_value_list[i][2]
            top_emb = sorted(enumerate(self.embeddings),
                             key=lambda x: x[1][dim],
                             reverse=True)[:5]
            top_emb = [(self.idx2word[emb_idx]) for emb_idx, emb in top_emb]
            print(
                'dimension %d labelled as %s with score %f. Top words in this dimension:'
                % (dim, label, value.item()))
            print(top_emb)
Beispiel #46
0
a = torch.tensor(np.arange(24).reshape(4, 3, 2)); print(a)
print(a.chunk(2))
print(a.chunk(2, dim=1))  # split 3 into 2 and 1;
print(a.chunk(4))

## transpose
torch.manual_seed(1)
x = torch.randn(2, 3); print(x, x.shape)  # tensor([[ 0.6614,  0.2669,  0.0617], [ 0.6213, -0.4519, -0.1661]]) torch.Size([2, 3])
tmp = x.transpose(0, 1); print(tmp, tmp.shape)  # tensor([[ 0.6614,  0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]]) torch.Size([3, 2])
x = torch.ones(2, 3, 4); print(x.shape)  # torch.Size([2, 3, 4])
print(x.transpose(0, 1).shape)  # torch.Size([3, 2, 4])
print(x.transpose(1, 2).shape)  # torch.Size([2, 4, 3])

## t(): Convenience method of transpose() for 2D tensors. The given tensor must be 2 dimensional. Swap dimensions 1 and 2
print(x.t())  # tensor([[ 0.6614,  0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]])
print(torch.t(x))  # tensor([[ 0.6614,  0.6213], [ 0.2669, -0.4519], [ 0.0617, -0.1661]])

## eq()
x = torch.Tensor([[1, 2], [3, 0]])
print(x.eq(0))  # tensor([[0, 0], [0, 1]], dtype=torch.uint8)

## permute; 变换维度, 和 transpose() 差不多, 但后者只能交换两个
x = torch.randn(2, 3, 5); print(x)
tmp = x.permute(2, 0, 1); print(tmp, tmp.shape)  # torch.Size([5, 2, 3])

## repeat(*sizes): sizes (torch.Size or int...): The number of times to repeat this tensor along each dimension
x = torch.Tensor([1, 2, 3]); print(x)
print(x.repeat(2))  # 维度保持不变
print(x.repeat(4, 2))
print(x.repeat(2, 2, 2))  # 还可以扩充维度
x = torch.Tensor([[1, 2], [3, 4]]); print(x, x.shape)  # torch.Size([2, 2])