Exemple #1
0
def predict(Z_train, Z_test, Y_train, Y_test, pi, mu, sigma):

    G_train = distribution_function.weighted_gmm_pdf(
        pi, Z_train, mu, sigma, poincare_function.distance)
    G_train = G_train.max(-1)[1] + 1

    # for each class we count
    predict_class = torch.zeros(len(mu), len(pi))
    for j, v in enumerate(G_train):
        predict_class[v.item() - 1][torch.LongTensor(Y_train[j]) - 1] += 1
    sv, si = predict_class.sort(-1)
    g = torch.zeros(len(mu))
    for k in range(len(pi)):
        clas = torch.argmax(predict_class, -1)
        gaus = predict_class[torch.arange(0, len(predict_class)),
                             clas].argmax()
        clas = clas[gaus]
        predict_class[gaus] = -1
        #predict_class[:,clas] = -1
        g[gaus] = clas

    # predict
    G_test = distribution_function.weighted_gmm_pdf(pi, Z_test, mu, sigma,
                                                    poincare_function.distance)
    G_test = G_test.max(-1)[1] + 1

    prediction = g[G_test - 1].long()
    return prediction
Exemple #2
0
def accuracy_cross_validation(Z, Y, pi, mu, sigma, nb_set, verbose=True):
    subset_index = torch.randperm(len(Z))
    nb_value = len(Z) // nb_set
    I_CV = [
        subset_index[nb_value * i:min(nb_value * (i + 1), len(Z))]
        for i in range(nb_set)
    ]
    acc_total = 0.
    for i, test_index in enumerate(I_CV):
        # create train dataset
        train_index = torch.cat(
            [subset for ci, subset in enumerate(I_CV) if (i != ci)], 0)
        Z_train = Z[train_index]
        Y_train = [Y[ic.item()] for ic in train_index]

        #create test datase
        Z_test = Z[test_index]
        Y_test = [Y[ic.item()] for ic in test_index]

        if (verbose):
            print("Set " + str(i) + " :")
            print("\t train size -> " + str(len(Z_train)))
            print("\t test size -> " + str(len(Z_test)))
            print("Associate to each gaussian a class")

        G_train = distribution_function.weighted_gmm_pdf(
            pi, Z_train, mu, sigma, poincare_function.distance)
        G_train = G_train.max(-1)[1] + 1

        # for each class we count
        predict_class = torch.zeros(len(mu), len(pi))
        for j, v in enumerate(G_train):
            predict_class[v.item() - 1][torch.LongTensor(Y_train[j]) - 1] += 1
        sv, si = predict_class.sort(-1)
        g = torch.zeros(len(mu))
        for k in range(len(pi)):
            clas = torch.argmax(predict_class, -1)
            gaus = predict_class[torch.arange(0, len(predict_class)),
                                 clas].argmax()
            clas = clas[gaus]
            predict_class[gaus] = -1
            #predict_class[:,clas] = -1
            g[gaus] = clas

        # predict
        G_test = distribution_function.weighted_gmm_pdf(
            pi, Z_test, mu, sigma, poincare_function.distance)
        G_test = G_test.max(-1)[1] + 1

        prediction = g[G_test - 1].long()
        acc = accuracy(prediction,
                       torch.LongTensor([i[0] - 1 for i in Y_test]))
        acc_total += acc.item()
    return acc_total / (len(I_CV))
Exemple #3
0
def accuracy_disc_product(z, y, pi, mu, sigma, verbose=False):
    n_disc = len(z)
    n_example = len(z[0])
    n_distrib = len(mu[0])
    y = torch.LongTensor([y[i][0] - 1 for i in range(len(y))])

    # first getting the pdf for each disc distribution
    prob = [
        distribution_function.weighted_gmm_pdf(
            pi[i], z[i], mu[i], sigma[i],
            poincare_function.distance).unsqueeze(0) for i in range(n_disc)
    ]
    print(torch.cat(prob, 0).shape)
    summed_prob = torch.cat(prob, 0).sum(0)
    print("summed prob size ->", summed_prob.shape)
    _, associated_distrib = summed_prob.max(-1)
    print("associated distribution size ->", associated_distrib.shape)
    print("associated distribution ->", associated_distrib)
    print("source labels ->", y)
    label = associated_distrib.numpy()
    label_source = y.numpy()
    sources_number = n_distrib
    if (n_distrib <= 6):
        return accuracy_small_disc_product(label, label_source, sources_number)
    else:
        return accuracy_huge_disc_product(label, label_source, sources_number)
    def fit(self,
            dataloader,
            alpha=1.0,
            beta=1.0,
            gamma=0.0,
            pi=None,
            mu=None,
            sigma=None,
            max_iter=100,
            negative_sampling=5):

        if (pi is None):
            gamma = 0.0

        else:
            if (self.cuda):
                pi = pi.cuda()
                sigma = sigma.cuda()
                mu = mu.cuda()
        progress_bar = tqdm.trange(max_iter) if (
            self.verbose) else range(max_iter)
        for i in progress_bar:
            loss_value1, loss_value2, loss_value3, loss_pdf3 = 0, 0, 0, 0
            for example, neigbhors, walks in dataloader:
                self.optimizer.zero_grad()
                # obtain negatives examples sampled according to the given distribution
                with torch.no_grad():
                    negative = self.n_dist.sample(
                        sample_shape=(walks.size(0), walks.size(1),
                                      negative_sampling))
                # set variables to cuda device
                if (self.cuda):
                    example = example.cuda()
                    neigbhors = neigbhors.cuda()
                    walks = walks.cuda()
                    negative = negative.cuda()
                # get the needed embeddings
                r_example = example.unsqueeze(1).expand_as(neigbhors)
                embed_source, embed_neigbhor = self.W(r_example), self.W(
                    neigbhors)
                embed_source_rw = self.W(walks)
                embed_source_rw, embed_context_rw = embed_source_rw[:, :,
                                                                    0], embed_source_rw[:, :,
                                                                                        1]
                embed_negative = self.W(negative)
                # computing O1 loss
                loss_o1 = losses.SGDLoss.O1(embed_source, embed_neigbhor)
                # computing O2 loss
                loss_o2 = losses.SGDLoss.O2(embed_source_rw, embed_context_rw,
                                            embed_negative)
                # computing total loss
                loss = alpha * loss_o1.mean() + beta * loss_o2.mean()
                # if we want to use the prior loss
                if (gamma > 0):
                    r_example = self.W(example).squeeze()
                    pi_z = pi[example].squeeze()
                    loss_o3 = (
                        -pi_z.detach() *
                        torch.log(1e-4 +
                                  distribution_function.weighted_gmm_pdf(
                                      pi_z.detach(), r_example, mu.detach(),
                                      sigma.detach(), self.d)))
                    # print("loss o3 size ->", loss_o3)
                    loss += gamma * loss_o3.mean()
                    loss_value3 = loss_o3.sum(-1).mean().item()
                    loss_pdf3 = torch.exp(-loss_o3.mean()).item()

                loss_value1 = loss_o1.mean().item()
                loss_value2 = loss_o2.mean().item()
                loss.backward()
                self.optimizer.step()
            if (self.verbose):
                progress_bar.set_postfix({
                    "O1": loss_value1,
                    "O2": loss_value2,
                    "O3": loss_value3,
                    "PDF": loss_pdf3
                })