Beispiel #1
0
    def fit(self, X, y=None, lr=0.001, batch_size=256, num_epochs=10, update_interval=1, tol=1e-3):
        '''X: tensor data'''
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        print("=====Training DEC=======")
        # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.parameters()), lr=lr, momentum=0.9)

        print("Initializing cluster centers with kmeans.")
        kmeans = KMeans(self.n_clusters, n_init=20)
        data, _ = self.forward(X)
        y_pred = kmeans.fit_predict(data.data.cpu().numpy())
        y_pred_last = y_pred
        self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))
        if y is not None:
            y = y.cpu().numpy()
            print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

        self.train()
        num = X.shape[0]
        num_batch = int(math.ceil(1.0*X.shape[0]/batch_size))
        for epoch in range(num_epochs):
            if epoch%update_interval == 0:
                # update the targe distribution p
                _, q = self.forward(X)
                p = self.target_distribution(q).data

                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if y is not None:
                    print("acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))

                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / num
                y_pred_last = y_pred
                if epoch>0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print("Reach tolerance threshold. Stopping training.")
                    break

            # train 1 epoch
            train_loss = 0.0
            for batch_idx in range(num_batch):
                xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                pbatch = p[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                
                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)

                z, qbatch = self.forward(inputs)
                loss = self.loss_function(target, qbatch)
                train_loss += loss.data*len(inputs)
                loss.backward()
                optimizer.step()

            print("#Epoch %3d: Loss: %.4f" % (
                epoch+1, train_loss / num))
    def predict(self, X, y):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        latent = self.encodeBatch(X)
        q = self.soft_assign(latent)

        # evalute the clustering performance
        y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
        y = y.data.cpu().numpy()
        if y is not None:
            print("acc: %.5f, nmi: %.5f" %
                  (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
            final_acc = acc(y, y_pred)
            final_nmi = normalized_mutual_info_score(y, y_pred)
        return final_acc, final_nmi
Beispiel #3
0
 def initialize_cluster(self, loader, init="k-means++"):
     trainX=[]
     trainY=[]
     for batch_idx,(X,Y) in enumerate(loader):
         trainX.append(self.encodeBatch(X.float()).cpu())
         trainY.append(Y.cpu())
     trainX = torch.cat(tuple(trainX), 0).numpy()
     trainY = torch.cat(tuple(trainY), 0).numpy()
     n_components = self.n_centroids
     km = KMeans(n_clusters=n_components, init=init).fit(trainX)
     y_pred = km.predict(trainX)
     print("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred)))
     write_log("acc: %.5f, nmi: %.5f" % (acc(trainY, y_pred), normalized_mutual_info_score(trainY, y_pred)), self.log_dir)
     
     u_p = km.cluster_centers_
     return u_p, y_pred
Beispiel #4
0
 def evaluate(self, dataset):
     dataX, dataY = dataset[:]
     X = self.encodeBatch(dataX).cpu().numpy()
     Y = dataY.cpu().numpy()
     y_pred = self.predict(X)
     accuracy = acc(Y, y_pred)
     nmi = normalized_mutual_info_score(Y, y_pred)
     logging.info("acc: %.5f, nmi: %.5f" % (accuracy, nmi))
     return accuracy, nmi
Beispiel #5
0
 def test(self, X, y):
     kmeans = KMeans(self.n_clusters, n_init=20)
     data, _ = self.forward(X)
     y_pred = kmeans.fit_predict(data.data.cpu().numpy())
     if y is not None:
         y = y.cpu().numpy()
         print(y[0:10], y_pred[0:10])
         print("Kmeans acc: %.5f, nmi: %.5f" %
               (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
Beispiel #6
0
    def learn_gmm(self, dataset, max_iter=200, init=False):
        dataX, dataY = dataset[:]
        X = self.encodeBatch(dataX).cpu().numpy()
        Y = dataY.cpu().numpy()
        self.gmmfit(X, max_iter=max_iter, init=init)
        y_pred = self.predict(X)
        logging.info("acc: %.5f, nmi: %.5f" % (acc(Y, y_pred), normalized_mutual_info_score(Y, y_pred)))

        log_prob_norm, log_resp = self._e_step(X)
        return log_resp
Beispiel #7
0
    def initialize_cluster(self, trainX, trainY, init="k-means++"):
        trainX = self.encodeBatch(trainX)
        trainX = trainX.cpu().numpy()
        trainY = trainY.cpu().numpy()
        n_components = len(np.unique(trainY))
        km = KMeans(n_clusters=n_components, init=init).fit(trainX)
        y_pred = km.predict(trainX)
        print("acc: %.5f, nmi: %.5f" % (acc(
            trainY, y_pred), normalized_mutual_info_score(trainY, y_pred)))

        u_p = km.cluster_centers_
        return u_p, y_pred
    def fit(self,
            anchor,
            positive,
            negative,
            ml_ind1,
            ml_ind2,
            cl_ind1,
            cl_ind2,
            mask,
            use_global,
            ml_p,
            cl_p,
            X,
            y=None,
            lr=0.001,
            batch_size=256,
            num_epochs=10,
            update_interval=1,
            tol=1e-3,
            use_kmeans=True,
            plotting="",
            clustering_loss_weight=1):

        # save intermediate results for plotting
        intermediate_results = collections.defaultdict(lambda: {})
        '''X: tensor data'''
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        print("=====Training IDEC=======")
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      self.parameters()),
                               lr=lr)

        if use_kmeans:
            print("Initializing cluster centers with kmeans.")
            kmeans = KMeans(self.n_clusters, n_init=20)
            data = self.encodeBatch(X)
            y_pred = kmeans.fit_predict(data.data.cpu().numpy())
            y_pred_last = y_pred
            self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))
        else:
            # use kmeans to randomly initialize cluster ceters
            print("Randomly initializing cluster centers.")
            kmeans = KMeans(self.n_clusters, n_init=1, max_iter=1)
            data = self.encodeBatch(X)
            y_pred = kmeans.fit_predict(data.data.cpu().numpy())
            y_pred_last = y_pred
            self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))

        if y is not None:
            y = y.cpu().numpy()
            # print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
        self.train()
        num = X.shape[0]
        num_batch = int(math.ceil(1.0 * X.shape[0] / batch_size))
        ml_num_batch = int(math.ceil(1.0 * ml_ind1.shape[0] / batch_size))
        cl_num_batch = int(math.ceil(1.0 * cl_ind1.shape[0] / batch_size))
        tri_num_batch = int(math.ceil(1.0 * anchor.shape[0] / batch_size))
        cl_num = cl_ind1.shape[0]
        ml_num = ml_ind1.shape[0]
        tri_num = anchor.shape[0]

        final_acc, final_nmi, final_epoch = 0, 0, 0
        update_ml = 1
        update_cl = 1
        update_triplet = 1
        for epoch in range(num_epochs):
            if epoch % update_interval == 0:
                # update the targe distribution p
                latent = self.encodeBatch(X)
                q = self.soft_assign(latent)
                p = self.target_distribution(q).data

                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if use_global:
                    y_dict = collections.defaultdict(list)
                    ind1, ind2 = [], []
                    for i in range(y_pred.shape[0]):
                        y_dict[y_pred[i]].append(i)
                    for key in y_dict.keys():
                        if y is not None:
                            print("predicted class: ", key, " total: ",
                                  len(y_dict[key]))
                            #, " mapped index(ground truth): ", np.bincount(y[y_dict[key]]).argmax())

                if y is not None:
                    print("acc: %.5f, nmi: %.5f" % (acc(
                        y, y_pred), normalized_mutual_info_score(y, y_pred)))
                    print("satisfied constraints: %.5f" %
                          self.satisfied_constraints(ml_ind1, ml_ind2, cl_ind1,
                                                     cl_ind2, y_pred))
                    final_acc = acc(y, y_pred)
                    final_nmi = normalized_mutual_info_score(y, y_pred)
                    final_epoch = epoch

                # save model for plotting
                if plotting and (epoch in [10, 20, 30, 40] or epoch % 50 == 0
                                 or epoch == num_epochs - 1):

                    df = pd.DataFrame(latent.cpu().numpy())
                    df["y"] = y
                    df.to_pickle(
                        os.path.join(plotting, "save_model_%d.pkl" % (epoch)))

                    intermediate_results["acc"][str(epoch)] = acc(y, y_pred)
                    intermediate_results["nmi"][str(
                        epoch)] = normalized_mutual_info_score(y, y_pred)
                    with open(
                            os.path.join(plotting,
                                         "intermediate_results.json"),
                            "w") as fp:
                        json.dump(intermediate_results, fp)

                # check stop criterion
                try:
                    delta_label = np.sum(y_pred != y_pred_last).astype(
                        np.float32) / num
                    y_pred_last = y_pred
                    if epoch > 0 and delta_label < tol:
                        print('delta_label ', delta_label, '< tol ', tol)
                        print("Reach tolerance threshold. Stopping training.")

                        # save model for plotting
                        if plotting:

                            df = pd.DataFrame(latent.cpu().numpy())
                            df["y"] = y
                            df.to_pickle(
                                os.path.join(plotting,
                                             "save_model_%d.pkl" % epoch))

                            intermediate_results["acc"][str(epoch)] = acc(
                                y, y_pred)
                            intermediate_results["nmi"][str(
                                epoch)] = normalized_mutual_info_score(
                                    y, y_pred)
                            with open(
                                    os.path.join(plotting,
                                                 "intermediate_results.json"),
                                    "w") as fp:
                                json.dump(intermediate_results, fp)
                        break
                except:
                    pass

            # train 1 epoch for clustering loss
            train_loss = 0.0
            recon_loss_val = 0.0
            cluster_loss_val = 0.0
            instance_constraints_loss_val = 0.0
            global_loss_val = 0.0
            for batch_idx in range(num_batch):
                xbatch = X[batch_idx * batch_size:min((batch_idx + 1) *
                                                      batch_size, num)]
                pbatch = p[batch_idx * batch_size:min((batch_idx + 1) *
                                                      batch_size, num)]
                mask_batch = mask[batch_idx * batch_size:min((batch_idx + 1) *
                                                             batch_size, num)]
                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)
                cons_detail = np.array(
                    [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
                global_cons = torch.from_numpy(cons_detail).float().to("cuda")

                z, qbatch, xrecon = self.forward(inputs)
                if use_global == False:
                    cluster_loss = self.cluster_loss(target, qbatch)
                    recon_loss = self.recon_loss(inputs, xrecon)
                    instance_constraints_loss = self.difficulty_loss(
                        qbatch, mask_batch)
                    loss = cluster_loss + recon_loss + instance_constraints_loss
                    loss.backward()
                    optimizer.step()
                    cluster_loss_val += cluster_loss.data * len(inputs)
                    recon_loss_val += recon_loss.data * len(inputs)
                    instance_constraints_loss_val += instance_constraints_loss.data * len(
                        inputs)
                    train_loss = clustering_loss_weight * cluster_loss_val + recon_loss_val + instance_constraints_loss_val
                else:
                    cluster_loss = self.cluster_loss(target, qbatch)
                    recon_loss = self.recon_loss(inputs, xrecon)
                    global_loss = self.global_size_loss(qbatch, global_cons)
                    loss = cluster_loss + recon_loss + global_loss
                    loss.backward()
                    optimizer.step()
                    cluster_loss_val += cluster_loss.data * len(inputs)
                    recon_loss_val += recon_loss.data * len(inputs)
                    train_loss = clustering_loss_weight * cluster_loss_val + recon_loss_val

            if instance_constraints_loss_val != 0.0:
                print(
                    "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f Instance Difficulty Loss: %.4f"
                    % (epoch + 1, train_loss / num, cluster_loss_val / num,
                       recon_loss_val / num,
                       instance_constraints_loss_val / num))
            elif global_loss_val != 0.0 and use_global:
                print(
                    "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f Global Loss: %.4f"
                    % (epoch + 1, train_loss / num +
                       global_loss_val / num_batch, cluster_loss_val / num,
                       recon_loss_val / num, global_loss_val / num_batch))
            else:
                print(
                    "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f"
                    % (epoch + 1, train_loss / num, cluster_loss_val / num,
                       recon_loss_val / num))
            ml_loss = 0.0
            if epoch % update_ml == 0:
                for ml_batch_idx in range(ml_num_batch):
                    px1 = X[ml_ind1[ml_batch_idx *
                                    batch_size:min(ml_num, (ml_batch_idx + 1) *
                                                   batch_size)]]
                    px2 = X[ml_ind2[ml_batch_idx *
                                    batch_size:min(ml_num, (ml_batch_idx + 1) *
                                                   batch_size)]]
                    pbatch1 = p[ml_ind1[ml_batch_idx *
                                        batch_size:min(ml_num, (ml_batch_idx +
                                                                1) *
                                                       batch_size)]]
                    pbatch2 = p[ml_ind2[ml_batch_idx *
                                        batch_size:min(ml_num, (ml_batch_idx +
                                                                1) *
                                                       batch_size)]]
                    optimizer.zero_grad()
                    inputs1 = Variable(px1)
                    inputs2 = Variable(px2)
                    target1 = Variable(pbatch1)
                    target2 = Variable(pbatch2)
                    z1, q1, xr1 = self.forward(inputs1)
                    z2, q2, xr2 = self.forward(inputs2)
                    loss = (ml_p * self.pairwise_loss(q1, q2, "ML") +
                            self.recon_loss(inputs1, xr1) +
                            self.recon_loss(inputs2, xr2))
                    # 0.1 for mnist/reuters, 1 for fashion, the parameters are tuned via grid search on validation set
                    ml_loss += loss.data
                    loss.backward()
                    optimizer.step()

            cl_loss = 0.0
            if epoch % update_cl == 0:
                for cl_batch_idx in range(cl_num_batch):
                    px1 = X[cl_ind1[cl_batch_idx *
                                    batch_size:min(cl_num, (cl_batch_idx + 1) *
                                                   batch_size)]]
                    px2 = X[cl_ind2[cl_batch_idx *
                                    batch_size:min(cl_num, (cl_batch_idx + 1) *
                                                   batch_size)]]
                    pbatch1 = p[cl_ind1[cl_batch_idx *
                                        batch_size:min(cl_num, (cl_batch_idx +
                                                                1) *
                                                       batch_size)]]
                    pbatch2 = p[cl_ind2[cl_batch_idx *
                                        batch_size:min(cl_num, (cl_batch_idx +
                                                                1) *
                                                       batch_size)]]
                    optimizer.zero_grad()
                    inputs1 = Variable(px1)
                    inputs2 = Variable(px2)
                    target1 = Variable(pbatch1)
                    target2 = Variable(pbatch2)
                    z1, q1, xr1 = self.forward(inputs1)
                    z2, q2, xr2 = self.forward(inputs2)
                    loss = cl_p * self.pairwise_loss(q1, q2, "CL")
                    cl_loss += loss.data
                    loss.backward()
                    optimizer.step()

            if ml_num_batch > 0 and cl_num_batch > 0:
                print("Pairwise Total:",
                      round(float(ml_loss.cpu()), 2) + float(cl_loss.cpu()),
                      "ML loss", float(ml_loss.cpu()), "CL loss:",
                      float(cl_loss.cpu()))
            triplet_loss = 0.0
            if epoch % update_triplet == 0:
                for tri_batch_idx in range(tri_num_batch):
                    px1 = X[anchor[tri_batch_idx *
                                   batch_size:min(tri_num, (tri_batch_idx +
                                                            1) * batch_size)]]
                    px2 = X[positive[tri_batch_idx *
                                     batch_size:min(tri_num, (tri_batch_idx +
                                                              1) *
                                                    batch_size)]]
                    px3 = X[negative[tri_batch_idx *
                                     batch_size:min(tri_num, (tri_batch_idx +
                                                              1) *
                                                    batch_size)]]
                    pbatch1 = p[anchor[tri_batch_idx *
                                       batch_size:min(tri_num, (tri_batch_idx +
                                                                1) *
                                                      batch_size)]]
                    pbatch2 = p[
                        positive[tri_batch_idx *
                                 batch_size:min(tri_num, (tri_batch_idx + 1) *
                                                batch_size)]]
                    pbatch3 = p[
                        negative[tri_batch_idx *
                                 batch_size:min(tri_num, (tri_batch_idx + 1) *
                                                batch_size)]]
                    optimizer.zero_grad()
                    inputs1 = Variable(px1)
                    inputs2 = Variable(px2)
                    inputs3 = Variable(px3)
                    target1 = Variable(pbatch1)
                    target2 = Variable(pbatch2)
                    target3 = Variable(pbatch3)
                    z1, q1, xr1 = self.forward(inputs1)
                    z2, q2, xr2 = self.forward(inputs2)
                    z3, q3, xr3 = self.forward(inputs3)
                    loss = self.triplet_loss(q1, q2, q3, 0.1)
                    triplet_loss += loss.data
                    loss.backward()
                    optimizer.step()
            if tri_num_batch > 0:
                print("Triplet Loss:", triplet_loss)
        return final_acc, final_nmi, final_epoch
    def fit(self,
            trainloader,
            validloader,
            lr=0.001,
            batch_size=128,
            num_epochs=10,
            visualize=False,
            anneal=False,
            optimizer="adam"):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        if optimizer == "adam":
            optimizer = optim.Adam(self.parameters(), lr=lr)
        elif optimizer == "sgd":
            optimizer = optim.SGD(self.parameters(), lr=lr, momentum=0.9)

        # validate
        self.eval()
        valid_loss = 0.0
        for batch_idx, (inputs, _) in enumerate(validloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            z, outputs = self.forward(inputs)

            loss = self.loss_function(outputs, inputs)
            valid_loss += loss.data * len(inputs)
            # total_loss += valid_recon_loss.data[0] * inputs.size()[0]
            # total_num += inputs.size()[0]

        # valid_loss = total_loss / total_num
        print("#Epoch -1: Valid Loss: %.5f" %
              (valid_loss / len(validloader.dataset)))

        for epoch in range(num_epochs):
            # train 1 epoch
            self.train()
            if anneal:
                adjust_learning_rate(lr, optimizer, epoch)
            train_loss = 0
            for batch_idx, (inputs, labels) in enumerate(trainloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                if use_cuda:
                    inputs = inputs.cuda()
                optimizer.zero_grad()
                inputs = Variable(inputs)

                z, outputs = self.forward(inputs)
                loss = self.loss_function(outputs, inputs)
                train_loss += loss.data * len(inputs)
                loss.backward()
                optimizer.step()
                # print("    #Iter %3d: Reconstruct Loss: %.3f" % (
                #     batch_idx, recon_loss.data[0]))

            # validate
            self.eval()
            valid_loss = 0.0
            for batch_idx, (inputs, labels) in enumerate(validloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                if use_cuda:
                    inputs = inputs.cuda()
                inputs = Variable(inputs)
                z, outputs = self.forward(inputs)

                loss = self.loss_function(outputs, inputs)
                valid_loss += loss.data * len(inputs)

            print("#Epoch %3d: Train Loss: %.5f, Valid Loss: %.5f" %
                  (epoch, train_loss / len(trainloader.dataset),
                   valid_loss / len(validloader.dataset)))

            if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1:
                trainX, trainY = self.encodeBatch(trainloader, True)
                testX, testY = self.encodeBatch(validloader, True)
                trainX = trainX.cpu().numpy()
                trainY = trainY.cpu().numpy()
                testX = testX.cpu().numpy()
                testY = testY.cpu().numpy()
                n_components = len(np.unique(trainY))
                km = KMeans(n_clusters=n_components, n_init=20).fit(trainX)
                y_pred = km.predict(testX)
                print("acc: %.5f, nmi: %.5f" %
                      (acc(testY, y_pred),
                       normalized_mutual_info_score(testY, y_pred)))
                gmm = GaussianMixture(
                    n_components=n_components,
                    covariance_type='diag',
                    means_init=km.cluster_centers_).fit(trainX)
                y_pred = gmm.predict(testX)
                print("acc: %.5f, nmi: %.5f" %
                      (acc(testY, y_pred),
                       normalized_mutual_info_score(testY, y_pred)))
Beispiel #10
0
    def fit(self,
            trainloader,
            validloader,
            lr=0.001,
            batch_size=128,
            num_epochs=10,
            visualize=False,
            anneal=False):
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()

        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      self.parameters()),
                               lr=lr)

        # validate
        self.eval()
        valid_loss = 0.0
        for batch_idx, (inputs, _) in enumerate(validloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            z, outputs, mu, logvar = self.forward(inputs)

            loss = self.loss_function(outputs, inputs, z, mu, logvar)
            valid_loss += loss.data * len(inputs)
            # total_loss += valid_recon_loss.data[0] * inputs.size()[0]
            # total_num += inputs.size()[0]

        # valid_loss = total_loss / total_num
        print("#Epoch -1: Valid Loss: %.5f" %
              (valid_loss / len(validloader.dataset)))

        for epoch in range(num_epochs):
            # train 1 epoch
            self.train()
            if anneal:
                adjust_learning_rate(lr, optimizer, epoch)
            train_loss = 0
            for batch_idx, (inputs, _) in enumerate(trainloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                if use_cuda:
                    inputs = inputs.cuda()
                optimizer.zero_grad()
                inputs = Variable(inputs)

                z, outputs, mu, logvar = self.forward(inputs)
                loss = self.loss_function(outputs, inputs, z, mu, logvar)
                train_loss += loss.data * len(inputs)
                loss.backward()
                optimizer.step()
                # print("    #Iter %3d: Reconstruct Loss: %.3f" % (
                #     batch_idx, recon_loss.data[0]))

            # validate
            self.eval()
            valid_loss = 0.0
            for batch_idx, (inputs, _) in enumerate(validloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                if use_cuda:
                    inputs = inputs.cuda()
                inputs = Variable(inputs)
                z, outputs, mu, logvar = self.forward(inputs)

                loss = self.loss_function(outputs, inputs, z, mu, logvar)
                valid_loss += loss.data * len(inputs)
                # total_loss += valid_recon_loss.data[0] * inputs.size()[0]
                # total_num += inputs.size()[0]

                # view reconstruct
                if visualize and batch_idx == 0:
                    n = min(inputs.size(0), 8)
                    comparison = torch.cat([
                        inputs.view(-1, 1, 28, 28)[:n],
                        outputs.view(-1, 1, 28, 28)[:n]
                    ])
                    save_image(comparison.data.cpu(),
                               'results/vae/reconstruct/reconstruction_' +
                               str(epoch) + '.png',
                               nrow=n)

            # valid_loss = total_loss / total_num
            print("#Epoch %3d: Train Loss: %.5f, Valid Loss: %.5f" %
                  (epoch, train_loss / len(trainloader.dataset),
                   valid_loss / len(validloader.dataset)))

            if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1:
                trainX, trainY = self.encodeBatch(trainloader, True)
                testX, testY = self.encodeBatch(validloader, True)
                trainX = trainX.numpy()
                trainY = trainY.numpy()
                testX = testX.numpy()
                testY = testY.numpy()
                n_components = len(np.unique(trainY))
                km = KMeans(n_clusters=n_components, n_init=20).fit(trainX)
                y_pred = km.predict(testX)
                print("acc: %.5f, nmi: %.5f" %
                      (acc(testY, y_pred),
                       normalized_mutual_info_score(testY, y_pred)))
                gmm = GaussianMixture(
                    n_components=n_components,
                    covariance_type='diag',
                    means_init=km.cluster_centers_).fit(trainX)
                y_pred = gmm.predict(testX)
                print("acc: %.5f, nmi: %.5f" %
                      (acc(testY, y_pred),
                       normalized_mutual_info_score(testY, y_pred)))

            # view sample
            if visualize:
                sample = Variable(torch.randn(64, self.z_dim))
                if use_cuda:
                    sample = sample.cuda()
                sample = self.decode(sample).cpu()
                save_image(sample.data.view(64, 1, 28, 28),
                           'results/vae/sample/sample_' + str(epoch) + '.png')
Beispiel #11
0
    def fit(self,
            dataloader,
            lr=0.001,
            batch_size=256,
            num_epochs=10,
            update_interval=1,
            tol=1e-3):
        '''X: tensor data'''
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
            # X=X.cuda()
        print("=====Training DEC=======")
        write_log("=====Training DEC=======", self.log_dir)
        # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     self.parameters()),
                              lr=lr,
                              momentum=0.9)

        print("Initializing cluster centers with kmeans.")
        write_log("Initializing cluster centers with kmeans.", self.log_dir)
        kmeans = KMeans(self.n_clusters, n_init=20)
        #原始代码
        # data, _ = self.forward(X)
        # 按batch_size求q,X,Y替换为Dataloader
        data = []
        y = []
        for batch_idx, (inputs, yi) in enumerate(dataloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            inputs = inputs.cuda()
            datai, _ = self.forward(inputs)
            data.append(datai.data.cpu())
            y.append(yi.data.cpu())
            del inputs
            torch.cuda.empty_cache()
        data = torch.cat(tuple(data), 0)
        y = torch.cat(tuple(y), 0)
        y_pred = kmeans.fit_predict(data)
        y_pred_last = y_pred
        # print(y[0:10], y_pred[0:10])
        self.mu.data.copy_(torch.Tensor(kmeans.cluster_centers_))
        if y is not None:
            y = y.cpu().numpy()
            # print(y.shape,y_pred.shape)
            print("Kmeans acc: %.5f, nmi: %.5f" %
                  (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
            write_log(
                "Kmeans acc: %.5f, nmi: %.5f" %
                (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)),
                self.log_dir)
        del data, y
        torch.cuda.empty_cache()

        self.train()
        # num_batch = int(math.ceil(1.0*X.shape[0]/batch_size))
        for epoch in range(num_epochs):
            tic = timer()
            if epoch % update_interval == 0:
                # update the targe distribution p
                # _, q = self.forward(X)
                #按batch计算q
                data = []
                y = []
                num = dataloader.dataset.__len__()
                for batch_idx, (xbatch, yi) in enumerate(dataloader):
                    # xbatch = X[batch_idx * batch_size: min((batch_idx + 1) * batch_size, num)]
                    xbatch = xbatch.float().cuda()
                    datai, _ = self.forward(xbatch)
                    data.append(datai.data.cpu())
                    y.append(yi.data.cpu())
                    del xbatch, datai
                    torch.cuda.empty_cache()
                data = torch.cat(tuple(data), 0)
                y = torch.cat(tuple(y), 0).numpy()
                # print("data:",data,data.shape)
                q = 1.0 / (1.0 + torch.sum(
                    (data.unsqueeze(1) - self.mu.data.cpu())**2, dim=2) /
                           self.alpha)
                q = q**(self.alpha + 1.0) / 2.0
                q = q / torch.sum(q, dim=1, keepdim=True)
                p = self.target_distribution(q).data
                del data
                torch.cuda.empty_cache()
                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()
                if y is not None:
                    print("acc: %.5f, nmi: %.5f" % (acc(
                        y, y_pred), normalized_mutual_info_score(y, y_pred)))
                    write_log("acc: %.5f, nmi: %.5f" % (acc(
                        y, y_pred), normalized_mutual_info_score(y, y_pred)),
                              logpath=self.log_dir)
                    if self.writer is not None:
                        self.writer.add_scalars(
                            'dec', {
                                'acc': acc(y, y_pred),
                                'nmi': normalized_mutual_info_score(y, y_pred)
                            }, epoch)
                # check stop criterion
                #本次结果和上次结果相差小于tol=0.0001时停止训练
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / num
                y_pred_last = y_pred
                if epoch > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    # write_log('delta_label '+str(delta_label) +'< tol '+str(tol) )
                    print("Reach tolerance threshold. Stopping training.")
                    # write_log("Reach tolerance threshold. Stopping training.")
                    break

            # train 1 epoch
            train_loss = 0.0
            for batch_idx, (xbatch, _) in enumerate(dataloader):
                # xbatch = X[batch_idx*batch_size : min((batch_idx+1)*batch_size, num)]
                pbatch = p[batch_idx * batch_size:min((batch_idx + 1) *
                                                      batch_size, num)]
                xbatch = xbatch.float().cuda()
                pbatch = pbatch.cuda()

                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)
                # print(inputs,target)
                z, qbatch = self.forward(inputs)
                loss = self.loss_function(target, qbatch)
                train_loss += loss * len(inputs)
                loss.backward()
                # for param in self.parameters():
                #     print('param', param.grad)
                optimizer.step()
                del xbatch, qbatch, inputs, target, loss
                torch.cuda.empty_cache()
            toc = timer()
            print("cost:", toc - tic)
            print("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num))
            write_log("#Epoch %3d: Loss: %.4f" % (epoch + 1, train_loss / num),
                      self.log_dir)
            if self.writer is not None:
                self.writer.add_scalars('dec', {'loss': train_loss / num},
                                        epoch + 1)

            torch.cuda.empty_cache()
    def fit(self,
            stat,
            y_pred,
            ml_list,
            cl_list,
            ml_ind1,
            ml_ind2,
            cl_ind1,
            cl_ind2,
            ml_p,
            cl_p,
            X,
            y=None,
            lr=0.001,
            batch_size=256,
            num_epochs=100,
            update_interval=1,
            tol=1e-3):
        '''X: tensor data'''
        # use_cuda = torch.cuda.is_available()
        # if use_cuda:
        #     self.cuda()
        print("=====Training IDEC=======")
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      self.parameters()),
                               lr=lr)

        print("Initializing cluster centers with input assignment.")
        data = self.encodeBatch(X).data.cpu().numpy()
        cluster_centers = []
        y_pred_last = y_pred
        for i in range(self.n_clusters):
            cluster_centers.append(np.full(self.z_dim, 0, dtype=np.float32))
        cluster_size = dict()
        for i in range(len(y_pred)):
            cluster_centers[y_pred[i]] += data[i]
            class_id = int(y_pred[i])
            if class_id not in cluster_size:
                cluster_size[class_id] = 1.0
            else:
                cluster_size[class_id] += 1.0
        print(cluster_size)
        for i in range(self.n_clusters):
            cluster_centers[i] /= cluster_size[i]
        cluster_centers = np.asarray(cluster_centers)
        self.mu.data.copy_(torch.Tensor(cluster_centers))
        if y is not None:
            y = y.cpu().numpy()
            # print("Kmeans acc: %.5f, nmi: %.5f" % (acc(y, y_pred), normalized_mutual_info_score(y, y_pred)))
        self.train()
        num = X.shape[0]
        num_batch = int(math.ceil(1.0 * X.shape[0] / batch_size))
        ml_num_batch = int(math.ceil(1.0 * ml_ind1.shape[0] / batch_size))
        cl_num_batch = int(math.ceil(1.0 * cl_ind1.shape[0] / batch_size))
        cl_num = cl_ind1.shape[0]
        ml_num = ml_ind1.shape[0]
        last_acc, last_nmi, final_epoch = 0, 0, 0
        update_ml = 1
        update_cl = 1
        last_delta = 0.0
        last_nmi = 1.0
        arr_acc = []
        for epoch in range(num_epochs):
            # if epoch < 20:
            #     self.mu.data.copy_(torch.Tensor(cluster_centers))
            if epoch % update_interval == 0:
                # update the targe distribution p
                latent = self.encodeBatch(X)
                q = self.soft_assign(latent)
                p = self.target_distribution(q).data

                # evalute the clustering performance
                y_pred = torch.argmax(q, dim=1).data.cpu().numpy()

                if y is not None:
                    num_vcon = 0
                    for i in range(len(ml_list[0])):
                        if y_pred[ml_list[0][i]] != y_pred[ml_list[1][i]]:
                            num_vcon += 1
                    for i in range(len(cl_list[0])):
                        if y_pred[cl_list[0][i]] == y_pred[cl_list[1][i]]:
                            num_vcon += 1
                    last_acc = acc(y, y_pred)
                    last_nmi = normalized_mutual_info_score(
                        y, y_pred, average_method="arithmetic")
                    arr_acc.append(last_acc)
                    if len(arr_acc) > 4:
                        arr_acc = arr_acc[1:]
                    final_epoch = epoch
                    print("NMI -  ACC - #violated constraints")
                    stat.append((last_nmi, last_acc, num_vcon, last_delta))
                    print("%.5f\t%.5f\t%d\n" % (last_nmi, last_acc, num_vcon))
                # check stop criterion
                delta_label = np.sum(y_pred != y_pred_last).astype(
                    np.float32) / num
                y_pred_last = y_pred
                if epoch > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print("Reach tolerance threshold. Stopping training.")
                    break
                print("Delta label:", delta_label)
            # train 1 epoch for clustering loss
            train_loss = 0.0
            recon_loss_val = 0.0
            cluster_loss_val = 0.0
            for batch_idx in range(num_batch):
                xbatch = X[batch_idx * batch_size:min((batch_idx + 1) *
                                                      batch_size, num)]
                pbatch = p[batch_idx * batch_size:min((batch_idx + 1) *
                                                      batch_size, num)]
                optimizer.zero_grad()
                inputs = Variable(xbatch)
                target = Variable(pbatch)

                z, qbatch, xrecon = self.forward(inputs)

                cluster_loss = self.cluster_loss(target, qbatch)
                recon_loss = self.recon_loss(inputs, xrecon)
                loss = cluster_loss + recon_loss
                loss.backward()
                optimizer.step()
                cluster_loss_val += cluster_loss.data * len(inputs)
                recon_loss_val += recon_loss.data * len(inputs)
                train_loss = cluster_loss_val + recon_loss_val

            print(
                "#Epoch %3d: Total: %.4f Clustering Loss: %.4f Reconstruction Loss: %.4f"
                % (epoch + 1, train_loss / num, cluster_loss_val / num,
                   recon_loss_val / num))
            ml_loss = 0.0
            if epoch % update_ml == 0:
                for ml_batch_idx in range(ml_num_batch):
                    px1 = X[ml_ind1[ml_batch_idx *
                                    batch_size:min(ml_num, (ml_batch_idx + 1) *
                                                   batch_size)]]
                    px2 = X[ml_ind2[ml_batch_idx *
                                    batch_size:min(ml_num, (ml_batch_idx + 1) *
                                                   batch_size)]]
                    # pbatch1 = p[ml_ind1[ml_batch_idx * batch_size: min(ml_num, (ml_batch_idx + 1) * batch_size)]]
                    # pbatch2 = p[ml_ind2[ml_batch_idx * batch_size: min(ml_num, (ml_batch_idx + 1) * batch_size)]]
                    optimizer.zero_grad()
                    inputs1 = Variable(px1)
                    inputs2 = Variable(px2)
                    z1, q1, xr1 = self.forward(inputs1)
                    z2, q2, xr2 = self.forward(inputs2)
                    loss = (ml_p * self.pairwise_loss(q1, q2, "ML") +
                            self.recon_loss(inputs1, xr1) +
                            self.recon_loss(inputs2, xr2))
                    # 0.1 for mnist/reyuters, 1 for fashion, the parameters are tuned via grid search on validation set
                    ml_loss += loss.data
                    loss.backward()
                    optimizer.step()

            cl_loss = 0.0
            if epoch % update_cl == 0:
                for cl_batch_idx in range(cl_num_batch):
                    px1 = X[cl_ind1[cl_batch_idx *
                                    batch_size:min(cl_num, (cl_batch_idx + 1) *
                                                   batch_size)]]
                    px2 = X[cl_ind2[cl_batch_idx *
                                    batch_size:min(cl_num, (cl_batch_idx + 1) *
                                                   batch_size)]]
                    # pbatch1 = p[cl_ind1[cl_batch_idx * batch_size: min(cl_num, (cl_batch_idx + 1) * batch_size)]]
                    # pbatch2 = p[cl_ind2[cl_batch_idx * batch_size: min(cl_num, (cl_batch_idx + 1) * batch_size)]]
                    optimizer.zero_grad()
                    inputs1 = Variable(px1)
                    inputs2 = Variable(px2)
                    z1, q1, xr1 = self.forward(inputs1)
                    z2, q2, xr2 = self.forward(inputs2)
                    loss = cl_p * self.pairwise_loss(q1, q2, "CL")
                    cl_loss += loss.data
                    loss.backward()
                    optimizer.step()

            if ml_num_batch > 0 and cl_num_batch > 0:
                print("Pairwise Total:",
                      round(float(ml_loss.cpu()), 2) + float(cl_loss.cpu()),
                      "ML loss", float(ml_loss.cpu()), "CL loss:",
                      float(cl_loss.cpu()))
        return last_acc, last_nmi, final_epoch
Beispiel #13
0
    def fit(self,
            trainloader,
            validloader,
            lr=0.001,
            num_epochs=10,
            corrupt=0.3,
            loss_type="mse"):
        """
        data_x: FloatTensor
        valid_x: FloatTensor
        """
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        logging.info("=====Stacked Denoising Autoencoding Layer=======")
        # optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=lr)
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     self.parameters()),
                              lr=lr,
                              momentum=0.9)
        if loss_type == "mse":
            criterion = MSELoss()
        elif loss_type == "cross-entropy":
            criterion = BCELoss()

        # validate
        total_loss = 0.0
        total_num = 0
        for batch_idx, (inputs, _) in enumerate(validloader):
            inputs = inputs.view(inputs.size(0), -1).float()
            if use_cuda:
                inputs = inputs.cuda()
            inputs = Variable(inputs)
            z, outputs = self.forward(inputs)

            valid_recon_loss = criterion(outputs, inputs)
            total_loss += valid_recon_loss.data * len(inputs)
            total_num += inputs.size()[0]

        valid_loss = total_loss / total_num
        logging.info("#Epoch 0: Valid Reconstruct Loss: %.4f" % (valid_loss))
        self.train()
        for epoch in range(num_epochs):
            # train 1 epoch
            adjust_learning_rate(lr, optimizer, epoch)
            train_loss = 0.0
            for batch_idx, (inputs, _) in enumerate(trainloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                inputs_corr = masking_noise(inputs, corrupt)
                if use_cuda:
                    inputs = inputs.cuda()
                    inputs_corr = inputs_corr.cuda()
                optimizer.zero_grad()
                inputs = Variable(inputs)
                inputs_corr = Variable(inputs_corr)

                z, outputs = self.forward(inputs_corr)
                recon_loss = criterion(outputs, inputs)
                train_loss += recon_loss.data * len(inputs)
                recon_loss.backward()
                optimizer.step()

            # validate
            valid_loss = 0.0
            for batch_idx, (inputs, _) in enumerate(validloader):
                inputs = inputs.view(inputs.size(0), -1).float()
                if use_cuda:
                    inputs = inputs.cuda()
                inputs = Variable(inputs)
                z, outputs = self.forward(inputs)

                valid_recon_loss = criterion(outputs, inputs)
                valid_loss += valid_recon_loss.data * len(inputs)

            logging.info(
                "#Epoch %3d: Reconstruct Loss: %.4f, Valid Reconstruct Loss: %.4f"
                % (epoch + 1, train_loss / len(trainloader.dataset),
                   valid_loss / len(validloader.dataset)))

            if epoch % int(num_epochs / 10) == 0 or epoch == num_epochs - 1:
                trainX, trainY = self.encodeBatch(trainloader, True)
                testX, testY = self.encodeBatch(validloader, True)
                trainX = trainX.cpu().numpy()
                trainY = trainY.cpu().numpy()
                testX = testX.cpu().numpy()
                testY = testY.cpu().numpy()
                n_components = len(np.unique(trainY))
                km = KMeans(n_clusters=n_components, n_init=20).fit(trainX)
                y_pred = km.predict(testX)
                logging.info("acc: %.5f, nmi: %.5f" %
                             (acc(testY, y_pred),
                              normalized_mutual_info_score(testY, y_pred)))
                gmm = GaussianMixture(
                    n_components=n_components,
                    covariance_type='diag',
                    means_init=km.cluster_centers_).fit(trainX)
                y_pred = gmm.predict(testX)
                logging.info("acc: %.5f, nmi: %.5f" %
                             (acc(testY, y_pred),
                              normalized_mutual_info_score(testY, y_pred)))
Beispiel #14
0
    dcn = DeepClusteringNetwork(input_dim=405,
                                z_dim=latent_dim,
                                n_centroids=n_clusters,
                                binary=False,
                                encodeLayer=[500, 500, 2000],
                                decodeLayer=[2000, 500, 500],
                                activation="relu",
                                dropout=0)
    dcn.load_model(sdae_savepath)
    dcn.fit(X, y, lr=0.001, batch_size=batch_size, num_epochs=10)
    # testing
    testdata, _ = dcn.forward(Xt)
    kmeans = KMeans(n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(testdata.detach().cpu().numpy())
    print((metrics.normalized_mutual_info_score(yt, y_pred)))
    print(acc(yt, y_pred))
    total_entropy = compute_entropy(n_clusters, y_pred, attr_t, n_bins)
    print("Total entropy: %.5f" % total_entropy)

    acc_dcn.append(acc(yt, y_pred))
    nmi_dcn.append(metrics.normalized_mutual_info_score(yt, y_pred))
    entropy_dcn.append(total_entropy)

    dec = DEC(input_dim=405,
              z_dim=latent_dim,
              n_clusters=n_clusters,
              encodeLayer=[500, 500, 2000],
              activation="relu",
              dropout=0)
    #print(dec)
    # dec.load_model(sdae_savepath)