예제 #1
0
파일: DCEC.py 프로젝트: Tracesource/DCEC
    def __init__(self,
                 input_shape,
                 filters=[32, 64, 128, 10],
                 n_clusters=10,
                 alpha=1.0):

        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)  #调用自编码器
        hidden = self.cae.get_layer(name='embedding').output  #将嵌入层得到的嵌入特征保留
        self.encoder = Model(inputs=self.cae.input,
                             outputs=hidden)  #由cae的输入到嵌入层构建encoder模型

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters, name='clustering')(
            hidden)  #将嵌入层的输出作为聚类层的输入
        self.model = Model(inputs=self.cae.input,
                           outputs=[
                               clustering_layer, self.cae.output
                           ])  #定义DCEC的模型(从cae的输入开始,最后输出有cae的输出和聚类层的输出)
예제 #2
0
파일: DCEC.py 프로젝트: bradgwest/paap
    def __init__(self,
                 input_shape: Tuple[int, int, int],
                 filters: Iterable[int] = [32, 64, 128, 32],
                 n_clusters: int = 32,
                 alpha: int = 1):
        """DCEC Model

        :param input_shape: Shape of the input layer in the model
        :param filters: Number of filters in the convolutional layers, plus the size of the clustering layer. Hence the
            length should equal len(convolutional layers) + 1.
        :param n_clusters: k, the number of clusters to target
        # TODO Do we need this parameter?
        :param alpha: parameter in Student's t distribution
        """
        # TODO Add activation as a parameter to this model
        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name="embedding").output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters,
                                           name="clustering")(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])
예제 #3
0
    def __init__(self,
                 input_shape,
                 filters=[32, 64, 128, 10],
                 n_clusters=10,
                 alpha=1.0):
        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name='embedding').output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters,
                                           name='clustering')(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])
예제 #4
0
파일: DCEC.py 프로젝트: ryansar/DCEC
class DCEC(object):
    def __init__(self,
                 input_shape,
                 filters=[32, 64, 128, 10],
                 n_clusters=10,
                 alpha=1.0):

        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name='embedding').output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters,
                                           name='clustering')(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])

    def pretrain(self,
                 x,
                 batch_size=256,
                 epochs=200,
                 optimizer='adam',
                 save_dir='results/temp'):
        print('...Pretraining...')
        self.cae.compile(optimizer=optimizer, loss='mse')
        from keras.callbacks import CSVLogger
        csv_logger = CSVLogger(args.save_dir + '/pretrain_log.csv')

        # begin training
        t0 = time()
        self.cae.fit(x,
                     x,
                     batch_size=batch_size,
                     epochs=epochs,
                     callbacks=[csv_logger])
        print('Pretraining time: ', time() - t0)
        self.cae.save(save_dir + '/pretrain_cae_model.h5')
        print('Pretrained weights are saved to %s/pretrain_cae_model.h5' %
              save_dir)
        self.pretrained = True

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def extract_feature(self,
                        x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict(self, x):
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q**2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    def compile(self,
                loss=['kld', 'mse'],
                loss_weights=[1, 1],
                optimizer='adam'):
        self.model.compile(loss=loss,
                           loss_weights=loss_weights,
                           optimizer=optimizer)

    def fit(self,
            x,
            y=None,
            batch_size=256,
            maxiter=2e4,
            tol=1e-3,
            update_interval=140,
            cae_weights=None,
            save_dir='./results/temp'):

        print('Update interval', update_interval)
        save_interval = x.shape[0] / batch_size * 5
        print('Save interval', save_interval)

        # Step 1: pretrain if necessary
        t0 = time()
        if not self.pretrained and cae_weights is None:
            print('...pretraining CAE using default hyper-parameters:')
            print('   optimizer=\'adam\';   epochs=200')
            self.pretrain(x, batch_size, save_dir=save_dir)
            self.pretrained = True
        elif cae_weights is not None:
            self.cae.load_weights(cae_weights)
            print('cae_weights is loaded successfully.')

        # Step 2: initialize cluster centers using k-means
        t1 = time()
        print('Initializing cluster centers with k-means.')
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name='clustering').set_weights(
            [kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        import csv, os
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile = open(save_dir + '/dcec_log.csv', 'w')
        logwriter = csv.DictWriter(
            logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
        logwriter.writeheader()

        t2 = time()
        loss = [0, 0, 0]
        index = 0
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                q, _ = self.model.predict(x, verbose=0)
                p = self.target_distribution(
                    q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
                    acc = np.round(metrics.acc(y, self.y_pred), 5)
                    nmi = np.round(metrics.nmi(y, self.y_pred), 5)
                    ari = np.round(metrics.ari(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite,
                                   acc=acc,
                                   nmi=nmi,
                                   ari=ari,
                                   L=loss[0],
                                   Lc=loss[1],
                                   Lr=loss[2])
                    logwriter.writerow(logdict)
                    print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari',
                          ari, '; loss=', loss)

                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(
                    np.float32) / self.y_pred.shape[0]
                y_pred_last = np.copy(self.y_pred)
                if ite > 0 and delta_label < tol:
                    print('delta_label ', delta_label, '< tol ', tol)
                    print('Reached tolerance threshold. Stopping training.')
                    logfile.close()
                    break

            # train on batch
            if (index + 1) * batch_size > x.shape[0]:
                loss = self.model.train_on_batch(
                    x=x[index * batch_size::],
                    y=[p[index * batch_size::], x[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(
                    x=x[index * batch_size:(index + 1) * batch_size],
                    y=[
                        p[index * batch_size:(index + 1) * batch_size],
                        x[index * batch_size:(index + 1) * batch_size]
                    ])
                index += 1

            # save intermediate model
            if ite % save_interval == 0:
                # save DCEC model checkpoints
                print('saving model to:',
                      save_dir + '/dcec_model_' + str(ite) + '.h5')
                self.model.save_weights(save_dir + '/dcec_model_' + str(ite) +
                                        '.h5')

            ite += 1

        # save the trained model
        logfile.close()
        print('saving model to:', save_dir + '/dcec_model_final.h5')
        self.model.save_weights(save_dir + '/dcec_model_final.h5')
        t3 = time()
        print('Pretrain time:  ', t1 - t0)
        print('Clustering time:', t3 - t1)
        print('Total time:     ', t3 - t0)
def Layer_Wise_preTrain(batch_size = 1, nkerns = [3, 4], dataset=None, n_epochs=6, k_Top=5):
    """
        Layer-wise Convolutional auto-encoders.
    """

    if dataset is None:
        dataset = Preprocess_Input().load_data()
    train_set_x = dataset[0][0]
    train_set_z = dataset[0][2]
    n_train_batch = train_set_x.get_value(borrow=True).shape[0]
    n_train_batch /= batch_size

    print '... Building AutoEncoders'
    rng = numpy.random.RandomState(96813)
    index = T.lscalar('index')
    learning_rate = T.dscalar('rate')
    x = T.matrix('x')
    z = T.iscalar('z')
    #index.tag.test_value = 0
    #learning_rate.tag.test_value = .3
    em = 50
    layer0_input = x[:, :z*50].reshape((batch_size, 1, 50, -1))

    #   Auto-Encoder for Conv. LAYER 1
    layer0 = CAE(rng, input=layer0_input, image_shape=(batch_size, 1, em, None), \
        filter_shape=(nkerns[0], 1, 1, 7), factor=.5, s=z, k_Top=k_Top, do_fold=True)
    
    #zz = layer0.get_cost_updates(learning_rate)
    #print 'hidden:', theano.function([index, learning_rate], [zz], on_unused_input='ignore', \
    #                                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], \
    #                                        z: train_set_z[index]})(0, .3)
    cost, updates = layer0.get_cost_updates(learning_rate)
    #print 'DECODE RESULT:\n', theano.function([index], [layer0.output.shape, layer0_input.shape, z.type, layer0.zz.shape], \
    #                                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size], \
    #                                        z: train_set_z[index]})(0)
    
    train0 = theano.function([index, learning_rate], cost, updates=updates, \
                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                z: train_set_z[index]})
    em /= 2
    #   Auto-Encoder for Conv. LAYER 2.
    layer1_input = layer0.Output()
    layer1 = CAE(rng, input=layer1_input, image_shape=(batch_size, nkerns[0], em, None), \
                        filter_shape=(nkerns[1], nkerns[0], 1, 3), factor=.0, s=z, k_Top=k_Top, do_fold=True)

    cost1, updates1 = layer1.get_cost_updates(learning_rate)

    train1 = theano.function([index, learning_rate], cost1, updates=updates1, \
                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                z: train_set_z[index]})
    em /= 2
    #   Auto-Encoder for Hidden Layer.
    hidden_input = layer1.Output().flatten(2)
    hidden_layer = AE(rng, input=hidden_input, n_visible=nkerns[1]*em*k_Top, n_hidden=100)
    cost_h, updates_h = hidden_layer.get_cost_updates(learning_rate)
    train_h = theano.function([index, learning_rate], cost_h, updates=updates_h, \
                        givens={x: train_set_x[index * batch_size: (index + 1) * batch_size],
                                z: train_set_z[index]})
    print '... Pretraining model'

    ls_1 = []
    rate = 1e-2
    epoch = 0
    while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch in xrange(n_train_batch):
            cost_ij = train0(minibatch, rate)
        ls_1.append(cost_ij)
        rate *= .95
        print '\tepoch %i : cost: %f' % (epoch, cost_ij)
        # print layer0.W.get_value(borrow=True)

    ls_2 = []
    rate = 1e-2
    epoch = 0
    while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch in xrange(n_train_batch):
            cost_ij = train1(minibatch, rate)
        ls_2.append(cost_ij)
        rate *= .95
        print '\tepoch %i : cost: %f' % (epoch, cost_ij)
        # print layer1.W.get_value(borrow=True)

    ls_3 = []
    rate=4e-2
    epoch = 0
    while epoch < n_epochs:
        epoch = epoch + 1
        for minibatch in xrange(n_train_batch):
            cost_ij = train_h(minibatch, rate)
        ls_3.append(cost_ij)
        rate *= .95
        print '\tepoch %i : cost: %f' % (epoch, cost_ij)
        
    
    #  PLOT AutoEncoder Cost Function
    plt.subplot(3, 1, 1)
    plt.plot(numpy.arange(len(ls_1)) + 1, numpy.asarray(ls_1), 'r.-')
    plt.title('AutoEncoder Cost function Results')
    plt.xlabel('Epochs')
    plt.ylabel('Convolutional Layer 1')
    
    plt.subplot(3, 1, 2)
    plt.plot(numpy.arange(len(ls_2)) + 1, numpy.asarray(ls_2), 'r.-')
    #plt.title('AutoEncoder Cost function Results')
    plt.xlabel('Epochs')
    plt.ylabel('Convolutional Layer 2')
    
    plt.subplot(3, 1, 3)
    plt.plot(numpy.arange(len(ls_3)) + 1, numpy.asarray(ls_3), 'r.-')
    plt.xlabel('Epochs')
    plt.ylabel('Hidden Layer values')
    
    plt.show()
    
    return [layer0.params, layer1.params, hidden_layer.params]
예제 #6
0
파일: DCEC.py 프로젝트: bradgwest/paap
class DCEC(object):
    def __init__(self,
                 input_shape: Tuple[int, int, int],
                 filters: Iterable[int] = [32, 64, 128, 32],
                 n_clusters: int = 32,
                 alpha: int = 1):
        """DCEC Model

        :param input_shape: Shape of the input layer in the model
        :param filters: Number of filters in the convolutional layers, plus the size of the clustering layer. Hence the
            length should equal len(convolutional layers) + 1.
        :param n_clusters: k, the number of clusters to target
        # TODO Do we need this parameter?
        :param alpha: parameter in Student's t distribution
        """
        # TODO Add activation as a parameter to this model
        super(DCEC, self).__init__()

        self.n_clusters = n_clusters
        self.input_shape = input_shape
        self.alpha = alpha
        self.pretrained = False
        self.y_pred = []

        self.cae = CAE(input_shape, filters)
        hidden = self.cae.get_layer(name="embedding").output
        self.encoder = Model(inputs=self.cae.input, outputs=hidden)

        # Define DCEC model
        clustering_layer = ClusteringLayer(self.n_clusters,
                                           name="clustering")(hidden)
        self.model = Model(inputs=self.cae.input,
                           outputs=[clustering_layer, self.cae.output])

    # TODO we should really be training for 200 epochs
    # TODO Can we do a bigger batch size here?
    # TODO Should we train for longer?
    def pretrain(self,
                 x,
                 batch_size=512,
                 epochs=200,
                 optimizer="adam",
                 save_dir="results/temp"):
        logger.info("...Pretraining...")
        self.cae.compile(optimizer=optimizer, loss="mse")
        from tensorflow.keras.callbacks import CSVLogger

        csv_logger = CSVLogger(args.save_dir + "/pretrain_log.csv")

        # begin training
        t0 = time()
        self.cae.fit(x,
                     x,
                     batch_size=batch_size,
                     epochs=epochs,
                     callbacks=[csv_logger])
        logger.info("Pretraining time: {}".format(time() - t0))
        self.cae.save(save_dir + "/pretrain_cae_model.h5")
        logger.info(
            "Pretrained weights are saved to %s/pretrain_cae_model.h5" %
            save_dir)
        save_results_to_gcs(save_dir)
        self.pretrained = True

    def load_weights(self, weights_path):
        self.model.load_weights(weights_path)

    def extract_feature(self,
                        x):  # extract features from before clustering layer
        return self.encoder.predict(x)

    def predict(self, x):
        q, _ = self.model.predict(x, verbose=0)
        return q.argmax(1)

    @staticmethod
    def target_distribution(q):
        weight = q**2 / q.sum(0)
        return (weight.T / weight.sum(1)).T

    @staticmethod
    def should_stop(losses, threshold=0.0005):
        assert len(losses) >= 3, "Losses is not length 3"
        a = sum(losses) / len(losses)
        for x in losses:
            if abs(x - a) > threshold:
                return False
        return True

    def compile(self,
                loss=["kld", "mse"],
                loss_weights=[1, 1],
                optimizer="adam"):
        self.model.compile(loss=loss,
                           loss_weights=loss_weights,
                           optimizer=optimizer)

    def fit(
        self,
        x,
        y=None,
        batch_size=512,  # This was 256, Castellano used 128
        maxiter=2e4,
        tol=1e-3,
        update_interval=140,  # Was 140
        cae_weights=None,
        save_dir="./results/temp",
    ):

        logger.info("Update interval {}".format(update_interval))
        save_interval = int(x.shape[0] / batch_size * 5)
        logger.info("Save interval {}".format(save_interval))

        # Step 1: pretrain if necessary
        t0 = time()
        if not self.pretrained and cae_weights is None:
            logger.info("...pretraining CAE using default hyper-parameters:")
            logger.info("   optimizer='adam';   epochs=200")
            self.pretrain(x, batch_size, save_dir=save_dir)
            self.pretrained = True
        elif cae_weights is not None:
            self.cae.load_weights(cae_weights)
            logger.info("cae_weights is loaded successfully.")

        # Step 2: initialize cluster centers using k-means
        t1 = time()
        logger.info("Initializing cluster centers with k-means.")
        kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
        self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
        y_pred_last = np.copy(self.y_pred)
        self.model.get_layer(name="clustering").set_weights(
            [kmeans.cluster_centers_])

        # Step 3: deep clustering
        # logging file
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        logfile_path = save_dir + "/dcec_log.csv"
        logfile = open(logfile_path, "w")
        logwriter = csv.DictWriter(
            logfile, fieldnames=["iter", "acc", "nmi", "ari", "L", "Lc", "Lr"])
        logwriter.writeheader()

        overall_log_loss = save_dir + "/dcec_log_all.csv"
        l2 = open(overall_log_loss, "w")
        lw2 = csv.DictWriter(l2, fieldnames=["iter", "L", "Lc", "Lr"])
        lw2.writeheader()

        loss = [0, 0, 0]
        index = 0
        previous_losses = []
        for ite in range(int(maxiter)):
            if ite % update_interval == 0:
                logger.info("Updating. Iter {}".format(ite))
                q, _ = self.model.predict(x, verbose=0)
                # model.predict() causes a memory leak in tf2. So, use model(). See notes above
                # q, _ = self.model(x_tf, training=False)
                p = self.target_distribution(
                    q)  # update the auxiliary target distribution p

                # evaluate the clustering performance
                self.y_pred = q.argmax(1)
                if y is not None:
                    logger.info("{} calculating acc".format(ite))
                    acc = np.round(metrics.acc(y, self.y_pred), 5)
                    nmi = np.round(metrics.nmi(y, self.y_pred), 5)
                    ari = np.round(metrics.ari(y, self.y_pred), 5)
                    loss = np.round(loss, 5)
                    logdict = dict(iter=ite,
                                   acc=acc,
                                   nmi=nmi,
                                   ari=ari,
                                   L=loss[0],
                                   Lc=loss[1],
                                   Lr=loss[2])
                    logwriter.writerow(logdict)
                    logger.info(
                        "Iter {}: Acc {}, nmi {}, ari {}; loss={}".format(
                            ite, acc, nmi, ari, loss))

                loss_dict = {
                    "iter": ite,
                    "L": loss[0],
                    "Lc": loss[1],
                    "Lr": loss[2]
                }
                logwriter.writerow(loss_dict)
                logger.info("iter {i}; L {L}; Lc {Lc}; Lr {Lr}".format(
                    i=ite, **loss_dict))

                logger.info("Evaluating full loss")
                loss_all = self.model.evaluate(x,
                                               y=[p, x],
                                               batch_size=batch_size,
                                               verbose=0)
                previous_losses.append(loss_all[0])
                ld = {
                    "iter": ite,
                    "L": loss_all[0],
                    "Lc": loss_all[1],
                    "Lr": loss_all[2]
                }
                logger.info(
                    "Overall loss. iter {iter}; L {L}; Lc {Lc}; Lr {Lr}".
                    format(**ld))
                lw2.writerow(ld)

                # check stop criterion
                delta_label = np.sum(self.y_pred != y_pred_last).astype(
                    np.float32) / self.y_pred.shape[0]
                logger.info("delta_label={}".format(delta_label))
                y_pred_last = np.copy(self.y_pred)
                if self.n_clusters > 1 and ite > 0 and delta_label < tol:
                    logger.info("delta_label {} < tol {}".format(
                        delta_label, tol))
                    logger.info(
                        "Reached tolerance threshold. Stopping training.")
                    logfile.close()
                    break
                elif self.n_clusters == 1 and len(
                        previous_losses) >= 3 and self.should_stop(
                            previous_losses):
                    logger.info(
                        "Stopping criteria reached: Last 3 losses {}".format(
                            previous_losses[-3:]))
                    break

            # train on batch
            if (index + 1) * batch_size > x.shape[0]:
                loss = self.model.train_on_batch(
                    x=x[index * batch_size::],
                    y=[p[index * batch_size::], x[index * batch_size::]])
                index = 0
            else:
                loss = self.model.train_on_batch(
                    x=x[index * batch_size:(index + 1) * batch_size],
                    y=[
                        p[index * batch_size:(index + 1) * batch_size],
                        x[index * batch_size:(index + 1) * batch_size],
                    ],
                )
                index += 1

            loss_dict = {
                "iter": ite,
                "L": loss[0],
                "Lc": loss[1],
                "Lr": loss[2]
            }
            logwriter.writerow(loss_dict)

            if ite % 10 == 0:
                logger.info("iter={};L={};L_c={};L_r={}".format(ite, *loss))

            # save intermediate model
            if ite % save_interval == 0:
                # save DCEC model checkpoints
                logger.info(
                    "saving model to: {}".format(save_dir + "/dcec_model_" +
                                                 str(ite) + ".h5"))
                path = save_dir + "/dcec_model_" + str(ite) + ".h5"
                self.model.save_weights(path)
                gcs_copy(path)
                gcs_copy(logfile_path)
                gcs_copy(overall_log_loss)

            ite += 1

        # save the trained model
        logfile.close()
        l2.close()
        logger.info("saving model to: {}".format(save_dir +
                                                 "/dcec_model_final.h5"))
        self.model.save_weights(save_dir + "/dcec_model_final.h5")
        t3 = time()
        logger.info("Pretrain time:   {}".format(t1 - t0))
        logger.info("Clustering time: {}".format(t3 - t1))
        logger.info("Total time:      {}".format(t3 - t0))

        save_results_to_gcs(save_dir)