def testKMeans(dataset_name, archs):
    '''
    Performs kMeans clustering, and report metrics on the output latent space produced by the networks defined in archs,
    with given dataset. Assumes that testOnlyClusterInitialization and testOnlyClusterImprovement have been run before
    this for the specified archs/datasets, as the results saved by them are used for clustering
    :param dataset_name: Name of dataset [MNIST, COIL20]
    :param archs: Architectures as a dictionary
    :return: None - reports the accuracy and nmi clustering metrics
    '''
    rootLogger.info('Initial Cluster Quality Comparison')
    rootLogger.info(80 * '_')
    rootLogger.info('%-50s     %8s     %8s' % ('method', 'ACC', 'NMI'))
    rootLogger.info(80 * '_')
    dataset = DatasetHelper(dataset_name)
    dataset.loadDataset()
    rootLogger.info(
        evaluateKMeans(dataset.input_flat, dataset.labels,
                       dataset.getClusterCount(), 'image')[0])
    for arch in archs:
        Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] +
                       '.npy')
        rootLogger.info(
            evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(),
                           arch['name'])[0])
        Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' +
                       arch['name'] + '.npy')
        rootLogger.info(
            evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(),
                           arch['name'])[0])
        Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' +
                       arch['name'] + '.npy')
        rootLogger.info(
            evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(),
                           arch['name'])[0])
    rootLogger.info(80 * '_')
Ejemplo n.º 2
0
    def doClusteringWithKMeansLoss(self, dataset, epochs):
        '''
        Trains the autoencoder with combined kMeans loss and reconstruction loss
        At the moment does not give good results
        :param dataset: Data on which the autoencoder is trained
        :param epochs: Number of training epochs
        :return: None - (side effect) saves the trained network params and latent space in appropriate location
        '''
        batch_size = self.batch_size
        # Load the inputs in latent space produced by the pretrained autoencoder and use it to initialize cluster centers
        Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name))
        quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), 'Initial')
        rootLogger.info(quality_desc)
        # Load network parameters - code borrowed from mnist lasagne example
        with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(self.network, param_values, trainable=True)
        # reconstruction loss is just rms loss between input and reconstructed input
        reconstruction_loss = self.getReconstructionLossExpression(layers.get_output(self.network), self.t_target)
        # extent the network to do soft cluster assignments
        clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size, self.encode_size)
        soft_assignments = layers.get_output(clustering_network)
        # k-means loss is the sum of distances from the cluster centers weighted by the soft assignments to the clusters
        kmeansLoss = self.getKMeansLoss(layers.get_output(self.encode_layer), soft_assignments, clustering_network.W, dataset.getClusterCount(), self.encode_size, batch_size)
        params = lasagne.layers.get_all_params(self.network, trainable=True)
        # total loss = reconstruction loss + lambda * kmeans loss
        weight_reconstruction = 1
        weight_kmeans = 0.1
        total_loss = weight_kmeans * kmeansLoss + weight_reconstruction * reconstruction_loss
        updates = lasagne.updates.nesterov_momentum(total_loss, params, learning_rate=0.01)
        trainKMeansWithAE = theano.function([self.t_input, self.t_target], total_loss, updates=updates)
        for epoch in range(epochs):
            error = 0
            total_batches = 0
            for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True):
                inputs, targets = batch
                error += trainKMeansWithAE(inputs, targets)
                total_batches += 1
            # For every 10th epoch, update the cluster centers and print the clustering accuracy and nmi - for checking if the network
            # is actually doing something meaningful - the labels are never used for training
            if (epoch + 1) % 10 == 0:
                for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
                    Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
                quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))
                rootLogger.info(quality_desc)
            else:
                # Just print the training loss
                rootLogger.info("%-30s     %8s     %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", ""))
            if self.shouldStopNow:
            	break

        # Save the inputs in latent space and the network parameters
        for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
            Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
        np.save('saved_params/%s/pc_km_z_%s.npy' % (dataset.name, self.name), Z)
        np.savez('saved_params/%s/pc_km_m_%s.npz' % (dataset.name, self.name),
                 *lasagne.layers.get_all_param_values(self.network, trainable=True))
    def doClusteringWithKMeansLoss(self, dataset, epochs):
        '''
        Trains the autoencoder with combined kMeans loss and reconstruction loss
        At the moment does not give good results
        :param dataset: Data on which the autoencoder is trained
        :param epochs: Number of training epochs
        :return: None - (side effect) saves the trained network params and latent space in appropriate location
        '''
        batch_size = self.batch_size
        # Load the inputs in latent space produced by the pretrained autoencoder and use it to initialize cluster centers
        Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name))
        quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), 'Initial')
        rootLogger.info(quality_desc)
        # Load network parameters - code borrowed from mnist lasagne example
        with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            lasagne.layers.set_all_param_values(self.network, param_values, trainable=True)
        # reconstruction loss is just rms loss between input and reconstructed input
        reconstruction_loss = self.getReconstructionLossExpression(layers.get_output(self.network), self.t_target)
        # extent the network to do soft cluster assignments
        clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size, self.encode_size)
        soft_assignments = layers.get_output(clustering_network)
        # k-means loss is the sum of distances from the cluster centers weighted by the soft assignments to the clusters
        kmeansLoss = self.getKMeansLoss(layers.get_output(self.encode_layer), soft_assignments, clustering_network.W, dataset.getClusterCount(), self.encode_size, batch_size)
        params = lasagne.layers.get_all_params(self.network, trainable=True)
        # total loss = reconstruction loss + lambda * kmeans loss
        weight_reconstruction = 1
        weight_kmeans = 0.1
        total_loss = weight_kmeans * kmeansLoss + weight_reconstruction * reconstruction_loss
        updates = lasagne.updates.nesterov_momentum(total_loss, params, learning_rate=0.01)
        trainKMeansWithAE = theano.function([self.t_input, self.t_target], total_loss, updates=updates)
        for epoch in range(epochs):
            error = 0
            total_batches = 0
            for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True):
                inputs, targets = batch
                error += trainKMeansWithAE(inputs, targets)
                total_batches += 1
            # For every 10th epoch, update the cluster centers and print the clustering accuracy and nmi - for checking if the network
            # is actually doing something meaningful - the labels are never used for training
            if (epoch + 1) % 10 == 0:
                for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
                    Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
                quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))
                rootLogger.info(quality_desc)
            else:
                # Just print the training loss
                rootLogger.info("%-30s     %8s     %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", ""))
            if self.shouldStopNow:
            	break

        # Save the inputs in latent space and the network parameters
        for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
            Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
        np.save('saved_params/%s/pc_km_z_%s.npy' % (dataset.name, self.name), Z)
        np.savez('saved_params/%s/pc_km_m_%s.npz' % (dataset.name, self.name),
                 *lasagne.layers.get_all_param_values(self.network, trainable=True))
Ejemplo n.º 4
0
 def pretrainWithData(self, dataset, epochs, continue_training=False):
     '''
     Pretrains the autoencoder on the given dataset
     :param dataset: Data on which the autoencoder is trained
     :param epochs: number of training epochs
     :param continue_training: Resume training if saved params available
     :return: None - (side effect) saves the trained network params and latent space in appropriate location
     '''
     batch_size = self.batch_size
     # array for holding the latent space representation of input
     Z = np.zeros((dataset.input.shape[0], self.encode_size), dtype=np.float32);
     # in case we're continuing training load the network params
     if continue_training:
         with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f:
             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
             lasagne.layers.set_all_param_values(self.network, param_values, trainable=True)
     for epoch in range(epochs):
         error = 0
         total_batches = 0
         for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True):
             inputs, targets = batch
             error += self.trainAutoencoder(inputs, targets)
             total_batches += 1
         # learning rate decay
         self.learning_rate.set_value(self.learning_rate.get_value() * lasagne.utils.floatX(0.9999))
         # For every 20th iteration, print the clustering accuracy and nmi - for checking if the network
         # is actually doing something meaningful - the labels are never used for training
         if (epoch + 1) % 2 == 0:
             for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
                 Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
                 # Uncomment the next two lines to create reconstruction outputs in folder dumps/ (may need to be created)
                 #for i, x in enumerate(self.predictReconstruction(batch[0])):
                 #	print('dump')
                 #	rescaleReshapeAndSaveImage(x[0], "dumps/%02d%03d.jpg"%(epoch,i));
             rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))[0])
         else:
             # Just report the training loss
             rootLogger.info("%-30s     %8s     %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", ""))
         if self.shouldStopNow:
         	break
     # The inputs in latent space after pretraining
     for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
         Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
     # Save network params and latent space
     np.save('saved_params/%s/z_%s.npy' % (dataset.name, self.name), Z)
     # Borrowed from mnist lasagne example
     np.savez('saved_params/%s/m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))
 def pretrainWithData(self, dataset, epochs, continue_training=False):
     '''
     Pretrains the autoencoder on the given dataset
     :param dataset: Data on which the autoencoder is trained
     :param epochs: number of training epochs
     :param continue_training: Resume training if saved params available
     :return: None - (side effect) saves the trained network params and latent space in appropriate location
     '''
     batch_size = self.batch_size
     # array for holding the latent space representation of input
     Z = np.zeros((dataset.input.shape[0], self.encode_size), dtype=np.float32);
     # in case we're continuing training load the network params
     if continue_training:
         with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f:
             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
             lasagne.layers.set_all_param_values(self.network, param_values, trainable=True)
     for epoch in range(epochs):
         error = 0
         total_batches = 0
         for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True):
             inputs, targets = batch
             error += self.trainAutoencoder(inputs, targets)
             total_batches += 1
         # learning rate decay
         self.learning_rate.set_value(self.learning_rate.get_value() * lasagne.utils.floatX(0.9999))
         # For every 20th iteration, print the clustering accuracy and nmi - for checking if the network
         # is actually doing something meaningful - the labels are never used for training
         if (epoch + 1) % 2 == 0:
             for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
                 Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
                 # Uncomment the next two lines to create reconstruction outputs in folder dumps/ (may need to be created)
                 #for i, x in enumerate(self.predictReconstruction(batch[0])):
                 #	print('dump')
                 #	rescaleReshapeAndSaveImage(x[0], "dumps/%02d%03d.jpg"%(epoch,i));
             rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))[0])
         else:
             # Just report the training loss
             rootLogger.info("%-30s     %8s     %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", ""))
         if self.shouldStopNow:
         	break
     # The inputs in latent space after pretraining
     for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
         Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
     # Save network params and latent space
     np.save('saved_params/%s/z_%s.npy' % (dataset.name, self.name), Z)
     # Borrowed from mnist lasagne example
     np.savez('saved_params/%s/m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))
 def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs):
     '''
     Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss
     At the moment does not give good results
     :param dataset: Data on which the autoencoder is trained
     :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss
     :param epochs: Number of training epochs
     :return: None - (side effect) saves the trained network params and latent space in appropriate location
     '''
     batch_size = self.batch_size
     # Load saved network params and inputs in latent space obtained after pretraining
     with np.load('saved_params/%s/m_%s.npz' %
                  (dataset.name, self.name)) as f:
         param_values = [f['arr_%d' % i] for i in range(len(f.files))]
         lasagne.layers.set_all_param_values(self.network,
                                             param_values,
                                             trainable=True)
     Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name))
     # Find initial cluster centers
     quality_desc, cluster_centers = evaluateKMeans(
         Z, dataset.labels, dataset.getClusterCount(), 'Initial')
     rootLogger.info(quality_desc)
     # P is the more pure target distribution we want to achieve
     P = T.matrix('P')
     # Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space
     clustering_network = ClusteringLayer(self.encode_layer,
                                          dataset.getClusterCount(),
                                          cluster_centers, batch_size,
                                          self.encode_size)
     soft_assignments = layers.get_output(clustering_network)
     reconstructed_output_exp = layers.get_output(self.network)
     # Clustering loss = kl divergence between the pure distribution P and current distribution
     clustering_loss = self.getKLDivLossExpression(soft_assignments, P)
     reconstruction_loss = self.getReconstructionLossExpression(
         reconstructed_output_exp, self.t_target)
     params_ae = lasagne.layers.get_all_params(self.network, trainable=True)
     params_dec = lasagne.layers.get_all_params(clustering_network,
                                                trainable=True)
     # Total loss = weighted sum of the two losses
     w_cluster_loss = 1
     w_reconstruction_loss = 1
     total_loss = w_cluster_loss * clustering_loss
     if (combined_loss):
         total_loss = total_loss + w_reconstruction_loss * reconstruction_loss
     all_params = params_dec
     if combined_loss:
         all_params.extend(params_ae)
     # Parameters = unique parameters in the new network
     all_params = list(set(all_params))
     # SGD with momentum, LR = 0.01, Momentum = 0.9
     updates = lasagne.updates.nesterov_momentum(total_loss,
                                                 all_params,
                                                 learning_rate=0.01)
     # Function to calculate the soft assignment distribution
     getSoftAssignments = theano.function([self.t_input], soft_assignments)
     # Train function - based on whether complete loss is used or not
     trainFunction = None
     if combined_loss:
         trainFunction = theano.function([self.t_input, self.t_target, P],
                                         total_loss,
                                         updates=updates)
     else:
         trainFunction = theano.function([self.t_input, P],
                                         clustering_loss,
                                         updates=updates)
     for epoch in range(epochs):
         # Get the current distribution
         qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()),
                        dtype=np.float32)
         for i, batch in enumerate(
                 dataset.iterate_minibatches(self.input_type,
                                             batch_size,
                                             shuffle=False)):
             qij[i * batch_size:(i + 1) * batch_size] = getSoftAssignments(
                 batch[0])
         # Calculate the desired distribution
         pij = self.calculateP(qij)
         error = 0
         total_batches = 0
         for i, batch in enumerate(
                 dataset.iterate_minibatches(self.input_type,
                                             batch_size,
                                             pij,
                                             shuffle=True)):
             if (combined_loss):
                 error += trainFunction(batch[0], batch[0], batch[1])
             else:
                 error += trainFunction(batch[0], batch[1])
             total_batches += 1
         for i, batch in enumerate(
                 dataset.iterate_minibatches(self.input_type,
                                             batch_size,
                                             shuffle=False)):
             Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(
                 batch[0])
         # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network
         # is actually doing something meaningful - the labels are never used for training
         if (epoch + 1) % 10 == 0:
             rootLogger.info(
                 evaluateKMeans(
                     Z, dataset.labels, dataset.getClusterCount(),
                     "%d [%.4f]" % (epoch, error / total_batches))[0])
         if self.shouldStopNow:
             break
     # Save the inputs in latent space and the network parameters
     for i, batch in enumerate(
             dataset.iterate_minibatches(self.input_type,
                                         batch_size,
                                         shuffle=False)):
         Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(
             batch[0])
     np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z)
     np.savez(
         'saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name),
         *lasagne.layers.get_all_param_values(self.network, trainable=True))
Ejemplo n.º 7
0
    def doClusteringWithKMeansLoss(self, dataset, epochs):
        '''
        Trains the autoencoder with combined kMeans loss and reconstruction loss
        At the moment does not give good results
        :param dataset: Data on which the autoencoder is trained
        :param epochs: Number of training epochs
        :return: None - (side effect) saves the trained network params and latent space in appropriate location
        '''
        batch_size = self.batch_size
        # Load the inputs in latent space produced by the pretrained autoencoder and use it to initialize cluster centers
        Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name),
                    encoding='latin1')
        quality_desc, cluster_centers = evaluateKMeans(
            Z, dataset.labels, dataset.getClusterCount(), 'Initial')
        rootLogger.info(quality_desc)
        #Load network parameters - code borrowed from mnist lasagne example
        with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name),
                     encoding='latin1') as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            model = self.network
            model.set_weights(param_values)
        #extent the network to do soft cluster assignments
        clustering_network = ClusteringLayer(dataset.getClusterCount(),
                                             batch_size,
                                             int(self.encode_size),
                                             name='cluster')
        clustering_network.build(cluster_centers.shape)
        soft_assignments = clustering_network(self.encode_layer)
        weights_cluster = clustering_network.get_config()['W']
        #Parameters help in the custom loss of KMeans
        self.soft_assignments = soft_assignments
        self.num_clusters = dataset.getClusterCount()
        self.latent_space_dim = int(self.encode_size)
        self.num_samples = batch_size
        weight_reconstruction = 1
        weight_kmeans = 0.1
        #Optimizer SGD And Model
        sgd = keras.optimizers.Adam(0.0001)
        trainKMeansWithAE = keras.models.Model(
            inputs=[model.layers[0].input],
            outputs=[model.layers[-1].output, soft_assignments])
        trainKMeansWithAE.compile(
            loss=['mse', self.KMeansLoss],
            loss_weights=[weight_reconstruction, weight_kmeans],
            optimizer=sgd)
        #plot_model(trainKMeansWithAE, to_file='trainKMeansWithAE.png', show_shapes=True)
        #Image(filename='trainKMeansWithAE.png')
        #Tensorboard To Visualizations Gradients.
        tensorboard1 = keras.callbacks.TensorBoard(log_dir="logs/{}".format(
            time.time()),
                                                   write_grads=True,
                                                   write_images=True,
                                                   histogram_freq=0)
        kmeans = KMeans(n_clusters=self.num_clusters, n_init=20)
        for epoch in range(epochs):
            error = 0
            total_batches = 0
            for batch in dataset.iterate_minibatches(self.input_type,
                                                     batch_size,
                                                     shuffle=True):
                inputs, targets = batch
                encoded = self.predictEncoding.predict(inputs)
                self.y_pred = kmeans.fit_predict(encoded)
                trainKMeansWithAE.get_layer(name='cluster').set_weights(
                    [kmeans.cluster_centers_])
                history = trainKMeansWithAE.fit(inputs, [targets, encoded],
                                                steps_per_epoch=1,
                                                callbacks=[tensorboard1])
                error += history.history['loss'][0]
                total_batches += 1
            # For every 10th epoch, update the cluster centers and print the clustering accuracy and nmi - for checking if the network
            # is actually doing something meaningful - the labels are never used for training
            if (epoch + 1) % 10 == 0:
                for i, batch in enumerate(
                        dataset.iterate_minibatches(self.input_type,
                                                    batch_size,
                                                    shuffle=False)):
                    batch_reverse = batch[0].reshape(
                        tuple([batch[0].shape[0]]) +
                        tuple(reversed(list(batch[0].shape[1:]))))
                    Z[i * batch_size:(i + 1) *
                      batch_size] = self.predictEncoding.predict(batch[0])
                quality_desc, cluster_centers = evaluateKMeans(
                    Z, dataset.labels, dataset.getClusterCount(),
                    "%d/%d [%.4f]" %
                    (epoch + 1, epochs, error / total_batches))
                rootLogger.info(quality_desc)
            else:
                # Just print the training loss
                rootLogger.info(
                    "%-30s     %8s     %8s" %
                    ("%d/%d [%.4f]" %
                     (epoch + 1, epochs, error / total_batches), "", ""))
            if self.shouldStopNow:
                break

        # Save the inputs in latent space and the network parameters
        for i, batch in enumerate(
                dataset.iterate_minibatches(self.input_type,
                                            batch_size,
                                            shuffle=False)):
            Z[i * batch_size:(i + 1) *
              batch_size] = self.predictEncoding.predict(batch[0])
        np.save('saved_params/%s/pc_km_z_%s.npy' % (dataset.name, self.name),
                Z)
        np.savez('saved_params/%s/pc_km_m_%s.npz' % (dataset.name, self.name),
                 *trainKMeansWithAE.get_weights())
Ejemplo n.º 8
0
    def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs):
        '''
        Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss
        At the moment does not give good results
        :param dataset: Data on which the autoencoder is trained
        :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss
        :param epochs: Number of training epochs
        :return: None - (side effect) saves the trained network params and latent space in appropriate location
        '''
        batch_size = self.batch_size
        # Load saved network params and inputs in latent space obtained after pretraining
        with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name),
                     encoding="latin1") as f:
            param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            self.network.set_weights(param_values)
        Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name))
        #just for tracing.
        #Z_reshaped = Z.reshape((Z.shape[0],Z.shape[-1]))
        #print(self.model.layers[1].output)
        #print(Z.shape)
        #print(dataset.labels)
        #print(dataset.getClusterCount())
        #Find initial cluster centers
        quality_desc, cluster_centers = evaluateKMeans(
            Z, dataset.labels, dataset.getClusterCount(), 'Initial')
        rootLogger.info(quality_desc)
        #Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space
        clustering_network = ClusteringLayer(dataset.getClusterCount(),
                                             batch_size, int(self.encode_size))
        clustering_network.build(cluster_centers.shape)
        cluster_output = clustering_network(self.encode_layer)
        reconstructed_output_exp = self.network.layers[-1].output
        #get soft assignments model and plot model
        soft_model = keras.models.Model(
            inputs=self.encoder_model.layers[0].input, outputs=cluster_output)
        #plot_model(soft_model, to_file='soft_model.png', show_shapes=True)
        #Image(filename='soft_model.png')
        # SGD with momentum, LR = 0.01, Momentum = 0.9
        adam = keras.optimizers.Adam(0.0001)
        trainFunction = None

        if combined_loss:
            trainFunction = keras.models.Model(
                inputs=self.encoder_model.layers[0].input,
                outputs=[reconstructed_output_exp, cluster_output])
            #plot_model(trainFunction, to_file='train.png', show_shapes=True)
            #Image(filename='train.png')
            trainFunction.compile(loss=['mse', 'kld'],
                                  loss_weights=[1.0, 0.1],
                                  optimizer=adam)
        else:
            trainFunction = keras.models.Model(
                inputs=[self.network.layers[0].input],
                outputs=[cluster_output])
            trainFunction.compile(loss='kld', optimizer=adam)

        for epoch in range(epochs):
            # Get the current distribution
            qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()),
                           dtype=np.float32)
            for i, batch in enumerate(
                    dataset.iterate_minibatches(self.input_type,
                                                batch_size,
                                                shuffle=False)):
                qij[i * batch_size:(i + 1) * batch_size] = soft_model.predict(
                    batch[0], steps=1)
            # Calculate the desired distribution
            pij = self.calculateP(qij)
            error = 0
            total_batches = 0
            history = LossHistory()
            for i, batch in enumerate(
                    dataset.iterate_minibatches(self.input_type,
                                                batch_size,
                                                pij,
                                                shuffle=True)):
                if (combined_loss):
                    history = trainFunction.fit(x=[batch[0]],
                                                y=[batch[0], batch[1]],
                                                steps_per_epoch=1)
                    error += history.history['loss'][0]
                else:
                    history = trainFunction.fit(batch[0],
                                                batch[1],
                                                steps_per_epoch=1)
                    error += history.history['loss'][0]
                total_batches += 1
            for i, batch in enumerate(
                    dataset.iterate_minibatches(self.input_type,
                                                batch_size,
                                                shuffle=False)):
                Z[i * batch_size:(i + 1) *
                  batch_size] = self.predictEncoding.predict(batch[0])
            # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network
            # is actually doing something meaningful - the labels are never used for training
            if (epoch + 1) % 10 == 0:
                rootLogger.info(
                    evaluateKMeans(
                        Z, dataset.labels, dataset.getClusterCount(),
                        "%d [%.4f]" % (epoch, error / total_batches))[0])
            if self.shouldStopNow:
                break
        # Save the inputs in latent space and the network parameters
        for i, batch in enumerate(
                dataset.iterate_minibatches(self.input_type,
                                            batch_size,
                                            shuffle=False)):
            Z[i * batch_size:(i + 1) *
              batch_size] = self.predictEncoding.predict(batch[0])
        np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z)
        np.savez('saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name),
                 *trainFunction.get_weights())
Ejemplo n.º 9
0
 def pretrainWithData(self, dataset, epochs, continue_training=False):
     '''
     Pretrains the autoencoder on the given dataset
     :param dataset: Data on which the autoencoder is trained
     :param epochs: number of training epochs
     :param continue_training: Resume training if saved params available
     :return: None - (side effect) saves the trained network params and latent space in appropriate location
     '''
     batch_size = self.batch_size
     # array for holding the latent space representation of input
     Z = np.zeros((dataset.input.shape[0], int(self.encode_size)),
                  dtype=np.int32)
     # in case we're continuing training load the network params
     if continue_training:
         with np.load(
                 'saved_params/%s/m_%s.npz' % (dataset.name, self.name),
                 encoding == 'latin1') as f:
             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
             self.trainAutoencoder.set_weights(param_values)
     for epoch in range(epochs):
         error = 0
         total_batches = 0
         for batch in dataset.iterate_minibatches(self.input_type,
                                                  batch_size,
                                                  shuffle=True):
             inputs, targets = batch
             history = self.trainAutoencoder.fit(inputs,
                                                 targets,
                                                 steps_per_epoch=1)
             error += history.history['loss'][0]
             total_batches += 1
         # learning rate decay
         self.learning_rate = self.learning_rate * float(0.9999)
         # For every 20th iteration, print the clustering accuracy and nmi - for checking if the network
         # is actually doing something meaningful - the labels are never used for training
         if (epoch + 1) % 2 == 0:
             for i, batch in enumerate(
                     dataset.iterate_minibatches(self.input_type,
                                                 batch_size,
                                                 shuffle=False)):
                 batch_reverse = inputs.reshape(
                     tuple([batch[0].shape[0]]) +
                     tuple(reversed(list(batch[0].shape[1:]))))
                 Z[i * batch_size:(i + 1) *
                   batch_size] = self.predictEncoding.predict(batch[0])
             rootLogger.info(
                 evaluateKMeans(
                     Z, dataset.labels, dataset.getClusterCount(),
                     "%d/%d [%.4f]" %
                     (epoch + 1, epochs, error / total_batches))[0])
         else:
             # Just report the training loss
             rootLogger.info(
                 "%-30s     %8s     %8s" %
                 ("%d/%d [%.4f]" %
                  (epoch + 1, epochs, error / total_batches), "", ""))
         if self.shouldStopNow:
             break
     # The inputs in latent space after pretraining
     for i, batch in enumerate(
             dataset.iterate_minibatches(self.input_type,
                                         batch_size,
                                         shuffle=False)):
         #batch_reverse = inputs.reshape(tuple([batch[0].shape[0]])+tuple(reversed(list(batch[0].shape[1:]))))
         Z[i * batch_size:(i + 1) *
           batch_size] = self.predictEncoding.predict(batch[0], steps=1)
     # Save network params and latent space
     np.save('saved_params/%s/z_%s.npy' % (dataset.name, self.name), Z)
     # Borrowed from mnist lasagne example
     print(np.array(self.trainAutoencoder.get_weights())[0].shape)
     np.savez('saved_params/%s/m_%s.npz' % (dataset.name, self.name),
              *self.trainAutoencoder.get_weights())
 def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs):
     '''
     Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss
     At the moment does not give good results
     :param dataset: Data on which the autoencoder is trained
     :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss
     :param epochs: Number of training epochs
     :return: None - (side effect) saves the trained network params and latent space in appropriate location
     '''
     batch_size = self.batch_size
     # Load saved network params and inputs in latent space obtained after pretraining
     with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f:
         param_values = [f['arr_%d' % i] for i in range(len(f.files))]
         lasagne.layers.set_all_param_values(self.network, param_values, trainable=True)
     Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name))
     # Find initial cluster centers
     quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), 'Initial')
     rootLogger.info(quality_desc)
     # P is the more pure target distribution we want to achieve
     P = T.matrix('P')
     # Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space
     clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size,self.encode_size)
     soft_assignments = layers.get_output(clustering_network)
     reconstructed_output_exp = layers.get_output(self.network)
     # Clustering loss = kl divergence between the pure distribution P and current distribution
     clustering_loss = self.getKLDivLossExpression(soft_assignments, P)
     reconstruction_loss = self.getReconstructionLossExpression(reconstructed_output_exp, self.t_target)
     params_ae = lasagne.layers.get_all_params(self.network, trainable=True)
     params_dec = lasagne.layers.get_all_params(clustering_network, trainable=True)
     # Total loss = weighted sum of the two losses
     w_cluster_loss = 1
     w_reconstruction_loss = 1
     total_loss = w_cluster_loss * clustering_loss
     if (combined_loss):
         total_loss = total_loss + w_reconstruction_loss * reconstruction_loss
     all_params = params_dec
     if combined_loss:
         all_params.extend(params_ae)
     # Parameters = unique parameters in the new network
     all_params = list(set(all_params))
     # SGD with momentum, LR = 0.01, Momentum = 0.9
     updates = lasagne.updates.nesterov_momentum(total_loss, all_params, learning_rate=0.01)
     # Function to calculate the soft assignment distribution
     getSoftAssignments = theano.function([self.t_input], soft_assignments)
     # Train function - based on whether complete loss is used or not
     trainFunction = None
     if combined_loss:
         trainFunction = theano.function([self.t_input, self.t_target, P], total_loss, updates=updates)
     else:
         trainFunction = theano.function([self.t_input, P], clustering_loss, updates=updates)
     for epoch in range(epochs):
         # Get the current distribution
         qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()), dtype=np.float32)
         for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
             qij[i * batch_size: (i + 1) * batch_size] = getSoftAssignments(batch[0])
         # Calculate the desired distribution
         pij = self.calculateP(qij)
         error = 0
         total_batches = 0
         for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, pij, shuffle=True)):
             if (combined_loss):
                 error += trainFunction(batch[0], batch[0], batch[1])
             else:
                 error += trainFunction(batch[0], batch[1])
             total_batches += 1
         for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
             Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
         # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network
         # is actually doing something meaningful - the labels are never used for training
         if (epoch + 1) % 10 == 0:
             rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d [%.4f]" % (
                 epoch, error / total_batches))[0])
         if self.shouldStopNow:
        	   break
     # Save the inputs in latent space and the network parameters
     for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)):
         Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0])
     np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z)
     np.savez('saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name),
              *lasagne.layers.get_all_param_values(self.network, trainable=True))