def testKMeans(dataset_name, archs): ''' Performs kMeans clustering, and report metrics on the output latent space produced by the networks defined in archs, with given dataset. Assumes that testOnlyClusterInitialization and testOnlyClusterImprovement have been run before this for the specified archs/datasets, as the results saved by them are used for clustering :param dataset_name: Name of dataset [MNIST, COIL20] :param archs: Architectures as a dictionary :return: None - reports the accuracy and nmi clustering metrics ''' rootLogger.info('Initial Cluster Quality Comparison') rootLogger.info(80 * '_') rootLogger.info('%-50s %8s %8s' % ('method', 'ACC', 'NMI')) rootLogger.info(80 * '_') dataset = DatasetHelper(dataset_name) dataset.loadDataset() rootLogger.info( evaluateKMeans(dataset.input_flat, dataset.labels, dataset.getClusterCount(), 'image')[0]) for arch in archs: Z = numpy.load('saved_params/' + dataset.name + '/z_' + arch['name'] + '.npy') rootLogger.info( evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) Z = numpy.load('saved_params/' + dataset.name + '/pc_z_' + arch['name'] + '.npy') rootLogger.info( evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) Z = numpy.load('saved_params/' + dataset.name + '/pc_km_z_' + arch['name'] + '.npy') rootLogger.info( evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), arch['name'])[0]) rootLogger.info(80 * '_')
def doClusteringWithKMeansLoss(self, dataset, epochs): ''' Trains the autoencoder with combined kMeans loss and reconstruction loss At the moment does not give good results :param dataset: Data on which the autoencoder is trained :param epochs: Number of training epochs :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # Load the inputs in latent space produced by the pretrained autoencoder and use it to initialize cluster centers Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name)) quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), 'Initial') rootLogger.info(quality_desc) # Load network parameters - code borrowed from mnist lasagne example with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(self.network, param_values, trainable=True) # reconstruction loss is just rms loss between input and reconstructed input reconstruction_loss = self.getReconstructionLossExpression(layers.get_output(self.network), self.t_target) # extent the network to do soft cluster assignments clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size, self.encode_size) soft_assignments = layers.get_output(clustering_network) # k-means loss is the sum of distances from the cluster centers weighted by the soft assignments to the clusters kmeansLoss = self.getKMeansLoss(layers.get_output(self.encode_layer), soft_assignments, clustering_network.W, dataset.getClusterCount(), self.encode_size, batch_size) params = lasagne.layers.get_all_params(self.network, trainable=True) # total loss = reconstruction loss + lambda * kmeans loss weight_reconstruction = 1 weight_kmeans = 0.1 total_loss = weight_kmeans * kmeansLoss + weight_reconstruction * reconstruction_loss updates = lasagne.updates.nesterov_momentum(total_loss, params, learning_rate=0.01) trainKMeansWithAE = theano.function([self.t_input, self.t_target], total_loss, updates=updates) for epoch in range(epochs): error = 0 total_batches = 0 for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True): inputs, targets = batch error += trainKMeansWithAE(inputs, targets) total_batches += 1 # For every 10th epoch, update the cluster centers and print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 10 == 0: for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches)) rootLogger.info(quality_desc) else: # Just print the training loss rootLogger.info("%-30s %8s %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", "")) if self.shouldStopNow: break # Save the inputs in latent space and the network parameters for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) np.save('saved_params/%s/pc_km_z_%s.npy' % (dataset.name, self.name), Z) np.savez('saved_params/%s/pc_km_m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))
def pretrainWithData(self, dataset, epochs, continue_training=False): ''' Pretrains the autoencoder on the given dataset :param dataset: Data on which the autoencoder is trained :param epochs: number of training epochs :param continue_training: Resume training if saved params available :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # array for holding the latent space representation of input Z = np.zeros((dataset.input.shape[0], self.encode_size), dtype=np.float32); # in case we're continuing training load the network params if continue_training: with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(self.network, param_values, trainable=True) for epoch in range(epochs): error = 0 total_batches = 0 for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True): inputs, targets = batch error += self.trainAutoencoder(inputs, targets) total_batches += 1 # learning rate decay self.learning_rate.set_value(self.learning_rate.get_value() * lasagne.utils.floatX(0.9999)) # For every 20th iteration, print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 2 == 0: for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) # Uncomment the next two lines to create reconstruction outputs in folder dumps/ (may need to be created) #for i, x in enumerate(self.predictReconstruction(batch[0])): # print('dump') # rescaleReshapeAndSaveImage(x[0], "dumps/%02d%03d.jpg"%(epoch,i)); rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))[0]) else: # Just report the training loss rootLogger.info("%-30s %8s %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", "")) if self.shouldStopNow: break # The inputs in latent space after pretraining for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) # Save network params and latent space np.save('saved_params/%s/z_%s.npy' % (dataset.name, self.name), Z) # Borrowed from mnist lasagne example np.savez('saved_params/%s/m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))
def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs): ''' Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss At the moment does not give good results :param dataset: Data on which the autoencoder is trained :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss :param epochs: Number of training epochs :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # Load saved network params and inputs in latent space obtained after pretraining with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(self.network, param_values, trainable=True) Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name)) # Find initial cluster centers quality_desc, cluster_centers = evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), 'Initial') rootLogger.info(quality_desc) # P is the more pure target distribution we want to achieve P = T.matrix('P') # Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size, self.encode_size) soft_assignments = layers.get_output(clustering_network) reconstructed_output_exp = layers.get_output(self.network) # Clustering loss = kl divergence between the pure distribution P and current distribution clustering_loss = self.getKLDivLossExpression(soft_assignments, P) reconstruction_loss = self.getReconstructionLossExpression( reconstructed_output_exp, self.t_target) params_ae = lasagne.layers.get_all_params(self.network, trainable=True) params_dec = lasagne.layers.get_all_params(clustering_network, trainable=True) # Total loss = weighted sum of the two losses w_cluster_loss = 1 w_reconstruction_loss = 1 total_loss = w_cluster_loss * clustering_loss if (combined_loss): total_loss = total_loss + w_reconstruction_loss * reconstruction_loss all_params = params_dec if combined_loss: all_params.extend(params_ae) # Parameters = unique parameters in the new network all_params = list(set(all_params)) # SGD with momentum, LR = 0.01, Momentum = 0.9 updates = lasagne.updates.nesterov_momentum(total_loss, all_params, learning_rate=0.01) # Function to calculate the soft assignment distribution getSoftAssignments = theano.function([self.t_input], soft_assignments) # Train function - based on whether complete loss is used or not trainFunction = None if combined_loss: trainFunction = theano.function([self.t_input, self.t_target, P], total_loss, updates=updates) else: trainFunction = theano.function([self.t_input, P], clustering_loss, updates=updates) for epoch in range(epochs): # Get the current distribution qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()), dtype=np.float32) for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): qij[i * batch_size:(i + 1) * batch_size] = getSoftAssignments( batch[0]) # Calculate the desired distribution pij = self.calculateP(qij) error = 0 total_batches = 0 for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, pij, shuffle=True)): if (combined_loss): error += trainFunction(batch[0], batch[0], batch[1]) else: error += trainFunction(batch[0], batch[1]) total_batches += 1 for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding( batch[0]) # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 10 == 0: rootLogger.info( evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), "%d [%.4f]" % (epoch, error / total_batches))[0]) if self.shouldStopNow: break # Save the inputs in latent space and the network parameters for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding( batch[0]) np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z) np.savez( 'saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))
def doClusteringWithKMeansLoss(self, dataset, epochs): ''' Trains the autoencoder with combined kMeans loss and reconstruction loss At the moment does not give good results :param dataset: Data on which the autoencoder is trained :param epochs: Number of training epochs :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # Load the inputs in latent space produced by the pretrained autoencoder and use it to initialize cluster centers Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name), encoding='latin1') quality_desc, cluster_centers = evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), 'Initial') rootLogger.info(quality_desc) #Load network parameters - code borrowed from mnist lasagne example with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name), encoding='latin1') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] model = self.network model.set_weights(param_values) #extent the network to do soft cluster assignments clustering_network = ClusteringLayer(dataset.getClusterCount(), batch_size, int(self.encode_size), name='cluster') clustering_network.build(cluster_centers.shape) soft_assignments = clustering_network(self.encode_layer) weights_cluster = clustering_network.get_config()['W'] #Parameters help in the custom loss of KMeans self.soft_assignments = soft_assignments self.num_clusters = dataset.getClusterCount() self.latent_space_dim = int(self.encode_size) self.num_samples = batch_size weight_reconstruction = 1 weight_kmeans = 0.1 #Optimizer SGD And Model sgd = keras.optimizers.Adam(0.0001) trainKMeansWithAE = keras.models.Model( inputs=[model.layers[0].input], outputs=[model.layers[-1].output, soft_assignments]) trainKMeansWithAE.compile( loss=['mse', self.KMeansLoss], loss_weights=[weight_reconstruction, weight_kmeans], optimizer=sgd) #plot_model(trainKMeansWithAE, to_file='trainKMeansWithAE.png', show_shapes=True) #Image(filename='trainKMeansWithAE.png') #Tensorboard To Visualizations Gradients. tensorboard1 = keras.callbacks.TensorBoard(log_dir="logs/{}".format( time.time()), write_grads=True, write_images=True, histogram_freq=0) kmeans = KMeans(n_clusters=self.num_clusters, n_init=20) for epoch in range(epochs): error = 0 total_batches = 0 for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True): inputs, targets = batch encoded = self.predictEncoding.predict(inputs) self.y_pred = kmeans.fit_predict(encoded) trainKMeansWithAE.get_layer(name='cluster').set_weights( [kmeans.cluster_centers_]) history = trainKMeansWithAE.fit(inputs, [targets, encoded], steps_per_epoch=1, callbacks=[tensorboard1]) error += history.history['loss'][0] total_batches += 1 # For every 10th epoch, update the cluster centers and print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 10 == 0: for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): batch_reverse = batch[0].reshape( tuple([batch[0].shape[0]]) + tuple(reversed(list(batch[0].shape[1:])))) Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0]) quality_desc, cluster_centers = evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches)) rootLogger.info(quality_desc) else: # Just print the training loss rootLogger.info( "%-30s %8s %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", "")) if self.shouldStopNow: break # Save the inputs in latent space and the network parameters for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0]) np.save('saved_params/%s/pc_km_z_%s.npy' % (dataset.name, self.name), Z) np.savez('saved_params/%s/pc_km_m_%s.npz' % (dataset.name, self.name), *trainKMeansWithAE.get_weights())
def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs): ''' Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss At the moment does not give good results :param dataset: Data on which the autoencoder is trained :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss :param epochs: Number of training epochs :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # Load saved network params and inputs in latent space obtained after pretraining with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name), encoding="latin1") as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] self.network.set_weights(param_values) Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name)) #just for tracing. #Z_reshaped = Z.reshape((Z.shape[0],Z.shape[-1])) #print(self.model.layers[1].output) #print(Z.shape) #print(dataset.labels) #print(dataset.getClusterCount()) #Find initial cluster centers quality_desc, cluster_centers = evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), 'Initial') rootLogger.info(quality_desc) #Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space clustering_network = ClusteringLayer(dataset.getClusterCount(), batch_size, int(self.encode_size)) clustering_network.build(cluster_centers.shape) cluster_output = clustering_network(self.encode_layer) reconstructed_output_exp = self.network.layers[-1].output #get soft assignments model and plot model soft_model = keras.models.Model( inputs=self.encoder_model.layers[0].input, outputs=cluster_output) #plot_model(soft_model, to_file='soft_model.png', show_shapes=True) #Image(filename='soft_model.png') # SGD with momentum, LR = 0.01, Momentum = 0.9 adam = keras.optimizers.Adam(0.0001) trainFunction = None if combined_loss: trainFunction = keras.models.Model( inputs=self.encoder_model.layers[0].input, outputs=[reconstructed_output_exp, cluster_output]) #plot_model(trainFunction, to_file='train.png', show_shapes=True) #Image(filename='train.png') trainFunction.compile(loss=['mse', 'kld'], loss_weights=[1.0, 0.1], optimizer=adam) else: trainFunction = keras.models.Model( inputs=[self.network.layers[0].input], outputs=[cluster_output]) trainFunction.compile(loss='kld', optimizer=adam) for epoch in range(epochs): # Get the current distribution qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()), dtype=np.float32) for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): qij[i * batch_size:(i + 1) * batch_size] = soft_model.predict( batch[0], steps=1) # Calculate the desired distribution pij = self.calculateP(qij) error = 0 total_batches = 0 history = LossHistory() for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, pij, shuffle=True)): if (combined_loss): history = trainFunction.fit(x=[batch[0]], y=[batch[0], batch[1]], steps_per_epoch=1) error += history.history['loss'][0] else: history = trainFunction.fit(batch[0], batch[1], steps_per_epoch=1) error += history.history['loss'][0] total_batches += 1 for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0]) # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 10 == 0: rootLogger.info( evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), "%d [%.4f]" % (epoch, error / total_batches))[0]) if self.shouldStopNow: break # Save the inputs in latent space and the network parameters for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0]) np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z) np.savez('saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name), *trainFunction.get_weights())
def pretrainWithData(self, dataset, epochs, continue_training=False): ''' Pretrains the autoencoder on the given dataset :param dataset: Data on which the autoencoder is trained :param epochs: number of training epochs :param continue_training: Resume training if saved params available :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # array for holding the latent space representation of input Z = np.zeros((dataset.input.shape[0], int(self.encode_size)), dtype=np.int32) # in case we're continuing training load the network params if continue_training: with np.load( 'saved_params/%s/m_%s.npz' % (dataset.name, self.name), encoding == 'latin1') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] self.trainAutoencoder.set_weights(param_values) for epoch in range(epochs): error = 0 total_batches = 0 for batch in dataset.iterate_minibatches(self.input_type, batch_size, shuffle=True): inputs, targets = batch history = self.trainAutoencoder.fit(inputs, targets, steps_per_epoch=1) error += history.history['loss'][0] total_batches += 1 # learning rate decay self.learning_rate = self.learning_rate * float(0.9999) # For every 20th iteration, print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 2 == 0: for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): batch_reverse = inputs.reshape( tuple([batch[0].shape[0]]) + tuple(reversed(list(batch[0].shape[1:])))) Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0]) rootLogger.info( evaluateKMeans( Z, dataset.labels, dataset.getClusterCount(), "%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches))[0]) else: # Just report the training loss rootLogger.info( "%-30s %8s %8s" % ("%d/%d [%.4f]" % (epoch + 1, epochs, error / total_batches), "", "")) if self.shouldStopNow: break # The inputs in latent space after pretraining for i, batch in enumerate( dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): #batch_reverse = inputs.reshape(tuple([batch[0].shape[0]])+tuple(reversed(list(batch[0].shape[1:])))) Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding.predict(batch[0], steps=1) # Save network params and latent space np.save('saved_params/%s/z_%s.npy' % (dataset.name, self.name), Z) # Borrowed from mnist lasagne example print(np.array(self.trainAutoencoder.get_weights())[0].shape) np.savez('saved_params/%s/m_%s.npz' % (dataset.name, self.name), *self.trainAutoencoder.get_weights())
def doClusteringWithKLdivLoss(self, dataset, combined_loss, epochs): ''' Trains the autoencoder with combined kldivergence loss and reconstruction loss, or just the kldivergence loss At the moment does not give good results :param dataset: Data on which the autoencoder is trained :param combined_loss: boolean - whether to use both reconstruction and kl divergence loss or just kldivergence loss :param epochs: Number of training epochs :return: None - (side effect) saves the trained network params and latent space in appropriate location ''' batch_size = self.batch_size # Load saved network params and inputs in latent space obtained after pretraining with np.load('saved_params/%s/m_%s.npz' % (dataset.name, self.name)) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(self.network, param_values, trainable=True) Z = np.load('saved_params/%s/z_%s.npy' % (dataset.name, self.name)) # Find initial cluster centers quality_desc, cluster_centers = evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), 'Initial') rootLogger.info(quality_desc) # P is the more pure target distribution we want to achieve P = T.matrix('P') # Extend the network so it calculates soft assignment cluster distribution for the inputs in latent space clustering_network = ClusteringLayer(self.encode_layer, dataset.getClusterCount(), cluster_centers, batch_size,self.encode_size) soft_assignments = layers.get_output(clustering_network) reconstructed_output_exp = layers.get_output(self.network) # Clustering loss = kl divergence between the pure distribution P and current distribution clustering_loss = self.getKLDivLossExpression(soft_assignments, P) reconstruction_loss = self.getReconstructionLossExpression(reconstructed_output_exp, self.t_target) params_ae = lasagne.layers.get_all_params(self.network, trainable=True) params_dec = lasagne.layers.get_all_params(clustering_network, trainable=True) # Total loss = weighted sum of the two losses w_cluster_loss = 1 w_reconstruction_loss = 1 total_loss = w_cluster_loss * clustering_loss if (combined_loss): total_loss = total_loss + w_reconstruction_loss * reconstruction_loss all_params = params_dec if combined_loss: all_params.extend(params_ae) # Parameters = unique parameters in the new network all_params = list(set(all_params)) # SGD with momentum, LR = 0.01, Momentum = 0.9 updates = lasagne.updates.nesterov_momentum(total_loss, all_params, learning_rate=0.01) # Function to calculate the soft assignment distribution getSoftAssignments = theano.function([self.t_input], soft_assignments) # Train function - based on whether complete loss is used or not trainFunction = None if combined_loss: trainFunction = theano.function([self.t_input, self.t_target, P], total_loss, updates=updates) else: trainFunction = theano.function([self.t_input, P], clustering_loss, updates=updates) for epoch in range(epochs): # Get the current distribution qij = np.zeros((dataset.input.shape[0], dataset.getClusterCount()), dtype=np.float32) for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): qij[i * batch_size: (i + 1) * batch_size] = getSoftAssignments(batch[0]) # Calculate the desired distribution pij = self.calculateP(qij) error = 0 total_batches = 0 for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, pij, shuffle=True)): if (combined_loss): error += trainFunction(batch[0], batch[0], batch[1]) else: error += trainFunction(batch[0], batch[1]) total_batches += 1 for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) # For every 10th iteration, print the clustering accuracy and nmi - for checking if the network # is actually doing something meaningful - the labels are never used for training if (epoch + 1) % 10 == 0: rootLogger.info(evaluateKMeans(Z, dataset.labels, dataset.getClusterCount(), "%d [%.4f]" % ( epoch, error / total_batches))[0]) if self.shouldStopNow: break # Save the inputs in latent space and the network parameters for i, batch in enumerate(dataset.iterate_minibatches(self.input_type, batch_size, shuffle=False)): Z[i * batch_size:(i + 1) * batch_size] = self.predictEncoding(batch[0]) np.save('saved_params/%s/pc_z_%s.npy' % (dataset.name, self.name), Z) np.savez('saved_params/%s/pc_m_%s.npz' % (dataset.name, self.name), *lasagne.layers.get_all_param_values(self.network, trainable=True))