def kmeans_initialize(self, force=False): """Initializes the K-Means training (non-parallel).""" output_file = self.m_configuration.kmeans_intermediate_file % 0 if self.m_tool_chain.__check_file__(output_file, force, 1000): utils.info( "UBM training: Skipping KMeans initialization since the file '%s' already exists" % output_file) else: # read data utils.info("UBM training: initializing kmeans") training_list = self.m_file_selector.training_feature_list() data = numpy.vstack([ bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices( len(training_list), self.m_args.limit_training_examples) ]) # Perform KMeans initialization kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1]) # Creates the KMeansTrainer and call the initialization procedure kmeans_trainer = bob.trainer.KMeansTrainer() kmeans_trainer.initialize(kmeans_machine, data) utils.ensure_dir(os.path.dirname(output_file)) kmeans_machine.save(bob.io.HDF5File(output_file, 'w')) utils.info("UBM training: saved initial KMeans machine to '%s'" % output_file)
def gmm_initialize(self, force=False): """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel). This might require a lot of memory.""" output_file = self.m_configuration.gmm_intermediate_file % 0 if self.m_tool_chain.__check_file__(output_file, force, 800): utils.info("UBM Training: Skipping GMM initialization since '%s' already exists" % output_file) else: training_list = self.m_file_selector.training_feature_list() utils.info("UBM Training: Initializing GMM") # load KMeans machine kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(self.m_configuration.kmeans_file)) # read features data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)]) # Create initial GMM Machine gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians, data.shape[1]) [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data) # Initializes the GMM gmm_machine.means = kmeans_machine.means gmm_machine.variances = variances gmm_machine.weights = weights gmm_machine.set_variance_thresholds(self.m_tool.m_variance_threshold) utils.ensure_dir(os.path.dirname(output_file)) gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w')) utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
def gmm_initialize(self, force=False): """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel). This might require a lot of memory.""" output_file = self.m_configuration.gmm_intermediate_file % 0 if self.m_tool_chain.__check_file__(output_file, force, 800): utils.info( "UBM Training: Skipping GMM initialization since '%s' already exists" % output_file) else: training_list = self.m_file_selector.training_feature_list() utils.info("UBM Training: Initializing GMM") # load KMeans machine kmeans_machine = bob.machine.KMeansMachine( bob.io.HDF5File(self.m_configuration.kmeans_file)) # read features data = numpy.vstack([ bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices( len(training_list), self.m_args.limit_training_examples) ]) # Create initial GMM Machine gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians, data.shape[1]) [ variances, weights ] = kmeans_machine.get_variances_and_weights_for_each_cluster(data) # Initializes the GMM gmm_machine.means = kmeans_machine.means gmm_machine.variances = variances gmm_machine.weights = weights gmm_machine.set_variance_thresholds( self.m_tool.m_variance_threshold) utils.ensure_dir(os.path.dirname(output_file)) gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w')) utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
def kmeans_initialize(self, force=False): """Initializes the K-Means training (non-parallel).""" output_file = self.m_configuration.kmeans_intermediate_file % 0 if self.m_tool_chain.__check_file__(output_file, force, 1000): utils.info("UBM training: Skipping KMeans initialization since the file '%s' already exists" % output_file) else: # read data utils.info("UBM training: initializing kmeans") training_list = self.m_file_selector.training_feature_list() data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)]) # Perform KMeans initialization kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1]) # Creates the KMeansTrainer and call the initialization procedure kmeans_trainer = bob.trainer.KMeansTrainer() kmeans_trainer.initialize(kmeans_machine, data) utils.ensure_dir(os.path.dirname(output_file)) kmeans_machine.save(bob.io.HDF5File(output_file, 'w')) utils.info("UBM training: saved initial KMeans machine to '%s'" % output_file)