Ejemplo n.º 1
0
    def kmeans_initialize(self, force=False):
        """Initializes the K-Means training (non-parallel)."""
        output_file = self.m_configuration.kmeans_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans initialization since the file '%s' already exists"
                % output_file)
        else:
            # read data
            utils.info("UBM training: initializing kmeans")
            training_list = self.m_file_selector.training_feature_list()
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Perform KMeans initialization
            kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians,
                                                       data.shape[1])
            # Creates the KMeansTrainer and call the initialization procedure
            kmeans_trainer = bob.trainer.KMeansTrainer()
            kmeans_trainer.initialize(kmeans_machine, data)
            utils.ensure_dir(os.path.dirname(output_file))
            kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
            utils.info("UBM training: saved initial KMeans machine to '%s'" %
                       output_file)
Ejemplo n.º 2
0
  def gmm_initialize(self, force=False):
    """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
    output_file = self.m_configuration.gmm_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 800):
      utils.info("UBM Training: Skipping GMM initialization since '%s' already exists" % output_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      utils.info("UBM Training: Initializing GMM")

      # load KMeans machine
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(self.m_configuration.kmeans_file))

      # read features
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Create initial GMM Machine
      gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians, data.shape[1])

      [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

      # Initializes the GMM
      gmm_machine.means = kmeans_machine.means
      gmm_machine.variances = variances
      gmm_machine.weights = weights
      gmm_machine.set_variance_thresholds(self.m_tool.m_variance_threshold)

      utils.ensure_dir(os.path.dirname(output_file))
      gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
      utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
Ejemplo n.º 3
0
    def gmm_initialize(self, force=False):
        """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
        output_file = self.m_configuration.gmm_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 800):
            utils.info(
                "UBM Training: Skipping GMM initialization since '%s' already exists"
                % output_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            utils.info("UBM Training: Initializing GMM")

            # load KMeans machine
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(self.m_configuration.kmeans_file))

            # read features
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Create initial GMM Machine
            gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians,
                                                 data.shape[1])

            [
                variances, weights
            ] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

            # Initializes the GMM
            gmm_machine.means = kmeans_machine.means
            gmm_machine.variances = variances
            gmm_machine.weights = weights
            gmm_machine.set_variance_thresholds(
                self.m_tool.m_variance_threshold)

            utils.ensure_dir(os.path.dirname(output_file))
            gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
            utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
Ejemplo n.º 4
0
  def kmeans_initialize(self, force=False):
    """Initializes the K-Means training (non-parallel)."""
    output_file = self.m_configuration.kmeans_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 1000):
      utils.info("UBM training: Skipping KMeans initialization since the file '%s' already exists" % output_file)
    else:
      # read data
      utils.info("UBM training: initializing kmeans")
      training_list = self.m_file_selector.training_feature_list()
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Perform KMeans initialization
      kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1])
      # Creates the KMeansTrainer and call the initialization procedure
      kmeans_trainer = bob.trainer.KMeansTrainer()
      kmeans_trainer.initialize(kmeans_machine, data)
      utils.ensure_dir(os.path.dirname(output_file))
      kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
      utils.info("UBM training: saved initial KMeans machine to '%s'" % output_file)