def read_stats(self, filename):
     """Reads accumulated K-Means statistics from file"""
     utils.debug("UBM training: Reading stats file '%s'" % filename)
     f = bob.io.HDF5File(filename)
     zeroeth = f.read('zeros')
     first = f.read('first')
     nsamples = f.read('nsamples')
     dist = f.read('dist')
     return (zeroeth, first, nsamples, dist)
 def read_stats(self, filename):
   """Reads accumulated K-Means statistics from file"""
   utils.debug("UBM training: Reading stats file '%s'" % filename)
   f = bob.io.HDF5File(filename)
   zeroeth  = f.read('zeros')
   first    = f.read('first')
   nsamples = f.read('nsamples')
   dist     = f.read('dist')
   return (zeroeth, first, nsamples, dist)
Exemple #3
0
    def calibrate_scores(self,
                         norms=['nonorm', 'ztnorm'],
                         groups=['dev', 'eval'],
                         prior=0.5):
        """Calibrates the score files by learning a linear calibration from the dev files (first element of the groups) and executing the on all groups, separately for all given norms."""
        # read score files of the first group
        for norm in norms:
            training_score_file = self.m_file_selector.no_norm_result_file(
                groups[0]
            ) if norm == 'nonorm' else self.m_file_selector.zt_norm_result_file(
                groups[0]) if norm is 'ztnorm' else None

            # create a LLR trainer
            utils.info(
                " - Calibration: Training calibration for type %s from group %s"
                % (norm, groups[0]))
            llr_trainer = bob.trainer.CGLogRegTrainer(prior, 1e-16, 100000)

            training_scores = list(
                bob.measure.load.split_four_column(training_score_file))
            for i in (0, 1):
                h = numpy.array(training_scores[i])
                h.shape = (len(training_scores[i]), 1)
                training_scores[i] = h
            # train the LLR
            llr_machine = llr_trainer.train(training_scores[0],
                                            training_scores[1])
            del training_scores
            utils.debug(
                "   ... Resulting calibration parameters: shift = %f, scale = %f"
                % (llr_machine.biases[0], llr_machine.weights[0, 0]))

            # now, apply it to all groups
            for group in groups:
                score_file = self.m_file_selector.no_norm_result_file(
                    group
                ) if norm == 'nonorm' else self.m_file_selector.zt_norm_result_file(
                    group) if norm is 'ztnorm' else None
                calibrated_file = self.m_file_selector.calibrated_score_file(
                    group, norm == 'ztnorm')

                utils.info(
                    " - Calibration: calibrating scores from '%s' to '%s'" %
                    (score_file, calibrated_file))

                # iterate through the score file and calibrate scores
                scores = bob.measure.load.four_column(score_file)
                with open(calibrated_file, 'w') as f:
                    for line in scores:
                        assert len(line) == 4
                        calibrated_score = llr_machine([line[3]])
                        f.write('%s %s %s ' % line[0:3] +
                                str(calibrated_score[0]) + "\n")
Exemple #4
0
 def __check_file__(self, filename, force, expected_file_size=1):
     """Checks if the file exists and has size greater or equal to expected_file_size.
 If the file is to small, or if the force option is set to true, the file is removed.
 This function returns true is the file is there, otherwise false"""
     if os.path.exists(filename):
         if force or os.path.getsize(filename) < expected_file_size:
             utils.debug("  .. Removing old file '%s'." % filename)
             os.remove(filename)
             return False
         else:
             return True
     return False
  def feature_normalization(self, indices, force=False):
    """Normalizes the list of features to have zero mean and unit variance (parallel)"""
    normalized_list = self.m_file_selector.training_feature_list()

    utils.info("UBM training: normalizing features from range(%d, %d)" % indices)

    # iterate through the files and normalize the features
    for index in range(indices[0], indices[1]):
      feature = bob.io.load(str(training_list[index]))

      mean, std = self.m_tool.__normalize_std_array__(feature)

      if self.m_tool_chain.__check_file__(normalized_list[index], force):
        utils.debug("Skipping file '%s'" % normalized_list[index])
      else:
        utils.ensure_dir(os.path.dirname(normalized_list[index]))
        f = bob.io.HDF5File(str(normalized_list[index]), 'w')
        f.set('mean', mean)
        f.set('std', std)
        utils.debug("Saved normalized feature %s" %str(normalized_list[index]))
    def feature_normalization(self, indices, force=False):
        """Normalizes the list of features to have zero mean and unit variance (parallel)"""
        normalized_list = self.m_file_selector.training_feature_list()

        utils.info("UBM training: normalizing features from range(%d, %d)" %
                   indices)

        # iterate through the files and normalize the features
        for index in range(indices[0], indices[1]):
            feature = bob.io.load(str(training_list[index]))

            mean, std = self.m_tool.__normalize_std_array__(feature)

            if self.m_tool_chain.__check_file__(normalized_list[index], force):
                utils.debug("Skipping file '%s'" % normalized_list[index])
            else:
                utils.ensure_dir(os.path.dirname(normalized_list[index]))
                f = bob.io.HDF5File(str(normalized_list[index]), 'w')
                f.set('mean', mean)
                f.set('std', std)
                utils.debug("Saved normalized feature %s" %
                            str(normalized_list[index]))