Ejemplo n.º 1
0
def train_isv(algorithm, force=False, allow_missing_files=False):
    """Finally, the UBM is used to train the ISV projector/enroller."""
    fs = FileSelector.instance()

    if utils.check_file(fs.projector_file, force, 800):
        logger.info(
            "ISV training: Skipping ISV training since '%s' already exists",
            fs.projector_file)
    else:
        # read UBM into the ISV class
        algorithm.load_ubm(fs.ubm_file)

        # read training data
        training_list = fs.training_list('projected_gmm',
                                         'train_projector',
                                         arrange_by_client=True)
        training_list = utils.filter_missing_files(
            training_list,
            split_by_client=True,
            allow_missing_files=allow_missing_files)
        train_gmm_stats = [[
            algorithm.read_gmm_stats(filename) for filename in client_files
        ] for client_files in training_list]

        # perform ISV training
        logger.info("ISV training: training ISV with %d clients",
                    len(train_gmm_stats))
        algorithm.train_isv(train_gmm_stats)
        # save result
        bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
        algorithm.save_projector(fs.projector_file)
Ejemplo n.º 2
0
def train_plda(algorithm, force=False, allow_missing_files=False):
    """Train the feature projector with the extracted features of the world group."""
    fs = FileSelector.instance()
    if utils.check_file(fs.plda_file, force, 1000):
        logger.info("- PLDA projector '%s' already exists.", fs.plda_file)
    else:
        if algorithm.use_wccn:
            input_label = 'wccn_projected'
        elif algorithm.use_lda:
            input_label = 'lda_projected'
        else:
            input_label = 'whitened'
        train_files = fs.training_list(input_label,
                                       'train_projector',
                                       arrange_by_client=True)
        train_files = utils.filter_missing_files(
            train_files,
            split_by_client=True,
            allow_missing_files=allow_missing_files)
        train_features = [[
            bob.bio.base.load(filename) for filename in client_files
        ] for client_files in train_files]

        # perform training
        algorithm.train_plda(train_features)
        bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file))
        bob.bio.base.save(algorithm.plda_base, fs.plda_file)
Ejemplo n.º 3
0
def wccn_project(algorithm, indices, force=False, allow_missing_files=False):
    """Performs IVector projection"""
    fs = FileSelector.instance()
    algorithm.load_wccn(fs.wccn_file)
    if algorithm.use_lda:
        input_label = 'lda_projected'
    else:
        input_label = 'whitened'

    input_files = fs.training_list(input_label, 'train_projector')
    wccn_projected_files = fs.training_list('wccn_projected',
                                            'train_projector')

    logger.info(
        "IVector training: WCCN projection range (%d, %d) from '%s' to '%s'",
        indices[0], indices[1], fs.directories[input_label],
        fs.directories['wccn_projected'])
    # extract the features
    for i in range(indices[0], indices[1]):
        ivector_file = input_files[i]
        wccn_projected_file = wccn_projected_files[i]
        if not utils.check_file(wccn_projected_file, force):
            if len(
                    utils.filter_missing_files(
                        [ivector_file],
                        split_by_client=False,
                        allow_missing_files=allow_missing_files)) > 0:
                # load feature
                ivector = algorithm.read_feature(ivector_file)
                # project feature
                wccn_projected = algorithm.project_wccn(ivector)
                # write it
                bob.io.base.create_directories_safe(
                    os.path.dirname(wccn_projected_file))
                bob.bio.base.save(wccn_projected, wccn_projected_file)
Ejemplo n.º 4
0
def lda_project(algorithm, indices, force=False, allow_missing_files=False):
    """Performs IVector projection"""
    fs = FileSelector.instance()
    algorithm.load_lda(fs.lda_file)

    whitened_files = fs.training_list('whitened', 'train_projector')
    lda_projected_files = fs.training_list('lda_projected', 'train_projector')

    logger.info(
        "IVector training: LDA projection range (%d, %d) from '%s' to '%s'",
        indices[0], indices[1], fs.directories['whitened'],
        fs.directories['lda_projected'])
    # extract the features
    for i in range(indices[0], indices[1]):
        ivector_file = whitened_files[i]
        lda_projected_file = lda_projected_files[i]
        if not utils.check_file(lda_projected_file, force):
            if len(
                    utils.filter_missing_files(
                        [ivector_file],
                        split_by_client=False,
                        allow_missing_files=allow_missing_files)) > 0:
                # load feature
                ivector = algorithm.read_feature(ivector_file)
                # project feature
                lda_projected = algorithm.project_lda(ivector)
                # write it
                bob.io.base.create_directories_safe(
                    os.path.dirname(lda_projected_file))
                bob.bio.base.save(lda_projected, lda_projected_file)
Ejemplo n.º 5
0
def ivector_estep(algorithm,
                  iteration,
                  indices,
                  force=False,
                  allow_missing_files=False):
    """Performs a single E-step of the IVector algorithm (parallel)"""
    fs = FileSelector.instance()
    stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1])

    if utils.check_file(stats_file, force, 1000):
        logger.info(
            "IVector training: Skipping IVector E-Step since the file '%s' already exists",
            stats_file)
    else:
        logger.info("IVector training: E-Step from range(%d, %d)", *indices)

        # Temporary machine used for initialization
        algorithm.load_ubm(fs.ubm_file)

        # get the IVectorTrainer and call the initialization procedure
        trainer = algorithm.ivector_trainer

        # Load machine
        if iteration:
            # load last TV file
            tv = bob.learn.em.IVectorMachine(
                bob.io.base.HDF5File(fs.ivector_intermediate_file(iteration)))
            tv.ubm = algorithm.ubm
        else:
            # create new TV machine
            tv = bob.learn.em.IVectorMachine(algorithm.ubm,
                                             algorithm.subspace_dimension_of_t,
                                             algorithm.variance_threshold)
            trainer.initialize(tv)

        # Load data
        training_list = fs.training_list('projected_gmm', 'train_projector')
        training_list = [
            training_list[i] for i in range(indices[0], indices[1])
        ]
        training_list = utils.filter_missing_files(
            training_list,
            split_by_client=False,
            allow_missing_files=allow_missing_files)
        data = [algorithm.read_gmm_stats(f) for f in training_list]

        # Perform the E-step
        trainer.e_step(tv, data)

        # write results to file
        bob.io.base.create_directories_safe(os.path.dirname(stats_file))
        hdf5 = bob.io.base.HDF5File(stats_file, 'w')
        hdf5.set('acc_nij_wij2', trainer.acc_nij_wij2)
        hdf5.set('acc_fnormij_wij', trainer.acc_fnormij_wij)
        hdf5.set('acc_nij', trainer.acc_nij)
        hdf5.set('acc_snormij', trainer.acc_snormij)
        hdf5.set('nsamples', indices[1] - indices[0])
        logger.info("IVector training: Wrote Stats file '%s'", stats_file)
Ejemplo n.º 6
0
def train_whitener(algorithm, force=False, allow_missing_files=False):
    """Train the feature projector with the extracted features of the world group."""
    fs = FileSelector.instance()

    if utils.check_file(fs.whitener_file, force, 1000):
        logger.info("- Whitening projector '%s' already exists.",
                    fs.whitener_file)
    else:
        train_files = fs.training_list('projected_ivector', 'train_projector')
        train_files = utils.filter_missing_files(
            train_files,
            split_by_client=False,
            allow_missing_files=allow_missing_files)
        train_features = [bob.bio.base.load(f) for f in train_files]

        # perform training
        algorithm.train_whitener(train_features)
        bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file))
        bob.bio.base.save(algorithm.whitener, fs.whitener_file)
Ejemplo n.º 7
0
def train_isv(algorithm, force=False, allow_missing_files=False):
  """Finally, the UBM is used to train the ISV projector/enroller."""
  fs = FileSelector.instance()

  if utils.check_file(fs.projector_file, force, 800):
    logger.info("ISV training: Skipping ISV training since '%s' already exists", fs.projector_file)
  else:
    # read UBM into the ISV class
    algorithm.load_ubm(fs.ubm_file)

    # read training data
    training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True)
    training_list = utils.filter_missing_files(training_list, split_by_client=True, allow_missing_files=allow_missing_files)
    train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list]

    # perform ISV training
    logger.info("ISV training: training ISV with %d clients", len(train_gmm_stats))
    algorithm.train_isv(train_gmm_stats)
    # save result
    bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
    algorithm.save_projector(fs.projector_file)
Ejemplo n.º 8
0
def gmm_project(algorithm, extractor, indices, force=False, allow_missing_files = False):
  """Performs GMM projection"""
  fs = FileSelector.instance()
  algorithm.load_ubm(fs.ubm_file)

  feature_files = fs.training_list('extracted', 'train_projector')
  projected_files = fs.training_list('projected_gmm', 'train_projector')
  
  logger.info("ISV training: Project features range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['extracted'], fs.directories['projected_gmm'])

  # extract the features
  for i in range(indices[0], indices[1]):
    feature_file = feature_files[i]
    projected_file = projected_files[i]
   
    if not utils.check_file(projected_file, force):  
      if len(utils.filter_missing_files([feature_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0:
        # load feature
        feature = read_feature(extractor, feature_file)
        # project feature
        projected = algorithm.project_ubm(feature)
        # write it
        bob.io.base.create_directories_safe(os.path.dirname(projected_file))
        bob.bio.base.save(projected, projected_file)
Ejemplo n.º 9
0
def ivector_project(algorithm,
                    indices,
                    force=False,
                    allow_missing_files=False):
    """Performs IVector projection"""
    # read UBM and TV into the IVector class
    fs = FileSelector.instance()
    algorithm.load_ubm(fs.ubm_file)
    algorithm.load_tv(fs.tv_file)

    gmm_stats_files = fs.training_list('projected_gmm', 'train_projector')
    ivector_files = fs.training_list('projected_ivector', 'train_projector')

    logger.info(
        "IVector training: Project features range (%d, %d) from '%s' to '%s'",
        indices[0], indices[1], fs.directories['projected_gmm'],
        fs.directories['projected_ivector'])
    # extract the features
    for i in range(indices[0], indices[1]):
        gmm_stats_file = gmm_stats_files[i]
        ivector_file = ivector_files[i]

        if not utils.check_file(ivector_file, force):
            if len(
                    utils.filter_missing_files(
                        [gmm_stats_file],
                        split_by_client=False,
                        allow_missing_files=allow_missing_files)) > 0:
                # load feature
                feature = algorithm.read_gmm_stats(gmm_stats_file)
                # project feature
                projected = algorithm.project_ivector(feature)
                # write it
                bob.io.base.create_directories_safe(
                    os.path.dirname(ivector_file))
                bob.bio.base.save(projected, ivector_file)