예제 #1
0
def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if gmm_execute(args):
    return True

  # now, check what we can do
  algorithm = tools.base(args.algorithm)    

  # the file selector object
  fs = tools.FileSelector.instance()

  if args.sub_task == 'gmm-project':
    tools.gmm_project(
        algorithm,
        args.extractor,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'isv-e-step':
    tools.isv_estep(
        algorithm,
        args.iteration,
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'isv-m-step':
    tools.isv_mstep(
        algorithm,
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)


  elif args.sub_task == 'save-projector':
    tools.save_isv_projector(
        algorithm,
        force=args.force)


  # train the feature projector
  #elif args.sub_task == 'train-isv':
    #tools.train_isv(
        #algorithm,
        #force = args.force)

  else:
    # Not our keyword...
    return False
  return True
예제 #2
0
def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if gmm_execute(args):
    return True

  # now, check what we can do
  algorithm = tools.base(args.algorithm)

  # the file selector object
  fs = tools.FileSelector.instance()

  if args.sub_task == 'gmm-project':
    tools.gmm_project(
        algorithm,
        args.extractor,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        allow_missing_files = args.allow_missing_files,
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'train-isv':
    tools.train_isv(
        algorithm,
        allow_missing_files = args.allow_missing_files,
        force = args.force)

  else:
    # Not our keyword...
    return False
  return True
예제 #3
0
def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the IVector algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.ivector_intermediate_file(iteration)
  new_machine_file = fs.ivector_intermediate_file(iteration + 1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("IVector training: Skipping IVector M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('projected_gmm', 'train_projector')
    # try if there is one file containing all data
    if os.path.exists(fs.ivector_stats_file(iteration, 0, len(training_list))):
      # load stats file
      statistics = self._read_stats(fs.ivector_stats_file(iteration, 0, len(training_list)))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] >= job_indices[0]:
          stats_files.append(fs.ivector_stats_file(iteration, job_indices[0], job_indices[-1]))
      # read all stats files
      statistics = _accumulate(stats_files)

    # Load machine
    algorithm.load_ubm(fs.ubm_file)
    if iteration:
      tv = bob.learn.em.IVectorMachine(bob.io.base.HDF5File(old_machine_file))
      tv.ubm = algorithm.ubm
    else:
      tv = bob.learn.em.IVectorMachine(algorithm.ubm, algorithm.subspace_dimension_of_t, algorithm.variance_threshold)

    # Creates the IVectorTrainer and initialize values
    trainer = algorithm.ivector_trainer
    trainer.reset_accumulators(tv)
    trainer.acc_nij_wij2 = statistics[0]
    trainer.acc_fnormij_wij = statistics[1]
    trainer.acc_nij = statistics[2]
    trainer.acc_snormij = statistics[3]
    trainer.m_step(tv) # data is not used in M-step
    logger.info("IVector training: Performed M step %d", iteration)

    # Save the IVector model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    tv.save(bob.io.base.HDF5File(new_machine_file, 'w'))
    logger.info("IVector training: Wrote new IVector machine '%s'", new_machine_file)

  if iteration == algorithm.tv_training_iterations-1:
    shutil.copy(new_machine_file, fs.tv_file)
    logger.info("IVector training: Wrote new TV matrix '%s'", fs.tv_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.ivector_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #4
0
def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the GMM training (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.gmm_intermediate_file(iteration)
  new_machine_file = fs.gmm_intermediate_file(iteration + 1)

  if utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping GMM M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.gmm_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.gmm_stats_file(iteration, 0, len(training_list))
      # load stats file
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          stats_files.append(fs.gmm_stats_file(iteration, job_indices[0], job_indices[-1]))

      # read all stats files
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_files[0]))
      for stats_file in stats_files[1:]:
        gmm_stats += bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))

    # load the old gmm machine
    gmm_machine =  bob.learn.em.GMMMachine(bob.io.base.HDF5File(old_machine_file))

    # initialize the trainer
    trainer = algorithm.ubm_trainer
    trainer.initialize(gmm_machine)
    trainer.gmm_statistics = gmm_stats

    # Calls M-step (no data required)
    trainer.m_step(gmm_machine)

    # Saves the GMM statistics to the file
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    gmm_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # Write the final UBM file after the last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.gmm_training_iterations-1:
    shutil.copy(new_machine_file, fs.ubm_file)
    logger.info("UBM training: Wrote new UBM '%s'", fs.ubm_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.gmm_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #5
0
파일: gmm.py 프로젝트: 183amir/bob.bio.gmm
def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the GMM training (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.gmm_intermediate_file(iteration)
  new_machine_file = fs.gmm_intermediate_file(iteration + 1)

  if utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping GMM M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.gmm_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.gmm_stats_file(iteration, 0, len(training_list))
      # load stats file
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          stats_files.append(fs.gmm_stats_file(iteration, job_indices[0], job_indices[-1]))

      # read all stats files
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_files[0]))
      for stats_file in stats_files[1:]:
        gmm_stats += bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))

    # load the old gmm machine
    gmm_machine =  bob.learn.em.GMMMachine(bob.io.base.HDF5File(old_machine_file))

    # initialize the trainer
    trainer = algorithm.ubm_trainer
    trainer.initialize(gmm_machine)
    trainer.gmm_statistics = gmm_stats

    # Calls M-step (no data required)
    trainer.m_step(gmm_machine)

    # Saves the GMM statistics to the file
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    gmm_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # Write the final UBM file after the last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.gmm_training_iterations-1:
    shutil.copy(new_machine_file, fs.ubm_file)
    logger.info("UBM training: Wrote new UBM '%s'", fs.ubm_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.gmm_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #6
0
def isv_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the ISV algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.isv_intermediate_file(iteration)
  new_machine_file = fs.isv_intermediate_file(iteration + 1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("ISV training: Skipping ISV M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True)
    # try if there is one file containing all data
    if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))):
      # load stats file
      statistics = _read_stats(fs.isv_stats_file(iteration, 0, len(training_list)))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] >= job_indices[0]:
          stats_files.append(fs.isv_stats_file(iteration, job_indices[0], job_indices[-1]))
      # read all stats files
      statistics = _accumulate(stats_files)

    # Load machine
    algorithm.load_ubm(fs.ubm_file)
    if iteration:
      isv_base     = bob.learn.em.ISVBase(bob.io.base.HDF5File(old_machine_file))
      isv_base.ubm = algorithm.ubm
    else:
      isv_base = bob.learn.em.ISVBase(algorithm.ubm, algorithm.subspace_dimension_of_u)

    # Creates the IVectorTrainer and initialize values
    trainer = algorithm.isv_trainer
    data = [algorithm.read_gmm_stats(training_list[0])]#Loading data just to allocate memory
    trainer.initialize(isv_base, data) #Just to allocate memory
    trainer.acc_u_a1 = statistics[0]
    trainer.acc_u_a2 = statistics[1]
    trainer.m_step(isv_base) # data is not used in M-step
    logger.info("ISV training: Performed M step %d", iteration)

    # Save the ISV model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w'))
    logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file)

  if iteration == algorithm.isv_training_iterations-1:
    shutil.copy(new_machine_file, fs.isv_file)
    logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.isv_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #7
0
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the K-Means algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.kmeans_intermediate_file(iteration)
  new_machine_file = fs.kmeans_intermediate_file(iteration+1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.kmeans_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.kmeans_stats_file(iteration, 0, len(training_list))
      # load stats file
      statistics = _read_stats(stats_file)
    else:
      # load several files
      filenames = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          filenames.append(fs.kmeans_stats_file(iteration, job_indices[0], job_indices[-1]))
      statistics = _accumulate(filenames)

    # Creates the KMeansMachine
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(old_machine_file))
    trainer = algorithm.kmeans_trainer
    trainer.reset_accumulators(kmeans_machine)

    trainer.zeroeth_order_statistics = statistics[0]
    trainer.first_order_statistics = statistics[1]
    trainer.average_min_distance = statistics[3]
    error = statistics[3] / statistics[2]

    # Performs the M-step
    trainer.m_step(kmeans_machine, None) # data is not used in M-step
    logger.info("UBM training: Performed M step %d with result %f" % (iteration, error))

    # Save the K-Means model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    kmeans_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # copy the k_means file, when last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.kmeans_training_iterations-1:
    shutil.copy(new_machine_file, fs.kmeans_file)
    logger.info("UBM training: Wrote new KMeans machine '%s'", fs.kmeans_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.kmeans_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #8
0
파일: gmm.py 프로젝트: 183amir/bob.bio.gmm
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the K-Means algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.kmeans_intermediate_file(iteration)
  new_machine_file = fs.kmeans_intermediate_file(iteration+1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.kmeans_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.kmeans_stats_file(iteration, 0, len(training_list))
      # load stats file
      statistics = _read_stats(stats_file)
    else:
      # load several files
      filenames = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          filenames.append(fs.kmeans_stats_file(iteration, job_indices[0], job_indices[-1]))
      statistics = _accumulate(filenames)

    # Creates the KMeansMachine
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(old_machine_file))
    trainer = algorithm.kmeans_trainer
    trainer.reset_accumulators(kmeans_machine)

    trainer.zeroeth_order_statistics = statistics[0]
    trainer.first_order_statistics = statistics[1]
    trainer.average_min_distance = statistics[3]
    error = statistics[3] / statistics[2]

    # Performs the M-step
    trainer.m_step(kmeans_machine, None) # data is not used in M-step
    logger.info("UBM training: Performed M step %d with result %f" % (iteration, error))

    # Save the K-Means model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    kmeans_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # copy the k_means file, when last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.kmeans_training_iterations-1:
    shutil.copy(new_machine_file, fs.kmeans_file)
    logger.info("UBM training: Wrote new KMeans machine '%s'", fs.kmeans_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.kmeans_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
예제 #9
0
def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if gmm_execute(args):
    return True

  # now, check what we can do
  algorithm = tools.base(args.algorithm)

  # the file selector object
  fs = tools.FileSelector.instance()

  if args.sub_task == 'gmm-project':
    tools.gmm_project(
        algorithm,
        args.extractor,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'ivector-e-step':
    tools.ivector_estep(
        algorithm,
        args.iteration,
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'ivector-m-step':
    tools.ivector_mstep(
        algorithm,
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)

  elif args.sub_task == 'ivector-projection':
    tools.ivector_project(
        algorithm,
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'train-whitener':
    tools.train_whitener(
        algorithm,
        force = args.force)

  elif args.sub_task == 'whitening-projection':
    tools.whitening_project(
        algorithm,
        indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  elif args.sub_task == 'train-lda':
    if algorithm.use_lda:
      tools.train_lda(
          algorithm,
          force = args.force)

  elif args.sub_task == 'lda-projection':
    if algorithm.use_lda:
      tools.lda_project(
          algorithm,
          indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
          force = args.force)

  elif args.sub_task == 'train-wccn':
    if algorithm.use_wccn:
      tools.train_wccn(
          algorithm,
          force = args.force)

  elif args.sub_task == 'wccn-projection':
    if algorithm.use_wccn:
      tools.wccn_project(
          algorithm,
          indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
          force = args.force)

  elif args.sub_task == 'train-plda':
    if algorithm.use_plda:
      tools.train_plda(
          algorithm,
          force = args.force)

  elif args.sub_task == 'save-projector':
    tools.save_projector(
        algorithm,
        force=args.force)
    # Not our keyword...
  else:
    return False
  return True
예제 #10
0
def execute(args):
  """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

  # first, let the base script decide if it knows how to execute the job
  if bob.bio.base.script.verify.execute(args):
    return True

  # now, check what we can do
  algorithm = tools.base(args.algorithm)

  # the file selector object
  fs = tools.FileSelector.instance()

  # train the feature projector
  if args.sub_task == 'kmeans-init':
    tools.kmeans_initialize(
        algorithm,
        args.extractor,
        args.limit_training_data,
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'kmeans-e-step':
    tools.kmeans_estep(
        algorithm,
        args.extractor,
        args.iteration,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'kmeans-m-step':
    tools.kmeans_mstep(
        algorithm,
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)

  elif args.sub_task == 'gmm-init':
    tools.gmm_initialize(
        algorithm,
        args.extractor,
        args.limit_training_data,
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'gmm-e-step':
    tools.gmm_estep(
        algorithm,
        args.extractor,
        args.iteration,
        indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs),
        force = args.force)

  # train the feature projector
  elif args.sub_task == 'gmm-m-step':
    tools.gmm_mstep(
        algorithm,
        args.iteration,
        number_of_parallel_jobs = args.grid.number_of_projection_jobs,
        clean = args.clean_intermediate,
        force = args.force)
  else:
    # Not our keyword...
    return False
  return True
예제 #11
0
def execute(args):
    """Run the desired job of the tool chain that is specified on command line.
  This job might be executed either in the grid, or locally."""

    # first, let the base script decide if it knows how to execute the job
    if gmm_execute(args):
        return True

    # now, check what we can do
    algorithm = tools.base(args.algorithm)

    # the file selector object
    fs = tools.FileSelector.instance()

    if args.sub_task == 'gmm-project':
        tools.gmm_project(algorithm,
                          args.extractor,
                          indices=base_tools.indices(
                              fs.training_list('extracted', 'train_projector'),
                              args.grid.number_of_projection_jobs),
                          allow_missing_files=args.allow_missing_files,
                          force=args.force)

    elif args.sub_task == 'ivector-e-step':
        tools.ivector_estep(algorithm,
                            args.iteration,
                            allow_missing_files=args.allow_missing_files,
                            indices=base_tools.indices(
                                fs.training_list('projected_gmm',
                                                 'train_projector'),
                                args.grid.number_of_projection_jobs),
                            force=args.force)

    # train the feature projector
    elif args.sub_task == 'ivector-m-step':
        tools.ivector_mstep(
            algorithm,
            args.iteration,
            number_of_parallel_jobs=args.grid.number_of_projection_jobs,
            clean=args.clean_intermediate,
            force=args.force)

    elif args.sub_task == 'ivector-projection':
        tools.ivector_project(algorithm,
                              allow_missing_files=args.allow_missing_files,
                              indices=base_tools.indices(
                                  fs.training_list('projected_gmm',
                                                   'train_projector'),
                                  args.grid.number_of_projection_jobs),
                              force=args.force)

    elif args.sub_task == 'train-whitener':
        tools.train_whitener(algorithm,
                             allow_missing_files=args.allow_missing_files,
                             force=args.force)

    elif args.sub_task == 'whitening-projection':
        tools.whitening_project(algorithm,
                                allow_missing_files=args.allow_missing_files,
                                indices=base_tools.indices(
                                    fs.training_list('projected_gmm',
                                                     'train_projector'),
                                    args.grid.number_of_projection_jobs),
                                force=args.force)

    elif args.sub_task == 'train-lda':
        if algorithm.use_lda:
            tools.train_lda(algorithm,
                            allow_missing_files=args.allow_missing_files,
                            force=args.force)

    elif args.sub_task == 'lda-projection':
        if algorithm.use_lda:
            tools.lda_project(algorithm,
                              allow_missing_files=args.allow_missing_files,
                              indices=base_tools.indices(
                                  fs.training_list('projected_gmm',
                                                   'train_projector'),
                                  args.grid.number_of_projection_jobs),
                              force=args.force)

    elif args.sub_task == 'train-wccn':
        if algorithm.use_wccn:
            tools.train_wccn(algorithm,
                             allow_missing_files=args.allow_missing_files,
                             force=args.force)

    elif args.sub_task == 'wccn-projection':
        if algorithm.use_wccn:
            tools.wccn_project(algorithm,
                               allow_missing_files=args.allow_missing_files,
                               indices=base_tools.indices(
                                   fs.training_list('projected_gmm',
                                                    'train_projector'),
                                   args.grid.number_of_projection_jobs),
                               force=args.force)

    elif args.sub_task == 'train-plda':
        if algorithm.use_plda:
            tools.train_plda(algorithm,
                             allow_missing_files=args.allow_missing_files,
                             force=args.force)

    elif args.sub_task == 'save-projector':
        tools.save_projector(algorithm, force=args.force)
        # Not our keyword...
    else:
        return False
    return True
예제 #12
0
def isv_mstep(algorithm,
              iteration,
              number_of_parallel_jobs,
              force=False,
              clean=False):
    """Performs a single M-step of the ISV algorithm (non-parallel)"""
    fs = FileSelector.instance()

    old_machine_file = fs.isv_intermediate_file(iteration)
    new_machine_file = fs.isv_intermediate_file(iteration + 1)

    if utils.check_file(new_machine_file, force, 1000):
        logger.info(
            "ISV training: Skipping ISV M-Step since the file '%s' already exists",
            new_machine_file)
    else:
        # get the files from e-step
        training_list = fs.training_list('projected_gmm',
                                         'train_projector',
                                         arrange_by_client=True)
        # try if there is one file containing all data
        if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))):
            # load stats file
            statistics = _read_stats(
                fs.isv_stats_file(iteration, 0, len(training_list)))
        else:
            # load several files
            stats_files = []
            for job in range(number_of_parallel_jobs):
                job_indices = tools.indices(training_list,
                                            number_of_parallel_jobs, job + 1)
                if job_indices[-1] >= job_indices[0]:
                    stats_files.append(
                        fs.isv_stats_file(iteration, job_indices[0],
                                          job_indices[-1]))
            # read all stats files
            statistics = _accumulate(stats_files)

        # Load machine
        algorithm.load_ubm(fs.ubm_file)
        if iteration:
            isv_base = bob.learn.em.ISVBase(
                bob.io.base.HDF5File(old_machine_file))
            isv_base.ubm = algorithm.ubm
        else:
            isv_base = bob.learn.em.ISVBase(algorithm.ubm,
                                            algorithm.subspace_dimension_of_u)

        # Creates the IVectorTrainer and initialize values
        trainer = algorithm.isv_trainer
        data = [algorithm.read_gmm_stats(training_list[0])
                ]  #Loading data just to allocate memory
        trainer.initialize(isv_base, data)  #Just to allocate memory
        trainer.acc_u_a1 = statistics[0]
        trainer.acc_u_a2 = statistics[1]
        trainer.m_step(isv_base)  # data is not used in M-step
        logger.info("ISV training: Performed M step %d", iteration)

        # Save the ISV model
        bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
        isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w'))
        logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file)

    if iteration == algorithm.isv_training_iterations - 1:
        shutil.copy(new_machine_file, fs.isv_file)
        logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file)

    if clean and iteration > 0:
        old_dir = os.path.dirname(fs.isv_intermediate_file(iteration - 1))
        logger.info("Removing old intermediate directory '%s'", old_dir)
        shutil.rmtree(old_dir)