def execute(args): """Run the desired job of the tool chain that is specified on command line. This job might be executed either in the grid, or locally.""" # first, let the base script decide if it knows how to execute the job if gmm_execute(args): return True # now, check what we can do algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project( algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'isv-e-step': tools.isv_estep( algorithm, args.iteration, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True), args.grid.number_of_projection_jobs), force = args.force) # train the feature projector elif args.sub_task == 'isv-m-step': tools.isv_mstep( algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, force = args.force) elif args.sub_task == 'save-projector': tools.save_isv_projector( algorithm, force=args.force) # train the feature projector #elif args.sub_task == 'train-isv': #tools.train_isv( #algorithm, #force = args.force) else: # Not our keyword... return False return True
def execute(args): """Run the desired job of the tool chain that is specified on command line. This job might be executed either in the grid, or locally.""" # first, let the base script decide if it knows how to execute the job if gmm_execute(args): return True # now, check what we can do algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project( algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), allow_missing_files = args.allow_missing_files, force = args.force) # train the feature projector elif args.sub_task == 'train-isv': tools.train_isv( algorithm, allow_missing_files = args.allow_missing_files, force = args.force) else: # Not our keyword... return False return True
def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): """Performs a single M-step of the IVector algorithm (non-parallel)""" fs = FileSelector.instance() old_machine_file = fs.ivector_intermediate_file(iteration) new_machine_file = fs.ivector_intermediate_file(iteration + 1) if utils.check_file(new_machine_file, force, 1000): logger.info("IVector training: Skipping IVector M-Step since the file '%s' already exists", new_machine_file) else: # get the files from e-step training_list = fs.training_list('projected_gmm', 'train_projector') # try if there is one file containing all data if os.path.exists(fs.ivector_stats_file(iteration, 0, len(training_list))): # load stats file statistics = self._read_stats(fs.ivector_stats_file(iteration, 0, len(training_list))) else: # load several files stats_files = [] for job in range(number_of_parallel_jobs): job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1) if job_indices[-1] >= job_indices[0]: stats_files.append(fs.ivector_stats_file(iteration, job_indices[0], job_indices[-1])) # read all stats files statistics = _accumulate(stats_files) # Load machine algorithm.load_ubm(fs.ubm_file) if iteration: tv = bob.learn.em.IVectorMachine(bob.io.base.HDF5File(old_machine_file)) tv.ubm = algorithm.ubm else: tv = bob.learn.em.IVectorMachine(algorithm.ubm, algorithm.subspace_dimension_of_t, algorithm.variance_threshold) # Creates the IVectorTrainer and initialize values trainer = algorithm.ivector_trainer trainer.reset_accumulators(tv) trainer.acc_nij_wij2 = statistics[0] trainer.acc_fnormij_wij = statistics[1] trainer.acc_nij = statistics[2] trainer.acc_snormij = statistics[3] trainer.m_step(tv) # data is not used in M-step logger.info("IVector training: Performed M step %d", iteration) # Save the IVector model bob.io.base.create_directories_safe(os.path.dirname(new_machine_file)) tv.save(bob.io.base.HDF5File(new_machine_file, 'w')) logger.info("IVector training: Wrote new IVector machine '%s'", new_machine_file) if iteration == algorithm.tv_training_iterations-1: shutil.copy(new_machine_file, fs.tv_file) logger.info("IVector training: Wrote new TV matrix '%s'", fs.tv_file) if clean and iteration > 0: old_dir = os.path.dirname(fs.ivector_intermediate_file(iteration-1)) logger.info("Removing old intermediate directory '%s'", old_dir) shutil.rmtree(old_dir)
def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): """Performs a single M-step of the GMM training (non-parallel)""" fs = FileSelector.instance() old_machine_file = fs.gmm_intermediate_file(iteration) new_machine_file = fs.gmm_intermediate_file(iteration + 1) if utils.check_file(new_machine_file, force, 1000): logger.info("UBM training: Skipping GMM M-Step since the file '%s' already exists", new_machine_file) else: # get the files from e-step training_list = fs.training_list('extracted', 'train_projector') # try if there is one file containing all data if os.path.exists(fs.gmm_stats_file(iteration, 0, len(training_list))): stats_file = fs.gmm_stats_file(iteration, 0, len(training_list)) # load stats file gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file)) else: # load several files stats_files = [] for job in range(number_of_parallel_jobs): job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1) if job_indices[-1] > job_indices[0]: stats_files.append(fs.gmm_stats_file(iteration, job_indices[0], job_indices[-1])) # read all stats files gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_files[0])) for stats_file in stats_files[1:]: gmm_stats += bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file)) # load the old gmm machine gmm_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(old_machine_file)) # initialize the trainer trainer = algorithm.ubm_trainer trainer.initialize(gmm_machine) trainer.gmm_statistics = gmm_stats # Calls M-step (no data required) trainer.m_step(gmm_machine) # Saves the GMM statistics to the file bob.io.base.create_directories_safe(os.path.dirname(new_machine_file)) gmm_machine.save(bob.io.base.HDF5File(new_machine_file, 'w')) # Write the final UBM file after the last iteration # TODO: implement other stopping criteria if iteration == algorithm.gmm_training_iterations-1: shutil.copy(new_machine_file, fs.ubm_file) logger.info("UBM training: Wrote new UBM '%s'", fs.ubm_file) if clean and iteration > 0: old_dir = os.path.dirname(fs.gmm_intermediate_file(iteration-1)) logger.info("Removing old intermediate directory '%s'", old_dir) shutil.rmtree(old_dir)
def isv_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): """Performs a single M-step of the ISV algorithm (non-parallel)""" fs = FileSelector.instance() old_machine_file = fs.isv_intermediate_file(iteration) new_machine_file = fs.isv_intermediate_file(iteration + 1) if utils.check_file(new_machine_file, force, 1000): logger.info("ISV training: Skipping ISV M-Step since the file '%s' already exists", new_machine_file) else: # get the files from e-step training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True) # try if there is one file containing all data if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))): # load stats file statistics = _read_stats(fs.isv_stats_file(iteration, 0, len(training_list))) else: # load several files stats_files = [] for job in range(number_of_parallel_jobs): job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1) if job_indices[-1] >= job_indices[0]: stats_files.append(fs.isv_stats_file(iteration, job_indices[0], job_indices[-1])) # read all stats files statistics = _accumulate(stats_files) # Load machine algorithm.load_ubm(fs.ubm_file) if iteration: isv_base = bob.learn.em.ISVBase(bob.io.base.HDF5File(old_machine_file)) isv_base.ubm = algorithm.ubm else: isv_base = bob.learn.em.ISVBase(algorithm.ubm, algorithm.subspace_dimension_of_u) # Creates the IVectorTrainer and initialize values trainer = algorithm.isv_trainer data = [algorithm.read_gmm_stats(training_list[0])]#Loading data just to allocate memory trainer.initialize(isv_base, data) #Just to allocate memory trainer.acc_u_a1 = statistics[0] trainer.acc_u_a2 = statistics[1] trainer.m_step(isv_base) # data is not used in M-step logger.info("ISV training: Performed M step %d", iteration) # Save the ISV model bob.io.base.create_directories_safe(os.path.dirname(new_machine_file)) isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w')) logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file) if iteration == algorithm.isv_training_iterations-1: shutil.copy(new_machine_file, fs.isv_file) logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file) if clean and iteration > 0: old_dir = os.path.dirname(fs.isv_intermediate_file(iteration-1)) logger.info("Removing old intermediate directory '%s'", old_dir) shutil.rmtree(old_dir)
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): """Performs a single M-step of the K-Means algorithm (non-parallel)""" fs = FileSelector.instance() old_machine_file = fs.kmeans_intermediate_file(iteration) new_machine_file = fs.kmeans_intermediate_file(iteration+1) if utils.check_file(new_machine_file, force, 1000): logger.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists", new_machine_file) else: # get the files from e-step training_list = fs.training_list('extracted', 'train_projector') # try if there is one file containing all data if os.path.exists(fs.kmeans_stats_file(iteration, 0, len(training_list))): stats_file = fs.kmeans_stats_file(iteration, 0, len(training_list)) # load stats file statistics = _read_stats(stats_file) else: # load several files filenames = [] for job in range(number_of_parallel_jobs): job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1) if job_indices[-1] > job_indices[0]: filenames.append(fs.kmeans_stats_file(iteration, job_indices[0], job_indices[-1])) statistics = _accumulate(filenames) # Creates the KMeansMachine kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(old_machine_file)) trainer = algorithm.kmeans_trainer trainer.reset_accumulators(kmeans_machine) trainer.zeroeth_order_statistics = statistics[0] trainer.first_order_statistics = statistics[1] trainer.average_min_distance = statistics[3] error = statistics[3] / statistics[2] # Performs the M-step trainer.m_step(kmeans_machine, None) # data is not used in M-step logger.info("UBM training: Performed M step %d with result %f" % (iteration, error)) # Save the K-Means model bob.io.base.create_directories_safe(os.path.dirname(new_machine_file)) kmeans_machine.save(bob.io.base.HDF5File(new_machine_file, 'w')) # copy the k_means file, when last iteration # TODO: implement other stopping criteria if iteration == algorithm.kmeans_training_iterations-1: shutil.copy(new_machine_file, fs.kmeans_file) logger.info("UBM training: Wrote new KMeans machine '%s'", fs.kmeans_file) if clean and iteration > 0: old_dir = os.path.dirname(fs.kmeans_intermediate_file(iteration-1)) logger.info("Removing old intermediate directory '%s'", old_dir) shutil.rmtree(old_dir)
def execute(args): """Run the desired job of the tool chain that is specified on command line. This job might be executed either in the grid, or locally.""" # first, let the base script decide if it knows how to execute the job if gmm_execute(args): return True # now, check what we can do algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project( algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'ivector-e-step': tools.ivector_estep( algorithm, args.iteration, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) # train the feature projector elif args.sub_task == 'ivector-m-step': tools.ivector_mstep( algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, force = args.force) elif args.sub_task == 'ivector-projection': tools.ivector_project( algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-whitener': tools.train_whitener( algorithm, force = args.force) elif args.sub_task == 'whitening-projection': tools.whitening_project( algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-lda': if algorithm.use_lda: tools.train_lda( algorithm, force = args.force) elif args.sub_task == 'lda-projection': if algorithm.use_lda: tools.lda_project( algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-wccn': if algorithm.use_wccn: tools.train_wccn( algorithm, force = args.force) elif args.sub_task == 'wccn-projection': if algorithm.use_wccn: tools.wccn_project( algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-plda': if algorithm.use_plda: tools.train_plda( algorithm, force = args.force) elif args.sub_task == 'save-projector': tools.save_projector( algorithm, force=args.force) # Not our keyword... else: return False return True
def execute(args): """Run the desired job of the tool chain that is specified on command line. This job might be executed either in the grid, or locally.""" # first, let the base script decide if it knows how to execute the job if bob.bio.base.script.verify.execute(args): return True # now, check what we can do algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() # train the feature projector if args.sub_task == 'kmeans-init': tools.kmeans_initialize( algorithm, args.extractor, args.limit_training_data, force = args.force) # train the feature projector elif args.sub_task == 'kmeans-e-step': tools.kmeans_estep( algorithm, args.extractor, args.iteration, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) # train the feature projector elif args.sub_task == 'kmeans-m-step': tools.kmeans_mstep( algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, force = args.force) elif args.sub_task == 'gmm-init': tools.gmm_initialize( algorithm, args.extractor, args.limit_training_data, force = args.force) # train the feature projector elif args.sub_task == 'gmm-e-step': tools.gmm_estep( algorithm, args.extractor, args.iteration, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) # train the feature projector elif args.sub_task == 'gmm-m-step': tools.gmm_mstep( algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, force = args.force) else: # Not our keyword... return False return True
def execute(args): """Run the desired job of the tool chain that is specified on command line. This job might be executed either in the grid, or locally.""" # first, let the base script decide if it knows how to execute the job if gmm_execute(args): return True # now, check what we can do algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project(algorithm, args.extractor, indices=base_tools.indices( fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), allow_missing_files=args.allow_missing_files, force=args.force) elif args.sub_task == 'ivector-e-step': tools.ivector_estep(algorithm, args.iteration, allow_missing_files=args.allow_missing_files, indices=base_tools.indices( fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force=args.force) # train the feature projector elif args.sub_task == 'ivector-m-step': tools.ivector_mstep( algorithm, args.iteration, number_of_parallel_jobs=args.grid.number_of_projection_jobs, clean=args.clean_intermediate, force=args.force) elif args.sub_task == 'ivector-projection': tools.ivector_project(algorithm, allow_missing_files=args.allow_missing_files, indices=base_tools.indices( fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force=args.force) elif args.sub_task == 'train-whitener': tools.train_whitener(algorithm, allow_missing_files=args.allow_missing_files, force=args.force) elif args.sub_task == 'whitening-projection': tools.whitening_project(algorithm, allow_missing_files=args.allow_missing_files, indices=base_tools.indices( fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force=args.force) elif args.sub_task == 'train-lda': if algorithm.use_lda: tools.train_lda(algorithm, allow_missing_files=args.allow_missing_files, force=args.force) elif args.sub_task == 'lda-projection': if algorithm.use_lda: tools.lda_project(algorithm, allow_missing_files=args.allow_missing_files, indices=base_tools.indices( fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force=args.force) elif args.sub_task == 'train-wccn': if algorithm.use_wccn: tools.train_wccn(algorithm, allow_missing_files=args.allow_missing_files, force=args.force) elif args.sub_task == 'wccn-projection': if algorithm.use_wccn: tools.wccn_project(algorithm, allow_missing_files=args.allow_missing_files, indices=base_tools.indices( fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force=args.force) elif args.sub_task == 'train-plda': if algorithm.use_plda: tools.train_plda(algorithm, allow_missing_files=args.allow_missing_files, force=args.force) elif args.sub_task == 'save-projector': tools.save_projector(algorithm, force=args.force) # Not our keyword... else: return False return True
def isv_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False): """Performs a single M-step of the ISV algorithm (non-parallel)""" fs = FileSelector.instance() old_machine_file = fs.isv_intermediate_file(iteration) new_machine_file = fs.isv_intermediate_file(iteration + 1) if utils.check_file(new_machine_file, force, 1000): logger.info( "ISV training: Skipping ISV M-Step since the file '%s' already exists", new_machine_file) else: # get the files from e-step training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True) # try if there is one file containing all data if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))): # load stats file statistics = _read_stats( fs.isv_stats_file(iteration, 0, len(training_list))) else: # load several files stats_files = [] for job in range(number_of_parallel_jobs): job_indices = tools.indices(training_list, number_of_parallel_jobs, job + 1) if job_indices[-1] >= job_indices[0]: stats_files.append( fs.isv_stats_file(iteration, job_indices[0], job_indices[-1])) # read all stats files statistics = _accumulate(stats_files) # Load machine algorithm.load_ubm(fs.ubm_file) if iteration: isv_base = bob.learn.em.ISVBase( bob.io.base.HDF5File(old_machine_file)) isv_base.ubm = algorithm.ubm else: isv_base = bob.learn.em.ISVBase(algorithm.ubm, algorithm.subspace_dimension_of_u) # Creates the IVectorTrainer and initialize values trainer = algorithm.isv_trainer data = [algorithm.read_gmm_stats(training_list[0]) ] #Loading data just to allocate memory trainer.initialize(isv_base, data) #Just to allocate memory trainer.acc_u_a1 = statistics[0] trainer.acc_u_a2 = statistics[1] trainer.m_step(isv_base) # data is not used in M-step logger.info("ISV training: Performed M step %d", iteration) # Save the ISV model bob.io.base.create_directories_safe(os.path.dirname(new_machine_file)) isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w')) logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file) if iteration == algorithm.isv_training_iterations - 1: shutil.copy(new_machine_file, fs.isv_file) logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file) if clean and iteration > 0: old_dir = os.path.dirname(fs.isv_intermediate_file(iteration - 1)) logger.info("Removing old intermediate directory '%s'", old_dir) shutil.rmtree(old_dir)