def train_tv(self): # Create status servers self.__create_stats() # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename), ubm, tv_rank=self.TV_RANK, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath) filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS) lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex)) for f in lst: os.remove(f)
def evaluate(self, explain=True): """ This method is used to score our trained model. """ # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Load TV matrix filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser(outputPath + ".h5") # Extract i-vectors from enrollment data logging.info("Extracting i-vectors from enrollment data") filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS) enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', filename)) enroll_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=enroll_stat, uncertainty=False) # Extract i-vectors from test data logging.info("Extracting i-vectors from test data") filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS) test_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', filename)) test_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=test_stat, uncertainty=False) # Do cosine distance scoring and write results logging.info("Calculating cosine score") test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv, test_iv, test_ndx, wccn=None) # Write scores filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS) scores_cos.write(os.path.join(self.BASE_DIR, "result", filename)) # Explain the Analysis by writing more readible text file if explain: modelset = list(scores_cos.modelset) segset = list(scores_cos.segset) scores = np.array(scores_cos.scoremat) filename = "ivector_scores_explained_{}.txt".format( iv.NUM_GAUSSIANS) fout = open(os.path.join(iv.BASE_DIR, "result", filename), "a") fout.truncate(0) #clear content for seg_idx, seg in enumerate(segset): fout.write("Wav: {}\n".format(seg)) for speaker_idx, speaker in enumerate(modelset): fout.write("\tSpeaker {}:\t{}\n".format( speaker, scores[speaker_idx, seg_idx])) fout.write("\n") fout.close()
def train_ubm(self, feature_dir, speaker_list, ubm_list, distrib_nb, feature_extension='h5', num_threads=10): ''' training the GMM with EM-Algorithm ''' self.logger.info('training UBM') fs = sidekit.FeaturesServer(feature_filename_structure=( "{dir}/{speaker_list}/feat/{{}}.{ext}".format( dir=feature_dir, speaker_list=speaker_list, ext=feature_extension)), dataset_list=["energy", "cep", "vad"], mask="[0-12]", feat_norm="cmvn", keep_all_features=True, delta=True, double_delta=True, rasta=True, context=None) ubm = sidekit.Mixture() llk = ubm.EM_split(fs, ubm_list, distrib_nb, num_thread=num_threads) ubm.write(get_experiment_nets() + '/ubm_{}.h5'.format(self.network_file)) return ubm, fs
def adaptation(args): if args.feat_type == 'mfcc': datasetlist = ["energy", "cep", "vad"] mask = "[0-12]" features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' if args.feat_type == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' # create feature server for loading feature from disk feature_server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=True if args.delta else False, double_delta=True if args.delta else False, delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5') ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5') ubm = sidekit.Mixture() ubm.read(os.getcwd() + '/model/ubm.h5') enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=ubm.distrib_nb(), feature_size=ubm.dim()) enroll_stat.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=args.num_thread) enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5') print('MAP adaptation', end='') regulation_factor = 16 enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5') print('\rMAP adaptation done') print('Compute scores', end='') score = sidekit.gmm_scoring(ubm, enroll_sv, ndx, feature_server, num_thread=args.num_thread) score.write(os.getcwd() + '/task/dev_score.h5') print('\rCompute scores done')
def evaluate(self, explain=True): """ This method is used to evaluate the test set. It does so by" - read the test_ndx file that contains the test set - read the trained UBM model, and trained parameters (enroll_stat file) - evaluate the test set using gmm_scoring and write the scores - if explain=True, write the scores in a more readible way Args: explain (boolean): If True, write another text file that contain the same information as the one within ubm_scores file but in a readible way. """ ############################# READING ############################ # Create Feature server server = self.createFeatureServer() # Read the index for the test datas test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) # Read the UBM model ubm = sidekit.Mixture() model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS) enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, "stat", filename)) # MAP adaptation of enrollment speaker models enroll_sv = enroll_stat.adapt_mean_map_multisession( ubm=ubm, r=3 # MAP regulation factor ) ############################ Evaluating ########################### # Compute scores scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm, enroll=enroll_sv, ndx=test_ndx, feature_server=server, num_thread=self.NUM_THREADS) # Save the model's Score object filename = "ubm_scores_{}.h5".format(self.NUM_GAUSSIANS) scores_gmm_ubm.write(os.path.join(self.BASE_DIR, "result", filename)) # Explain the Analysis by writing more readible text file if explain: filename = "ubm_scores_{}_explained.txt".format(self.NUM_GAUSSIANS) fout = open(os.path.join(self.BASE_DIR, "result", filename), "a") fout.truncate(0) #clear content modelset = list(scores_gmm_ubm.modelset) segset = list(scores_gmm_ubm.segset) scores = np.array(scores_gmm_ubm.scoremat) for seg_idx, seg in enumerate(segset): fout.write("Wav: {}\n".format(seg)) for speaker_idx, speaker in enumerate(modelset): fout.write("\tSpeaker {}:\t{}\n"\ .format(speaker, scores[speaker_idx, seg_idx])) fout.write("\n") fout.close()
def train(self, SAVE_FLAG=True): #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll")) for i in range(len(train_list)): train_list[i] = train_list[i].split(".h5")[0] server = self.createFeatureServer("enroll") logging.info("Training...") ubm = sidekit.Mixture() # Expectation-Maximization estimation of the Mixture parameters. ubm.EM_split( features_server=server, #sidekit.FeaturesServer used to load data feature_list=train_list, # list of feature files to train the model distrib_nb=self. NUM_GUASSIANS, # final number of Gaussian distributions iterations=( 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8 ), # list of iteration number for each step of the learning process num_thread=self. NUM_THREADS, # number of thread to launch for parallel computing save_partial=False # if False, it only saves the last model ) # -> 2 iterations of EM with 2 distributions # -> 2 iterations of EM with 4 distributions # -> 4 iterations of EM with 8 distributions # -> 4 iterations of EM with 16 distributions # -> 4 iterations of EM with 32 distributions # -> 4 iterations of EM with 64 distributions # -> 8 iterations of EM with 128 distributions # -> 8 iterations of EM with 256 distributions # -> 8 iterations of EM with 512 distributions # -> 8 iterations of EM with 1024 distributions model_dir = os.path.join(self.BASE_DIR, "ubm") modelname = "ubm_{}.h5".format(self.NUM_GUASSIANS) logging.info("Saving the model {} at {}".format(modelname, model_dir)) ubm.write(os.path.join(model_dir, modelname)) # Read idmap for the enrolling data enroll_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5")) # Create Statistic Server to store/process the enrollment data enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap, ubm=ubm) logging.debug(enroll_stat) # Compute the sufficient statistics for a list of sessions whose indices are segIndices. server.feature_filename_structure = os.path.join( self.BASE_DIR, "feat", "{}.h5") #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning enroll_stat.accumulate_stat(ubm=ubm, feature_server=server, seg_indices=range( enroll_stat.segset.shape[0])) if SAVE_FLAG: # Save the status of the enroll data filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS) enroll_stat.write(os.path.join(self.BASE_DIR, "ubm", filename))
def train_ubm(**args): if args['feat_type'] == 'mfcc': datasetlist = ["energy", "cep", "vad"] mask = "[0-12]" features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature' if args['feat_type'] == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = '/home/zeng/zeng/aishell/aishell2/ios/data/feature' utils.remove(features_folder) ubmlist = [] if os.path.exists(os.getcwd() + '/log/aishell2_wavlist.log'): with open(os.getcwd() + '/log/aishell2_wavlist.log', 'r') as fobj: for i in fobj: ubmlist.append(i[0:-1]) else: ubmlist = preprocess() # create feature server for loading feature from disk server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=args['delta'], double_delta=args['delta'], delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) # create Mixture object for training ubm = sidekit.Mixture() ubm.EM_split(server, ubmlist, args['distribNum'], iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8), num_thread=args['num_thread'], save_partial=True) # write trained ubm to disk ubm.write(os.getcwd() + '/model/ubm_512.h5')
def train_ubm(**args): if (args['feat_type'] == 'mfcc') or (args['feat_type'] == 'plp'): datasetlist = ["energy", "cep", "vad"] mask = "[0-19]" if args['feat_type'] == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = os.getcwd() + '/{}_train_feature'.format( args['feat_type']) ubmlist = [] try: with open(os.getcwd() + '/log/aishell2.log', 'r') as fobj: for i in fobj: ubmlist.append(i[0:-1]) except FileNotFoundError: print('please generate ubm wav list as first') # create feature server for loading feature from disk server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=args['delta'], double_delta=args['delta'], delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) # create Mixture object for training ubm = sidekit.Mixture() ubm.EM_split(server, ubmlist, args['distribNum'], iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8), num_thread=args['num_thread'], save_partial=True) # write trained ubm to disk ubm.write(os.getcwd() + '/model/ubm_512.h5')
def evaluate(self, explain=True): ############################# READING ############################ # Create Feature server server = self.createFeatureServer() # Read the index for the test datas test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) # Read the UBM model ubm = sidekit.Mixture() model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) ############################ Evaluating ########################### filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS) enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, "ubm", filename)) # MAP adaptation of enrollment speaker models enroll_sv = enroll_stat.adapt_mean_map_multisession( ubm=ubm, r=3 # MAP regulation factor ) # Compute scores scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm, enroll=enroll_sv, ndx=test_ndx, feature_server=server, num_thread=self.NUM_THREADS) # Save the model's Score object filename = "ubm_scores_{}.h5".format(self.NUM_GUASSIANS) scores_gmm_ubm.write(os.path.join(self.BASE_DIR, "result", filename)) #write Analysis if explain: filename = "ubm_scores_explained_{}.txt".format(self.NUM_GUASSIANS) fout = open(os.path.join(self.BASE_DIR, "result", filename), "a") fout.truncate(0) #clear content modelset = list(scores_gmm_ubm.modelset) segset = list(scores_gmm_ubm.segset) scores = np.array(scores_gmm_ubm.scoremat) for seg_idx, seg in enumerate(segset): fout.write("Wav: {}\n".format(seg)) for speaker_idx, speaker in enumerate(modelset): fout.write("\tSpeaker {}:\t{}\n".format( speaker, scores[speaker_idx, seg_idx])) fout.write("\n") fout.close()
def data_init(self): # Read tv_idmap, and plda_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_tv.h5")) plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_plda.h5")) # Load UBM ubm = sidekit.Mixture() model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Create Feature Server fs = self.__createFeatureServer() # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): logging.warning("Error merging tv_idmap & plda_idmap") return back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5')) # Load the sufficient statistics from TV training data tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5')) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GUASSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'), ubm, tv_rank=self.RANK_TV, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath)
def evaluate(self): """ This method is used to score our trained model. """ # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Load TV matrix filename = "tv_matrix_{}".format(self.NUM_GUASSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser(outputPath + ".h5") # Extract i-vectors from enrollment data logging.info("Extracting i-vectors from enrollment data") enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', 'enroll_stat_32.h5')) enroll_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=enroll_stat, uncertainty=False) # Extract i-vectors from test data logging.info("Extracting i-vectors from test data") test_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', 'test_stat.h5')) test_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=test_stat, uncertainty=False) # Do cosine distance scoring and write results logging.info("Calculating cosine score") test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv, test_iv, test_ndx, wccn=None) # Write scores filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS) scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))
def train_tv(self): """ This method is used to train the Total Variability (TV) matrix and save it into 'ivector' directory !! """ # Create status servers self.__create_stats() # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename), ubm, tv_rank=self.TV_RANK, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath) # tv = fa.F # TV matrix # tv_mean = fa.mean # Mean vector # tv_sigma = fa.Sigma # Residual covariance matrix # Clear files produced at each iteration filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS) lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex)) for f in lst: os.remove(f)
train_feature_filename_structure = "./mfcc/train/{}.h5" enroll_feature_filename_structure = "./mfcc/enroll/{}.h5" test_feature_filename_structure = "./mfcc/test/{}.h5" train_wavscp_path = os.path.join(project_dir, 'data/train/wav.scp') enroll_wavscp_path = os.path.join(project_dir, 'data/enroll/wav.scp') test_wavscp_path = os.path.join(project_dir, 'data/test/wav.scp') print('get train feats') #uttId,filepath_list,feat_list ubmList, train_input_file_list, train_output_feats_list = basic_ops.get_info4mfcc(train_wavscp_path, project_dir, 'train') basic_ops.make_mfcc_feats(ubmList, train_input_file_list, train_output_feats_list, nj) server_train = basic_ops.get_feature_server(train_feature_filename_structure) print('Train the UBM by EM') ubm = sidekit.Mixture() llk = ubm.EM_split(server_train, ubmList, components_num, num_thread=nj, save_partial=True) ubm.write("/home/wcq/bird/task/ubm512.h5") ubm = sidekit.Mixture('/home/wcq/bird/task/ubm512.h5') print('get enroll feats') enrollList, enroll_input_file_list, enroll_output_feats_list = basic_ops.get_info4mfcc(enroll_wavscp_path, project_dir, 'enroll') basic_ops.make_mfcc_feats(enrollList, enroll_input_file_list, enroll_output_feats_list, nj) server_enroll = basic_ops.get_feature_server(enroll_feature_filename_structure) #prepare the idmap for models = [] segments = [] enroll_idmap = sidekit.IdMap()
def __create_stats(self): """ This private method is used to create Statistic Servers. TODO: post some more info """ # Read tv_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "tv_idmap.h5")) back_idmap = tv_idmap # If PLDA is enabled if self.ENABLE_PLDA: # Read plda_idmap plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "plda_idmap.h5")) # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): raise RuntimeError("Error merging tv_idmap & plda_idmap") # Check UBM model ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name) if not os.path.exists(ubm_path): #if UBM model does not exist, train one logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS)) ubm = UBM(self.conf_path) ubm.train() #load trained UBM model logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS)) ubm = sidekit.Mixture() ubm.read(ubm_path) back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and (if enabled) PLDA data back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", back_filename)): #BUG: don't use self.NUM_THREADS when assgining num_thread # as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename)) # Load the sufficient statistics from TV training data tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)): tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename)) # Load sufficient statistics and extract i-vectors from PLDA training data if self.ENABLE_PLDA: plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", plda_filename)): plda_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap) plda_stat.write( os.path.join(self.BASE_DIR, "stat", plda_filename)) # Load sufficient statistics from test data filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)): test_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "test_idmap.h5")) test_stat = sidekit.StatServer(statserver_file_name=test_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning test_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat.segset.shape[0])) test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
def train(self, SAVE=True): """ This method is used to train our UBM model by doing the following: - Create FeatureServe for the enroll features - create use EM algorithm to train our UBM over the enroll features - create StatServer to save trained parameters - if Save arugment is True (which is by default), then it saves that StatServer. Args: SAVE (boolean): if True, then it will save the StatServer. If False, then the StatServer will be discarded. """ #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll")) for i in range(len(train_list)): train_list[i] = train_list[i].split(".h5")[0] server = self.createFeatureServer("enroll") logging.info("Training...") ubm = sidekit.Mixture() # Set the model name ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) # Expectation-Maximization estimation of the Mixture parameters. ubm.EM_split( features_server=server, #sidekit.FeaturesServer used to load data feature_list=train_list, #list of feature files to train the model distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions num_thread=self.NUM_THREADS, # number of parallel processes save_partial=False, # if False, it only saves the last model iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8) ) # -> 2 iterations of EM with 2 distributions # -> 2 iterations of EM with 4 distributions # -> 4 iterations of EM with 8 distributions # -> 4 iterations of EM with 16 distributions # -> 4 iterations of EM with 32 distributions # -> 4 iterations of EM with 64 distributions # -> 8 iterations of EM with 128 distributions # -> 8 iterations of EM with 256 distributions # -> 8 iterations of EM with 512 distributions # -> 8 iterations of EM with 1024 distributions model_dir = os.path.join(self.BASE_DIR, "ubm") logging.info("Saving the model {} at {}".format(ubm.name, model_dir)) ubm.write(os.path.join(model_dir, ubm.name)) # Read idmap for the enrolling data enroll_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5")) # Create Statistic Server to store/process the enrollment data enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap, ubm=ubm) logging.debug(enroll_stat) server.feature_filename_structure = os.path.join(self.BASE_DIR, "feat", "{}.h5") # Compute the sufficient statistics for a list of sessions whose indices are segIndices. #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning enroll_stat.accumulate_stat(ubm=ubm, feature_server=server, seg_indices=range(enroll_stat.segset.shape[0]) ) if SAVE: # Save the status of the enroll data filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS) enroll_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
def get_embeddings(self): ''' finally, testing: ''' speaker_list = self.get_validation_data_name() distrib_nb = self.config.getint('i_vector', 'distrib_nb') nbThread = self.config.getint('i_vector', 'nbThread') vector_size = self.config.getint('i_vector', 'vector_size') feature_extension = 'h5' set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_times = [] checkpoints = ["/TV_{}".format(self.network_file)] #load data: ubm = sidekit.Mixture() ubm.read(get_experiment_nets() + '/ubm_{}.h5'.format(self.network_file)) ubm_list, test_list_long = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_train_data())[1])[0]) ubm_list, test_list_short = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_test_data())[1])[0]) tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5( get_experiment_nets() + "/TV_{}".format(self.network_file)) fs = sidekit.FeaturesServer(feature_filename_structure=( "{dir}/{speaker_list}/feat/{{}}.{ext}".format( dir=get_training('i_vector'), speaker_list=speaker_list, ext=feature_extension)), dataset_list=["energy", "cep", "vad"], mask="[0-12]", feat_norm="cmvn", keep_all_features=True, delta=True, double_delta=True, rasta=True, context=None) #exract ivectors test_stat_long = sidekit.StatServer(test_list_long, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_long.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_long.segset.shape[0]), num_thread=nbThread) test_stat_short = sidekit.StatServer(test_list_short, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_short.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_short.segset.shape[0]), num_thread=nbThread) test_iv_long = test_stat_long.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] test_iv_short = test_stat_short.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] iv_lis, y_list, s_list = create_data_lists( False, test_iv_long.stat1, test_iv_short.stat1, test_list_long.leftids.astype(int), test_list_short.leftids.astype(int)) #generate embeddings embeddings, speakers, num_embeddings = generate_embeddings( iv_lis, y_list, vector_size) set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) set_of_times = [ np.zeros( (len(test_list_long.leftids) + len(test_list_short.leftids), ), dtype=int) ] return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
test_wavscp_path = os.path.join(project_dir, 'data/test/wav.scp') train_feature_filename_structure = "./mfcc/train/{}.h5" enroll_feature_filename_structure = "./mfcc/enroll/{}.h5" test_feature_filename_structure = "./mfcc/test/{}.h5" train_ivecs_stat_path = './exp/train_ivecs_stat' enroll_ivecs_stat_path = './exp/enroll_ivecs_stat' test_ivecs_stat_path = './exp/test_ivecs_stat' train_stats_path = './task/train_stat.h5' enroll_stats_path = './task/enroll_stat.h5' test_stats_path = './task/test_stat.h5' ubm_path = 'task/ubm512.h5' ubm = sidekit.Mixture(ubm_path) print("Acc the train stats") train_idmap = get_idmap(train_wavscp_path) train_feature_server = basic_ops.get_feature_server( train_feature_filename_structure) train_stat_server = get_stat_server(ubm, train_idmap, train_feature_server, train_stats_path) print("Train the T") # multiprocess on one node for train T space fa = sidekit.FactorAnalyser() fa.total_variability(train_stats_path, ubm, tv_rank, nb_iter=10,
def __create_stats(self): # Read tv_idmap, and plda_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "tv_idmap.h5")) plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "plda_idmap.h5")) # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): raise RuntimeError("Error merging tv_idmap & plda_idmap") # Load UBM model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data back_filename = 'back_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", back_filename)): #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename)) # Load the sufficient statistics from TV training data tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)): tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename)) # Load sufficient statistics and extract i-vectors from PLDA training data plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", plda_filename)): plda_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap) plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename)) # Load sufficient statistics from test data filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)): test_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "test_idmap.h5")) test_stat = sidekit.StatServer(statserver_file_name=test_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning test_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat.segset.shape[0])) test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
pre_emphasis=0.97, save_param=["energy", "cep", "fb"], keep_all_features=True) server = sidekit.FeaturesServer(features_extractor=extractor, feature_filename_structure="chunks_features/all_train/{}.h5", # sources=None, dataset_list=["energy", "cep", "fb"], mask="[0-12]", feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, # sdc_config=(1,3,7), delta=True, double_delta=True, delta_filter=None, context=None, traps_dct_nb=None, rasta=False, keep_all_features=True) print('Train the UBM by EM') # Extract all features and train a GMM without writing to disk ubm = sidekit.Mixture() llk = ubm.EM_split(server, utter_list, distribNb)#, num_thread=nbThread) pickle.dump(ubm,open("ubm_64.pkl","wb")) ubm.write('gmm/ubm_train_64.h5')
import sidekit as skk import os import sys import multiprocessing import logging import htkmfc as hhtk from matplotlib import pyplot as plt print('Start') signal, samplerate = sf.read('sw02289.sph') Time = np.linspace(0, len(signal) / samplerate, num=len(signal)) # plt.figure(1) # plt.title('Signal Wave...') # plt.plot(Time,signal) # plt.show() # datahtk = np.reshape(signal[:,0], (1,lala)) a = skk.mfcc(signal[:, 0]) ubm = skk.Mixture() #x= skk.FeaturesExtractor(feature_filename_structure='sw02289.sph',shift=0.01,sampling_frequency=16000, window_size=0.025); f, (ax1, ax2) = plt.subplots(2, sharex=True) ax1.plot(a[0]) ax2.plot(a[1]) plt.show()