def get_stat_server(ubm, idmap, feature_server, stat_path): if os.path.exists(stat_path): print("stat server exits") stats = sidekit.StatServer(stat_path, distrib_nb=n_components, feature_size=n_feats) else: stats = sidekit.StatServer(idmap, distrib_nb=n_components, feature_size=n_feats) stats.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(stats.segset.shape[0]),num_thread=nj) stats.write(stat_path) return stats
def adaptation(args): if args.feat_type == 'mfcc': datasetlist = ["energy", "cep", "vad"] mask = "[0-12]" features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' if args.feat_type == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' # create feature server for loading feature from disk feature_server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=True if args.delta else False, double_delta=True if args.delta else False, delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5') ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5') ubm = sidekit.Mixture() ubm.read(os.getcwd() + '/model/ubm.h5') enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=ubm.distrib_nb(), feature_size=ubm.dim()) enroll_stat.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=args.num_thread) enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5') print('MAP adaptation', end='') regulation_factor = 16 enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5') print('\rMAP adaptation done') print('Compute scores', end='') score = sidekit.gmm_scoring(ubm, enroll_sv, ndx, feature_server, num_thread=args.num_thread) score.write(os.getcwd() + '/task/dev_score.h5') print('\rCompute scores done')
def train(self, SAVE_FLAG=True): #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll")) for i in range(len(train_list)): train_list[i] = train_list[i].split(".h5")[0] server = self.createFeatureServer("enroll") logging.info("Training...") ubm = sidekit.Mixture() # Expectation-Maximization estimation of the Mixture parameters. ubm.EM_split( features_server=server, #sidekit.FeaturesServer used to load data feature_list=train_list, # list of feature files to train the model distrib_nb=self. NUM_GUASSIANS, # final number of Gaussian distributions iterations=( 1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8 ), # list of iteration number for each step of the learning process num_thread=self. NUM_THREADS, # number of thread to launch for parallel computing save_partial=False # if False, it only saves the last model ) # -> 2 iterations of EM with 2 distributions # -> 2 iterations of EM with 4 distributions # -> 4 iterations of EM with 8 distributions # -> 4 iterations of EM with 16 distributions # -> 4 iterations of EM with 32 distributions # -> 4 iterations of EM with 64 distributions # -> 8 iterations of EM with 128 distributions # -> 8 iterations of EM with 256 distributions # -> 8 iterations of EM with 512 distributions # -> 8 iterations of EM with 1024 distributions model_dir = os.path.join(self.BASE_DIR, "ubm") modelname = "ubm_{}.h5".format(self.NUM_GUASSIANS) logging.info("Saving the model {} at {}".format(modelname, model_dir)) ubm.write(os.path.join(model_dir, modelname)) # Read idmap for the enrolling data enroll_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5")) # Create Statistic Server to store/process the enrollment data enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap, ubm=ubm) logging.debug(enroll_stat) # Compute the sufficient statistics for a list of sessions whose indices are segIndices. server.feature_filename_structure = os.path.join( self.BASE_DIR, "feat", "{}.h5") #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning enroll_stat.accumulate_stat(ubm=ubm, feature_server=server, seg_indices=range( enroll_stat.segset.shape[0])) if SAVE_FLAG: # Save the status of the enroll data filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS) enroll_stat.write(os.path.join(self.BASE_DIR, "ubm", filename))
def data_init(self): # Read tv_idmap, and plda_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_tv.h5")) plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_plda.h5")) # Load UBM ubm = sidekit.Mixture() model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Create Feature Server fs = self.__createFeatureServer() # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): logging.warning("Error merging tv_idmap & plda_idmap") return back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5')) # Load the sufficient statistics from TV training data tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5')) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GUASSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'), ubm, tv_rank=self.RANK_TV, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath)
def train_total_variability(self, ubm, fs, distrib_nb, rank_TV, tv_iteration, train_idmap, num_threads=10): self.logger.info('train total variability ') train_stat = sidekit.StatServer(train_idmap, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) train_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( train_stat.segset.shape[0]), num_thread=num_threads) tv_mean, tv, _, __, tv_sigma = train_stat.factor_analysis( rank_f=rank_TV, rank_g=0, rank_h=None, re_estimate_residual=False, it_nb=(tv_iteration, 0, 0), min_div=True, ubm=ubm, batch_size=100, num_thread=num_threads) sidekit.sidekit_io.write_tv_hdf5( (tv, tv_mean, tv_sigma), get_experiment_nets() + "/TV_{}".format(self.network_file))
def get_embeddings(self): ''' finally, testing: ''' speaker_list = self.get_validation_data_name() distrib_nb = self.config.getint('i_vector', 'distrib_nb') nbThread = self.config.getint('i_vector', 'nbThread') vector_size = self.config.getint('i_vector', 'vector_size') feature_extension = 'h5' set_of_embeddings = [] set_of_speakers = [] set_of_num_embeddings = [] set_of_times = [] checkpoints = ["/TV_{}".format(self.network_file)] #load data: ubm = sidekit.Mixture() ubm.read(get_experiment_nets() + '/ubm_{}.h5'.format(self.network_file)) ubm_list, test_list_long = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_train_data())[1])[0]) ubm_list, test_list_short = self.load_data( speaker_list, os.path.splitext( os.path.split(self.get_validation_test_data())[1])[0]) tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5( get_experiment_nets() + "/TV_{}".format(self.network_file)) fs = sidekit.FeaturesServer(feature_filename_structure=( "{dir}/{speaker_list}/feat/{{}}.{ext}".format( dir=get_training('i_vector'), speaker_list=speaker_list, ext=feature_extension)), dataset_list=["energy", "cep", "vad"], mask="[0-12]", feat_norm="cmvn", keep_all_features=True, delta=True, double_delta=True, rasta=True, context=None) #exract ivectors test_stat_long = sidekit.StatServer(test_list_long, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_long.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_long.segset.shape[0]), num_thread=nbThread) test_stat_short = sidekit.StatServer(test_list_short, ubm=ubm, distrib_nb=distrib_nb, feature_size=0, index=None) test_stat_short.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat_short.segset.shape[0]), num_thread=nbThread) test_iv_long = test_stat_long.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] test_iv_short = test_stat_short.estimate_hidden(tv_mean, tv_sigma, V=tv, batch_size=100, num_thread=nbThread)[0] iv_lis, y_list, s_list = create_data_lists( False, test_iv_long.stat1, test_iv_short.stat1, test_list_long.leftids.astype(int), test_list_short.leftids.astype(int)) #generate embeddings embeddings, speakers, num_embeddings = generate_embeddings( iv_lis, y_list, vector_size) set_of_embeddings.append(embeddings) set_of_speakers.append(speakers) set_of_num_embeddings.append(num_embeddings) set_of_times = [ np.zeros( (len(test_list_long.leftids) + len(test_list_short.leftids), ), dtype=int) ] return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
for line in eval_lines: splits = line.strip().split(' ') uttId = splits[0] spkId = uttId.split('_')[0] models.append(spkId) segments.append(uttId) enroll_idmap.leftids = numpy.asarray(models) enroll_idmap.rightids = numpy.asarray(segments) enroll_idmap.start = numpy.empty(enroll_idmap.rightids.shape, '|O') enroll_idmap.stop = numpy.empty(enroll_idmap.rightids.shape, '|O') enroll_idmap.validate() print('Compute the sufficient statistics') # Create a StatServer for the enrollment data and compute the statistics enroll_stat = sidekit.StatServer(enroll_idmap, components_num, n_feats) enroll_stat.accumulate_stat(ubm=ubm, feature_server=server_enroll, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=nj) print('MAP adaptation of the speaker models') regulation_factor = 16 # MAP regulation factor default=16 enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write('/home/wcq/bird/task/enroll_map_models.h5') enroll_sv = sidekit.StatServer('/home/wcq/bird/task/enroll_map_models.h5', components_num, n_feats) print('get test feats') testList, test_input_file_list, test_output_feats_list = basic_ops.get_info4mfcc(test_wavscp_path, project_dir, 'test') basic_ops.make_mfcc_feats(testList, test_input_file_list, test_output_feats_list, nj) server_test = basic_ops.get_feature_server(test_feature_filename_structure) ubm_w = ubm.w
def __create_stats(self): """ This private method is used to create Statistic Servers. TODO: post some more info """ # Read tv_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "tv_idmap.h5")) back_idmap = tv_idmap # If PLDA is enabled if self.ENABLE_PLDA: # Read plda_idmap plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "plda_idmap.h5")) # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): raise RuntimeError("Error merging tv_idmap & plda_idmap") # Check UBM model ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name) if not os.path.exists(ubm_path): #if UBM model does not exist, train one logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS)) ubm = UBM(self.conf_path) ubm.train() #load trained UBM model logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS)) ubm = sidekit.Mixture() ubm.read(ubm_path) back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and (if enabled) PLDA data back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", back_filename)): #BUG: don't use self.NUM_THREADS when assgining num_thread # as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename)) # Load the sufficient statistics from TV training data tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)): tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename)) # Load sufficient statistics and extract i-vectors from PLDA training data if self.ENABLE_PLDA: plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", plda_filename)): plda_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap) plda_stat.write( os.path.join(self.BASE_DIR, "stat", plda_filename)) # Load sufficient statistics from test data filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)): test_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "test_idmap.h5")) test_stat = sidekit.StatServer(statserver_file_name=test_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning test_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat.segset.shape[0])) test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
print('Train the UBM by EM') # Extract all features and train a GMM without writing to disk ubm = sidekit.Mixture() llk = ubm.EM_split(features_server, ubmList, distribNb, num_thread=nbThread, save_partial=True) ubm.write('gmm/ubm.h5') # Compute the sufficient statistics on the UBM print('Compute the sufficient statistics') # Create a StatServer for the enrollment data and compute the statistics enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=4, feature_size=50) enroll_stat.accumulate_stat(ubm=ubm, feature_server=features_server, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=nbThread) enroll_stat.write('data/stat_rsr2015_male_enroll.h5') # Adapt the GMM speaker models from the UBM via a MAP adaptation print('MAP adaptation of the speaker models') regulation_factor = 3 # MAP regulation factor enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write('data/sv_rsr2015_male_enroll.h5') # Compute all trials and save scores in HDF5 format
def train_net(net, train_dataloader, test_dataloader): if not os.path.exists(ConfigNetwork.modelname): last_model_loaded = False # True # False # inits iteration_number = 0 for epoch in range(0, ConfigNetwork.train_number_epochs): """ if ConfigNetwork.learning_rate_scheduler: optimizer = optim.Adam(net.parameters(), lr = ConfigNetwork.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min') else: """ epoch_learning_rate_exponent = max( 0, epoch - (ConfigNetwork.learning_rate_defactor_after_epoch - 1)) lr = ConfigNetwork.learning_rate * ConfigNetwork.learning_rate_defactor**epoch_learning_rate_exponent parameters = filter(lambda p: p.requires_grad, net.parameters()) if ConfigNetwork.train_vae: optimizer = pyro_optim.Adam({'lr': lr}) else: optimizer = optim.Adam(parameters, lr=lr) base_file_pattern = os.path.join( ConfigNetwork.storage_dir, '{}_epoch_{}'.format(ConfigNetwork.modelname, epoch)) epoch_net_file = '{}_model'.format(base_file_pattern) if epoch < ConfigNetwork.freeze_ResNet_epochs: net.set_ResNet_requires_grad(requires_grad=False) else: net.set_ResNet_requires_grad(requires_grad=True) if not os.path.exists(epoch_net_file) and epoch == 0: # init meta embeddings network if ConfigNetwork.train_with_meta_embeddings: logging.debug('init B with plda expectation') if not os.path.exists(ConfigNetwork.embeddings_file): dataset = SoftMaxDatabase( imageFolderDataset=ConfigFaceDatasets. dataset_class( root=ConfigFaceDatasets.training_dir), transform=train_dataloader.dataset.transform, should_invert=False) embeddings_loader = DataLoader( dataset, shuffle=False, num_workers=ConfigNetwork.num_workers, batch_size=ConfigNetwork.batch_size_train) softmax_net = net.to_softmaxNetwork() softmax_net.normalize = False with h5py.File(ConfigNetwork.embeddings_file, "a") as embd_file: for i, data in enumerate(embeddings_loader, 0): img0, label = data img0, label = Variable(img0).cuda(), Variable( label).cuda() output0 = super(SoftMaxNetwork, softmax_net).forward_once(img0) embd_file.create_dataset( "{}".format(i), data=numpy.column_stack( (output0.data.cpu().numpy(), label.data.cpu().numpy())), compression="gzip", fletcher32=True) logging.critical('extracted embeddings') if not os.path.exists( ConfigNetwork.embeddings_file_plda ) or not os.path.exists( ConfigNetwork.embeddings_mean_file): data = [] with h5py.File(ConfigNetwork.embeddings_file, "r") as h5f: for key, value in h5f.items(): data.append(value.value) data = numpy.concatenate(data) embeddings = data[:, :ConfigNetwork.embedding_size] embeddings_mean = embeddings.mean(0) numpy.save(ConfigNetwork.embeddings_mean_file, embeddings_mean) logging.debug('embeddings mean: {}'.format( embeddings.mean(0))) embeddings -= embeddings.mean(0) embeddings = (embeddings.T / numpy.linalg.norm( embeddings, axis=1)).T # prepare cosine distance embedding_labels = data[:, ConfigNetwork. embedding_size:].squeeze() s = sidekit.StatServer() s.modelset = embedding_labels s.segset = numpy.arange( embedding_labels.shape[0]).astype(str) s.stat0 = numpy.ones((embedding_labels.shape[0], 1)) s.stat1 = copy.deepcopy(embeddings) s.start = numpy.empty(embedding_labels.shape[0], dtype='|O') s.stop = numpy.empty(embedding_labels.shape[0], dtype='|O') s.validate() ids = numpy.unique(s.modelset) class_nb = ids.shape[0] f = sidekit.FactorAnalyser() rank_f = ConfigNetwork.embedding_size f.plda(s, rank_f=rank_f) f.write(ConfigNetwork.embeddings_file_plda) else: f = sidekit.FactorAnalyser( ConfigNetwork.embeddings_file_plda) e_mu = torch.from_numpy(f.mean).type(torch.FloatTensor) e_B = torch.from_numpy( numpy.linalg.inv(f.Sigma).diagonal()).type( torch.FloatTensor) # e_B = torch.from_numpy(numpy.linalg.inv(f.Sigma)).type(torch.FloatTensor) assert (isinstance(net, GME_SoftmaxNetwork)) net = GME_SoftmaxNetwork( num_train_classes=net.num_train_classes, pretrained_siamese_net=net.pretrained_net, expected_mu=e_mu, expected_B=e_B).cuda() logging.debug('init B with plda done') if not os.path.exists(epoch_net_file): if last_model_loaded: logging.critical( 'run validation on epoch {}'.format(epoch - 1)) test_model(database_dir=test_dataloader, net=net, net_distance=net_distance, epoch=None) last_model_loaded = False if ConfigNetwork.select_difficult_pairs_epoch is not None: if epoch == ConfigNetwork.select_difficult_pairs_epoch: train_dataloader = select_difficult_pairs( net, train_dataloader) # train an epoch net.train() train_epoch(train_dataloader=train_dataloader, net=net, optimizer=optimizer, epoch=epoch, iteration_number=iteration_number) torch.save(obj=net.state_dict(), f=epoch_net_file) else: net.load_state_dict(torch.load(epoch_net_file)) logging.info('loaded model for epoch: {}'.format(epoch)) last_model_loaded = True continue logging.critical('run validation on epoch {}'.format(epoch)) test_model(database_dir=test_dataloader, net=net, net_distance=net_distance, epoch=None) torch.save(obj=net.state_dict(), f='{}'.format(ConfigNetwork.modelname)) logging.info('training completed, model stored.')
import os os.environ['THEANO_FLAGS'] = 'device=cpu' os.environ['SIDEKIT'] = 'libsvm=false,theano=false' import sys import sidekit import h5py import logging import numpy as np directory = os.fsencode( "/home/adit/Desktop/DCASE2017-baseline-system-master/Model_DCASE" ) #"/home/adit/Desktop") distribNb = 2048 ubm = sidekit.Mixture() enroll_stat = sidekit.StatServer(distrib_nb=distribNb, feature_size=40) regulation_factor = 3 # MAP regulation factor enroll_sv = enroll_stat.adapt_mean_map(ubm, regulation_factor) enroll_sv.write('gmm_adapted.h5')
def train(self, SAVE=True): """ This method is used to train our UBM model by doing the following: - Create FeatureServe for the enroll features - create use EM algorithm to train our UBM over the enroll features - create StatServer to save trained parameters - if Save arugment is True (which is by default), then it saves that StatServer. Args: SAVE (boolean): if True, then it will save the StatServer. If False, then the StatServer will be discarded. """ #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll")) for i in range(len(train_list)): train_list[i] = train_list[i].split(".h5")[0] server = self.createFeatureServer("enroll") logging.info("Training...") ubm = sidekit.Mixture() # Set the model name ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) # Expectation-Maximization estimation of the Mixture parameters. ubm.EM_split( features_server=server, #sidekit.FeaturesServer used to load data feature_list=train_list, #list of feature files to train the model distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions num_thread=self.NUM_THREADS, # number of parallel processes save_partial=False, # if False, it only saves the last model iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8) ) # -> 2 iterations of EM with 2 distributions # -> 2 iterations of EM with 4 distributions # -> 4 iterations of EM with 8 distributions # -> 4 iterations of EM with 16 distributions # -> 4 iterations of EM with 32 distributions # -> 4 iterations of EM with 64 distributions # -> 8 iterations of EM with 128 distributions # -> 8 iterations of EM with 256 distributions # -> 8 iterations of EM with 512 distributions # -> 8 iterations of EM with 1024 distributions model_dir = os.path.join(self.BASE_DIR, "ubm") logging.info("Saving the model {} at {}".format(ubm.name, model_dir)) ubm.write(os.path.join(model_dir, ubm.name)) # Read idmap for the enrolling data enroll_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5")) # Create Statistic Server to store/process the enrollment data enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap, ubm=ubm) logging.debug(enroll_stat) server.feature_filename_structure = os.path.join(self.BASE_DIR, "feat", "{}.h5") # Compute the sufficient statistics for a list of sessions whose indices are segIndices. #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning enroll_stat.accumulate_stat(ubm=ubm, feature_server=server, seg_indices=range(enroll_stat.segset.shape[0]) ) if SAVE: # Save the status of the enroll data filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS) enroll_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
from collections import OrderedDict from fuel.datasets import IndexableDataset from fuel.schemes import (SequentialScheme, ShuffledScheme, SequentialExampleScheme, ShuffledExampleScheme) from fuel.schemes import ConstantScheme from fuel.transformers import Mapping, Batch, Padding, Filter, Unpack from fuel.streams import DataStream import sympy """ ------------------ Set a few parameters here --------------------""" Test = False """ ---------------------------------------------------------------- """ # LOAD STATSERVER OF I-VECTORS ivss = sidekit.StatServer("iv_sre04050608_m_training_tandem.h5") mu = ivss.get_mean_stat1() std = ivss.get_total_covariance_stat1() ivss.whiten_stat1(mu, std) ivss.norm_stat1() # GET LIST OF UNIQUE SPEAKERS WITH NUMBER OF SESSIONS FOR EACH unique_spk = set(ivss.modelset.tolist()) speakers = [] sess = [] for spk in unique_spk: speakers.append((spk, (ivss.modelset == spk).sum())) sess.append((ivss.modelset == spk).sum()) # COMPUUTE THE NUMBER OF POSSIBLE UNIQUE TARGET TRIPLETS positive_example = int(0)
# %% ################################################################# # Train the Universal background Model (UBM) ################################################################# print('Train the UBM by EM') ubm = sidekit.Mixture() llk = ubm.EM_split(fs, ubmList, distribNb, numThread=nbThread) ubm.save_pickle('gmm/ubm_bnf.p') # %% ################################################################# # Compute the sufficient statistics on the UBM ################################################################# print('Compute the sufficient statistics') # Create a StatServer for the enrollment data and compute the statistics enroll_stat = sidekit.StatServer(enroll_idmap, ubm) enroll_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(enroll_stat.segset.shape[0]), numThread=nbThread) enroll_stat.save('data/stat_sre10_coreX-coreX_m_enroll_bnf.h5') nap_stat = sidekit.StatServer(nap_idmap, ubm) nap_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(nap_stat.segset.shape[0]), numThread=nbThread) nap_stat.save('data/stat_sre04050608_m_training_bnf.h5') test_stat = sidekit.StatServer(test_idmap, ubm) test_stat.accumulate_stat(ubm=ubm,
#!coding=utf-8 import sidekit from utils import BasicUtils basic_ops = BasicUtils() enroll_ivecs_stat = sidekit.StatServer("./exp/enroll_ivecs_stat", distrib_nb=512, feature_size=63) test_ivecs_stat = sidekit.StatServer("./exp/test_ivecs_stat", distrib_nb=512, feature_size=63) sts_per_model = enroll_ivecs_stat.mean_stat_per_model() spk_list = sts_per_model.modelset mean_ivecs = sts_per_model.stat1 test_ivecs = test_ivecs_stat.stat1 print(spk_list.shape, mean_ivecs.shape, test_ivecs.shape) test_utts = test_ivecs_stat.segset print(test_utts.shape) result_lines = [] for k in range(len(test_utts)): uttId = test_utts[k] uttId_ivec = test_ivecs[k] temp_scores = [] for i in range(len(spk_list)): cos = basic_ops.compute_cosine(uttId_ivec, mean_ivecs[i]) temp_scores.append(cos) max_score = max(temp_scores) max_score_index = temp_scores.index(max_score) result_spk = spk_list[max_score_index]
def __create_stats(self): # Read tv_idmap, and plda_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "tv_idmap.h5")) plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "plda_idmap.h5")) # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): raise RuntimeError("Error merging tv_idmap & plda_idmap") # Load UBM model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data back_filename = 'back_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", back_filename)): #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename)) # Load the sufficient statistics from TV training data tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", tv_filename)): tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename)) # Load sufficient statistics and extract i-vectors from PLDA training data plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile( os.path.join(self.BASE_DIR, "stat", plda_filename)): plda_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap) plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename)) # Load sufficient statistics from test data filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS) if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)): test_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "test_idmap.h5")) test_stat = sidekit.StatServer(statserver_file_name=test_idmap, ubm=ubm) # Create Feature Server fs = self.createFeatureServer() # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning test_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range( test_stat.segset.shape[0])) test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))