def evaluate(self, explain=True): """ This method is used to score our trained model. """ # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Load TV matrix filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser(outputPath + ".h5") # Extract i-vectors from enrollment data logging.info("Extracting i-vectors from enrollment data") filename = 'enroll_stat_{}.h5'.format(self.NUM_GAUSSIANS) enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', filename)) enroll_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=enroll_stat, uncertainty=False) # Extract i-vectors from test data logging.info("Extracting i-vectors from test data") filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS) test_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', filename)) test_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=test_stat, uncertainty=False) # Do cosine distance scoring and write results logging.info("Calculating cosine score") test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv, test_iv, test_ndx, wccn=None) # Write scores filename = "ivector_scores_cos_{}.h5".format(self.NUM_GAUSSIANS) scores_cos.write(os.path.join(self.BASE_DIR, "result", filename)) # Explain the Analysis by writing more readible text file if explain: modelset = list(scores_cos.modelset) segset = list(scores_cos.segset) scores = np.array(scores_cos.scoremat) filename = "ivector_scores_explained_{}.txt".format( iv.NUM_GAUSSIANS) fout = open(os.path.join(iv.BASE_DIR, "result", filename), "a") fout.truncate(0) #clear content for seg_idx, seg in enumerate(segset): fout.write("Wav: {}\n".format(seg)) for speaker_idx, speaker in enumerate(modelset): fout.write("\tSpeaker {}:\t{}\n".format( speaker, scores[speaker_idx, seg_idx])) fout.write("\n") fout.close()
def adapt_plda(input_diar, model, features_server): # Il faudra supprimer les locuteurs qui n'ont pas assez de sessions idmap_in = input_diar.id_map() ivectors = model.train(features_server, idmap_in, normalization=False) # extract i-vectors on the current document # Normalize i-vectors and train PLDA norm_mean, norm_cov = ivectors.estimate_spectral_norm_stat1(1, 'sphNorm') # Train PLDA plda_fa = sidekit.FactorAnalyser() plda_fa.plda(ivectors, rank_f=20, nb_iter=10, scaling_factor=1., output_file_name=None, save_partial=False) model.sn_mean = norm_mean model.sn_cov = norm_cov model.plda_mean = plda_fa.mean model.plda_f = plda_fa.F model.plda_g = plda_fa.G model.plda_sigma = plda_fa.Sigma return model
def train_tv(self): # Create status servers self.__create_stats() # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename), ubm, tv_rank=self.TV_RANK, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath) filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS) lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex)) for f in lst: os.remove(f)
def data_init(self): # Read tv_idmap, and plda_idmap tv_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_tv.h5")) plda_idmap = sidekit.IdMap.read( os.path.join(self.BASE_DIR, "task", "idmap_plda.h5")) # Load UBM ubm = sidekit.Mixture() model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Create Feature Server fs = self.__createFeatureServer() # Create a joint StatServer for TV and PLDA training data back_idmap = plda_idmap.merge(tv_idmap) if not back_idmap.validate(): logging.warning("Error merging tv_idmap & plda_idmap") return back_stat = sidekit.StatServer(statserver_file_name=back_idmap, ubm=ubm) # Jointly compute the sufficient statistics of TV and PLDA data #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning back_stat.accumulate_stat(ubm=ubm, feature_server=fs, seg_indices=range(back_stat.segset.shape[0])) back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5')) # Load the sufficient statistics from TV training data tv_stat = sidekit.StatServer.read_subset( os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap) tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5')) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GUASSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'), ubm, tv_rank=self.RANK_TV, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath)
def evaluate(self): """ This method is used to score our trained model. """ # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Load TV matrix filename = "tv_matrix_{}".format(self.NUM_GUASSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) fa = sidekit.FactorAnalyser(outputPath + ".h5") # Extract i-vectors from enrollment data logging.info("Extracting i-vectors from enrollment data") enroll_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', 'enroll_stat_32.h5')) enroll_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=enroll_stat, uncertainty=False) # Extract i-vectors from test data logging.info("Extracting i-vectors from test data") test_stat = sidekit.StatServer.read( os.path.join(self.BASE_DIR, 'stat', 'test_stat.h5')) test_iv = fa.extract_ivectors_single(ubm=ubm, stat_server=test_stat, uncertainty=False) # Do cosine distance scoring and write results logging.info("Calculating cosine score") test_ndx = sidekit.Ndx.read( os.path.join(self.BASE_DIR, "task", "test_ndx.h5")) scores_cos = sidekit.iv_scoring.cosine_scoring(enroll_iv, test_iv, test_ndx, wccn=None) # Write scores filename = "ivector_scores_cos_{}.h5".format(self.NUM_GUASSIANS) scores_cos.write(os.path.join(self.BASE_DIR, "result", filename))
def train_tv(self): """ This method is used to train the Total Variability (TV) matrix and save it into 'ivector' directory !! """ # Create status servers self.__create_stats() # Load UBM model model_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) ubm = sidekit.Mixture() ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name)) # Train TV matrix using FactorAnalyser filename = "tv_matrix_{}".format(self.NUM_GAUSSIANS) outputPath = os.path.join(self.BASE_DIR, "ivector", filename) tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS) fa = sidekit.FactorAnalyser() fa.total_variability_single(os.path.join(self.BASE_DIR, "stat", tv_filename), ubm, tv_rank=self.TV_RANK, nb_iter=self.TV_ITERATIONS, min_div=True, tv_init=None, batch_size=self.BATCH_SIZE, save_init=False, output_file_name=outputPath) # tv = fa.F # TV matrix # tv_mean = fa.mean # Mean vector # tv_sigma = fa.Sigma # Residual covariance matrix # Clear files produced at each iteration filename_regex = "tv_matrix_{}_it-*.h5".format(self.NUM_GAUSSIANS) lst = glob(os.path.join(self.BASE_DIR, "ivector", filename_regex)) for f in lst: os.remove(f)
def train_net(net, train_dataloader, test_dataloader): if not os.path.exists(ConfigNetwork.modelname): last_model_loaded = False # True # False # inits iteration_number = 0 for epoch in range(0, ConfigNetwork.train_number_epochs): """ if ConfigNetwork.learning_rate_scheduler: optimizer = optim.Adam(net.parameters(), lr = ConfigNetwork.learning_rate) scheduler = ReduceLROnPlateau(optimizer, 'min') else: """ epoch_learning_rate_exponent = max( 0, epoch - (ConfigNetwork.learning_rate_defactor_after_epoch - 1)) lr = ConfigNetwork.learning_rate * ConfigNetwork.learning_rate_defactor**epoch_learning_rate_exponent parameters = filter(lambda p: p.requires_grad, net.parameters()) if ConfigNetwork.train_vae: optimizer = pyro_optim.Adam({'lr': lr}) else: optimizer = optim.Adam(parameters, lr=lr) base_file_pattern = os.path.join( ConfigNetwork.storage_dir, '{}_epoch_{}'.format(ConfigNetwork.modelname, epoch)) epoch_net_file = '{}_model'.format(base_file_pattern) if epoch < ConfigNetwork.freeze_ResNet_epochs: net.set_ResNet_requires_grad(requires_grad=False) else: net.set_ResNet_requires_grad(requires_grad=True) if not os.path.exists(epoch_net_file) and epoch == 0: # init meta embeddings network if ConfigNetwork.train_with_meta_embeddings: logging.debug('init B with plda expectation') if not os.path.exists(ConfigNetwork.embeddings_file): dataset = SoftMaxDatabase( imageFolderDataset=ConfigFaceDatasets. dataset_class( root=ConfigFaceDatasets.training_dir), transform=train_dataloader.dataset.transform, should_invert=False) embeddings_loader = DataLoader( dataset, shuffle=False, num_workers=ConfigNetwork.num_workers, batch_size=ConfigNetwork.batch_size_train) softmax_net = net.to_softmaxNetwork() softmax_net.normalize = False with h5py.File(ConfigNetwork.embeddings_file, "a") as embd_file: for i, data in enumerate(embeddings_loader, 0): img0, label = data img0, label = Variable(img0).cuda(), Variable( label).cuda() output0 = super(SoftMaxNetwork, softmax_net).forward_once(img0) embd_file.create_dataset( "{}".format(i), data=numpy.column_stack( (output0.data.cpu().numpy(), label.data.cpu().numpy())), compression="gzip", fletcher32=True) logging.critical('extracted embeddings') if not os.path.exists( ConfigNetwork.embeddings_file_plda ) or not os.path.exists( ConfigNetwork.embeddings_mean_file): data = [] with h5py.File(ConfigNetwork.embeddings_file, "r") as h5f: for key, value in h5f.items(): data.append(value.value) data = numpy.concatenate(data) embeddings = data[:, :ConfigNetwork.embedding_size] embeddings_mean = embeddings.mean(0) numpy.save(ConfigNetwork.embeddings_mean_file, embeddings_mean) logging.debug('embeddings mean: {}'.format( embeddings.mean(0))) embeddings -= embeddings.mean(0) embeddings = (embeddings.T / numpy.linalg.norm( embeddings, axis=1)).T # prepare cosine distance embedding_labels = data[:, ConfigNetwork. embedding_size:].squeeze() s = sidekit.StatServer() s.modelset = embedding_labels s.segset = numpy.arange( embedding_labels.shape[0]).astype(str) s.stat0 = numpy.ones((embedding_labels.shape[0], 1)) s.stat1 = copy.deepcopy(embeddings) s.start = numpy.empty(embedding_labels.shape[0], dtype='|O') s.stop = numpy.empty(embedding_labels.shape[0], dtype='|O') s.validate() ids = numpy.unique(s.modelset) class_nb = ids.shape[0] f = sidekit.FactorAnalyser() rank_f = ConfigNetwork.embedding_size f.plda(s, rank_f=rank_f) f.write(ConfigNetwork.embeddings_file_plda) else: f = sidekit.FactorAnalyser( ConfigNetwork.embeddings_file_plda) e_mu = torch.from_numpy(f.mean).type(torch.FloatTensor) e_B = torch.from_numpy( numpy.linalg.inv(f.Sigma).diagonal()).type( torch.FloatTensor) # e_B = torch.from_numpy(numpy.linalg.inv(f.Sigma)).type(torch.FloatTensor) assert (isinstance(net, GME_SoftmaxNetwork)) net = GME_SoftmaxNetwork( num_train_classes=net.num_train_classes, pretrained_siamese_net=net.pretrained_net, expected_mu=e_mu, expected_B=e_B).cuda() logging.debug('init B with plda done') if not os.path.exists(epoch_net_file): if last_model_loaded: logging.critical( 'run validation on epoch {}'.format(epoch - 1)) test_model(database_dir=test_dataloader, net=net, net_distance=net_distance, epoch=None) last_model_loaded = False if ConfigNetwork.select_difficult_pairs_epoch is not None: if epoch == ConfigNetwork.select_difficult_pairs_epoch: train_dataloader = select_difficult_pairs( net, train_dataloader) # train an epoch net.train() train_epoch(train_dataloader=train_dataloader, net=net, optimizer=optimizer, epoch=epoch, iteration_number=iteration_number) torch.save(obj=net.state_dict(), f=epoch_net_file) else: net.load_state_dict(torch.load(epoch_net_file)) logging.info('loaded model for epoch: {}'.format(epoch)) last_model_loaded = True continue logging.critical('run validation on epoch {}'.format(epoch)) test_model(database_dir=test_dataloader, net=net, net_distance=net_distance, epoch=None) torch.save(obj=net.state_dict(), f='{}'.format(ConfigNetwork.modelname)) logging.info('training completed, model stored.')
enroll_stats_path = './task/enroll_stat.h5' test_stats_path = './task/test_stat.h5' ubm_path = 'task/ubm512.h5' ubm = sidekit.Mixture(ubm_path) print("Acc the train stats") train_idmap = get_idmap(train_wavscp_path) train_feature_server = basic_ops.get_feature_server( train_feature_filename_structure) train_stat_server = get_stat_server(ubm, train_idmap, train_feature_server, train_stats_path) print("Train the T") # multiprocess on one node for train T space fa = sidekit.FactorAnalyser() fa.total_variability(train_stats_path, ubm, tv_rank, nb_iter=10, min_div=True, tv_init=None, batch_size=2000, save_init=True, output_file_name=TV_matrix_path, num_thread=nj) print("Extract train ivectors") train_ivecs_stat = fa.extract_ivectors(ubm, train_stats_path, uncertainty=False)