def trial_dev(path): ''' create the dev ndx and key file param: path : the annotation file path ''' data = pd.read_csv(path) models = [] segments = [] trials = [] enroll_idmap = sidekit.IdMap('task/idmap.h5') enroll_models = [] for i in range(0, len(enroll_idmap.leftids), 3): enroll_models.append(enroll_idmap.leftids[i]) print(enroll_models) for i in range(len(data.index)): for j in range(5): ind = data['GroupID'][i] * 5 + j models.append(enroll_models[ind]) segments.append(data['FileID'][i]) if str(data['SpeakerID'][i]) == enroll_models[ind]: trials.append('target') else: trials.append('nontarget') key = sidekit.Key(models=np.array(models), testsegs=np.array(segments), trials=np.array(trials)) assert key.validate(), 'key is not valid' key.write('task/dev_key.h5') ndx = key.to_ndx() assert ndx.validate(), 'ndx is not valid' ndx.write('task/dev_ndx.h5')
def readData(): ubm_TV_idmap = sidekit.IdMap(root + "fea/ubm_TV_idmap.h5") ubm_list = list(ubm_TV_idmap.rightids) with h5py.File(root + "fea/fea/train_mfcc.h5", 'r') as f: for i in range(len(ubm_list)): s = f[re.sub("/", "_", ubm_list[i])].value pass
def create_idMap(self, group): """ IdMap are used to store two lists of strings and to map between them. Most of the time, IdMap are used to associate names of segments (sessions) stored in leftids; with the ID of their class (that could be a speaker ID) stored in rightids. NOTE: Duplicated entries are allowed in each list. Additionally, and in order to allow more flexibility, IdMap includes two other vectors: 'start'and 'stop' which are vectors of floats and can be used to store boudaries of audio segments. An IdMap object is often used to store together: speaker IDs, segment IDs, start and stop time of the segment and to initialize a StatServer. """ # Make enrollment (IdMap) file list group_dir = os.path.join(self.AUDIO_DIR, group) # enrollment data directory group_files = os.listdir(group_dir) group_models = [files.split('_')[0] for files in group_files] # list of model IDs group_segments = [group + "/" + f for f in group_files] # Generate IdMap group_idmap = sidekit.IdMap() group_idmap.leftids = np.asarray(group_models) group_idmap.rightids = np.asarray(group_segments) group_idmap.start = np.empty(group_idmap.rightids.shape, '|O') group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O') if group_idmap.validate(): #TODO: possibily adding tv_idmap.h5 and plda_idmap.h5 group_idmap.write(os.path.join(self.TASK_DIR, group + '_idmap.h5')) else: raise RuntimeError('Problems with creating idMap file')
def create_idMap(self, group): assert group in ["enroll", "test"],\ "Invalid group name!! Choose either 'enroll', 'test'" # Make enrollment (IdMap) file list group_dir = os.path.join(self.audio_dir, group) group_files = sorted(os.listdir(group_dir)) # list of model IDs group_models = [files.split('.')[0] for files in group_files] # list of audio segments IDs group_segments = [group+"/"+f for f in group_files] # Generate IdMap group_idmap = sidekit.IdMap() group_idmap.leftids = np.asarray(group_models) group_idmap.rightids = np.asarray(group_segments) group_idmap.start = np.empty(group_idmap.rightids.shape, '|O') group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O') if group_idmap.validate(): group_idmap.write(os.path.join(self.task_dir, group+'_idmap.h5')) #generate tv_idmap and plda_idmap as well if group == "enroll": group_idmap.write(os.path.join(self.task_dir, 'tv_idmap.h5')) group_idmap.write(os.path.join(self.task_dir, 'plda_idmap.h5')) else: raise RuntimeError('Problems with creating idMap file')
def selectData(self, idmapDir, frameInfo): ''' select data such that >=12000frames and much than 2 utterances for each person, this method is used singal for select data ''' idmap = sidekit.IdMap(idmapDir[:-2] + "h5") train = np.load(frameInfo) trainList = [i for i in train.T if int(i[1]) >= 12000] # >=12000 idmap = idmap.filter_on_right([i[0] for i in trainList], True) trainList = dict(trainList) for j, i in enumerate(idmap.rightids): idmap.start[j] = trainList[i] idmapDict = self.idmap2Dict(idmap) idmapDict = {i: j for i, j in idmapDict.items() if j.shape[0] > 1} # >1 utter modelid2Key = {i: j for j, i in enumerate(idmapDict.keys())} # idmapDict = pd.concat(idmapDict) # idmapDict = Dataset.dataFrame2Idmap(idmapDict) # add 1 term into idmapDict to store some info that will be used in __init__() idmapDict["info"] = [ self.utterNum, self.validUtterNum, self.batchSize, self.frameRange ] idmapDict["key"] = modelid2Key pickle.dump(idmapDict, open(idmapDir, "wb")) return idmapDict
def getSummary(self): feature_filename_structure = root + "fea/fea/{}.h5" ubm_TV_idmap = sidekit.IdMap(root + "fea/ubm_TV_idmap.h5") ubm_list = list(ubm_TV_idmap.rightids) tmp = DataInteger.multiReadProc(ubm_list, 12, feature_filename_structure) np.save(inputDir + "fea/frameInfoSummary_train", tmp) enroll_idmap_10s = sidekit.IdMap(root + "fea/enroll_idmap_10s.h5") test_idmap_10s = sidekit.IdMap(root + "fea/test_idmap_10s.h5") enro, test = list(enroll_idmap_10s.rightids), list( test_idmap_10s.rightids) tmp = DataInteger.multiReadProc(enro, 48, feature_filename_structure) np.save(inputDir + "fea/frameInfoSummary_enroll", tmp) tmp = DataInteger.multiReadProc(test, 48, feature_filename_structure) np.save(inputDir + "fea/frameInfoSummary_test", tmp) print("finish...")
def trial_test(path): ''' create the test ndx file param: path : the test file path ''' data = pd.read_csv(path) models = [] segments = [] trials = [] enroll_idmap = sidekit.IdMap('task/idmap.h5') enroll_models = [] for i in range(0, len(enroll_idmap.leftids), 3): enroll_models.append(enroll_idmap.leftids[i]) print(enroll_models) for i in range(len(data.index)): for j in range(5): ind = data['GroupID'][i] * 5 + j models.append(enroll_models[ind]) segments.append(data['FileID'][i]) trials.append('nontarget') key = sidekit.Key(models=np.array(models), testsegs=np.array(segments), trials=np.array(trials)) ndx = key.to_ndx() assert ndx.validate(), 'ndx is not valid' ndx.write('task/ndx.h5')
def adaptation(args): if args.feat_type == 'mfcc': datasetlist = ["energy", "cep", "vad"] mask = "[0-12]" features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' if args.feat_type == 'fb': datasetlist = ["fb", "vad"] mask = None features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature' # create feature server for loading feature from disk feature_server = sidekit.FeaturesServer( features_extractor=None, feature_filename_structure=features_folder + "/{}.h5", sources=None, dataset_list=datasetlist, mask=mask, feat_norm="cmvn", global_cmvn=None, dct_pca=False, dct_pca_config=None, sdc=False, sdc_config=None, delta=True if args.delta else False, double_delta=True if args.delta else False, delta_filter=None, context=None, traps_dct_nb=None, rasta=True, keep_all_features=False) enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5') ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5') ubm = sidekit.Mixture() ubm.read(os.getcwd() + '/model/ubm.h5') enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=ubm.distrib_nb(), feature_size=ubm.dim()) enroll_stat.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=args.num_thread) enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5') print('MAP adaptation', end='') regulation_factor = 16 enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor) enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5') print('\rMAP adaptation done') print('Compute scores', end='') score = sidekit.gmm_scoring(ubm, enroll_sv, ndx, feature_server, num_thread=args.num_thread) score.write(os.getcwd() + '/task/dev_score.h5') print('\rCompute scores done')
def saveH5(self): outDir = root + "fea/fea/h5/" feature_filename_structure = root + "fea/fea/{}.h5" ubm_TV_idmap = sidekit.IdMap(root + "fea/ubm_TV_idmap.h5") ubm_list = list(ubm_TV_idmap.rightids) tmp = DataInteger.multiReadProc1(ubm_list, 12, feature_filename_structure, outDir + "train.h5") enroll_idmap_10s = sidekit.IdMap(root + "fea/enroll_idmap_10s.h5") test_idmap_10s = sidekit.IdMap(root + "fea/test_idmap_10s.h5") enro, test = list(enroll_idmap_10s.rightids), list( test_idmap_10s.rightids) tmp = DataInteger.multiReadProc1(enro, 12, feature_filename_structure, outDir + "enroll.h5") tmp = DataInteger.multiReadProc1(test, 12, feature_filename_structure, outDir + "test.h5") print("finish...")
def create_idmap(speakers, basenames): """Given a list of speakers and file basenames, return a Sidekit IdMap instance""" # make an idmap between speakers and filenames idmap = sidekit.IdMap() idmap.leftids = numpy.array(speakers) idmap.rightids = numpy.array(basenames) idmap.start = numpy.empty((len(speakers)), dtype="|O") # no start idmap.stop = numpy.empty(len(speakers), dtype="|O") # no end idmap.validate() return idmap
def get_idmapset(self, df): idmap = sidekit.IdMap() idmap.leftids = np.array(df['speaker_id']) idmap.rightids = np.array(df['file_id']) idmap.start = np.empty_like(idmap.leftids, dtype=None) idmap.stop = np.empty_like(idmap.leftids, dtype=None) idmap.validate() training_set = sidekit.nnet.xsets.IdMapSet( idmap_name=idmap, data_path=self.conf["rootdir"], file_extension=self.conf["file_extention"], sliding_window=self.conf["sliding_window"], window_len=self.conf["window_len"], window_shift=self.conf["window_shift"], sample_rate=self.conf["sample_rate_target"], min_duration=2.1) return training_set
def get_idmap(wavscp_path): tv_idmap = sidekit.IdMap() models = [] segments = [] wavscp_lines = basic_ops.read_file(wavscp_path) for line in wavscp_lines: splits = line.strip().split(' ') uttId = splits[0] spkId = uttId.split('_')[0] models.append(spkId) segments.append(uttId) tv_idmap.leftids = numpy.asarray(models) tv_idmap.rightids = numpy.asarray(segments) tv_idmap.start = numpy.empty(tv_idmap.rightids.shape, '|O') tv_idmap.stop = numpy.empty(tv_idmap.rightids.shape, '|O') tv_idmap.validate() return tv_idmap
def enroll(path): ''' create the idmap file param: path : enrollment file path ''' data = pd.read_csv(path) models = [] segments = [] for j in range(len(data.index)): models.extend([data['SpeakerID'][j]]) segments.extend([data['FileID'][j]]) idmap = sidekit.IdMap() idmap.leftids = np.asarray(models) idmap.rightids = np.asarray(segments) idmap.start = np.empty(idmap.rightids.shape, '|O') idmap.stop = np.empty(idmap.rightids.shape, '|O') assert idmap.validate(), 'idmap is not valid' idmap.write('task/idmap.h5')
def load_data(self, folder_name, speaker_list): self.logger.info('load data') with open( join(get_training('i_vector', folder_name), speaker_list + "_files.txt"), "r") as fh: ubm_list = np.array([line.rstrip() for line in fh]) with open( join(get_training('i_vector', folder_name), speaker_list + "_ids.txt"), "r") as fh: id_list = np.array([line.rstrip() for line in fh]) tv_idmap = sidekit.IdMap() tv_idmap.leftids = id_list tv_idmap.rightids = ubm_list tv_idmap.start = np.empty((len(ubm_list)), dtype="|O") tv_idmap.stop = np.empty((len(ubm_list)), dtype="|O") return ubm_list, tv_idmap
def create_idMap(self, group): """ IdMap are used to store two lists of strings and to map between them. Most of the time, IdMap are used to associate segments names (sessions) stored in leftids; with the ID of their class (that could be the speaker ID) stored in rightids. Additionally, and in order to allow more flexibility, IdMap includes two other vectors: 'start'and 'stop' which are float vectors used to store boudaries of audio segments. Args: group (string): name of the group that we want to create idmap for NOTE: Duplicated entries are allowed in each list. """ assert group in ["enroll", "test"],\ "Invalid group name!! Choose either 'enroll', 'test'" # Make enrollment (IdMap) file list group_dir = os.path.join(self.audio_dir, group) group_files = sorted(os.listdir(group_dir)) # list of model IDs group_models = [files.split('.')[0] for files in group_files] # list of audio segments IDs group_segments = [group + "/" + f for f in group_files] # Generate IdMap group_idmap = sidekit.IdMap() group_idmap.leftids = np.asarray(group_models) group_idmap.rightids = np.asarray(group_segments) group_idmap.start = np.empty(group_idmap.rightids.shape, '|O') group_idmap.stop = np.empty(group_idmap.rightids.shape, '|O') if group_idmap.validate(): group_idmap.write(os.path.join(self.task_dir, group + '_idmap.h5')) #generate tv_idmap and plda_idmap as well if group == "enroll": group_idmap.write(os.path.join(self.task_dir, 'tv_idmap.h5')) group_idmap.write(os.path.join(self.task_dir, 'plda_idmap.h5')) else: raise RuntimeError('Problems with creating idMap file')
def feaExtract_10s(self, num_thread, extract: bool = True): en = sidekit.IdMap(root + "fea/enroll_idmap_10s.h5") te = sidekit.IdMap(root + "fea/test_idmap_10s.h5") name = list(np.concatenate([en.rightids, te.rightids])) self.feaGet_core(10, name, extract, num_thread)
ubm = sidekit.Mixture() llk = ubm.EM_split(server_train, ubmList, components_num, num_thread=nj, save_partial=True) ubm.write("/home/wcq/bird/task/ubm512.h5") ubm = sidekit.Mixture('/home/wcq/bird/task/ubm512.h5') print('get enroll feats') enrollList, enroll_input_file_list, enroll_output_feats_list = basic_ops.get_info4mfcc(enroll_wavscp_path, project_dir, 'enroll') basic_ops.make_mfcc_feats(enrollList, enroll_input_file_list, enroll_output_feats_list, nj) server_enroll = basic_ops.get_feature_server(enroll_feature_filename_structure) #prepare the idmap for models = [] segments = [] enroll_idmap = sidekit.IdMap() eval_lines = basic_ops.read_file(project_dir + '/data/enroll/feats.scp') for line in eval_lines: splits = line.strip().split(' ') uttId = splits[0] spkId = uttId.split('_')[0] models.append(spkId) segments.append(uttId) enroll_idmap.leftids = numpy.asarray(models) enroll_idmap.rightids = numpy.asarray(segments) enroll_idmap.start = numpy.empty(enroll_idmap.rightids.shape, '|O') enroll_idmap.stop = numpy.empty(enroll_idmap.rightids.shape, '|O') enroll_idmap.validate() print('Compute the sufficient statistics')
# logging.basicConfig(filename='log/JVPD_ubm-gmm.log',level=logging.DEBUG) distribNb = 512 # number of Gaussian distributions for each GMM JVPD_Path = r'C:\Users\yokoo takaya\Desktop\JVPD' # Default for RSR2015 audioDir = os.path.join(JVPD_Path, 'JVPD_ALLsound') # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count()-1, 1) print('Load task definition') enroll_idmap = sidekit.IdMap('idmap_JVPD.h5') test_ndx = sidekit.Ndx('ndx_JVPD.h5') key = sidekit.Key('key_JVPD.h5') with open('JVPD_filename_all.txt') as inputFile: ubmList = inputFile.read().split('\n') logging.info("Initialize FeaturesExtractor") extractor = sidekit.FeaturesExtractor(audio_filename_structure=audioDir+"/{}.wav", feature_filename_structure="./features_PLP/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="lin", filter_bank_size=40, window_size=0.025,
if fnmatch(name, extension.upper()) or fnmatch(name, extension.lower()): name = os.path.splitext(name)[0] file_dict[corpus + '/' + os.path.splitext(name)[0].lower()] = os.path.join(path, name) corpusList.append(corpus) completeFileList.append(os.path.join(path, name)) fileList.append((corpus + '/' + os.path.splitext(name)[0]).lower()) return corpusList, completeFileList, fileList, file_dict extension = '*.sph' corpusList, completeFileList, sphList, file_dict = search_files(corpora_dir, extension) with open('nist_existing_sph_files.p', "wb" ) as f: pickle.dump( (corpusList, completeFileList, sphList), f) print("After listing, {} files found\n".format(len(completeFileList))) trn_male = sidekit.IdMap('task/original_sre10_coreX-coreX_m_trn.h5') ndx_male = sidekit.Ndx('task/original_sre10_coreX-coreX_m_ndx.h5') sre10_male_sessions = np.unique(np.concatenate((trn_male.rightids, ndx_male.segset), axis=1)) # Load dataframe i4u_df = pd.read_csv('Sph_MetaData/I4U.key', low_memory=False) # Create keys corresponding to NIST info i4u_df.database.replace(corpora.keys(), corpora.values(), inplace=True) i4u_df["filename"] = np.nan i4u_df["nistkey"] = i4u_df.database + '/' + i4u_df.session i4u_df.channel.replace(['a', 'b', 'x'], ['_a', '_b', ''], inplace=True) i4u_df["sessionKey"] = i4u_df.nistkey + i4u_df.channel # Load dataframe
distribNb = 4 # number of Gaussian distributions for each GMM rsr2015Path = '/info/home/larcher/RSR2015_v1/' # Default for RSR2015 audioDir = os.path.join(rsr2015Path, 'sph/male') # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count() - 1, 1) # Load IdMap, Ndx, Key from HDF5 files and ubm_list print('Load task definition') enroll_idmap = sidekit.IdMap('/info/home/larcher/task/3sesspwd_eval_m_trn.h5') test_ndx = sidekit.Ndx('/info/home/larcher/task/3sess-pwd_eval_m_ndx.h5') key = sidekit.Key('/info/home/larcher/task/3sess-pwd_eval_m_key.h5') with open('/info/home/larcher/task/ubm_list.txt') as inputFile: ubmList = inputFile.read().split('\n') # Process the audio to save MFCC on disk logging.info("Initialize FeaturesExtractor") extractor = sidekit.FeaturesExtractor( audio_filename_structure=audioDir + "/{}.wav", feature_filename_structure="./features/{}.h5", sampling_frequency=16000, lower_frequency=133.3333, higher_frequency=6955.4976, filter_bank="log",
rank_TV = 400 # Rank of the Total Variability matrix audioDir = '/lium/parolee/larcher/data/nist/' # Root directory where features are stored scoring = [ 'cosine', 'mahalanobis', '2cov', 'plda' ] # list of scoring to run on the task, could be 'cosine', 'mahalanobis', '2cov' or 'plda' # Automatically set the number of parallel process to run. # The number of threads to run is set equal to the number of cores available # on the machine minus one or to 1 if the machine has a single core. nbThread = max(multiprocessing.cpu_count() - 1, 1) ################################################################# # Load IdMap, Ndx, Key from PICKLE files and ubm_list ################################################################# print('Load task definition') enroll_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_trn.h5', 'hdf5') nap_idmap = sidekit.IdMap('task/sre04050608_m_training.h5', 'hdf5') back_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_back.h5', 'hdf5') test_ndx = sidekit.Ndx('task/sre10_coreX-coreX_m_ndx.h5', 'hdf5') test_idmap = sidekit.IdMap('task/sre10_coreX-coreX_m_test.h5', 'hdf5') keysX = [] for cond in range(9): keysX.append( sidekit.Key('task/sre10_coreX-coreX_det{}_key.h5'.format(cond + 1))) with open('task/ubm_list.txt', 'r') as inputFile: ubmList = inputFile.read().split('\n') if train: # %% #################################################################