def compute_cmvn(featdir): #read the spk2utt file spk2utt = open(featdir + '/spk2utt', 'r') #create feature reader reader = ark.ArkReader(featdir + '/feats.scp') #create writer for cmvn stats writer = ark.ArkWriter(featdir + '/cmvn.scp') #loop over speakers for line in spk2utt: #cut off end of line character line = line[0:len(line) - 1] split = line.split(' ') #get first speaker utterance spk_data = reader.read_utt(split[1]) #get the rest of the utterances for utt_id in split[2:len(split)]: spk_data = np.append(spk_data, reader.read_utt(utt_id), axis=0) #compute mean and variance stats = np.zeros([2, spk_data.shape[1] + 1]) stats[0, 0:spk_data.shape[1]] = np.sum(spk_data, 0) stats[1, 0:spk_data.shape[1]] = np.sum(np.square(spk_data), 0) stats[0, spk_data.shape[1]] = spk_data.shape[0] #write stats to file writer.write_next_utt(featdir + '/cmvn.ark', split[0], stats) writer.close()
def decode(self, likelihood, lengths, trans=None): # Input: # likelihood: N * L * P numpy array # N : number of utterance # L : maximum number of frames of one utterance # P : dimension of phone posterior (after transform) # The order should follow lang/phones.txt excepts <eps>, # #0~#4 # length : 1d numpy array of N lengths of utterances # trans : Transcription of N utterances, which is a list of N string. If # None, skip scoring # Write posterior to feats.scp (In order to call split_data.sh) writer = ark.ArkWriter( os.path.join(self.posterior_dir, 'feats.scp'), os.path.join(self.posterior_dir, 'likelihoods.ark')) N = likelihood.shape[0] n_digits = len(str(N)) for idx, (output, l) in enumerate(zip(likelihood, lengths)): output = output[:l, :] output = np.where(output == 0, np.finfo(float).eps, output) output = np.ascontiguousarray(output, dtype=np.float32) writer.write_next_utt( str(self._number2str(idx, n_digits)).encode('utf-8'), np.log(output)) writer.close() self._gen_utt2spk(N) if trans: scoring_cmd = 'false' self._write_trans(trans) else: scoring_cmd = 'true' os.system( '%s/scripts/decode.sh --cmd run.pl --skip_scoring %s --nj %s %s %s %s | tee %s/decode.log || exit 1;' % (os.getcwd(), scoring_cmd, self.nj, self.graph_dir, self.posterior_dir, self.decode_dir, self.decode_dir)) # Get best WER and print it wer = os.popen('grep WER %s/wer_* | utils/best_wer.sh' % self.decode_dir).read() print(wer) _, lmwt, penalty = wer[wer.find('wer'):].rstrip().split('_') output_path = os.path.join( self.decode_dir, 'scoring_kaldi/penalty_{}/{}.txt'.format(penalty, lmwt)) copy_path = os.path.join(self.decode_dir, 'output.txt') os.system("cat {} | sort > {}".format(output_path, copy_path)) print( "The result file of decoding corrsponding to the lowest WER is in: {}\n" .format(copy_path))
def decode(self, featdir, decodedir): #create a feature reader reader = batchdispenser.FeatureReader(featdir + '/feats.scp', featdir + '/cmvn.scp', featdir + '/utt2spk', int(self.conf['context_width'])) #remove ark file if it allready exists if os.path.isfile(decodedir + '/feats.ark'): os.remove(decodedir + '/feats.ark') #open likelihood writer writer = ark.ArkWriter(decodedir + '/feats.scp') #create a decoder decoder = nnetgraph.NnetDecoder(self.DNN, self.input_dim) #read the prior prior = np.load(self.conf['savedir'] + '/prior.npy') #start tensorflow session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=decoder.graph, config=config) as session: #load the model decoder.restore(self.conf['savedir'] + '/final') #feed the utterances one by one to the neural net while True: utt_id, utt_mat, looped = reader.getUtt() if looped: break #compute predictions output = decoder(utt_mat) #get state likelihoods by dividing by the prior output = output / prior #floor the values to avoid problems with log np.where(output == 0, np.finfo(float).eps, output) #write the pseudo-likelihoods in kaldi feature format writer.write_next_utt(decodedir + '/feats.ark', utt_id, np.log(output)) #close the writer writer.close()
def prepare_data(datadir, featdir, conf, feat_type, dynamic): if not os.path.exists(featdir): os.makedirs(featdir) #read the segments if os.path.isfile(datadir + '/segments'): segments = kaldiInterface.read_segments(datadir + '/segments') found_segments = True else: print( 'WARNING: no segments file found, assuming each wav file is seperate utterance' ) found_segments = False #read the wavfiles wavfiles = kaldiInterface.read_wavfiles(datadir + '/wav.scp') #create ark writer writer = ark.ArkWriter(featdir + '/feats.scp') if os.path.isfile(featdir + '/feats.ark'): os.remove(featdir + '/feats.ark') #read all the wav files RateUtt = {utt: read_wav(wavfiles[utt]) for utt in wavfiles} #create a featureComputer comp = feat.FeatureComputer(feat_type, dynamic, conf) #compute all the features for utt in wavfiles: if found_segments: for seg in segments[utt]: features = comp( RateUtt[utt][1][int(seg[1] * RateUtt[utt][0]):int(seg[2] * RateUtt[utt][0])], RateUtt[utt][0]) writer.write_next_utt(featdir + '/feats.ark', seg[0], features) else: features = comp(RateUtt[utt][1], RateUtt[utt][0]) writer.write_next_utt(featdir + '/feats.ark', utt, features) writer.close() #copy some kaldi files to features dir copyfile(datadir + '/utt2spk', featdir + '/utt2spk') copyfile(datadir + '/spk2utt', featdir + '/spk2utt') copyfile(datadir + '/text', featdir + '/text') copyfile(datadir + '/wav.scp', featdir + '/wav.scp')
def prepare_data(datadir, featdir, conf, feat_type, dynamic): ''' compute the features of all segments and save them on disk Args: datadir: directory where the kaldi data prep has been done featdir: directory where the features will be put conf: feature configuration featureType: string containing the type of features, optione are: fbank, mfcc and ssc. dynamic: the type of dynamic information added, options are: nodelta, delta and ddelta. ''' if not os.path.exists(featdir): os.makedirs(featdir) #read the segments if os.path.isfile(datadir + '/segments'): segments = readfiles.read_segments(datadir + '/segments') found_segments = True else: print( "WARNING: no segments file found, assuming each wav file is seperate utterance") found_segments = False #create ark writer if os.path.isfile(featdir + '/feats.ark'): os.remove(featdir + '/feats.ark') writer = ark.ArkWriter(featdir + '/feats.scp', featdir + '/feats.ark') #read the wavfiles wavfiles = readfiles.read_wavfiles(datadir + '/wav.scp') #read all the wav files rate_utt = {utt: read_wav(wavfiles[utt]) for utt in wavfiles} #create a featureComputer comp = feat.FeatureComputer(feat_type, dynamic, conf) #compute all the features max_length = 0 for utt in wavfiles: if found_segments: for seg in segments[utt]: features = comp( rate_utt[utt][1][int(seg[1]*rate_utt[utt][0]): int(seg[2]*rate_utt[utt][0])], rate_utt[utt][0]) writer.write_next_utt(featdir + '/feats.ark', seg[0], features) max_length = max(max_length, features.shape[0]) else: features = comp(rate_utt[utt][1], rate_utt[utt][0]) writer.write_next_utt(utt, features) max_length = max(max_length, features.shape[0]) writer.close() #copy some kaldi files to features dir copyfile(datadir + '/utt2spk', featdir + '/utt2spk') copyfile(datadir + '/spk2utt', featdir + '/spk2utt') copyfile(datadir + '/text', featdir + '/text') copyfile(datadir + '/wav.scp', featdir + '/wav.scp') #write the maximum length in a file with open(featdir + '/maxlength', 'w') as fid: fid.write(str(max_length))
line = f.readline() # 将maxlength写入文件 with open(dev_features_dir + "/maxlength", 'w') as f: f.write("%s"%max_input_length) print("the utt's maxlength is: " + str(max_input_length)) #create a feature reader with open(dev_features_dir + '/maxlength', 'r') as fid: max_length = int(fid.read()) featreader = feature_reader.FeatureReader(dev_features_dir + '/feats.scp', dev_features_dir + '/utt2spk', context_left, context_right, max_length) #create an ark writer for the likelihoods if os.path.isfile(decodedir + '/likelihoods.ark'): os.remove(decodedir + '/likelihoods.ark') writer = ark.ArkWriter(decodedir + '/feats.scp', decodedir + '/likelihoods.ark') #decode with te neural net nnet.decode(featreader, writer) print('------- decoding dev sets ----------') #copy the gmm model and some files to speaker mapping to the decoding dir os.system('cp %s %s' %(config.get('directories', 'expdir') + '/' + config.get('nnet', 'gmm_name') + '/final.mdl', decodedir)) os.system('cp -r %s %s' %(config.get('directories', 'expdir') + '/' + config.get('nnet', 'gmm_name') + '/graph', decodedir)) os.system('cp %s %s' %(config.get('directories', 'dev_features') + '/utt2spk', decodedir)) os.system('cp %s %s' %(config.get('directories', 'dev_features') + '/text', decodedir)) os.system('cp %s %s' %(config.get('directories', 'dev_features') + '/stm', decodedir)) os.system('cp %s %s' %(config.get('directories', 'dev_features') + '/glm', decodedir)) #change directory to kaldi egs os.chdir(config.get('directories', 'prjdir'))