analyze_cmd.add_argument("--dataset", type=str, default='/home-nfs/tawara/work/ttic/MyPython/src/timit/timit_3.test.npz', help="Which dataset to use") rng = numpy.random.RandomState(1) args = ap.parse_args() t_start = time.time() if args.cmd == 'analyze': batchsize = args.batchsize filename = args.load_from print "load from %s" % filename model = pickle.load(open(filename,'rb')) model.device_id = [0] cuda.get_device(0).use() offset = range(-context_length, context_length+1) x_test, frame_index = load_timit_labelled_kaldi(\ KaldiArk('/data2/tawara/work/ttic/MyPython/src/kaldi/timit/feats_test_cmvn.ark'), \ nnet_transf = '/data2/tawara/work/ttic/MyPython/src/kaldi/timit/final.feature_transform') x_train,_ = load_data(\ KaldiScp('/data2/tawara/work/ttic/MyPython/src/kaldi/timit/data/fbank/train_tr90/feats.scp'), \ offsets = offset) N_test=x_test.shape[0] N_train=x_train.shape[0] print "Applying batch normalization" for i in xrange(0, N_train, batchsize): x_batch = x_train[i : i + batchsize] model.forward(x_batch,test=False) logger.info("Extracting final layer") save_to = args.save_to print 'Saving output layer to %s' % filename+'.post.ark' ark=KaldiArk(filename+'.post.ark','wb')
with open('timit/triplets') as cv: tbl = {i:[int(p),int(s)] for p,s,i,_ in csv.reader(cv,delimiter=' ')} phones=[] states=[] for value in y: phones.append(tbl[str(value)][0]) tmp=[] for value in y: tmp.append(str(tbl[str(value)][0])+'_'+str(tbl[str(value)][1])) d={} cnt =0 for value in tmp: if not d.has_key(value): d[value] = cnt cnt += 1 states.append(d[value]) res={} res['phones']=np.asarray(phones, dtype=np.int32) res['states']=np.asarray(states, dtype=np.int32) return res if __name__ == "__main__": x_train_lb, y_train_lb = load_timit_labelled_kaldi('fbank/train_tr90_lb10', 'models/pdf.ark', nnet_transf = 'models/final.feature_transform') print convert(y_train_lb)['phones'][0:10] print convert(y_train_lb)['states'][0:10] print len(set(convert(y_train_lb)['phones'])),len(set(convert(y_train_lb)['states']))