def extract_mfcc(input_wav_dir, output_feature_file, output_feature_dir): # secure destination mkdir_for_file(output_feature_file) mkdir_for_dir(output_feature_dir) # extract mfcc in single file format and dir format import util mkdir_for_dir(default_temp_dir) util.make_feature(input_wav_dir, default_temp_dir) def write_feat(feat, feat_id, outfile): outfile.write(feat_id+'.wav\n') for i in range(feat.shape[0]): fline = '{:04d} {:04d} #' + ' {:f}'*feat.shape[1] +'\n' outfile.write(fline.format(i, i+1, *feat[i])) outfile.write('\n') with open(output_feature_file, 'w') as outfile: for f in sorted(os.listdir(default_temp_dir)): feat = util.read_feature(os.path.join(default_temp_dir, f)) write_feat(feat, f[:-4], outfile) for f in sorted(os.listdir(default_temp_dir)): feat = util.read_feature(os.path.join(default_temp_dir, f)) util.write_feature(feat, os.path.join(output_feature_dir, f)) # cleanup, remove large files mkdir_for_dir(default_temp_dir)
import sys import numpy as np sys.path.insert(0, "zrst/") from util import write_feature feat = open(sys.argv[1], "r") outfile = "" for line in feat: line = line.rstrip("\n") if "wav" in line: outfile = sys.argv[2] + "/" + line[:-3] + "mfc" mfc = [] continue if line == "": feature = np.asarray(mfc) write_feature(feature, outfile, period=100000) tokens = line.split() mfc.append([float(i) for i in tokens[3:]])
def write_feature_dir(feat, feat_id, feature_dir): import util for feature, id in zip(feat, feat_id): util.write_feature(feature, os.path.join(feature_dir, id+'.mfc'))