def melcepstrum_noise_shaping(wav_list, args): """APPLY NOISE SHAPING USING STFT-BASED MCEP""" # define synthesizer synthesizer = Synthesizer(fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) for i, wav_name in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list))) # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) if x.dtype != np.int16: logging.warn("wav file format is not 16 bit PCM.") x = np.float64(x) # check sampling frequency if not fs == args.fs: logging.error("sampling frequency is not matched.") sys.exit(1) # get frame number num_frames = int(1000 * len(x) / fs / args.shiftms) + 1 # load average mcep mlsa_coef = read_hdf5(args.stats, "/mcep/mean") * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.float64(np.tile(mlsa_coef, [num_frames, 1])) # synthesis and write x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) write_name = args.writedir + "/" + os.path.basename(wav_name) wavfile.write(write_name, args.fs, np.int16(x_ns))
def noise_shaping(wav_list): for wav_name in wav_list: # load wavfile and apply low cut filter fs, x = wavfile.read(wav_name) wav_type = x.dtype x = np.array(x, dtype=np.float64) # check sampling frequency if not fs == args.fs: print("ERROR: sampling frequency is not matched.") sys.exit(1) # extract features (only for get the number of frames) f0, _, _ = feature_extractor.analyze(x) num_frames = f0.shape[0] # load average mcep mlsa_coef = read_hdf5(args.stats, "/mean") mlsa_coef = mlsa_coef[args.mcep_dim_start:args. mcep_dim_end] * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.tile(mlsa_coef, [num_frames, 1]) # synthesis and write x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) if wav_type == np.int16: write_name = args.writedir + "/" + os.path.basename(wav_name) wavfile.write(write_name, args.fs, np.int16(x_ns)) else: wavfile.write(write_name, args.fs, x_ns)
def noise_shaping(wav_list, args): """APPLY NOISE SHAPING""" # define feature extractor feature_extractor = FeatureExtractor( analyzer="world", fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) # define synthesizer synthesizer = Synthesizer( fs=args.fs, shiftms=args.shiftms, fftl=args.fftl) for i, feat_id in enumerate(wav_list): logging.info("now processing %s (%d/%d)" % (feat_id, i + 1, len(wav_list))) # load wavfile and apply low cut filter wav_filename = args.outdir.replace("feat_id", feat_id) fs, x = wavfile.read(wav_filename) wav_type = x.dtype x = np.array(x, dtype=np.float64) # check sampling frequency if not fs == args.fs: logging.error("sampling frequency is not matched.") sys.exit(1) ## extract features (only for get the number of frames) f0, _, _ = feature_extractor.analyze(x) num_frames = f0.shape[0] # load average mcep mlsa_coef = read_hdf5(args.stats, "/%s/mean" % args.feature_type) mlsa_coef = mlsa_coef[args.mcep_dim_start:args.mcep_dim_end] * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.tile(mlsa_coef, [num_frames, 1]) # synthesis and write x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) write_name = args.writedir.replace("feat_id", feat_id) # check directory existence wav = np.clip(x_ns, -32768, 32767) if wav_type == np.int16: wavfile.write(write_name, args.fs, np.int16(wav)) else: wavfile.write(write_name, args.fs, wav)
def noise_shaping(wav_list): for wav_name in wav_list: # load wavfile and apply low cut filter #fs, x = wavfile.read(wav_name) #wav_type = x.dtype #x = np.array(x, dtype=np.float64) x, fs = sf.read(wav_name) # check sampling frequency if not fs == args.fs: print("ERROR: sampling frequency is not matched.") sys.exit(1) # extract features (only for get the number of frames) #def analyze(wav, fs=22050, minf0=40, maxf0=700, fperiod=5.0, fftl=1024, f0=None, time_axis=None): #print(fs) #_, f0, _, _ = analyze(x, fs=fs) f0, _ = pw.harvest(x, fs, frame_period=5.0) num_frames = f0.shape[0] # load average mcep #mlsa_coef = read_hdf5(args.stats, "/mean_org") mlsa_coef = read_hdf5(args.stats, "/mean_org_lf0") #mlsa_coef = read_hdf5(args.stats, "/mean_feat_org_lf0") #mlsa_coef = mlsa_coef[args.mcep_dim_start:args.mcep_dim_end] * args.mag mlsa_coef = mlsa_coef[args.mcep_dim_start:] * args.mag mlsa_coef[0] = 0.0 if args.inv: mlsa_coef[1:] = -1.0 * mlsa_coef[1:] mlsa_coef = np.tile(mlsa_coef, [num_frames, 1]) # synthesis and write x_ns = synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha, fs=fs, shiftms=args.shiftms) x_ns = low_cut_filter(x_ns, args.fs, cutoff=70) write_name = args.writedir + "/" + os.path.basename(wav_name) sf.write(write_name, x_ns, args.fs, 'PCM_16')