features = np.fromfile(args.featurefile, dtype='float32', count=args.nb_samples * eband_K) nb_samples = int(len(features) / eband_K) nb_chunks = int(nb_samples / nb_timesteps) nb_samples = nb_chunks * nb_timesteps print("rate K nb_samples: %d" % (nb_samples)) features = np.clip(features, 0, None) # no crazy low values features = features[:nb_samples * eband_K].reshape((nb_samples, eband_K)) print("features: ", features.shape) # read in Codec 2 model file records and set up sparse rate L vectors -------------------- Wo, L, A, phase, voiced = codec2_model.read(args.modelin, nb_samples) # Avoid harmonics above Fcutoff, as anti-alising filters tend to # produce very small values that don't affect speech but contribute # greatly to error for f in range(nb_samples): L[f] = round(L[f] * ((Fs / 2) - Fcutoff) / (Fs / 2)) # set up sparse amp output vectors print("building sparse output vecs...") amp_sparse = np.zeros((nb_samples, width + 2), dtype='float32') for i in range(nb_samples): for m in range(1, L[i] + 1): bin = int(np.round(m * Wo[i] * width / np.pi)) bin = min(width - 1, bin) amp_sparse[i, bin] = 20 * np.log10(A[i, m])
# constants N = 80 # number of time domain samples in frame width = 256 Fs = 8000 parser = argparse.ArgumentParser( description='Plot phase spectra and synthesised speech') parser.add_argument('modelfile', help='Codec 2 model file') parser.add_argument('--n0file', help='text file of n0 estimates') parser.add_argument('--start', type=int, default=30, help=' start frame') parser.add_argument('--png', action='store_true') args = parser.parse_args() # read in model file records Wo, L, A, phase, voiced = codec2_model.read(args.modelfile) nb_samples = Wo.size amp = 20.0 * np.log10(A + 1E-6) # read in n0 estimates have_n0 = 0 if args.n0file: n0_est = np.loadtxt(args.n0file) have_n0 = 1 print(n0_est[:10]) ''' # Python version of est_n0.c n0_est2 = np.zeros((nb_samples)) print("estimating linear phase component...") for i in range(nb_samples): err_min = 1E32
type=int, default=10, help='Number of training epochs') parser.add_argument('--nnout', type=str, default="phasenn.h5", help='Name of output Codec 2 model file') parser.add_argument('--plotunvoiced', action='store_true', help='plot unvoiced frames') args = parser.parse_args() assert nb_plots == len(args.frames) # read in model file records Wo, L, A, phase, voiced = codec2_model.read(args.modelfile, args.nb_samples) nb_samples = Wo.size nb_voiced = np.count_nonzero(voiced) print("nb_samples: %d voiced %d" % (nb_samples, nb_voiced)) # work out average energy for each frame (in dB) energy_thresh = 10 energy = np.zeros(nb_samples) nb_train = 0 for i in range(nb_samples): energy[i] = np.mean(20 * np.log10(A[i, 1:L[i] + 1])) if (energy[i] > energy_thresh) and voiced[i]: nb_train += 1 print("energy mean: %4.2f thresh: %4.2f nb_train: %d" % (np.mean(energy), energy_thresh, nb_train))