Example #1
0
features = np.fromfile(args.featurefile,
                       dtype='float32',
                       count=args.nb_samples * eband_K)
nb_samples = int(len(features) / eband_K)
nb_chunks = int(nb_samples / nb_timesteps)
nb_samples = nb_chunks * nb_timesteps
print("rate K nb_samples: %d" % (nb_samples))
features = np.clip(features, 0, None)
# no crazy low values
features = features[:nb_samples * eband_K].reshape((nb_samples, eband_K))
print("features: ", features.shape)

# read in Codec 2 model file records and set up sparse rate L vectors --------------------

Wo, L, A, phase, voiced = codec2_model.read(args.modelin, nb_samples)

# Avoid harmonics above Fcutoff, as anti-alising filters tend to
# produce very small values that don't affect speech but contribute
# greatly to error
for f in range(nb_samples):
    L[f] = round(L[f] * ((Fs / 2) - Fcutoff) / (Fs / 2))

# set up sparse amp output vectors
print("building sparse output vecs...")
amp_sparse = np.zeros((nb_samples, width + 2), dtype='float32')
for i in range(nb_samples):
    for m in range(1, L[i] + 1):
        bin = int(np.round(m * Wo[i] * width / np.pi))
        bin = min(width - 1, bin)
        amp_sparse[i, bin] = 20 * np.log10(A[i, m])
Example #2
0
# constants

N = 80  # number of time domain samples in frame
width = 256
Fs = 8000

parser = argparse.ArgumentParser(
    description='Plot phase spectra and synthesised speech')
parser.add_argument('modelfile', help='Codec 2 model file')
parser.add_argument('--n0file', help='text file of n0 estimates')
parser.add_argument('--start', type=int, default=30, help=' start frame')
parser.add_argument('--png', action='store_true')

args = parser.parse_args()
# read in model file records
Wo, L, A, phase, voiced = codec2_model.read(args.modelfile)
nb_samples = Wo.size
amp = 20.0 * np.log10(A + 1E-6)

# read in n0 estimates
have_n0 = 0
if args.n0file:
    n0_est = np.loadtxt(args.n0file)
    have_n0 = 1
    print(n0_est[:10])
    '''
# Python version of est_n0.c
n0_est2 =  np.zeros((nb_samples))
print("estimating linear phase component...")
for i in range(nb_samples):
    err_min = 1E32
Example #3
0
                    type=int,
                    default=10,
                    help='Number of training epochs')
parser.add_argument('--nnout',
                    type=str,
                    default="phasenn.h5",
                    help='Name of output Codec 2 model file')
parser.add_argument('--plotunvoiced',
                    action='store_true',
                    help='plot unvoiced frames')
args = parser.parse_args()

assert nb_plots == len(args.frames)

# read in model file records
Wo, L, A, phase, voiced = codec2_model.read(args.modelfile, args.nb_samples)
nb_samples = Wo.size
nb_voiced = np.count_nonzero(voiced)
print("nb_samples: %d voiced %d" % (nb_samples, nb_voiced))

# work out average energy for each frame (in dB)
energy_thresh = 10
energy = np.zeros(nb_samples)
nb_train = 0
for i in range(nb_samples):
    energy[i] = np.mean(20 * np.log10(A[i, 1:L[i] + 1]))
    if (energy[i] > energy_thresh) and voiced[i]:
        nb_train += 1
print("energy mean: %4.2f thresh: %4.2f nb_train: %d" %
      (np.mean(energy), energy_thresh, nb_train))