Ejemplo n.º 1
0
def create_weights_lab(
        labpath,
        fids,
        outfilepath,
        lineheadregexp=r'([^\^]+)\^([^-]+)-([^\+]+)\+([^=]+)=([^@]+)@(.+)',
        silencesymbol='sil',
        shift=0.005):
    """
    This function creates a one-column vector with one weight value per frame.
    This weight is created based on the silence symbol that is at the head of
    each lab line.

    Some lab file formats uses: r'([^\~]+)\~([^-]+)-([^\+]+)\+([^=]+)=([^:]+):(.+)'
    """

    makedirs(os.path.dirname(outfilepath))

    outfilepath, _ = data.getpathandshape(outfilepath)

    for fid in readids(fids):
        print_tty(
            '\r    Processing feature file {}                '.format(fid))

        with open(labpath.replace('*', fid)) as f:

            lines = f.readlines()
            lineels = re.findall(r'([0-9]+)\s+([0-9]+)\s+(.+)', lines[-1])[0]
            tend = float(lineels[1]) * 1e-7
            weight = np.ones(int(np.ceil(tend / shift)), dtype='float32')

            for line in lines:
                lineels = re.findall(r'([0-9]+)\s+([0-9]+)\s+(.+)', line)[0]
                tstart = float(lineels[0]) * 1e-7
                tend = float(lineels[1]) * 1e-7
                # print('{}-{}'.format(tstart, tend))
                phones = re.findall(lineheadregexp, lineels[2])[0]
                if phones[2] == silencesymbol:
                    weight[int(np.floor(tstart /
                                        shift)):int(np.ceil(tend /
                                                            shift))] = 0.0

            weight.astype('float32').tofile(outfilepath.replace('*', fid))

    print_tty(
        '\r                                                           \r')
Ejemplo n.º 2
0
    def test_data(self):
        import data

        fids = readids(cptest + 'file_id_list.scp')

        path, shape = data.getpathandshape('dummy.fwlspec')
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == None)
        path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)')
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 129))
        path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)', (-1, 12))
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 12))
        path, shape = data.getpathandshape('dummy.fwlspec', (-1, 12))
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 12))
        dim = data.getlastdim('dummy.fwlspec')
        self.assertTrue(dim == 1)
        dim = data.getlastdim('dummy.fwlspec:(-1,129)')
        self.assertTrue(dim == 129)

        indir = cptest + 'binary_label_' + str(
            lab_size) + '_norm_minmaxm11/*.lab:(-1,' + str(lab_size) + ')'
        Xs = data.load(indir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Xs: ')
        self.assertTrue(len(Xs) == 10)
        print(Xs[0].shape)
        self.assertTrue(Xs[0].shape == (667, lab_size))

        print(data.gettotallen(Xs))
        self.assertTrue(data.gettotallen(Xs) == 5694)

        outdir = cptest + 'wav_cmp_lf0_fwlspec65_fwnm17_bndnmnoscale/*.cmp:(-1,83)'
        Ys = data.load(outdir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Ys: ')
        print('len(Ys)=' + str(len(Ys)))
        self.assertTrue(len(Ys) == 10)
        print('Ys[0].shape' + str(Ys[0].shape))
        self.assertTrue(Ys[0].shape == (666, 83))

        wdir = cptest + 'wav_fwlspec65_weights/*.w:(-1,1)'
        Ws = data.load(wdir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Ws: ')
        self.assertTrue(len(Ws) == 10)

        Xs, Ys, Ws = data.croplen([Xs, Ys, Ws])

        [Xs, Ys], Ws = data.croplen_weight([Xs, Ys], Ws, thresh=0.5)

        Xs_w_stop = data.addstop(Xs)

        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            inouttimesync=False)
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift')
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            cropmode='begendbigger')
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            cropmode='all')

        worst_val = data.cost_0pred_rmse(Ys)
        print('worst_val={}'.format(worst_val))

        worst_val = data.cost_0pred_rmse(Ys[0])
        print('worst_val={}'.format(worst_val))

        def data_cost_model_mfn(Xs, Ys):
            return np.std(Ys)  # TODO More usefull

        X_vals = data.load(indir, fids)
        Y_vals = data.load(outdir, fids)
        X_vals, Y_vals = data.croplen([X_vals, Y_vals])
        cost = data.cost_model_mfn(data_cost_model_mfn, [X_vals, Y_vals])
        print(cost)

        class SmokyModel:
            def predict(self, Xs):
                return np.zeros([1, Xs.shape[1], 83])

        mod = SmokyModel()
        cost = data.cost_model_prediction_rmse(mod, [Xs], Ys)
        print(cost)

        std = data.prediction_mstd(mod, [Xs])
        print(std)

        rms = data.prediction_rms(mod, [Xs])
        print(rms)
Ejemplo n.º 3
0
    def test_compose(self):
        import data
        import compose

        fids = readids(cptest + '/file_id_list.scp')

        wav_dir = 'wav'
        f0_path = cptest + wav_dir + '_lf0/*.lf0'
        spec_path = cptest + wav_dir + '_fwlspec' + str(
            spec_size) + '/*.fwlspec'
        nm_path = cptest + wav_dir + '_fwnm' + str(nm_size) + '/*.fwnm'

        compose.compose([
            cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' +
            str(lab_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose_lab0/*.lab',
                        id_valid_start=8,
                        normfn=None,
                        wins=[],
                        dropzerovardims=False)

        compose.compose([
            cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' +
            str(lab_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose_lab1/*.lab',
                        id_valid_start=8,
                        normfn=compose.normalise_minmax,
                        wins=[],
                        dropzerovardims=False)

        path2, shape2 = data.getpathandshape(
            'tests/test_made__smoke_compose_compose_lab1/*.lab:(mean.dat,' +
            str(lab_size) + ')')

        compose.compose([
            cptest + 'binary_label_' + str(lab_size) + '/*.lab:(-1,' +
            str(lab_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose_lab2/*.lab',
                        id_valid_start=8,
                        normfn=compose.normalise_minmax,
                        wins=[],
                        dropzerovardims=True)

        compose.compose([
            f0_path, spec_path + ':(-1,' + str(spec_size) + ')',
            nm_path + ':(-1,' + str(nm_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose2_cmp1/*.cmp',
                        id_valid_start=8,
                        normfn=compose.normalise_minmax,
                        wins=[])

        compose.compose([
            f0_path, spec_path + ':(-1,' + str(spec_size) + ')',
            nm_path + ':(-1,' + str(nm_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose2_cmp2/*.cmp',
                        id_valid_start=8,
                        normfn=compose.normalise_meanstd,
                        wins=[])

        compose.compose([
            f0_path, spec_path + ':(-1,' + str(spec_size) + ')',
            nm_path + ':(-1,' + str(nm_size) + ')'
        ],
                        fids,
                        'tests/test_made__smoke_compose_compose2_cmp4/*.cmp',
                        id_valid_start=8,
                        normfn=compose.normalise_meanstd_nmnoscale,
                        wins=[])

        compose.compose(
            [
                f0_path, spec_path + ':(-1,' + str(spec_size) + ')',
                nm_path + ':(-1,' + str(nm_size) + ')'
            ],
            fids,
            'tests/test_made__smoke_compose_compose2_cmp_deltas/*.cmp',
            id_valid_start=8,
            normfn=compose.normalise_meanstd_nmnoscale,
            wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]])

        # WORLD vocoder features
        compose.compose(
            [
                cptest + wav_dir + '_world_lf0/*.lf0', cptest + wav_dir +
                '_world_fwlspec/*.fwlspec:(-1,' + str(spec_size) + ')',
                cptest + wav_dir + '_world_fwdbaper/*.fwdbaper:(-1,' +
                str(nm_size) + ')', cptest + wav_dir + '_world_vuv/*.vuv'
            ],
            fids,
            'tests/test_made__smoke_compose_compose2_cmp_WORLD/*.cmp',
            id_valid_start=8,
            normfn=compose.normalise_meanstd,
            wins=[])
        compose.compose(
            [
                cptest + wav_dir + '_world_lf0/*.lf0', cptest + wav_dir +
                '_world_fwlspec/*.fwlspec:(-1,' + str(spec_size) + ')',
                cptest + wav_dir + '_world_fwdbaper/*.fwdbaper:(-1,' +
                str(nm_size) + ')', cptest + wav_dir + '_world_vuv/*.vuv'
            ],
            fids,
            'tests/test_made__smoke_compose_compose2_cmp_WORLD_mlpg/*.cmp',
            id_valid_start=8,
            normfn=compose.normalise_meanstd,
            wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]])

        compose.create_weights_spec(
            spec_path + ':(-1,' + str(spec_size) + ')',
            fids,
            'tests/test_made__smoke_compose_compose2_w1/*.w',
            spec_type='fwlspec',
            thresh=-32)
Ejemplo n.º 4
0
def create_weights_spec(specfeaturepath,
                        fids,
                        outfilepath,
                        thresh=-32,
                        dftlen=4096,
                        spec_type='fwlspec'):
    """
    This function creates a one-column vector with one weight value per frame.
    This weight is computed as a silence coefficient. During training, silent
    segments will be dropped (i.e. dropped if weight<0.5), if present at the
    very begining or very end of the sample (or optionnaly within a sentence if
    the silence is particularly long).

    thresh : [dB] The weight of the frames whose energy < threshold are set
             weight = 0, and 1 otherwise.
    """
    def mag2db(a):
        return 20.0 * np.log10(np.abs(a))

    outfilepath = re.sub(
        r':[^:]+$', "",
        outfilepath)  # ignore any shape suffix in the output path
    if not os.path.isdir(os.path.dirname(outfilepath)):
        os.mkdir(os.path.dirname(outfilepath))

    for nf, fid in enumerate(fids):
        print_tty(
            '\r    Processing feature files {} for {}                '.format(
                nf, fid))

        infilepath, shape = data.getpathandshape(specfeaturepath)
        if shape is None: shape = (-1, 1)
        infilepath = infilepath.replace('*', fid)

        if spec_type == 'fwlspec':
            Yspec = np.fromfile(infilepath, dtype='float32')
            Yspec = Yspec.reshape(shape)
            ener = mag2db(np.exp(np.mean(Yspec, axis=1)))
        elif spec_type == 'mcep':
            Ymcep = np.fromfile(infilepath, dtype='float32')
            Ymcep = Ymcep.reshape(shape)
            ener = mag2db(np.exp(Ymcep[:, 0]))  # Just need the first coef
        elif spec_type == 'fwcep':
            Ymcep = np.fromfile(infilepath, dtype='float32')
            Ymcep = Ymcep.reshape(shape)
            ener = mag2db(np.exp(Ymcep[:, 0]))  # Just need the first coef

        # Normalise by the strongest value
        # That might not be very reliable if the estimated spec env is very noisy.
        ener -= np.max(ener)

        weight = ener.copy()
        weight[ener >= thresh] = 1.0
        weight[ener < thresh] = 0.0

        weight.astype('float32').tofile(outfilepath.replace('*', fid))

        if 0:
            import matplotlib.pyplot as plt
            plt.plot(ener, 'k')
            plt.plot(np.log10(weight), 'b')
            plt.plot([0, len(ener)], thresh * np.array([1, 1]), 'k')
            from IPython.core.debugger import Pdb
            Pdb().set_trace()

    print_tty(
        '\r                                                           \r')
Ejemplo n.º 5
0
def compose(featurepaths,
            fids,
            outfilepath,
            wins=None,
            id_valid_start=-1,
            normfn=None,
            shift=0.005,
            dropzerovardims=False,
            do_finalcheck=False,
            verbose=1):
    """
    For each file index in fids, compose a set of features (can be input or
    output data) into a single file and normalise it according to statistics and
    normfn.

    The outfilepath will be populated by the composed/normlised files, and,
    by statistics files that can be used for de-composition.

    Parameters
    ----------
    featurepaths :  path of features to concatenate for each file
    fids :          file IDs
    outfilepath :   outputpath of the resulted composition and normalisation.
    wins :          list of numpy arrays
                    E.g. values in Merlin are wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]]
    """
    print('Compose data (id_valid_start={})'.format(id_valid_start))

    if wins is None: wins = []

    outfilepath = re.sub(
        r':[^:]+$', "",
        outfilepath)  # ignore any shape suffix in the output path
    if not os.path.isdir(os.path.dirname(outfilepath)):
        os.mkdir(os.path.dirname(outfilepath))

    size = None
    mins = None
    maxs = None
    means = None
    nbframes = 0

    for nf, fid in enumerate(fids):
        print_tty('\r    Composing file {}/{} {}               '.format(
            1 + nf, len(fids), fid))

        features = []
        minlen = None
        for featurepath in featurepaths:
            infilepath, shape = data.getpathandshape(featurepath)
            if shape is None: shape = (-1, 1)
            infilepath = infilepath.replace('*', fid)
            feature = np.fromfile(infilepath, dtype='float32')
            feature = feature.reshape(shape)
            features.append(feature)
            if minlen is None: minlen = feature.shape[0]
            else: minlen = np.min((minlen, feature.shape[0]))

        # Crop features to same length
        for feati in xrange(len(features)):
            features[feati] = features[feati][:minlen, ]

        Y = np.hstack(features)

        if len(wins) > 0:
            YWs = [Y]  # Always add first the static values
            for win in wins:
                # Then concatenate the windowed values
                YW = np.ones(Y.shape)
                win_p = (len(win) + 1) / 2
                for d in xrange(Y.shape[1]):
                    YW[win_p - 1:-(win_p - 1), d] = -scipy.signal.convolve(
                        Y[:, d], win)[win_p:-win_p]  # The fastest
                    YW[:win_p - 1, d] = YW[win_p - 1, d]
                    YW[-(win_p - 1):, d] = YW[-(win_p - 1) - 1, d]
                YWs.append(YW)
            Y = np.hstack(YWs)

            #if 0:
            #from merlin.mlpg_fast import MLParameterGenerationFast as MLParameterGeneration
            #mlpg_algo = MLParameterGeneration()
            #var = np.tile(np.ones(CMP.shape[1]),(CMP.shape[0],1)) # Simplification!
            #YGEN = mlpg_algo.generation(CMP, var, 1)

            #plt.plot(Y, 'k')
            #plt.plot(YGEN, 'b')
            #from IPython.core.debugger import  Pdb; Pdb().set_trace()

        size = Y.shape[1]

        if nf < id_valid_start:
            if mins is None: mins = Y.min(axis=0)
            else: mins = np.minimum(mins, Y.min(axis=0))
            if maxs is None: maxs = Y.max(axis=0)
            else: maxs = np.maximum(maxs, Y.max(axis=0))
            if means is None: means = Y.sum(axis=0).astype('float64')
            else: means += Y.sum(axis=0).astype('float64')
            nbframes += Y.shape[0]

        #print('\r    Write data file {}: {}                '.format(nf, fid)),
        Y.astype('float32').tofile(outfilepath.replace('*', fid))
    print_tty(
        '\r                                                           \r')

    means /= nbframes
    zerovaridx = np.where(
        (maxs - mins) == 0.0)[0]  # Indices of dimensions having zero-variance

    mins.astype('float32').tofile(os.path.dirname(outfilepath) + '/min.dat')
    if verbose > 1: print('    mins={}'.format(mins))  # pragma: no cover
    maxs.astype('float32').tofile(os.path.dirname(outfilepath) + '/max.dat')
    if verbose > 1: print('    maxs={}'.format(maxs))  # pragma: no cover
    means.astype('float32').tofile(os.path.dirname(outfilepath) + '/mean.dat')
    if verbose > 1: print('    means={}'.format(means))  # pragma: no cover

    # Now that we have the mean, we can do the std
    stds = None
    for nf, fid in enumerate(fids):
        Y = np.fromfile(outfilepath.replace('*', fid), dtype='float32')
        Y = Y.reshape((-1, size))
        if nf < id_valid_start:
            if stds is None:
                stds = ((Y - means)**2).sum(axis=0).astype('float64')
            else:
                stds += ((Y - means)**2).sum(axis=0).astype('float64')
    stds /= nbframes - 1  # unbiased variance estimator
    stds = np.sqrt(stds)

    stds.astype('float32').tofile(os.path.dirname(outfilepath) + '/std.dat')
    if verbose > 1: print('    stds={}'.format(stds))

    keepidx = np.arange(len(means))
    if dropzerovardims:
        keepidx = np.setdiff1d(np.arange(len(means)), zerovaridx)
        size = len(keepidx)
        keepidx.astype('int32').tofile(
            os.path.dirname(outfilepath) + '/keepidx.dat')
        print('Dropped dimensions with zero variance. Remains {} dims'.format(
            size))

    print('{} files'.format(len(fids)))
    print('{} frames ({}s assuming {}s time shift)'.format(
        nbframes, datetime.timedelta(seconds=nbframes * shift), shift))
    strsize = ''
    for fpath in featurepaths:
        dummy, shape = data.getpathandshape(fpath)
        if shape is None: strsize += '1+'
        else: strsize += str(shape[1]) + '+'
    strsize = strsize[:-1]
    if dropzerovardims:
        strsize += '-' + str(len(zerovaridx))
    print('nb dimensions={} (features: ({})x{})'.format(
        size, strsize, 1 + len(wins)))
    print('{} dimensions with zero-variance ({}){}'.format(
        len(zerovaridx), zerovaridx, ', which have been dropped'
        if dropzerovardims else ', which have been kept'))
    if normfn is not None:
        print('normalisation done using: {}'.format(normfn.__name__))
    else:
        print('no normalisation called')
    print('output path: {}'.format(outfilepath))

    # Maybe this shouldn't be called within compose, it should come afterwards. No see #30
    if not normfn is None:
        normfn(outfilepath,
               fids,
               featurepaths=featurepaths,
               keepidx=keepidx,
               verbose=verbose)

    if do_finalcheck:
        print('Check data final statistics')
        verif_means = None
        verif_stds = None
        verif_mins = None
        verif_maxs = None
        verif_nbframes = 0
        for nf, fid in enumerate(fids):
            if nf >= id_valid_start: continue
            fpath = outfilepath.replace('*', fid)
            Y = np.fromfile(fpath, dtype='float32')
            Y = Y.reshape((-1, size))
            if verif_means is None:
                verif_means = Y.sum(axis=0).astype('float64')
            else:
                verif_means += Y.sum(axis=0).astype('float64')
            if verif_mins is None: verif_mins = Y.min(axis=0)
            else: verif_mins = np.minimum(verif_mins, Y.min(axis=0))
            if verif_maxs is None: verif_maxs = Y.max(axis=0)
            else: verif_maxs = np.maximum(verif_maxs, Y.max(axis=0))
            verif_nbframes += Y.shape[0]
        verif_means /= verif_nbframes
        for nf, fid in enumerate(fids):
            if nf >= id_valid_start: continue
            fpath = outfilepath.replace('*', fid)
            Y = np.fromfile(fpath, dtype='float32')
            Y = Y.reshape((-1, size))
            if verif_stds is None:
                verif_stds = ((Y -
                               verif_means)**2).sum(axis=0).astype('float64')
            else:
                verif_stds += ((Y -
                                verif_means)**2).sum(axis=0).astype('float64')
        verif_stds /= verif_nbframes - 1
        if verbose > 0:  # pragma: no cover
            print('verif_min={}'.format(verif_mins))
            print('verif_max={}'.format(verif_maxs))
            print('verif_means={}'.format(verif_means))
            print('verif_stds={}'.format(verif_stds))