Ejemplo n.º 1
0
 def __init__(self, csv):
     self.s = spectral.Spectral()
     self.s.read_data(csv)
     self.centers = 2
     self.kernel_type = 1
     self.normalise = 1
     self.max_iters = 1000
     self.gamma = 0.001
     self.constant = 1.0
     self.order = 2.0
Ejemplo n.º 2
0
def construct_encoder(config_dict):
    d = {}
    d['nfilt'] = config_dict['nfilters']
    d['do_dct'] = config_dict['cepstral']
    d['compression'] = config_dict['compression']
    d['do_deltas'] = config_dict['deltas']
    d['do_deltasdeltas'] = config_dict['deltas']
    d['fs'] = config_dict['samplerate']
    if 'nceps' in config_dict:
        d['nceps'] = config_dict['nceps']
    return spectral.Spectral(**d)
Ejemplo n.º 3
0
def _load_spec(fname, nfilt, frate, highpass, fs=16000):
    sig = _load_wav(fname, fs)
    if highpass:
        sig = hpfilter(sig, fs, highpass)
    encoder = spectral.Spectral(nfilt=nfilt,
                                fs=fs,
                                wlen=0.025,
                                frate=frate,
                                compression='log',
                                do_dct=False,
                                do_deltas=False,
                                do_deltasdeltas=False)
    return encoder.transform(sig)
Ejemplo n.º 4
0
def extract_features_single(filename, config):
    sig, fs, _ = wavread(filename)
    expected_fs = config['features']['preprocessing']['samplerate']
    if fs != expected_fs:
        if config['features']['preprocessing']['resample']:
            try:
                import scikits.samplerate
            except ImportError:
                print 'cannot resample because scikits.samplerate is not ' \
                    'installed. Either resample all audio files externally or ' \
                    'install it.'
            sig = scikits.samplerate.resample(sig, fs / expected_fs,
                                              'sinc_best')
        else:
            print 'samplerate error in file {2}: expected {0}, got {1}.\n' \
                'Try to resample all audio files to the samplerate specified' \
                ' in the config file. If you can\'t resample yourself, set the' \
                ' value of "reset" to "true" in the configuration file.'.format(
                    fs, expected_fs, filename)

    nfilt = config['features']['spectral'].get('filterbanks', 40)
    ncep = config['features']['spectral'].get('nceps', 13)
    do_dct = config['features']['spectral'].get('dct', True)
    lowerf = config['features']['spectral'].get('lowerf', 120)
    upperf = config['features']['spectral'].get('upperf', 6900)
    alpha = config['features']['preprocessing'].get('preemph', 0.97)
    fs = config['features']['spectral'].get('samplerate', 16000)
    frate = config['features']['spectral'].get('framerate', 100)
    wlen = config['features']['spectral'].get('winlen', 0.025)
    nfft = config['features']['spectral'].get('nfft', 512)
    compression = config['features']['spectral'].get('compression', 'log')
    do_deltas = config['features']['spectral'].get('deltas', True)
    do_deltasdeltas = config['features']['spectral'].get('deltas', True)

    encoder = spectral.Spectral(nfilt=nfilt,
                                ncep=ncep,
                                do_dct=do_dct,
                                lowerf=lowerf,
                                upperf=upperf,
                                alpha=alpha,
                                fs=fs,
                                frate=frate,
                                wlen=wlen,
                                nfft=nfft,
                                compression=compression,
                                do_deltas=do_deltas,
                                do_deltasdeltas=do_deltasdeltas)
    return encoder.transform(sig)
Ejemplo n.º 5
0
def _load_spec(monkey, fname, nfilt, frate, highpass, fs=16000):
    key = (monkey, fname, nfilt, frate, highpass)
    if not key in _spec_cache:
        sig = _load_wav(monkey, fname)
        if highpass:
            sig = hpfilter(sig, fs, highpass)
        encoder = spectral.Spectral(nfilt=nfilt,
                                    fs=fs,
                                    wlen=0.025,
                                    frate=frate,
                                    compression='log',
                                    do_dct=False,
                                    do_deltas=False,
                                    do_deltasdeltas=False)
        _spec_cache[key] = encoder.transform(sig)
    return _spec_cache[key]
Ejemplo n.º 6
0
def load_data_stacked(monkey,
                      nframes=30,
                      nfilt=40,
                      include_noise=False,
                      min_samples=50):
    """Loads audio data for monkey as stacked. Only intervals.

    Arguments:
    :param monkey: name of the monkey
    :param nframes: number of frames to stack
    :param nfilt: number of filterbanks
    :param include_noise: do not exclude noise intervals
    :param min_samples: minimum number of samples for a class to be used

    :return
      X: audio representation, ndarray (nsamples x nfilt * nframes)
      y: labels as int, ndarray (nsamples)
      labelset: list of call names (maps onto ints in y)
    """
    annot = get_annotation(monkey, include_noise=include_noise)
    counts = reduce(add,
                    (Counter(f.mark for f in annot[fname]) for fname in annot))
    annot = {
        k: [f for f in v if counts[f.mark] >= min_samples]
        for k, v in annot.iteritems()
    }
    # labelset = sorted(list(set(f.mark for fname in annot for f in annot[fname])))
    # # labelset = sorted(k for k in counts if counts[k] >= min_samples)
    # label2idx = dict(zip(labelset, range(len(labelset))))
    # nsamples = sum(imap(len, annot.itervalues()))

    frate = 100
    encoder = spectral.Spectral(nfilt=nfilt,
                                fs=16000,
                                wlen=0.025,
                                frate=frate,
                                compression='log',
                                nfft=1024,
                                do_dct=False,
                                do_deltas=False,
                                do_deltasdeltas=False)

    X, y, labelset = load_data_stacked_annot(monkey, annot, encoder, nframes)
    return X, y, labelset
Ejemplo n.º 7
0
                    'Use option -f to force resampling of the audio file.'.
                    format(encoder.config['fs'], fs, f))
                exit()

        # feats = np.hstack(encoder.transform(sig))
        feats = encoder.transform(sig)
        bname = path.splitext(path.basename(f))[0]
        np.save(path.join(outdir, bname + '.npy'), feats)


if __name__ == '__main__':
    args = parse_args()
    config_file = args['config']
    try:
        with open(config_file, 'r') as fid:
            config = json.load(fid)
    except IOError:
        print 'No such file:', config_file
        exit()

    outdir = args['outdir']
    if not os.path.exists(outdir):
        print 'No such directory:', outdir
        exit()

    encoder = spectral.Spectral(**config)

    force = args['force']
    files = args['files']
    convert(files, outdir, encoder, force)
Ejemplo n.º 8
0
def load_data_full_stacks(monkey,
                          nfilt=40,
                          stacksize=30,
                          highpass=2000,
                          min_samples=50):
    """

    Arguments:
    :param monkey:
    :param nfilt:
    :param stacksize:
    :param highpass:
    :param min_samples:
    """
    annot = reduced_annotation(monkey, min_samples=min_samples)

    X_train = {}
    X_test = {}
    y_train = {}
    y_test = {}

    frate = 100
    encoder = spectral.Spectral(nfilt=nfilt,
                                fs=16000,
                                wlen=0.025,
                                frate=frate,
                                compression='log',
                                nfft=1024,
                                do_dct=False,
                                do_deltas=False,
                                do_deltasdeltas=False)

    train_files, test_files = train_test_split_files(annot)

    labelset = sorted(
        list(
            set((f.mark if f.mark != '' else 'NOISE') for fname in annot
                for f in annot[fname])) + ['NOISE_ACT'])

    with open(path.join(BASEDIR, 'pred_lambdas_{0}.pkl'.format(monkey)),
              'rb') as fid:
        pred_lambda = pickle.load(fid)

    act_intervals = {}
    for fname in pred_lambda:
        act_intervals[fname] = speech_activity_to_intervals(pred_lambda[fname],
                                                            threshold=0.5,
                                                            winhop=0.025)

    annot_train = {fname: annot[fname] for fname in train_files}
    for fname in annot_train:
        X, y = load_Xy(monkey, fname, encoder, annot_train[fname],
                       act_intervals[fname], labelset, frate, highpass,
                       stacksize)
        X_train[fname] = X
        y_train[fname] = y

    annot_test = {fname: annot[fname] for fname in test_files}
    for fname in annot_test:
        X, y = load_Xy(monkey, fname, encoder, annot_test[fname],
                       act_intervals[fname], labelset, frate, highpass,
                       stacksize)
        X_test[fname] = X
        y_test[fname] = y

    return X_train, X_test, y_train, y_test, labelset
Ejemplo n.º 9
0
# read in the wave file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import spectral
import scipy.misc
import wave
import sys
import struct
import numpy as np

fid = wave.open(sys.argv[1], 'r')
_, _, fs, nframes, _, _ = fid.getparams()
sig = np.array(struct.unpack_from("%dh" % nframes,
                                  fid.readframes(nframes)))
fid.close()
config = dict(fs=fs, dct=False, scale='bark', deltas=False)
extractor = spectral.Spectral(**config)
data = extractor.transform(sig)

min = np.min(data)
max = np.max(data)
print(min)
print(max)
data = data - min
data = data / (max - min)

np.savetxt(sys.argv[1] + '.mfsc', data)
scipy.misc.imsave(sys.argv[1] + '.png', data)
Ejemplo n.º 10
0
def load_all_intervals(nfilt=40, stacksize=30, highpass=200):
    print 'loading annotations...',
    annot_all = {}
    t0 = time.time()
    for monkey in MONKEYS:
        annot_all[monkey] = data.reduced_annotation(monkey)
    print 'done. time taken: {0:.3f}s'.format(time.time() - t0)
    sys.stdout.flush()

    print 'splitting files...',
    sys.stdout.flush()
    t0 = time.time()
    train_test_files = {
        k: data.train_test_split_files(annot_all[k])
        for k in annot_all
    }
    print 'done. time taken: {0:.3f}s'.format(time.time() - t0)
    sys.stdout.flush()

    frate = 100
    encoder = spectral.Spectral(nfilt=nfilt,
                                fs=16000,
                                wlen=0.025,
                                frate=frate,
                                compression='log',
                                nfft=1024,
                                do_dct=False,
                                do_deltas=False,
                                do_deltasdeltas=False)

    X_train = {}
    X_test = {}
    y_train = {}
    y_test = {}
    labelset = {}
    for monkey in MONKEYS:
        print monkey
        sys.stdout.flush()

        train_files, test_files = train_test_files[monkey]
        annot = annot_all[monkey]

        annot_train = {fname: annot[fname] for fname in train_files}
        print '  loading train data...',
        sys.stdout.flush()
        t0 = time.time()
        X_train_, y_train_, labels_train = data.load_data_stacked_annot(
            monkey, annot_train, encoder, stacksize, highpass=highpass)
        print 'done. time taken: {0:.3f}s'.format(time.time() - t0)
        sys.stdout.flush()

        print '  loading test data...',
        sys.stdout.flush()
        t0 = time.time()
        annot_test = {fname: annot[fname] for fname in test_files}
        X_test_, y_test_, labels_test = data.load_data_stacked_annot(
            monkey, annot_test, encoder, stacksize, highpass=highpass)
        print 'done. time taken: {0:.3f}s'.format(time.time() - t0)
        sys.stdout.flush()

        # print '  scaling...',
        # sys.stdout.flush()
        # t0 = time.time()
        # scaler = StandardScaler().fit(np.vstack((X_train_, X_test_)))

        # X_train[monkey] = scaler.transform(X_train_)
        # y_train[monkey] = y_train_
        # X_test[monkey] = scaler.transform(X_test_)
        # y_test[monkey] = y_test_
        # print 'done. time taken: {0:.3f}s'.format(time.time() - t0)

        X_train[monkey] = X_train_
        y_train[monkey] = y_train_
        X_test[monkey] = X_test_
        y_test[monkey] = y_test_

        assert (labels_test == labels_train)
        labelset[monkey] = labels_train

    return X_train, X_test, y_train, y_test, labelset