def __init__(self, csv): self.s = spectral.Spectral() self.s.read_data(csv) self.centers = 2 self.kernel_type = 1 self.normalise = 1 self.max_iters = 1000 self.gamma = 0.001 self.constant = 1.0 self.order = 2.0
def construct_encoder(config_dict): d = {} d['nfilt'] = config_dict['nfilters'] d['do_dct'] = config_dict['cepstral'] d['compression'] = config_dict['compression'] d['do_deltas'] = config_dict['deltas'] d['do_deltasdeltas'] = config_dict['deltas'] d['fs'] = config_dict['samplerate'] if 'nceps' in config_dict: d['nceps'] = config_dict['nceps'] return spectral.Spectral(**d)
def _load_spec(fname, nfilt, frate, highpass, fs=16000): sig = _load_wav(fname, fs) if highpass: sig = hpfilter(sig, fs, highpass) encoder = spectral.Spectral(nfilt=nfilt, fs=fs, wlen=0.025, frate=frate, compression='log', do_dct=False, do_deltas=False, do_deltasdeltas=False) return encoder.transform(sig)
def extract_features_single(filename, config): sig, fs, _ = wavread(filename) expected_fs = config['features']['preprocessing']['samplerate'] if fs != expected_fs: if config['features']['preprocessing']['resample']: try: import scikits.samplerate except ImportError: print 'cannot resample because scikits.samplerate is not ' \ 'installed. Either resample all audio files externally or ' \ 'install it.' sig = scikits.samplerate.resample(sig, fs / expected_fs, 'sinc_best') else: print 'samplerate error in file {2}: expected {0}, got {1}.\n' \ 'Try to resample all audio files to the samplerate specified' \ ' in the config file. If you can\'t resample yourself, set the' \ ' value of "reset" to "true" in the configuration file.'.format( fs, expected_fs, filename) nfilt = config['features']['spectral'].get('filterbanks', 40) ncep = config['features']['spectral'].get('nceps', 13) do_dct = config['features']['spectral'].get('dct', True) lowerf = config['features']['spectral'].get('lowerf', 120) upperf = config['features']['spectral'].get('upperf', 6900) alpha = config['features']['preprocessing'].get('preemph', 0.97) fs = config['features']['spectral'].get('samplerate', 16000) frate = config['features']['spectral'].get('framerate', 100) wlen = config['features']['spectral'].get('winlen', 0.025) nfft = config['features']['spectral'].get('nfft', 512) compression = config['features']['spectral'].get('compression', 'log') do_deltas = config['features']['spectral'].get('deltas', True) do_deltasdeltas = config['features']['spectral'].get('deltas', True) encoder = spectral.Spectral(nfilt=nfilt, ncep=ncep, do_dct=do_dct, lowerf=lowerf, upperf=upperf, alpha=alpha, fs=fs, frate=frate, wlen=wlen, nfft=nfft, compression=compression, do_deltas=do_deltas, do_deltasdeltas=do_deltasdeltas) return encoder.transform(sig)
def _load_spec(monkey, fname, nfilt, frate, highpass, fs=16000): key = (monkey, fname, nfilt, frate, highpass) if not key in _spec_cache: sig = _load_wav(monkey, fname) if highpass: sig = hpfilter(sig, fs, highpass) encoder = spectral.Spectral(nfilt=nfilt, fs=fs, wlen=0.025, frate=frate, compression='log', do_dct=False, do_deltas=False, do_deltasdeltas=False) _spec_cache[key] = encoder.transform(sig) return _spec_cache[key]
def load_data_stacked(monkey, nframes=30, nfilt=40, include_noise=False, min_samples=50): """Loads audio data for monkey as stacked. Only intervals. Arguments: :param monkey: name of the monkey :param nframes: number of frames to stack :param nfilt: number of filterbanks :param include_noise: do not exclude noise intervals :param min_samples: minimum number of samples for a class to be used :return X: audio representation, ndarray (nsamples x nfilt * nframes) y: labels as int, ndarray (nsamples) labelset: list of call names (maps onto ints in y) """ annot = get_annotation(monkey, include_noise=include_noise) counts = reduce(add, (Counter(f.mark for f in annot[fname]) for fname in annot)) annot = { k: [f for f in v if counts[f.mark] >= min_samples] for k, v in annot.iteritems() } # labelset = sorted(list(set(f.mark for fname in annot for f in annot[fname]))) # # labelset = sorted(k for k in counts if counts[k] >= min_samples) # label2idx = dict(zip(labelset, range(len(labelset)))) # nsamples = sum(imap(len, annot.itervalues())) frate = 100 encoder = spectral.Spectral(nfilt=nfilt, fs=16000, wlen=0.025, frate=frate, compression='log', nfft=1024, do_dct=False, do_deltas=False, do_deltasdeltas=False) X, y, labelset = load_data_stacked_annot(monkey, annot, encoder, nframes) return X, y, labelset
'Use option -f to force resampling of the audio file.'. format(encoder.config['fs'], fs, f)) exit() # feats = np.hstack(encoder.transform(sig)) feats = encoder.transform(sig) bname = path.splitext(path.basename(f))[0] np.save(path.join(outdir, bname + '.npy'), feats) if __name__ == '__main__': args = parse_args() config_file = args['config'] try: with open(config_file, 'r') as fid: config = json.load(fid) except IOError: print 'No such file:', config_file exit() outdir = args['outdir'] if not os.path.exists(outdir): print 'No such directory:', outdir exit() encoder = spectral.Spectral(**config) force = args['force'] files = args['files'] convert(files, outdir, encoder, force)
def load_data_full_stacks(monkey, nfilt=40, stacksize=30, highpass=2000, min_samples=50): """ Arguments: :param monkey: :param nfilt: :param stacksize: :param highpass: :param min_samples: """ annot = reduced_annotation(monkey, min_samples=min_samples) X_train = {} X_test = {} y_train = {} y_test = {} frate = 100 encoder = spectral.Spectral(nfilt=nfilt, fs=16000, wlen=0.025, frate=frate, compression='log', nfft=1024, do_dct=False, do_deltas=False, do_deltasdeltas=False) train_files, test_files = train_test_split_files(annot) labelset = sorted( list( set((f.mark if f.mark != '' else 'NOISE') for fname in annot for f in annot[fname])) + ['NOISE_ACT']) with open(path.join(BASEDIR, 'pred_lambdas_{0}.pkl'.format(monkey)), 'rb') as fid: pred_lambda = pickle.load(fid) act_intervals = {} for fname in pred_lambda: act_intervals[fname] = speech_activity_to_intervals(pred_lambda[fname], threshold=0.5, winhop=0.025) annot_train = {fname: annot[fname] for fname in train_files} for fname in annot_train: X, y = load_Xy(monkey, fname, encoder, annot_train[fname], act_intervals[fname], labelset, frate, highpass, stacksize) X_train[fname] = X y_train[fname] = y annot_test = {fname: annot[fname] for fname in test_files} for fname in annot_test: X, y = load_Xy(monkey, fname, encoder, annot_test[fname], act_intervals[fname], labelset, frate, highpass, stacksize) X_test[fname] = X y_test[fname] = y return X_train, X_test, y_train, y_test, labelset
# read in the wave file from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import spectral import scipy.misc import wave import sys import struct import numpy as np fid = wave.open(sys.argv[1], 'r') _, _, fs, nframes, _, _ = fid.getparams() sig = np.array(struct.unpack_from("%dh" % nframes, fid.readframes(nframes))) fid.close() config = dict(fs=fs, dct=False, scale='bark', deltas=False) extractor = spectral.Spectral(**config) data = extractor.transform(sig) min = np.min(data) max = np.max(data) print(min) print(max) data = data - min data = data / (max - min) np.savetxt(sys.argv[1] + '.mfsc', data) scipy.misc.imsave(sys.argv[1] + '.png', data)
def load_all_intervals(nfilt=40, stacksize=30, highpass=200): print 'loading annotations...', annot_all = {} t0 = time.time() for monkey in MONKEYS: annot_all[monkey] = data.reduced_annotation(monkey) print 'done. time taken: {0:.3f}s'.format(time.time() - t0) sys.stdout.flush() print 'splitting files...', sys.stdout.flush() t0 = time.time() train_test_files = { k: data.train_test_split_files(annot_all[k]) for k in annot_all } print 'done. time taken: {0:.3f}s'.format(time.time() - t0) sys.stdout.flush() frate = 100 encoder = spectral.Spectral(nfilt=nfilt, fs=16000, wlen=0.025, frate=frate, compression='log', nfft=1024, do_dct=False, do_deltas=False, do_deltasdeltas=False) X_train = {} X_test = {} y_train = {} y_test = {} labelset = {} for monkey in MONKEYS: print monkey sys.stdout.flush() train_files, test_files = train_test_files[monkey] annot = annot_all[monkey] annot_train = {fname: annot[fname] for fname in train_files} print ' loading train data...', sys.stdout.flush() t0 = time.time() X_train_, y_train_, labels_train = data.load_data_stacked_annot( monkey, annot_train, encoder, stacksize, highpass=highpass) print 'done. time taken: {0:.3f}s'.format(time.time() - t0) sys.stdout.flush() print ' loading test data...', sys.stdout.flush() t0 = time.time() annot_test = {fname: annot[fname] for fname in test_files} X_test_, y_test_, labels_test = data.load_data_stacked_annot( monkey, annot_test, encoder, stacksize, highpass=highpass) print 'done. time taken: {0:.3f}s'.format(time.time() - t0) sys.stdout.flush() # print ' scaling...', # sys.stdout.flush() # t0 = time.time() # scaler = StandardScaler().fit(np.vstack((X_train_, X_test_))) # X_train[monkey] = scaler.transform(X_train_) # y_train[monkey] = y_train_ # X_test[monkey] = scaler.transform(X_test_) # y_test[monkey] = y_test_ # print 'done. time taken: {0:.3f}s'.format(time.time() - t0) X_train[monkey] = X_train_ y_train[monkey] = y_train_ X_test[monkey] = X_test_ y_test[monkey] = y_test_ assert (labels_test == labels_train) labelset[monkey] = labels_train return X_train, X_test, y_train, y_test, labelset