def load_voxceleb_list(): link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL3ZveGNlbGViX2xpc3RzLnppcA==\n' link = str(base64.decodebytes(link), 'utf-8') ds_path = get_datasetpath(name='voxceleb_lists', root='~', is_folder=False, override=False) if not os.path.exists(ds_path): path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) unzip_folder(zip_path=path, out_path=os.path.dirname(path), remove_zip=True) return Dataset(ds_path, read_only=True)
def load(clazz): """ Return records: list of all path to recorded audio files metadata: numpy.ndarray """ dat_path = get_datasetpath(name='FSDD', override=False) tmp_path = dat_path + '_tmp' zip_path = dat_path + '.zip' # ====== download zip dataset ====== # if not os.path.exists(dat_path) or \ len(os.listdir(dat_path)) != 1501: if not os.path.exists(zip_path): get_file(fname='FSDD.zip', origin=FSDD.LINK, outdir=get_datasetpath()) if os.path.exists(tmp_path): shutil.rmtree(tmp_path) unzip_folder(zip_path=zip_path, out_path=tmp_path, remove_zip=True) tmp_path = os.path.join(tmp_path, os.listdir(tmp_path)[0]) # ====== get all records ====== # record_path = os.path.join(tmp_path, 'recordings') all_records = [ os.path.join(record_path, i) for i in os.listdir(record_path) ] for f in all_records: name = os.path.basename(f) shutil.copy2(src=f, dst=os.path.join(dat_path, name)) # ====== copy the metadata ====== # meta_path = os.path.join(tmp_path, 'metadata.py') import imp meta = imp.load_source('metadata', meta_path).metadata assert len(set(len(i) for i in meta.values())) == 1, "Invalid metadata" rows = [] for name, info in meta.items(): info = sorted(info.items(), key=lambda x: x[0]) header = ['name'] + [i[0] for i in info] rows.append([name] + [i[1] for i in info]) with open(os.path.join(dat_path, 'metadata.csv'), 'w') as f: for r in [header] + rows: f.write(','.join(r) + '\n') # ====== clean ====== # if os.path.exists(tmp_path): shutil.rmtree(tmp_path) # ====== return dataset ====== # all_files = [ os.path.join(dat_path, i) for i in os.listdir(dat_path) if '.wav' in i ] meta = np.genfromtxt(os.path.join(dat_path, 'metadata.csv'), dtype=str, delimiter=',') return all_files, meta
def load_sre_list(): link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL1NSRV9GSUxFUy56aXA=\n' link = str(base64.decodebytes(link), 'utf-8') ds_path = get_datasetpath(name='SRE_FILES', root='~', is_folder=False, override=False) if os.path.exists(ds_path) and len(os.listdir(ds_path)) != 24: shutil.rmtree(ds_path) if not os.path.exists(ds_path): path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) unzip_folder(zip_path=path, out_path=ds_path, remove_zip=True) return Dataset(ds_path, read_only=True)
def load_sre_list(): link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL1NSRV9GSUxFUy56aXA=\n' link = str(base64.decodebytes(link), 'utf-8') ds_path = get_datasetpath(name='SRE_FILES', root='~', is_folder=False, override=False) if os.path.exists(ds_path) and len(os.listdir(ds_path)) != 24: shutil.rmtree(ds_path) if not os.path.exists(ds_path): path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) unzip_folder(zip_path=path, out_path=ds_path, remove_zip=True) return Dataset(ds_path, read_only=True)
def load_voxceleb_list(): link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL3ZveGNlbGViX2xpc3RzLnppcA==\n' link = str(base64.decodebytes(link), 'utf-8') ds_path = get_datasetpath(name='voxceleb_lists', root='~', is_folder=False, override=False) if not os.path.exists(ds_path): path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) unzip_folder(zip_path=path, out_path=os.path.dirname(path), remove_zip=True) return Dataset(ds_path, read_only=True)
def load_lre_list(): """ The header include following column: * name: LDC2017E22/data/ara-acm/ar-20031215-034005_0-a.sph * lre: {'train17', 'eval15', 'train15', 'dev17', 'eval17'} * language: {'ara-arb', 'ara-ary', 'ara-apc', 'ara-arz', 'ara-acm', 'eng-gbr', 'eng-usg', 'eng-sas', 'fre-hat', 'fre-waf' 'zho-wuu', 'zho-cdo', 'zho-cmn', 'zho-yue', 'zho-nan', 'spa-lac', 'spa-eur', 'spa-car', 'qsl-pol', 'qsl-rus', 'por-brz'} * corpus: {'pcm', 'alaw', 'babel', 'ulaw', 'vast', 'mls14'} * duration: {'3', '30', '5', '15', '10', '20', '1000', '25'} Note ---- Suggested namming scheme: `lre/lang/corpus/dur/base_name` """ link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2xyZV9saXN0LnR4dA==\n' link = str(base64.decodebytes(link), 'utf-8') path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) return np.genfromtxt(fname=path, dtype=str, delimiter=' ', skip_header=1)
def load_lre_list(): """ The header include following column: * name: LDC2017E22/data/ara-acm/ar-20031215-034005_0-a.sph * lre: {'train17', 'eval15', 'train15', 'dev17', 'eval17'} * language: {'ara-arb', 'ara-ary', 'ara-apc', 'ara-arz', 'ara-acm', 'eng-gbr', 'eng-usg', 'eng-sas', 'fre-hat', 'fre-waf' 'zho-wuu', 'zho-cdo', 'zho-cmn', 'zho-yue', 'zho-nan', 'spa-lac', 'spa-eur', 'spa-car', 'qsl-pol', 'qsl-rus', 'por-brz'} * corpus: {'pcm', 'alaw', 'babel', 'ulaw', 'vast', 'mls14'} * duration: {'3', '30', '5', '15', '10', '20', '1000', '25'} Note ---- Suggested namming scheme: `lre/lang/corpus/dur/base_name` """ link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2xyZV9saXN0LnR4dA==\n' link = str(base64.decodebytes(link), 'utf-8') path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) return np.genfromtxt(fname=path, dtype=str, delimiter=' ', skip_header=1)
def load(clazz): """ Return records: list of all path to recorded audio files metadata: numpy.ndarray """ dat_path = get_datasetpath(name='FSDD', override=False) tmp_path = dat_path + '_tmp' zip_path = dat_path + '.zip' # ====== download zip dataset ====== # if not os.path.exists(dat_path) or \ len(os.listdir(dat_path)) != 1501: if not os.path.exists(zip_path): get_file(fname='FSDD.zip', origin=FSDD.LINK, outdir=get_datasetpath()) if os.path.exists(tmp_path): shutil.rmtree(tmp_path) unzip_folder(zip_path=zip_path, out_path=tmp_path, remove_zip=True) tmp_path = os.path.join(tmp_path, os.listdir(tmp_path)[0]) # ====== get all records ====== # record_path = os.path.join(tmp_path, 'recordings') all_records = [os.path.join(record_path, i) for i in os.listdir(record_path)] for f in all_records: name = os.path.basename(f) shutil.copy2(src=f, dst=os.path.join(dat_path, name)) # ====== copy the metadata ====== # meta_path = os.path.join(tmp_path, 'metadata.py') import imp meta = imp.load_source('metadata', meta_path).metadata assert len(set(len(i) for i in meta.values())) == 1, "Invalid metadata" rows = [] for name, info in meta.items(): info = sorted(info.items(), key=lambda x: x[0]) header = ['name'] + [i[0] for i in info] rows.append([name] + [i[1] for i in info]) with open(os.path.join(dat_path, 'metadata.csv'), 'w') as f: for r in [header] + rows: f.write(','.join(r) + '\n') # ====== clean ====== # if os.path.exists(tmp_path): shutil.rmtree(tmp_path) # ====== return dataset ====== # all_files = [os.path.join(dat_path, i) for i in os.listdir(dat_path) if '.wav' in i] meta = np.genfromtxt(os.path.join(dat_path, 'metadata.csv'), dtype=str, delimiter=',') return all_files, meta
def test_speech_processor(self): try: datapath = F.load_digit_wav() except Exception as e: print('Error (skip this test):', str(e)) return output_path = utils.get_datasetpath(name='digit', override=True) feat = F.SpeechProcessor(datapath, output_path, audio_ext='wav', sr_new=8000, win=0.02, shift=0.01, nb_melfilters=40, nb_ceps=13, get_delta=2, get_energy=True, pitch_threshold=0.8, get_spec=True, get_mspec=True, get_mfcc=True, get_pitch=True, get_vad=True, save_stats=True, substitute_nan=None, dtype='float32', datatype='memmap', ncache=0.12, ncpu=4) feat.run() ds = F.Dataset(output_path) def is_equal(x1, x2): x1 = repr(np.array(x1, 'float32').tolist()) x2 = repr(np.array(x2, 'float32').tolist()) n = 0 for i, j in zip(x1, x2): if i == j: n += 1 return n >= max(len(x1), len(x2)) // 2 # these numbers are highly numerical instable for i in ds.keys(): if i == 'indices.csv': self.assertTrue(isinstance(ds[i], str)) elif '_' not in i: pca = i + '_pca' if pca in ds: self.assertTrue( is_equal(np.sum(ds[i][:], dtype='float32'), test_speech_features[i])) elif '_pca' not in i: self.assertTrue( is_equal(np.sum(ds[i][:], dtype='float32'), test_speech_features[i])) else: self.assertTrue( is_equal(np.sum(ds[i].components_), test_speech_features[i]))
def load_lre_sad(): """ key: 'LDC2017E23/data/eval/lre17_lqoyrygc.sph' value: [(1.99, 3.38), (8.78, 16.41)] (in second) """ link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2xyZV9zYWQ=\n' link = str(base64.decodebytes(link), 'utf-8') path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) return MmapDict(path=path, read_only=True)
def load_lre_sad(): """ key: 'LDC2017E23/data/eval/lre17_lqoyrygc.sph' value: [(1.99, 3.38), (8.78, 16.41)] (in second) """ link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2xyZV9zYWQ=\n' link = str(base64.decodebytes(link), 'utf-8') path = get_file(fname=os.path.basename(link), origin=link, outdir=get_datasetpath(root='~')) return MmapDict(path=path, read_only=True)
def load_glove(ndim=100): """ Automaticall load a MmapDict which contains the mapping (word -> [vector]) where vector is the embedding vector with given `ndim`. """ ndim = int(ndim) if ndim not in (50, 100, 200, 300): raise ValueError('Only support 50, 100, 200, 300 dimensions.') link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2dsb3ZlLjZCLiVkZA==\n' link = str(base64.decodebytes(link) % ndim, 'utf-8') fname = os.path.basename(link) embedding = get_file(fname, link, outdir=get_datasetpath(root='~')) return MmapDict(embedding, read_only=True)
def load_glove(ndim=100): """ Automaticall load a MmapDict which contains the mapping (word -> [vector]) where vector is the embedding vector with given `ndim`. """ ndim = int(ndim) if ndim not in (50, 100, 200, 300): raise ValueError('Only support 50, 100, 200, 300 dimensions.') link = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzL2dsb3ZlLjZCLiVkZA==\n' link = str(base64.decodebytes(link) % ndim, 'utf-8') fname = os.path.basename(link) embedding = get_file(fname, link, outdir=get_datasetpath(root='~')) return MmapDict(embedding, read_only=True)
def __init__(self, frame_length=256, frame_step=80, fft_length=None, sample_rate=8000, power=2.0, top_DB=80.0, window_fn=tf.signal.hann_window, pad_end=False, num_mel_bins=20, num_cepstral=None, log_mels=False, lower_edge_hertz=125.0, upper_edge_hertz=3800.0, dtype=tf.float32, save_path=None, seed=8): ### preprocessing the arguments if save_path is None: save_path = get_datasetpath(name='audio_datasets', override=False) if not os.path.exists(save_path): os.mkdir(save_path) if fft_length is None: fft_length = frame_length fft_length = 2**int(np.ceil(np.log2(fft_length))) ### store self.save_path = save_path self.frame_length = int(frame_length) self.frame_step = int(frame_step) self.fft_length = int(fft_length) self.log_mels = bool(log_mels) self.power = power self.top_DB = top_DB self.window_fn = window_fn self.pad_end = pad_end self.num_mel_bins = num_mel_bins self.num_cepstral = num_cepstral self.sample_rate = int(sample_rate) self.lower_edge_hertz = lower_edge_hertz self.upper_edge_hertz = upper_edge_hertz self.dtype = dtype self.seed = seed ### mel-frequency self.mel_weight = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=self.num_mel_bins, num_spectrogram_bins=self.fft_length // 2 + 1, sample_rate=self.sample_rate, lower_edge_hertz=self.lower_edge_hertz, upper_edge_hertz=self.upper_edge_hertz, dtype=self.dtype)
def test_speech_processor(self): try: datapath = F.load_digit_wav() except Exception as e: print('Error (skip this test):', str(e)) return output_path = utils.get_datasetpath(name='digit', override=True) feat = F.SpeechProcessor(datapath, output_path, audio_ext='wav', sr_new=8000, win=0.02, hop=0.01, nb_melfilters=40, nb_ceps=13, get_delta=2, get_energy=True, pitch_threshold=0.8, get_spec=True, get_mspec=True, get_mfcc=True, get_pitch=True, get_vad=True, save_stats=True, substitute_nan=None, dtype='float32', datatype='memmap', n_cache=0.12, ncpu=4) feat.run() ds = F.Dataset(output_path) def is_equal(x1, x2): x1 = repr(np.array(x1, 'float32').tolist()) x2 = repr(np.array(x2, 'float32').tolist()) n = 0 for i, j in zip(x1, x2): if i == j: n += 1 return n >= max(len(x1), len(x2)) // 2 # these numbers are highly numerical instable for i in ds.keys(): if i == 'indices.csv': self.assertTrue(isinstance(ds[i], str)) elif '_' not in i: pca = i + '_pca' if pca in ds: self.assertTrue( is_equal(np.sum(ds[i][:], dtype='float32'), test_speech_features[i])) elif '_pca' not in i: self.assertTrue( is_equal(np.sum(ds[i][:], dtype='float32'), test_speech_features[i])) else: self.assertTrue( is_equal(np.sum(ds[i].components_), test_speech_features[i]))
def load_parameters(clazz): # ====== all path ====== # name = clazz.__name__ + '.zip' path = os.path.join(base64.decodebytes(Model.ORIGIN).decode(), name) param_path = get_datasetpath(name=clazz.__name__, override=False) zip_path = os.path.join(Model.BASE_DIR, name) # ====== get params files ====== # if not os.path.exists(param_path) or \ len(os.listdir(param_path)) == 0: get_file(name, origin=path, outdir=Model.BASE_DIR) zf = ZipFile(zip_path, mode='r', compression=ZIP_DEFLATED) zf.extractall(path=Model.BASE_DIR) zf.close() # check if proper unzipped if not os.path.exists(param_path) or \ len(os.listdir(param_path)) == 0: raise RuntimeError("Zip file at path:%s is not proper unzipped, " "cannot find downloaded parameters at path: %s" % (zip_path, param_path)) else: os.remove(zip_path) # ====== create and return the params dataset ====== # ds = Dataset(param_path, read_only=True) return ds
class Model(NNOp): """ Model """ ORIGIN = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLW0wZGVscy8=\n' BASE_DIR = get_datasetpath(root='~') def __init__(self, **kwargs): super(Model, self).__init__(**kwargs) input_info = self.get_input_info() if not isinstance(input_info, Mapping) or \ len(input_info) == 0 or \ not all(is_string(k) and _validate_shape_dtype(v) for k, v in input_info.items()): raise ValueError("`get_input_info` must return a (length > 0) Mapping " "of: 'input-name' -> (shape-tuple, dtype-string), but the " "returned value is: %s" % str(input_info)) # ====== init kwargs_desc ====== # for name, (shape, dtype) in input_info.items(): self._kwargs_desc[name] = VariableDesc( shape=shape, name=name, dtype=dtype) @abstractmethod def get_input_info(self): pass def get_loaded_param(self, name): ds = self.__class__.load_parameters() if is_string(name): return_1_param = True else: return_1_param = False name = as_tuple(name, t=str) if any(n not in ds for n in name): raise RuntimeError("Cannot find parameter with name:'%s' from loaded " "dataset at path: '%s'" % (name, ds.path)) params = [ds[n][:] for n in name] return params[0] if return_1_param else tuple(params) @classmethod def load_parameters(clazz): # ====== all path ====== # name = clazz.__name__ + '.zip' path = os.path.join(base64.decodebytes(Model.ORIGIN).decode(), name) param_path = get_datasetpath(name=clazz.__name__, override=False) zip_path = os.path.join(Model.BASE_DIR, name) # ====== get params files ====== # if not os.path.exists(param_path) or \ len(os.listdir(param_path)) == 0: get_file(name, origin=path, outdir=Model.BASE_DIR) zf = ZipFile(zip_path, mode='r', compression=ZIP_DEFLATED) zf.extractall(path=Model.BASE_DIR) zf.close() # check if proper unzipped if not os.path.exists(param_path) or \ len(os.listdir(param_path)) == 0: raise RuntimeError("Zip file at path:%s is not proper unzipped, " "cannot find downloaded parameters at path: %s" % (zip_path, param_path)) else: os.remove(zip_path) # ====== create and return the params dataset ====== # ds = Dataset(param_path, read_only=True) return ds
from odin.basic import has_roles, WEIGHT, BIAS # =========================================================================== # Const # =========================================================================== FEAT = 'mspec' # using mel-spectrogram np.random.seed(12082518) # =========================================================================== # Load wav files # =========================================================================== wav_path = F.load_commands_wav() print("Found:", len(get_all_files(wav_path, filter_func=lambda x: '.wav' in x)), " .wav files") datapath = get_datasetpath("commands", override=False) # ====== start preprocessing audio files ====== # if False: speech = F.SpeechProcessor(wav_path, datapath, win=0.025, shift=0.01, nb_melfilters=40, nb_ceps=13, get_spec=True, get_mspec=True, get_mfcc=True, get_qspec=True, get_phase=True, get_pitch=False, get_energy=True,
FEAT = ['mspec', 'sad'] MODEL_PATH = get_modelpath(name='DIGITS', override=True) LOG_PATH = get_logpath(name='digits.log', override=True) FIG_PATH = get_figpath(name='DIGITS', override=True) stdio(LOG_PATH) DEBUG = False # ====== trainign ====== # BATCH_SIZE = 32 NB_EPOCH = 20 NB_SAMPLES = 8 VALID_PERCENTAGE = 0.4 # =========================================================================== # Load dataset # =========================================================================== path = get_datasetpath(name='TIDIGITS_feats', override=False) assert os.path.isdir(path), \ "Cannot find preprocessed feature at: %s, try to run 'odin/examples/features.py'" % path ds = F.Dataset(path, read_only=True) assert all(f in ds for f in FEAT), "Cannot find features with name: %s" % FEAT # ====== get all the indices of single digit ====== # indices = [(name, (s, e)) for name, (s, e) in list(ds['indices'].items()) if len(name.split('_')[-1]) == 1] K.get_rng().shuffle(indices) print("Found %s utterances of single digit" % ctext(len(indices), 'cyan')) # =========================================================================== # Load and visual the dataset # =========================================================================== train = [] test = [] max_length = 0
class DataLoader(object): ORIGIN = b'aHR0cHM6Ly9zMy5hbWF6b25hd3MuY29tL2FpLWRhdGFzZXRzLw==\n' BASE_DIR = get_datasetpath(root='~') def __init__(self): super(DataLoader, self).__init__() @classmethod def md5(clazz, ext=''): return None @classmethod def get_name(clazz, ext=''): name = clazz.__name__ name = name if ext is None or len(ext) == 0 \ else '_'.join([name, ext]) return name @classmethod def get_zip_path(clazz, ext=''): return os.path.join(DataLoader.BASE_DIR, clazz.get_name(ext) + '.zip') @classmethod def get_ds_path(clazz, ext=''): return os.path.join(DataLoader.BASE_DIR, clazz.get_name(ext)) @classmethod def get_link(clazz, ext=''): name = clazz.get_name(ext) + '.zip' path = base64.decodebytes(DataLoader.ORIGIN).decode() + name return path @classmethod def load(clazz, ext='', override=False): return clazz.get_dataset(ext=ext, override=override) @classmethod def get_dataset(clazz, ext='', override=False): # ====== all path ====== # name = clazz.get_name(ext) + '.zip' path = base64.decodebytes(DataLoader.ORIGIN).decode() + name zip_path = clazz.get_zip_path(ext) out_path = clazz.get_ds_path(ext) # ====== check out_path ====== # if os.path.isfile(out_path): raise RuntimeError("Found a file at path: %s, we need a folder " "to unzip downloaded files." % out_path) elif os.path.isdir(out_path): if override or len(os.listdir(out_path)) == 0: shutil.rmtree(out_path) else: return Dataset(out_path, read_only=True) # ====== download the file ====== # if os.path.exists(zip_path) and override: os.remove(zip_path) if not os.path.exists(zip_path): get_file(name, path, DataLoader.BASE_DIR) # ====== upzip dataset ====== # unzip_folder(zip_path, out_path, remove_zip=True) ds = Dataset(out_path, read_only=True) md5_checksum = clazz.md5(ext=ext) if md5_checksum is not None: assert ds.md5 == md5_checksum, "MD5 checksum mismatch for dataset: %s" % ds.path return ds
# ncpu=4: 5.9s # ncpu=8: 4.3 # ncpu=12: 4.0 # =========================================================================== from __future__ import print_function, division, absolute_import import matplotlib matplotlib.use('Agg') import numpy as np import shutil import os from odin import fuel as F, utils from collections import defaultdict datapath = F.load_digit_wav() output_path = utils.get_datasetpath(name='digit', override=True) feat = F.SpeechProcessor(datapath, output_path, audio_ext='wav', sr_new=16000, win=0.025, shift=0.01, nb_melfilters=40, nb_ceps=13, get_delta=2, get_energy=True, get_phase=True, get_spec=True, get_mspec=True, get_mfcc=True, get_pitch=True,