import numpy as np from odin import backend as K, nnet as N, visual as V from odin import preprocessing as pp from odin.utils import (args_parse, stdio, get_module_from_path, get_script_path) from odin.utils.mpi import cpu_count from utils import (WAV_FILES, SAMPLED_WAV_FILE, PATH_ACOUSTIC_FEAT, PATH_EXP) # =========================================================================== # Config # =========================================================================== stdio(os.path.join(PATH_EXP, 'features_extraction.log')) args = args_parse( descriptions=[('recipe', 'the name of function defined in feature_recipes.py', None), ('--debug', 'enable debug or not', None, False)]) DEBUG = args.debug # =========================================================================== # Create the recipes # =========================================================================== extractor = get_module_from_path(identifier=str(args.recipe), prefix='feature_recipes', path=get_script_path()) assert len(extractor) > 0, \ "Cannot find any recipe with name: '%s' from path: '%s'" % (args.recipe, get_script_path()) recipe = extractor[0](DEBUG) # ====== debugging ====== # if DEBUG: with np.warnings.catch_warnings(): np.warnings.filterwarnings('ignore')
import os os.environ['ODIN'] = 'float32,gpu,seed=1234' import timeit import numpy as np import tensorflow as tf from odin import (nnet as N, backend as K, fuel as F, visual as V, training as T, ml) from odin.utils import args_parse, ctext, batching, Progbar args = args_parse(descriptions=[ ('-dim', 'latent dimension', None, 2), ('-data', 'dataset mnist or fmnist', ('mnist', 'fmnist', 'cifar10'), 'mnist'), ('-loss', 'huber, mse, ce (cross-entropy), lglo (log loss)', ('huber', 'mse', 'ce', 'lglo'), 'mse'), ('-bs', 'batch size', None, 128), ('-epoch', 'batch size, if negative stop based on valid loss', None, -1), ('--cnn', 'using convolutional network instead of dense network', None, False) ]) # =========================================================================== # Load dataset # =========================================================================== is_cifar10 = False if args.data == 'fmnist': ds = F.FMNIST_original.load() X_train, y_train = ds['X_train'][:], ds['y_train'][:] ids = np.random.permutation(len(X_train)) X_train, y_train = X_train[ids], y_train[ids] X_valid, y_valid = X_train[50000:], y_train[50000:] X_train, y_train = X_train[:50000], y_train[:50000]
import os os.environ['ODIN'] = 'float32,gpu,seed=5218' import timeit import numpy as np import tensorflow as tf from odin import (nnet as N, backend as K, fuel as F, visual as V, training as T, ml) from odin.utils import args_parse, ctext, batching, Progbar args = args_parse(descriptions=[ ('-dim', 'latent dimension', None, 2), ('-data', 'dataset mnist or fmnist', ('mnist', 'fmnist', 'cifar10'), 'mnist'), ('-loss', 'huber, mse, ce (cross-entropy), lglo (log loss)', ('huber', 'mse', 'ce', 'lglo'), 'mse'), ('-bs', 'batch size', None, 128), ('-epoch', 'batch size, if negative stop based on valid loss', None, -1), ('--cnn', 'using convolutional network instead of dense network', None, False) ]) # =========================================================================== # Load dataset # =========================================================================== is_cifar10 = False if args.data == 'fmnist': ds = F.FMNIST_original.load() X_train, y_train = ds['X_train'][:], ds['y_train'][:] ids = np.random.permutation(len(X_train)) X_train, y_train = X_train[ids], y_train[ids] X_valid, y_valid = X_train[50000:], y_train[50000:] X_train, y_train = X_train[:50000], y_train[:50000]
from odin import backend as K, nnet as N, fuel as F, visual as V from odin.stats import train_valid_test_split, freqcount, sampling_iter from odin import training from odin.ml import evaluate, fast_pca, PLDA, Scorer from odin.utils import (Progbar, unique_labels, as_tuple_of_shape, stdio, ctext, args_parse, get_formatted_datetime, minibatch) from utils import (prepare_data, make_dnn_prediction, visualize_latent_space, get_exp_path) args = args_parse( descriptions=[( '-feat', 'Input feature for training', None, 'mspec24' ), ('-task', 'gender, age, dialect, speaker, digit', None, 'gender'), ('-batch', 'batch size', None, 32), ('-epoch', 'Number of training epoch', None, 12), ('--retrain', "deleted trained model, and re-train everything", None, False)]) # =========================================================================== # Const # =========================================================================== EXP_DIR, MODEL_PATH, LOG_PATH = get_exp_path('tvec', args, override=args.retrain) stdio(LOG_PATH) # ====== load data feeder ====== # (train, valid, X_test_name, X_test_true, X_test_data, labels) = prepare_data(feat=args.feat, label=args.task) n_classes = len(labels)
from tensorflow.python.ops import init_ops from odin import training from odin.utils import (args_parse, ctext, Progbar, as_tuple_of_shape, crypto, stdio) from odin import fuel as F, visual as V, nnet as N, backend as K from utils import prepare_dnn_data, get_model_path, csv2mat # =========================================================================== # Configs # =========================================================================== args = args_parse([ ('recipe', 'the name of function defined in feature_recipes.py', None), ('-feat', "Acoustic feature", ('mspec', 'mfcc'), 'mspec'), ('-batch', "batch size", None, 64), ('-epoch', "number of epoch", None, 25), ('-l', "audio segmenting length in second", None, 3), ('--debug', "enable debug mode", None, False), ('--train', "force continue training the saved model", None, False), ]) FEAT = args.feat TRAIN_MODEL = args.train DEBUG = bool(args.debug) (EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH) = get_model_path('xvec', args) stdio(LOG_PATH) # =========================================================================== # Create data feeder # =========================================================================== (train, valid, test_ids, test_dat, all_speakers) = prepare_dnn_data(recipe=args.recipe,
import numpy as np from odin import backend as K, nnet as N, visual as V from odin import preprocessing as pp from odin.utils import (args_parse, stdio, get_module_from_path, get_script_path) from odin.utils.mpi import cpu_count from utils import (WAV_FILES, SAMPLED_WAV_FILE, PATH_ACOUSTIC_FEAT, PATH_EXP) # =========================================================================== # Config # =========================================================================== stdio(os.path.join(PATH_EXP, 'features_extraction.log')) args = args_parse(descriptions=[ ('recipe', 'the name of function defined in feature_recipes.py', None), ('--debug', 'enable debug or not', None, False) ]) DEBUG = args.debug # =========================================================================== # Create the recipes # =========================================================================== extractor = get_module_from_path(identifier=str(args.recipe), prefix='feature_recipes', path=get_script_path()) assert len(extractor) > 0, \ "Cannot find any recipe with name: '%s' from path: '%s'" % (args.recipe, get_script_path()) recipe = extractor[0](DEBUG) # ====== debugging ====== # if DEBUG: with np.warnings.catch_warnings(): np.warnings.filterwarnings('ignore')
_args = args_parse(descriptions=[ ('recipe', 'recipe is the name of acoustic Dataset defined in feature_recipes.py', None), ('-feat', 'specific name for the acoustic features, extracted from the given recipe', None, ''), ('-aug', 'augmentation dataset: musan, rirs; could be multiple dataset ' 'for training: "musan,rirs"', None, 'None'), ('-ncpu', 'number of CPU to be used, if <= 0, auto-select', None, 0), # for scoring ('-sys', 'name of the system for scoring: xvec, ivec, e2e ...', None, 'xvec'), ('-sysid', 'when a system is saved multiple checkpoint (e.g. sys.0.ai)', None, '-1'), ('-score', 'name of dataset for scoring, multiple dataset split by ","', None, 'sre18dev,sre18eval'), ('-backend', 'list of dataset for training the backend: ' 'PLDA, SVM or Cosine', None, 'sre04,sre05,sre06,sre08,sre10,mx6'), ('-lda', 'if > 0, running LDA before training the backend ' 'with given number of components', None, 0), ('-plda', 'number of PLDA components, must be > 0 ', None, 150), ('--mll', 'pre-fitting maximum likelihood before training PLDA', None, False), ('--showllk', 'show LLK during training of PLDA, this will slow thing down', None, False), # for training ('-downsample', 'absolute number of files used for training', None, 0), ('-exclude', 'list of excluded dataset not for training,' 'multiple dataset split by ","', None, ''), # for ivector ('-nmix', 'for i-vector training, number of Gaussian components', None, 2048), ('-tdim', 'for i-vector training, number of latent dimension for i-vector', None, 600), # for DNN ('-utt', 'maximum length of sequence for training', None, 3), ('-seq', 'sequencing mode for training data, cut or pad', None, 'cut'), ('-batch', 'batch size, for training DNN, kaldi use 64, we use 128', None, 128), ('-epoch', 'number of epoch, for training DNN, kaldi only 3 epochs', None, 12), ('-clip', 'The maximum change in parameters allowed per minibatch, ' 'measured in Euclidean norm over the entire model (change ' 'will be clipped to this value), kaldi use 2.0', None, 2.0), ('-lr', 'learning rate for Adam, kaldi use 0.001 by default,' ' we use 0.01', None, 0.01), # others ('-mindur', 'for filtering utterances, minimum duration of utterance ' 'for training (in second)', None, 1), ('-minutt', 'for filtering utterances, minimum number of utterance of ' 'each speaker for training', None, 3), ('--override', 'override previous experiments', None, False), ('--debug', 'enable debugging', None, False), ])
import numpy as np from odin import ml from odin import fuel as F from odin.utils import args_parse, ctext, stdio, Progbar from utils import (get_model_path, prepare_ivec_data, csv2mat, TRAIN_DATA) # =========================================================================== # Configs # =========================================================================== args = args_parse([ ('recipe', 'the name of function defined in feature_recipes.py', None), ('-nmix', "Number of GMM mixture", None, 2048), ('-tdim', "Dimension of t-matrix", None, 600), ('-feat', "Acoustic feature", ('mspec', 'bnf'), 'bnf'), ('--gmm', "Force re-run training GMM", None, False), ('--stat', "Force re-extraction of centered statistics", None, False), ('--tmat', "Force re-run training Tmatrix", None, False), ('--ivec', "Force re-run extraction of i-vector", None, False), ('--all', "Run all the system again, just a shortcut", None, False), ]) args.gmm |= args.all args.stat |= args.all | args.gmm args.tmat |= args.all | args.stat args.ivec |= args.all | args.tmat FEAT = args.feat EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH = get_model_path( 'ivec', args) stdio(LOG_PATH) # =========================================================================== # Load dataset
from tensorflow.python.ops import init_ops from odin import training from odin.utils import (args_parse, ctext, Progbar, as_tuple_of_shape, crypto, stdio) from odin import fuel as F, visual as V, nnet as N, backend as K from utils import prepare_dnn_data, get_model_path, csv2mat # =========================================================================== # Configs # =========================================================================== args = args_parse([ ('recipe', 'the name of function defined in feature_recipes.py', None), ('-feat', "Acoustic feature", ('mspec', 'mfcc'), 'mspec'), ('-batch', "batch size", None, 64), ('-epoch', "number of epoch", None, 25), ('-l', "audio segmenting length in second", None, 3), ('--debug', "enable debug mode", None, False), ('--train', "force continue training the saved model", None, False), ]) FEAT = args.feat TRAIN_MODEL = args.train DEBUG = bool(args.debug) (EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH) = get_model_path('xvec', args) stdio(LOG_PATH) # =========================================================================== # Create data feeder # =========================================================================== (train, valid, test_ids, test_dat,
matplotlib.use('Agg') os.environ['ODIN'] = 'float32,gpu,seed=1234' args = args_parse(descriptions=[ ('-ds', 'dataset', None, 'mnist_original'), ('-zdim', 'latent dimension', None, 64), ('-hdim', 'number of hidden units', None, 256), ('-xdist', 'distribution of input X', None, 'poisson'), ('-zdist', 'posterior distribution of latent Z', None, 'normal'), ('-zprior', 'prior distribution of latent Z', None, 'normal01'), ('-xdrop', 'dropout on input X', None, 0), ('-edrop', 'dropout on the encoder E', None, 0), ('-zdrop', 'dropout on latent Z', None, 0), ('-ddrop', 'dropout on the decoder D', None, 0), ('-nsample-train', 'number of posterior samples', None, 1), ('-nsample-test', 'number of posterior samples', None, 1000), ('-batch', 'batch size', None, 64), ('-epoch', 'number of epoch', None, 200), ('--no-monitor', 'turn off epoch monitoring, significantly faster', None, False), ('--no-batchnorm', 'turn off batch normalization', None, False), ('--analytic', 'using analytic KL or sampling', None, False), ('--iw', 'enable important weights sampling', None, False), ]) FIGURE_PATH = '/tmp' # =========================================================================== # Load dataset # ===========================================================================
from odin import backend as K, nnet as N, fuel as F, visual as V from odin.stats import train_valid_test_split, freqcount, sampling_iter from odin import training from odin.ml import evaluate, fast_pca, PLDA, Scorer from odin.utils import (Progbar, unique_labels, as_tuple_of_shape, stdio, ctext, args_parse, get_formatted_datetime, batching) from utils import (prepare_data, make_dnn_prediction, visualize_latent_space, get_exp_path) args = args_parse(descriptions=[ ('-feat', 'Input feature for training', None, 'mspec24'), ('-task', 'gender, age, dialect, speaker, digit', None, 'gender'), ('-batch', 'batch size', None, 32), ('-epoch', 'Number of training epoch', None, 12), ('--retrain', "deleted trained model, and re-train everything", None, False) ]) # =========================================================================== # Const # =========================================================================== EXP_DIR, MODEL_PATH, LOG_PATH = get_exp_path('tvec', args, override=args.retrain) stdio(LOG_PATH) # ====== load data feeder ====== # (train, valid, X_test_name, X_test_true, X_test_data, labels) = prepare_data(feat=args.feat, label=args.task) n_classes = len(labels) # =========================================================================== # Create model
from odin import ml from odin import fuel as F from odin.utils import args_parse, ctext, stdio, Progbar from utils import (get_model_path, prepare_ivec_data, csv2mat, TRAIN_DATA) # =========================================================================== # Configs # =========================================================================== args = args_parse([ ('recipe', 'the name of function defined in feature_recipes.py', None), ('-nmix', "Number of GMM mixture", None, 2048), ('-tdim', "Dimension of t-matrix", None, 600), ('-feat', "Acoustic feature", ('mspec', 'bnf'), 'bnf'), ('--gmm', "Force re-run training GMM", None, False), ('--stat', "Force re-extraction of centered statistics", None, False), ('--tmat', "Force re-run training Tmatrix", None, False), ('--ivec', "Force re-run extraction of i-vector", None, False), ('--all', "Run all the system again, just a shortcut", None, False), ]) args.gmm |= args.all args.stat |= args.all | args.gmm args.tmat |= args.all | args.stat args.ivec |= args.all | args.tmat FEAT = args.feat EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH = get_model_path('ivec', args) stdio(LOG_PATH) # =========================================================================== # Load dataset # ===========================================================================
import os os.environ['ODIN'] = 'float32,cpu=1,thread=1,gpu=1' import sys import shutil import pickle import numpy as np from odin import visual as V, nnet as N from odin.utils import ctext, unique_labels, Progbar, UnitTimer, args_parse from odin.utils.mpi import cpu_count from odin import fuel as F, preprocessing as pp from utils import (PATH_ACOUSTIC, PATH_EXP, FeatureConfigs) args = args_parse(descriptions=[ ('--debug', 'enable debugging', None, False), ('-ncpu', 'if smaller than 1, auto select all possible CPU', None, 0) ]) audio = F.TIDIGITS.load() print(audio) all_files = sorted(list(audio['indices'].keys())) # =========================================================================== # Extractor # =========================================================================== bnf_network = N.models.BNF_2048_MFCC40() extractors = pp.make_pipeline(steps=[ pp.speech.AudioReader(sr=FeatureConfigs.sr, dataset=audio), pp.speech.PreEmphasis(coeff=0.97), pp.speech.Dithering(), # ====== STFT ====== # pp.speech.STFTExtractor(frame_length=FeatureConfigs.frame_length, step_length=FeatureConfigs.step_length,
import os os.environ['ODIN'] = 'float32,cpu=1,thread=1,gpu=1' import sys import shutil import pickle import numpy as np from odin import visual as V, nnet as N from odin.utils import ctext, unique_labels, Progbar, UnitTimer, args_parse from odin.utils.mpi import cpu_count from odin import fuel as F, preprocessing as pp from utils import (PATH_ACOUSTIC, PATH_EXP, FeatureConfigs) args = args_parse( descriptions=[('--debug', 'enable debugging', None, False), ('-ncpu', 'if smaller than 1, auto select all possible CPU', None, 0)]) audio = F.TIDIGITS.load() print(audio) all_files = sorted(list(audio['indices'].keys())) # =========================================================================== # Extractor # =========================================================================== bnf_network = N.models.BNF_2048_MFCC40() extractors = pp.make_pipeline( steps=[ pp.speech.AudioReader(sr=FeatureConfigs.sr, dataset=audio), pp.speech.PreEmphasis(coeff=0.97), pp.speech.Dithering(), # ====== STFT ====== # pp.speech.STFTExtractor(frame_length=FeatureConfigs.frame_length,