예제 #1
0
import numpy as np

from odin import backend as K, nnet as N, visual as V
from odin import preprocessing as pp
from odin.utils import (args_parse, stdio, get_module_from_path,
                        get_script_path)
from odin.utils.mpi import cpu_count

from utils import (WAV_FILES, SAMPLED_WAV_FILE, PATH_ACOUSTIC_FEAT, PATH_EXP)
# ===========================================================================
# Config
# ===========================================================================
stdio(os.path.join(PATH_EXP, 'features_extraction.log'))
args = args_parse(
    descriptions=[('recipe',
                   'the name of function defined in feature_recipes.py',
                   None), ('--debug', 'enable debug or not', None, False)])
DEBUG = args.debug
# ===========================================================================
# Create the recipes
# ===========================================================================
extractor = get_module_from_path(identifier=str(args.recipe),
                                 prefix='feature_recipes',
                                 path=get_script_path())
assert len(extractor) > 0, \
"Cannot find any recipe with name: '%s' from path: '%s'" % (args.recipe, get_script_path())
recipe = extractor[0](DEBUG)
# ====== debugging ====== #
if DEBUG:
    with np.warnings.catch_warnings():
        np.warnings.filterwarnings('ignore')
예제 #2
0
import os
os.environ['ODIN'] = 'float32,gpu,seed=1234'
import timeit

import numpy as np

import tensorflow as tf

from odin import (nnet as N, backend as K, fuel as F,
                  visual as V, training as T, ml)
from odin.utils import args_parse, ctext, batching, Progbar

args = args_parse(descriptions=[
    ('-dim', 'latent dimension', None, 2),
    ('-data', 'dataset mnist or fmnist', ('mnist', 'fmnist', 'cifar10'), 'mnist'),
    ('-loss', 'huber, mse, ce (cross-entropy), lglo (log loss)', ('huber', 'mse', 'ce', 'lglo'), 'mse'),
    ('-bs', 'batch size', None, 128),
    ('-epoch', 'batch size, if negative stop based on valid loss', None, -1),
    ('--cnn', 'using convolutional network instead of dense network', None, False)
])
# ===========================================================================
# Load dataset
# ===========================================================================
is_cifar10 = False
if args.data == 'fmnist':
  ds = F.FMNIST_original.load()
  X_train, y_train = ds['X_train'][:], ds['y_train'][:]
  ids = np.random.permutation(len(X_train))
  X_train, y_train = X_train[ids], y_train[ids]

  X_valid, y_valid = X_train[50000:], y_train[50000:]
  X_train, y_train = X_train[:50000], y_train[:50000]
import os
os.environ['ODIN'] = 'float32,gpu,seed=5218'
import timeit

import numpy as np

import tensorflow as tf

from odin import (nnet as N, backend as K, fuel as F,
                  visual as V, training as T, ml)
from odin.utils import args_parse, ctext, batching, Progbar

args = args_parse(descriptions=[
    ('-dim', 'latent dimension', None, 2),
    ('-data', 'dataset mnist or fmnist', ('mnist', 'fmnist', 'cifar10'), 'mnist'),
    ('-loss', 'huber, mse, ce (cross-entropy), lglo (log loss)', ('huber', 'mse', 'ce', 'lglo'), 'mse'),
    ('-bs', 'batch size', None, 128),
    ('-epoch', 'batch size, if negative stop based on valid loss', None, -1),
    ('--cnn', 'using convolutional network instead of dense network', None, False)
])
# ===========================================================================
# Load dataset
# ===========================================================================
is_cifar10 = False
if args.data == 'fmnist':
  ds = F.FMNIST_original.load()
  X_train, y_train = ds['X_train'][:], ds['y_train'][:]
  ids = np.random.permutation(len(X_train))
  X_train, y_train = X_train[ids], y_train[ids]

  X_valid, y_valid = X_train[50000:], y_train[50000:]
  X_train, y_train = X_train[:50000], y_train[:50000]
예제 #4
0
파일: tvec.py 프로젝트: trungnt13/odin-ai
from odin import backend as K, nnet as N, fuel as F, visual as V
from odin.stats import train_valid_test_split, freqcount, sampling_iter
from odin import training
from odin.ml import evaluate, fast_pca, PLDA, Scorer
from odin.utils import (Progbar, unique_labels, as_tuple_of_shape, stdio,
                        ctext, args_parse, get_formatted_datetime, minibatch)

from utils import (prepare_data, make_dnn_prediction, visualize_latent_space,
                   get_exp_path)

args = args_parse(
    descriptions=[(
        '-feat', 'Input feature for training', None, 'mspec24'
    ), ('-task', 'gender, age, dialect, speaker, digit', None,
        'gender'), ('-batch', 'batch size', None,
                    32), ('-epoch', 'Number of training epoch', None, 12),
                  ('--retrain',
                   "deleted trained model, and re-train everything", None,
                   False)])
# ===========================================================================
# Const
# ===========================================================================
EXP_DIR, MODEL_PATH, LOG_PATH = get_exp_path('tvec',
                                             args,
                                             override=args.retrain)
stdio(LOG_PATH)
# ====== load data feeder ====== #
(train, valid, X_test_name, X_test_true, X_test_data,
 labels) = prepare_data(feat=args.feat, label=args.task)
n_classes = len(labels)
예제 #5
0
from tensorflow.python.ops import init_ops

from odin import training
from odin.utils import (args_parse, ctext, Progbar, as_tuple_of_shape, crypto,
                        stdio)
from odin import fuel as F, visual as V, nnet as N, backend as K

from utils import prepare_dnn_data, get_model_path, csv2mat
# ===========================================================================
# Configs
# ===========================================================================
args = args_parse([
    ('recipe', 'the name of function defined in feature_recipes.py', None),
    ('-feat', "Acoustic feature", ('mspec', 'mfcc'), 'mspec'),
    ('-batch', "batch size", None, 64),
    ('-epoch', "number of epoch", None, 25),
    ('-l', "audio segmenting length in second", None, 3),
    ('--debug', "enable debug mode", None, False),
    ('--train', "force continue training the saved model", None, False),
])
FEAT = args.feat
TRAIN_MODEL = args.train
DEBUG = bool(args.debug)
(EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH,
 TEST_PATH) = get_model_path('xvec', args)
stdio(LOG_PATH)
# ===========================================================================
# Create data feeder
# ===========================================================================
(train, valid, test_ids, test_dat,
 all_speakers) = prepare_dnn_data(recipe=args.recipe,
예제 #6
0
import numpy as np

from odin import backend as K, nnet as N, visual as V
from odin import preprocessing as pp
from odin.utils import (args_parse, stdio,
                        get_module_from_path, get_script_path)
from odin.utils.mpi import cpu_count

from utils import (WAV_FILES, SAMPLED_WAV_FILE,
                   PATH_ACOUSTIC_FEAT, PATH_EXP)
# ===========================================================================
# Config
# ===========================================================================
stdio(os.path.join(PATH_EXP, 'features_extraction.log'))
args = args_parse(descriptions=[
    ('recipe', 'the name of function defined in feature_recipes.py', None),
    ('--debug', 'enable debug or not', None, False)
])
DEBUG = args.debug
# ===========================================================================
# Create the recipes
# ===========================================================================
extractor = get_module_from_path(identifier=str(args.recipe),
                                 prefix='feature_recipes',
                                 path=get_script_path())
assert len(extractor) > 0, \
"Cannot find any recipe with name: '%s' from path: '%s'" % (args.recipe, get_script_path())
recipe = extractor[0](DEBUG)
# ====== debugging ====== #
if DEBUG:
  with np.warnings.catch_warnings():
    np.warnings.filterwarnings('ignore')
예제 #7
0
_args = args_parse(descriptions=[
    ('recipe',
     'recipe is the name of acoustic Dataset defined in feature_recipes.py',
     None),
    ('-feat',
     'specific name for the acoustic features, extracted from the given recipe',
     None, ''),
    ('-aug', 'augmentation dataset: musan, rirs; could be multiple dataset '
     'for training: "musan,rirs"', None, 'None'),
    ('-ncpu', 'number of CPU to be used, if <= 0, auto-select', None, 0),
    # for scoring
    ('-sys', 'name of the system for scoring: xvec, ivec, e2e ...', None,
     'xvec'),
    ('-sysid', 'when a system is saved multiple checkpoint (e.g. sys.0.ai)',
     None, '-1'),
    ('-score', 'name of dataset for scoring, multiple dataset split by ","',
     None, 'sre18dev,sre18eval'),
    ('-backend', 'list of dataset for training the backend: '
     'PLDA, SVM or Cosine', None, 'sre04,sre05,sre06,sre08,sre10,mx6'),
    ('-lda', 'if > 0, running LDA before training the backend '
     'with given number of components', None, 0),
    ('-plda', 'number of PLDA components, must be > 0 ', None, 150),
    ('--mll', 'pre-fitting maximum likelihood before training PLDA', None,
     False),
    ('--showllk',
     'show LLK during training of PLDA, this will slow thing down', None,
     False),
    # for training
    ('-downsample', 'absolute number of files used for training', None, 0),
    ('-exclude', 'list of excluded dataset not for training,'
     'multiple dataset split by ","', None, ''),
    # for ivector
    ('-nmix', 'for i-vector training, number of Gaussian components', None,
     2048),
    ('-tdim', 'for i-vector training, number of latent dimension for i-vector',
     None, 600),
    # for DNN
    ('-utt', 'maximum length of sequence for training', None, 3),
    ('-seq', 'sequencing mode for training data, cut or pad', None, 'cut'),
    ('-batch', 'batch size, for training DNN, kaldi use 64, we use 128', None,
     128),
    ('-epoch', 'number of epoch, for training DNN, kaldi only 3 epochs', None,
     12),
    ('-clip', 'The maximum change in parameters allowed per minibatch, '
     'measured in Euclidean norm over the entire model (change '
     'will be clipped to this value), kaldi use 2.0', None, 2.0),
    ('-lr', 'learning rate for Adam, kaldi use 0.001 by default,'
     ' we use 0.01', None, 0.01),
    # others
    ('-mindur', 'for filtering utterances, minimum duration of utterance '
     'for training (in second)', None, 1),
    ('-minutt', 'for filtering utterances, minimum number of utterance of '
     'each speaker for training', None, 3),
    ('--override', 'override previous experiments', None, False),
    ('--debug', 'enable debugging', None, False),
])
예제 #8
0
import numpy as np

from odin import ml
from odin import fuel as F
from odin.utils import args_parse, ctext, stdio, Progbar

from utils import (get_model_path, prepare_ivec_data, csv2mat, TRAIN_DATA)
# ===========================================================================
# Configs
# ===========================================================================
args = args_parse([
    ('recipe', 'the name of function defined in feature_recipes.py', None),
    ('-nmix', "Number of GMM mixture", None, 2048),
    ('-tdim', "Dimension of t-matrix", None, 600),
    ('-feat', "Acoustic feature", ('mspec', 'bnf'), 'bnf'),
    ('--gmm', "Force re-run training GMM", None, False),
    ('--stat', "Force re-extraction of centered statistics", None, False),
    ('--tmat', "Force re-run training Tmatrix", None, False),
    ('--ivec', "Force re-run extraction of i-vector", None, False),
    ('--all', "Run all the system again, just a shortcut", None, False),
])
args.gmm |= args.all
args.stat |= args.all | args.gmm
args.tmat |= args.all | args.stat
args.ivec |= args.all | args.tmat
FEAT = args.feat
EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH = get_model_path(
    'ivec', args)
stdio(LOG_PATH)
# ===========================================================================
# Load dataset
예제 #9
0
파일: train_xvec.py 프로젝트: imito/odin
from tensorflow.python.ops import init_ops

from odin import training
from odin.utils import (args_parse, ctext, Progbar, as_tuple_of_shape,
                        crypto, stdio)
from odin import fuel as F, visual as V, nnet as N, backend as K

from utils import prepare_dnn_data, get_model_path, csv2mat
# ===========================================================================
# Configs
# ===========================================================================
args = args_parse([
    ('recipe', 'the name of function defined in feature_recipes.py', None),
    ('-feat', "Acoustic feature", ('mspec', 'mfcc'), 'mspec'),
    ('-batch', "batch size", None, 64),
    ('-epoch', "number of epoch", None, 25),
    ('-l', "audio segmenting length in second", None, 3),
    ('--debug', "enable debug mode", None, False),
    ('--train', "force continue training the saved model", None, False),
])
FEAT = args.feat
TRAIN_MODEL = args.train
DEBUG = bool(args.debug)
(EXP_DIR, MODEL_PATH, LOG_PATH,
 TRAIN_PATH, TEST_PATH) = get_model_path('xvec', args)
stdio(LOG_PATH)
# ===========================================================================
# Create data feeder
# ===========================================================================
(train, valid,
 test_ids, test_dat,
matplotlib.use('Agg')

os.environ['ODIN'] = 'float32,gpu,seed=1234'

args = args_parse(descriptions=[
    ('-ds', 'dataset', None, 'mnist_original'),
    ('-zdim', 'latent dimension', None, 64),
    ('-hdim', 'number of hidden units', None, 256),
    ('-xdist', 'distribution of input X', None, 'poisson'),
    ('-zdist', 'posterior distribution of latent Z', None, 'normal'),
    ('-zprior', 'prior distribution of latent Z', None, 'normal01'),
    ('-xdrop', 'dropout on input X', None, 0),
    ('-edrop', 'dropout on the encoder E', None, 0),
    ('-zdrop', 'dropout on latent Z', None, 0),
    ('-ddrop', 'dropout on the decoder D', None, 0),
    ('-nsample-train', 'number of posterior samples', None, 1),
    ('-nsample-test', 'number of posterior samples', None, 1000),
    ('-batch', 'batch size', None, 64),
    ('-epoch', 'number of epoch', None, 200),
    ('--no-monitor', 'turn off epoch monitoring, significantly faster', None,
     False),
    ('--no-batchnorm', 'turn off batch normalization', None, False),
    ('--analytic', 'using analytic KL or sampling', None, False),
    ('--iw', 'enable important weights sampling', None, False),
])

FIGURE_PATH = '/tmp'
# ===========================================================================
# Load dataset
# ===========================================================================
예제 #11
0
파일: tvec.py 프로젝트: imito/odin
from odin import backend as K, nnet as N, fuel as F, visual as V
from odin.stats import train_valid_test_split, freqcount, sampling_iter
from odin import training
from odin.ml import evaluate, fast_pca, PLDA, Scorer
from odin.utils import (Progbar, unique_labels, as_tuple_of_shape, stdio,
                        ctext, args_parse, get_formatted_datetime,
                        batching)

from utils import (prepare_data, make_dnn_prediction, visualize_latent_space,
                   get_exp_path)

args = args_parse(descriptions=[
    ('-feat', 'Input feature for training', None, 'mspec24'),
    ('-task', 'gender, age, dialect, speaker, digit', None, 'gender'),
    ('-batch', 'batch size', None, 32),
    ('-epoch', 'Number of training epoch', None, 12),
    ('--retrain', "deleted trained model, and re-train everything", None, False)
])
# ===========================================================================
# Const
# ===========================================================================
EXP_DIR, MODEL_PATH, LOG_PATH = get_exp_path('tvec', args, override=args.retrain)
stdio(LOG_PATH)
# ====== load data feeder ====== #
(train, valid,
 X_test_name, X_test_true, X_test_data,
 labels) = prepare_data(feat=args.feat, label=args.task)
n_classes = len(labels)
# ===========================================================================
# Create model
예제 #12
0
파일: train_ivec.py 프로젝트: imito/odin
from odin import ml
from odin import fuel as F
from odin.utils import args_parse, ctext, stdio, Progbar

from utils import (get_model_path, prepare_ivec_data, csv2mat,
                   TRAIN_DATA)
# ===========================================================================
# Configs
# ===========================================================================
args = args_parse([
    ('recipe', 'the name of function defined in feature_recipes.py', None),
    ('-nmix', "Number of GMM mixture", None, 2048),
    ('-tdim', "Dimension of t-matrix", None, 600),
    ('-feat', "Acoustic feature", ('mspec', 'bnf'), 'bnf'),
    ('--gmm', "Force re-run training GMM", None, False),
    ('--stat', "Force re-extraction of centered statistics", None, False),
    ('--tmat', "Force re-run training Tmatrix", None, False),
    ('--ivec', "Force re-run extraction of i-vector", None, False),
    ('--all', "Run all the system again, just a shortcut", None, False),
])
args.gmm |= args.all
args.stat |= args.all | args.gmm
args.tmat |= args.all | args.stat
args.ivec |= args.all | args.tmat
FEAT = args.feat
EXP_DIR, MODEL_PATH, LOG_PATH, TRAIN_PATH, TEST_PATH = get_model_path('ivec', args)
stdio(LOG_PATH)
# ===========================================================================
# Load dataset
# ===========================================================================
예제 #13
0
import os
os.environ['ODIN'] = 'float32,cpu=1,thread=1,gpu=1'
import sys
import shutil
import pickle

import numpy as np

from odin import visual as V, nnet as N
from odin.utils import ctext, unique_labels, Progbar, UnitTimer, args_parse
from odin.utils.mpi import cpu_count
from odin import fuel as F, preprocessing as pp
from utils import (PATH_ACOUSTIC, PATH_EXP, FeatureConfigs)

args = args_parse(descriptions=[
    ('--debug', 'enable debugging', None, False),
    ('-ncpu', 'if smaller than 1, auto select all possible CPU', None, 0)
])
audio = F.TIDIGITS.load()
print(audio)
all_files = sorted(list(audio['indices'].keys()))
# ===========================================================================
# Extractor
# ===========================================================================
bnf_network = N.models.BNF_2048_MFCC40()
extractors = pp.make_pipeline(steps=[
    pp.speech.AudioReader(sr=FeatureConfigs.sr, dataset=audio),
    pp.speech.PreEmphasis(coeff=0.97),
    pp.speech.Dithering(),
    # ====== STFT ====== #
    pp.speech.STFTExtractor(frame_length=FeatureConfigs.frame_length,
                            step_length=FeatureConfigs.step_length,
예제 #14
0
import os
os.environ['ODIN'] = 'float32,cpu=1,thread=1,gpu=1'
import sys
import shutil
import pickle

import numpy as np

from odin import visual as V, nnet as N
from odin.utils import ctext, unique_labels, Progbar, UnitTimer, args_parse
from odin.utils.mpi import cpu_count
from odin import fuel as F, preprocessing as pp
from utils import (PATH_ACOUSTIC, PATH_EXP, FeatureConfigs)

args = args_parse(
    descriptions=[('--debug', 'enable debugging', None, False),
                  ('-ncpu', 'if smaller than 1, auto select all possible CPU',
                   None, 0)])
audio = F.TIDIGITS.load()
print(audio)
all_files = sorted(list(audio['indices'].keys()))
# ===========================================================================
# Extractor
# ===========================================================================
bnf_network = N.models.BNF_2048_MFCC40()
extractors = pp.make_pipeline(
    steps=[
        pp.speech.AudioReader(sr=FeatureConfigs.sr, dataset=audio),
        pp.speech.PreEmphasis(coeff=0.97),
        pp.speech.Dithering(),
        # ====== STFT ====== #
        pp.speech.STFTExtractor(frame_length=FeatureConfigs.frame_length,
예제 #15
0
파일: helpers.py 프로젝트: imito/odin
_args = args_parse(descriptions=[
    ('recipe', 'recipe is the name of acoustic Dataset defined in feature_recipes.py', None),
    ('-feat', 'specific name for the acoustic features, extracted from the given recipe', None, ''),
    ('-aug', 'augmentation dataset: musan, rirs; could be multiple dataset '
             'for training: "musan,rirs"', None, 'None'),
    ('-ncpu', 'number of CPU to be used, if <= 0, auto-select', None, 0),
    # for scoring
    ('-sys', 'name of the system for scoring: xvec, ivec, e2e ...', None, 'xvec'),
    ('-sysid', 'when a system is saved multiple checkpoint (e.g. sys.0.ai)', None, '-1'),
    ('-score', 'name of dataset for scoring, multiple dataset split by ","', None, 'sre18dev,sre18eval'),
    ('-backend', 'list of dataset for training the backend: '
                 'PLDA, SVM or Cosine', None, 'sre04,sre05,sre06,sre08,sre10,mx6'),
    ('-lda', 'if > 0, running LDA before training the backend '
             'with given number of components', None, 0),
    ('-plda', 'number of PLDA components, must be > 0 ', None, 150),
    ('--mll', 'pre-fitting maximum likelihood before training PLDA', None, False),
    ('--showllk', 'show LLK during training of PLDA, this will slow thing down', None, False),
    # for training
    ('-downsample', 'absolute number of files used for training', None, 0),
    ('-exclude', 'list of excluded dataset not for training,'
                 'multiple dataset split by ","', None, ''),
    # for ivector
    ('-nmix', 'for i-vector training, number of Gaussian components', None, 2048),
    ('-tdim', 'for i-vector training, number of latent dimension for i-vector', None, 600),
    # for DNN
    ('-utt', 'maximum length of sequence for training', None, 3),
    ('-seq', 'sequencing mode for training data, cut or pad', None, 'cut'),
    ('-batch', 'batch size, for training DNN, kaldi use 64, we use 128', None, 128),
    ('-epoch', 'number of epoch, for training DNN, kaldi only 3 epochs', None, 12),
    ('-clip', 'The maximum change in parameters allowed per minibatch, '
              'measured in Euclidean norm over the entire model (change '
              'will be clipped to this value), kaldi use 2.0', None, 2.0),
    ('-lr', 'learning rate for Adam, kaldi use 0.001 by default,'
            ' we use 0.01', None, 0.01),
    # others
    ('-mindur', 'for filtering utterances, minimum duration of utterance '
                'for training (in second)', None, 1),
    ('-minutt', 'for filtering utterances, minimum number of utterance of '
                'each speaker for training', None, 3),
    ('--override', 'override previous experiments', None, False),
    ('--debug', 'enable debugging', None, False),
])