예제 #1
0
def get_arguments():
    args = ArgController().add(
        '-ds',
        "name of multiple datasets for cross-data analysis: 'cross8k_ly,crossecc_ly'",
        "cross8k_ly,crossecc_ly"
    ).add(
        "-model",
        "name of model for testing, for example: 'vae', 'movae', 'vae,movae'",
        "movae").add(
            "-path", "Saving all figures to given output folder",
            "/tmp/cross_analysis").add(
                "--verbose", "Enable verbose logging", False).add(
                    "-nprocess",
                    "Number of multiple processes for running the experiments",
                    2).parse()
    datasets = [
        i.strip().lower() for i in str(args.ds).split(',')
        if len(i.strip()) > 0
    ]
    model = [
        i.strip().lower() for i in str(args.model).split(',')
        if len(i.strip()) > 0
    ]
    nprocess = int(args.nprocess)
    return dict(datasets=datasets,
                models=model,
                outpath=args.path,
                verbose=bool(args.verbose),
                nprocess=nprocess)
예제 #2
0
def get_arguments():
    args = ArgController().add(
        "input", "Name of the dataset or path to csv file").add(
            "-n", "number of GMM components",
            2).add("-idx", "index of the positive component", 1).add(
                "-norm", "method for normalizing: raw, log", 'log',
                ('log', 'raw')).add(
                    "-outpath", "y_bin and y_prob will be saved to this path",
                    '').add("-figpath", "path for saving analysis figure",
                            '/tmp/tmp.pdf').add(
                                "--verbose",
                                "Enable verbose and saving diagnosis",
                                False).parse()
    inp = str(args.input)
    if os.path.exists(inp):
        assert os.path.isfile(inp), "%s must be path to a file" % inp
        data = []
        with open(inp, 'r') as f:
            for line in f:
                data.append(line.strip().split(','))
        data = np.array(data)
        if all(is_number(i, string_number=True) for i in data[0]):
            y_prot = data.astype('float32')
            y_prot_names = np.array(
                ['#%d' % i for i in range(y_prot.shape[1])])
        else:
            y_prot = data[1:].astype('float32')
            y_prot_names = data[0]
        outpath = args.outpath
    else:
        from sisua.data import get_dataset
        ds, gene_ds, prot_ds = get_dataset(inp, override=False)
        y_prot = ds['y']
        y_prot_names = np.array(ds['y_col'])
        outpath = ds.path if args.outpath == '' else args.outpath
    return {
        'y_prot': y_prot,
        'y_prot_names': y_prot_names,
        'n_components': int(args.n),
        'index': int(args.idx),
        'log_norm': True if args.norm == 'log' else False,
        'outpath': outpath if len(outpath) > 0 else None,
        'figpath': args.figpath if len(args.figpath) > 0 else None,
        'verbose': bool(args.verbose)
    }
예제 #3
0
from odin.backend import interpolation
from odin.bay.vi.autoencoder import RandomVariable, VariationalAutoencoder
from odin.exp import Trainer
from odin.fuel import AudioFeatureLoader
from odin.utils import ArgController, clean_folder, partialclass

tf.config.experimental.set_memory_growth(
    tf.config.list_physical_devices('GPU')[0], True)
tf.debugging.set_log_device_placement(False)
tf.autograph.set_verbosity(0)

tf.random.set_seed(8)
np.random.seed(8)

args = ArgController(\
).add("--override", "Override trained model", False
      ).parse()

SAVE_PATH = "/tmp/vae_audio"
if os.path.exists(SAVE_PATH) and args.override:
    clean_folder(SAVE_PATH, verbose=True)
if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH)
MODEL_PATH = os.path.join(SAVE_PATH, 'model')

# ===========================================================================
# Configs
# ===========================================================================
ZDIM = 32
MAX_LENGTH = 48
BUFFER_SIZE = 100
예제 #4
0
from odin.exp.trainer import Trainer
from odin.networks import ConvNetwork, DenseNetwork
from odin.utils import ArgController

# TODO: improve performance of VAE

sns.set()
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

tf.random.set_seed(8)
np.random.seed(8)


args = ArgController(
).add('-ds', "1-apple/orange, 2-fashionMNIST, 3-MNIST", 3\
).add('-zdim', "number of latent units", 32\
).parse()

BATCH_SIZE = 128
DATASET = int(args['ds'])
EPOCHS = 128
FREQ = 800
ZDIM = int(args['zdim'])
SUMMARY_STEPS = [500, 100]
output_dir = os.path.join('/tmp', 'vae_z%d_d%s' % (ZDIM, DATASET))
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
print("Output directory:", output_dir)

# ===========================================================================
# Load dataset and helpers
예제 #5
0
from tqdm import tqdm

from odin.bay.vi.autoencoder import VQVAE, RVconf, VectorQuantizer
from odin.fuel import BinarizedMNIST
from odin.utils import ArgController

tf.config.experimental.set_memory_growth(
    tf.config.list_physical_devices('GPU')[0], True)
tf.debugging.set_log_device_placement(False)
tf.autograph.set_verbosity(0)
tf.random.set_seed(1)
np.random.seed(1)

args = ArgController(
).add("--override", "Override trained model", False \
).add("--ema", "enable exponential moving average", False \
).add("-niter", "Number of training iteration", 10000 \
).parse()

# ===========================================================================
# Config
# ===========================================================================
SAVE_PATH = f"/tmp/vq_vae{'_ema' if args.ema else ''}"
if not os.path.exists(SAVE_PATH):
  os.makedirs(SAVE_PATH)
MODEL_PATH = os.path.join(SAVE_PATH, "model")
if args.override:
  for f in sorted(
      glob.glob(f"{SAVE_PATH}/*.png") + glob.glob(f"{SAVE_PATH}/model*")):
    os.remove(f)
    print("Removed file:", f)
예제 #6
0
#!/usr/bin/env python
from __future__ import print_function, division, absolute_import

import numpy as np

from odin.utils import get_modelpath, ArgController, stdio, get_logpath

stdio(get_logpath('tmp.log', override=True))

arg = ArgController(version=0.12).add(
    '-backend', 'theano or tensorflow',
    'tensorflow').add('-ds', 'dataset cifar10, or mnist',
                      'mnist').add('-epoch', 'number of epoch',
                                   3).add('-lr', 'learning rate',
                                          0.01).parse()

import os
os.environ['ODIN'] = 'float32,gpu,%s,seed=12' % arg['backend']

from odin import backend as K
from odin import nnet as N
from odin import fuel, training
from six.moves import cPickle

# ===========================================================================
# Load data
# ===========================================================================
USE_MNIST_DATA = True if 'mnist' in arg['ds'].lower() else False

if USE_MNIST_DATA:
    ds = fuel.load_mnist()
예제 #7
0
#!/usr/bin/env python
from __future__ import print_function, division, absolute_import
import os
os.environ['ODIN'] = 'float32,gpu,seed=12,log'
import shutil

import numpy as np
import tensorflow as tf

from odin.utils import ArgController
from odin import backend as K
from odin import nnet as N
from odin import fuel as F, training

arg = ArgController().add('-ds', 'dataset cifar10, mnist, or fmnist',
                          'mnist').add('--rnn', 'using RNN network',
                                       False).parse()
# ===========================================================================
# Load data
# ===========================================================================
USE_MNIST_DATA = True
if arg.ds.lower() == 'mnist':
    ds = F.MNIST_original.get_dataset()
elif arg.ds.lower() == 'fmnist':
    ds = F.FMNIST_original.get_dataset()
else:
    ds = F.CIFAR10.get_dataset()
    USE_MNIST_DATA = False
print(ds)

X = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X')
예제 #8
0
from odin import preprocessing as pp
from odin.visual import print_dist, print_confusion, print_hist
from odin.utils import (get_logpath, get_modelpath, get_datasetpath,
                        get_exppath, Progbar, unique_labels, chain,
                        get_formatted_datetime, as_tuple_of_shape, stdio,
                        ctext, ArgController)

# ===========================================================================
# Input arguments
# ===========================================================================
args = ArgController().add('-nmix', "Number of GMM mixture", 128).add(
    '-tdim', "Dimension of t-matrix",
    64).add('-feat', "Acoustic feature: spec, mspec, mfcc", 'mfcc').add(
        '--gmm', "Force re-run training GMM", False).add(
            '--stat', "Force re-extraction of centered statistics",
            False).add('--tmat', "Force re-run training Tmatrix", False).add(
                '--ivec', "Force re-run extraction of i-vector", False).add(
                    '--all', "Run all the system again, just a shortcut",
                    False).add('--acous',
                               "Force re-run acoustic feature extraction",
                               False).parse()
args.gmm |= args.all
args.stat |= args.all | args.gmm
args.tmat |= args.all | args.stat
args.ivec |= args.all | args.tmat
FEAT = args.feat
# ===========================================================================
# Const
# ===========================================================================
EXP_DIR = get_exppath('FSDD')
PATH_ACOUSTIC_FEATURES = os.path.join(EXP_DIR, 'features')
예제 #9
0
from __future__ import print_function, division, absolute_import

import time
import numpy as np
import pandas as pd
from collections import defaultdict

from odin.utils import ctext, ArgController

from sisua.inference import Inference, InferenceSCVI, InferenceDCA

args = ArgController(
).add('-path', "Save path csv file", '/tmp/tmp.csv'
).add('--sisua', "running SISUA first then scVAE", False
).add('--test', "evaluate the test time", False
).parse()

# ===========================================================================
# Configurations
# ===========================================================================
SEED = 87654321
n_cells = [200, 500, 1000, 2000, 5000, 10000, 40000, 100000, 1000000]
n_genes = [500]
n_proteins = [10]

n_epoch = 100
batch_size = 128
n_trials = 1

np.random.seed(SEED)
예제 #10
0
# ===========================================================================
# Argument parsing
# ===========================================================================
def call_main(dsname, outpath):
    try:
        main(dsname=dsname, outpath=outpath)
    except Exception as e:
        import traceback
        traceback.print_exc()
        print(f"error:'{e}'\nds:'{dsname}'\npath:'{outpath}'")


if __name__ == "__main__":
    all_dataset = list(get_dataset_meta().keys())
    args = ArgController(print_parsed=True\
      ).add("dsname", f"all available datasets: {', '.join(all_dataset)}"\
      ).add("-path", "Output directory", '/tmp'\
      ).parse()
    all_dsname = args.dsname.split(',')
    path = args.path

    for dsname in all_dsname:
        if dsname not in all_dataset:
            print(f"No support for dataset with name: {dsname}, "
                  f"all available datasets are: {all_dataset}")
            continue
        outpath = os.path.join(path, dsname)
        # override exists path
        if os.path.exists(outpath):
            shutil.rmtree(outpath)
        if not os.path.exists(outpath):
            os.makedirs(outpath)
예제 #11
0
from matplotlib import pyplot as plt

from odin import visual as vs
from odin.ml import fast_pca, fast_umap
from odin.utils import ArgController, md5_checksum
from sisua.data import OMIC, get_dataset, normalization_recipes
from sisua.models import (MISA, SCALE, SCVI, SISUA, DeepCountAutoencoder,
                          NetConf, RVmeta, VariationalAutoEncoder, load, save)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
sns.set()
tf.random.set_seed(8)
np.random.seed(8)

args = ArgController().add('--load', "Load and check model integrity",
                           False).parse()
BASE_DIR = '/tmp/exp'
if not os.path.exists(BASE_DIR):
    os.mkdir(BASE_DIR)

random.seed(1234)


# ===========================================================================
# Helper
# ===========================================================================
def predict2info(model, x):
    dists = tf.nest.flatten(model.predict(x, verbose=0))
    to_numbers = lambda d: [
        fn(i).numpy() for i in (d.mean(), d.variance())
        for fn in (tf.reduce_mean, tf.reduce_min, tf.reduce_max)
예제 #12
0
from odin.stats import train_valid_test_split, freqcount
from odin import ml
from odin import training
from odin import preprocessing as pp
from odin.visual import print_dist, print_confusion, print_hist
from odin.utils import (Progbar, unique_labels, chain, get_formatted_datetime,
                        as_tuple_of_shape, stdio, ctext, ArgController)

from utils import prepare_data, get_exp_path
# ===========================================================================
# Input arguments
# ===========================================================================
args = ArgController().add('-nmix', "Number of GMM mixture", 256).add(
    '-tdim', "Dimension of t-matrix",
    128).add('-feat', "Acoustic feature: spec, mspec, mfcc, bnf, sdc",
             'bnf').add('-task', 'gender, age, dialect, speaker, digit',
                        'gender').add(
                            '--retrain',
                            "deleted trained model, and re-train everything",
                            False).parse()
# ===========================================================================
# Const
# ===========================================================================
DTYPE = 'float64'
# ====== GMM trainign ====== #
NMIX = args.nmix
GMM_NITER = 10
GMM_DOWNSAMPLE = 4
GMM_STOCHASTIC = True
# ====== IVEC training ====== #
TV_DIM = args.tdim
TV_NITER = 10
예제 #13
0
#  Benchmark TRAIN-epoch: 5.98400415693 (s)
#  Benchmark PRED-batch: 0.183033730263 (s)
#  Benchmark PRED-epoch: 3.5595933524 (s)
# NOTE: the performance of float16 and float32 dataset are identical
# ===========================================================================
from __future__ import print_function, absolute_import, division

from odin.utils import ArgController

# ====== parse arguments ====== #
args = ArgController().add(
    '-bk', 'backend: tensorflow or theano', 'tensorflow').add(
        '-dev', 'gpu or cpu',
        'gpu').add('-dt', 'dtype: float32 or float16', 'float16').add(
            '-feat', 'feature type: mfcc, mspec, spec, qspec, qmspec, qmfcc',
            'mspec').add('-cnn', 'enable CNN or not',
                         True).add('-vad', 'number of GMM component for VAD', 2
                                   # for trainign
                                   ).add('-lr', 'learning rate', 0.0001).add(
                                       '-epoch', 'number of epoch',
                                       5).add('-bs', 'batch size', 8).parse()

# ====== import ====== #
import os

os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk'])

import numpy as np

np.random.seed(1208)
예제 #14
0
import os

os.environ['ODIN'] = 'gpu,float32'
import pickle

import numpy as np

from odin import ml
from odin import fuel as F
from odin.utils import ctext, ArgController
from odin import visual as V

from sklearn.metrics import confusion_matrix, accuracy_score

args = ArgController().add('--reset', "re-run the fitting of the model",
                           False).parse()
# ===========================================================================
# Const
# ===========================================================================
ds = F.MNIST.load()
print(ds)
nb_classes = 10
PATH = '/tmp/lore.ai'
# ===========================================================================
# Model
# ===========================================================================
if not os.path.exists(PATH) or args.reset:
    f = ml.LogisticRegression(nb_classes=nb_classes,
                              tol=1e-4,
                              fit_intercept=True,
                              path=PATH,
예제 #15
0
from sisua.models.variational_autoencoder import VariationalAutoEncoder

# turn off TF logging and set reproducibile random seed
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.random.set_seed(8)
np.random.seed(8)

x, y = get_dataset('pbmc8kly')
x_train, x_test = x.split()
y_train, y_test = y.split()
x_train.assert_matching_cells(y_train)
x_test.assert_matching_cells(y_test)

flags = ArgController().add('--no-train', 'Stop training',
                            False).add('--no-score', 'Stop scoring',
                                       False).add('--analyze', "Analyzing",
                                                  False).parse()
no_train = flags.no_train
no_score = flags.no_score
analyze = flags.analyze
# assume the scores were ready when analyze is enable
if analyze:
  no_train = True
  no_score = True

# ===========================================================================
# Configurations
# ===========================================================================
path = '/tmp/grid'
if not os.path.exists(path):
  os.mkdir(path)
예제 #16
0
from __future__ import print_function, division, absolute_import

from odin.utils import ArgController, get_all_files, pad_sequences, get_modelpath, one_hot

args = ArgController().add('-dev', 'device: gpu or cpu', 'gpu').add(
    '-bk', 'backend: tensorflow or theano', 'tensorflow').add(
        '-nclass', 'number of classes in newsgroup dataset will be used',
        20).add('-lr', 'learning rate',
                0.0001).add('-epoch', 'number of epoch',
                            3).add('--rebuild', 'rebuild the tokenizer or not',
                                   False).parse()

import os
os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk'])
from six.moves import cPickle
from itertools import chain

import numpy as np

from odin import backend as K, nnet as N, fuel as F
from odin.preprocessing.text import (Tokenizer, language, POSfilter,
                                     TYPEfilter, CasePreprocessor,
                                     TransPreprocessor)
from odin import training

# ===========================================================================
# Const
# ===========================================================================
embedding_dims = 100
MAX_SEQ_LEN = 1000
MAX_NB_WORDS = 20000
예제 #17
0
from odin.bay.vi import RandomVariable, VariationalAutoencoder
from odin.utils import ArgController

tfk = tf.keras
tfkl = tf.keras.layers
tfpl = tfp.layers
tfd = tfp.distributions
tf.random.set_seed(1)
tf.config.experimental.set_memory_growth(
    tf.config.list_physical_devices('GPU')[0], True)
tf.debugging.set_log_device_placement(False)
tf.autograph.set_verbosity(0)
np.random.seed(1)

args = ArgController(\
  ).add("-epochs", "Number of training epochs", 200\
  ).parse()

# ===========================================================================
# configs
# ===========================================================================
learning_rate = 1e-3
epochs = int(args.epochs)
SAVE_PATH = "/tmp/vae_basic"
if os.path.exists(SAVE_PATH):
    shutil.rmtree(SAVE_PATH)
os.makedirs(SAVE_PATH)

# ===========================================================================
# load data
# ===========================================================================
예제 #18
0
        # plotting the figures
        if plot_enable:
            robust_run("evaluate_plotting",
                       f"model:{model} train:{train_ds} test:{test_ds}",
                       plotting, posterior, outpath, train_ds, test_ds)


# ===========================================================================
# Main
# ===========================================================================
if __name__ == "__main__":
    args = ArgController(\
    ).add("model", "The model alias, multiple model separated by comma" \
    ).add("ds1", "The first dataset", "eccx" \
    ).add("-ds2", "The second dataset, if empty, only analyze the first one", "" \
    ).add("-bs", "batch size", 4 \
    ).add("--score", "re-calculating the model scores", False \
    ).add("--plot", "plotting the analysis", False \
    ).add("--override", "remove exists folders", False \
    ).add("--only-cross", "Only do cross dataset experiments", False \
    ).parse()
    # preprocess the arguments
    models = args.model.split(",")
    ds1s = args.ds1.split(",")
    ds2s = args.ds2.split(",")
    assert len(ds1s) > 0 and len(ds1s[0])> 0,\
      "-ds1 option for the first dataset must be provided."
    # evaluation modes
    plot = bool(args.plot)
    score = bool(args.score)
    if not (plot or score):
        plot = True
예제 #19
0
import os
os.environ['ODIN'] = "cpu=1,float32"
import shutil

import numpy as np

from odin import fuel as F, nnet as N
from odin import preprocessing as pp
from odin.utils import (get_all_files, get_all_ext, exec_commands, MPI,
                        cpu_count, Progbar, ArgController, stdio, ctext,
                        crypto)

args = ArgController().add('path', "path to TIDIGITS dataset").add(
    '--wav', "re-run Converting sphere file to wave",
    False).add('--ds', "re-run Group wave files into a dataset",
               False).add('--compress', "re-run compression of the dataset",
                          False).parse()

TOTAL_FILES = 25096
README = \
"""
Original sample rate: 20,000 Hz
Downsampled sample rate: 8,000 Hz

Saved WAV file format:
    * [train|test]
    * [m|w|b|g] (alias for man, women, boy, girl)
    * [age]
    * [dialectID]
    * [speakerID]
예제 #20
0
from __future__ import print_function, division, absolute_import

import matplotlib
matplotlib.use('Agg')

from odin.utils import ArgController, stdio, one_hot
args = ArgController(
).add('-model', 'model name, specified in models_cifar.py', 'cnn'
).parse()

import os
os.environ['ODIN'] = 'float32,gpu,seed=87654321,log'

import numpy as np
import tensorflow as tf

from odin import fuel as F, nnet as N, backend as K, training, utils
from odin.stats import train_valid_test_split

MODEL_NAME = args.model
MODEL_PATH = utils.get_modelpath(name='cifar10_%s' % MODEL_NAME, override=True)
LOG_PATH = utils.get_logpath(name='cifar10_%s.log' % MODEL_NAME, override=True)
stdio(LOG_PATH)
# ===========================================================================
# Some handmade constants
# ===========================================================================
NB_EPOCH = 10
LEARNING_RATE = 0.001
# ===========================================================================
# Load dataset
# ===========================================================================
예제 #21
0
from matplotlib import pyplot as plt
from scipy import sparse
from tensorflow.python import keras

from odin import visual as vs
from odin.bay.vi.autoencoder import AmortizedLDA
from odin.bay.vi.metrics import unsupervised_clustering_scores
from odin.fuel import MNIST, PBMC, Cortex, HumanEmbryos, Newsgroup5, Newsgroup20
from odin.ml import fast_lda_topics, get_topics_string
from odin.utils import ArgController

args = ArgController(
).add('-ds', 'cortex, embryo, pbmc5k, pbmc10k, news5, news20, mnist', 'cortex' \
).add('-warmup', 'warmup iteration', 30000 \
).add('-niter', 'max iteration', 35000 \
).add('-post', 'posterior distribution', "dirichlet" \
).add('-dist', 'output distribution', "categorical" \
).add('--log', 'log norm the input data', False \
).add('--override', 'override saved results', False \
).add('--lda', 'training LDA model', False \
).parse()

# python amortized_lda_test.py --log -post dirichlet -dist categorical -ds news5
# python amortized_lda_test.py --log -post dirichlet -dist negativebinomial -ds news5
# python amortized_lda_test.py --log -post dirichlet -dist zinb -ds news5
# python amortized_lda_test.py -post dirichlet -dist categorical -ds news5
# python amortized_lda_test.py -post dirichlet -dist negativebinomial -ds news5
# python amortized_lda_test.py -post dirichlet -dist zinb -ds news5
# python amortized_lda_test.py --log -post gaussian -dist categorical -ds news5
# python amortized_lda_test.py --log -post gaussian -dist negativebinomial -ds news5
# python amortized_lda_test.py --log -post gaussian -dist zinb -ds news5
# python amortized_lda_test.py -post gaussian -dist categorical -ds news5