def get_arguments(): args = ArgController().add( '-ds', "name of multiple datasets for cross-data analysis: 'cross8k_ly,crossecc_ly'", "cross8k_ly,crossecc_ly" ).add( "-model", "name of model for testing, for example: 'vae', 'movae', 'vae,movae'", "movae").add( "-path", "Saving all figures to given output folder", "/tmp/cross_analysis").add( "--verbose", "Enable verbose logging", False).add( "-nprocess", "Number of multiple processes for running the experiments", 2).parse() datasets = [ i.strip().lower() for i in str(args.ds).split(',') if len(i.strip()) > 0 ] model = [ i.strip().lower() for i in str(args.model).split(',') if len(i.strip()) > 0 ] nprocess = int(args.nprocess) return dict(datasets=datasets, models=model, outpath=args.path, verbose=bool(args.verbose), nprocess=nprocess)
def get_arguments(): args = ArgController().add( "input", "Name of the dataset or path to csv file").add( "-n", "number of GMM components", 2).add("-idx", "index of the positive component", 1).add( "-norm", "method for normalizing: raw, log", 'log', ('log', 'raw')).add( "-outpath", "y_bin and y_prob will be saved to this path", '').add("-figpath", "path for saving analysis figure", '/tmp/tmp.pdf').add( "--verbose", "Enable verbose and saving diagnosis", False).parse() inp = str(args.input) if os.path.exists(inp): assert os.path.isfile(inp), "%s must be path to a file" % inp data = [] with open(inp, 'r') as f: for line in f: data.append(line.strip().split(',')) data = np.array(data) if all(is_number(i, string_number=True) for i in data[0]): y_prot = data.astype('float32') y_prot_names = np.array( ['#%d' % i for i in range(y_prot.shape[1])]) else: y_prot = data[1:].astype('float32') y_prot_names = data[0] outpath = args.outpath else: from sisua.data import get_dataset ds, gene_ds, prot_ds = get_dataset(inp, override=False) y_prot = ds['y'] y_prot_names = np.array(ds['y_col']) outpath = ds.path if args.outpath == '' else args.outpath return { 'y_prot': y_prot, 'y_prot_names': y_prot_names, 'n_components': int(args.n), 'index': int(args.idx), 'log_norm': True if args.norm == 'log' else False, 'outpath': outpath if len(outpath) > 0 else None, 'figpath': args.figpath if len(args.figpath) > 0 else None, 'verbose': bool(args.verbose) }
from odin.backend import interpolation from odin.bay.vi.autoencoder import RandomVariable, VariationalAutoencoder from odin.exp import Trainer from odin.fuel import AudioFeatureLoader from odin.utils import ArgController, clean_folder, partialclass tf.config.experimental.set_memory_growth( tf.config.list_physical_devices('GPU')[0], True) tf.debugging.set_log_device_placement(False) tf.autograph.set_verbosity(0) tf.random.set_seed(8) np.random.seed(8) args = ArgController(\ ).add("--override", "Override trained model", False ).parse() SAVE_PATH = "/tmp/vae_audio" if os.path.exists(SAVE_PATH) and args.override: clean_folder(SAVE_PATH, verbose=True) if not os.path.exists(SAVE_PATH): os.makedirs(SAVE_PATH) MODEL_PATH = os.path.join(SAVE_PATH, 'model') # =========================================================================== # Configs # =========================================================================== ZDIM = 32 MAX_LENGTH = 48 BUFFER_SIZE = 100
from odin.exp.trainer import Trainer from odin.networks import ConvNetwork, DenseNetwork from odin.utils import ArgController # TODO: improve performance of VAE sns.set() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' tf.random.set_seed(8) np.random.seed(8) args = ArgController( ).add('-ds', "1-apple/orange, 2-fashionMNIST, 3-MNIST", 3\ ).add('-zdim', "number of latent units", 32\ ).parse() BATCH_SIZE = 128 DATASET = int(args['ds']) EPOCHS = 128 FREQ = 800 ZDIM = int(args['zdim']) SUMMARY_STEPS = [500, 100] output_dir = os.path.join('/tmp', 'vae_z%d_d%s' % (ZDIM, DATASET)) if not os.path.exists(output_dir): os.mkdir(output_dir) print("Output directory:", output_dir) # =========================================================================== # Load dataset and helpers
from tqdm import tqdm from odin.bay.vi.autoencoder import VQVAE, RVconf, VectorQuantizer from odin.fuel import BinarizedMNIST from odin.utils import ArgController tf.config.experimental.set_memory_growth( tf.config.list_physical_devices('GPU')[0], True) tf.debugging.set_log_device_placement(False) tf.autograph.set_verbosity(0) tf.random.set_seed(1) np.random.seed(1) args = ArgController( ).add("--override", "Override trained model", False \ ).add("--ema", "enable exponential moving average", False \ ).add("-niter", "Number of training iteration", 10000 \ ).parse() # =========================================================================== # Config # =========================================================================== SAVE_PATH = f"/tmp/vq_vae{'_ema' if args.ema else ''}" if not os.path.exists(SAVE_PATH): os.makedirs(SAVE_PATH) MODEL_PATH = os.path.join(SAVE_PATH, "model") if args.override: for f in sorted( glob.glob(f"{SAVE_PATH}/*.png") + glob.glob(f"{SAVE_PATH}/model*")): os.remove(f) print("Removed file:", f)
#!/usr/bin/env python from __future__ import print_function, division, absolute_import import numpy as np from odin.utils import get_modelpath, ArgController, stdio, get_logpath stdio(get_logpath('tmp.log', override=True)) arg = ArgController(version=0.12).add( '-backend', 'theano or tensorflow', 'tensorflow').add('-ds', 'dataset cifar10, or mnist', 'mnist').add('-epoch', 'number of epoch', 3).add('-lr', 'learning rate', 0.01).parse() import os os.environ['ODIN'] = 'float32,gpu,%s,seed=12' % arg['backend'] from odin import backend as K from odin import nnet as N from odin import fuel, training from six.moves import cPickle # =========================================================================== # Load data # =========================================================================== USE_MNIST_DATA = True if 'mnist' in arg['ds'].lower() else False if USE_MNIST_DATA: ds = fuel.load_mnist()
#!/usr/bin/env python from __future__ import print_function, division, absolute_import import os os.environ['ODIN'] = 'float32,gpu,seed=12,log' import shutil import numpy as np import tensorflow as tf from odin.utils import ArgController from odin import backend as K from odin import nnet as N from odin import fuel as F, training arg = ArgController().add('-ds', 'dataset cifar10, mnist, or fmnist', 'mnist').add('--rnn', 'using RNN network', False).parse() # =========================================================================== # Load data # =========================================================================== USE_MNIST_DATA = True if arg.ds.lower() == 'mnist': ds = F.MNIST_original.get_dataset() elif arg.ds.lower() == 'fmnist': ds = F.FMNIST_original.get_dataset() else: ds = F.CIFAR10.get_dataset() USE_MNIST_DATA = False print(ds) X = K.placeholder(shape=(None, ) + ds['X_train'].shape[1:], name='X')
from odin import preprocessing as pp from odin.visual import print_dist, print_confusion, print_hist from odin.utils import (get_logpath, get_modelpath, get_datasetpath, get_exppath, Progbar, unique_labels, chain, get_formatted_datetime, as_tuple_of_shape, stdio, ctext, ArgController) # =========================================================================== # Input arguments # =========================================================================== args = ArgController().add('-nmix', "Number of GMM mixture", 128).add( '-tdim', "Dimension of t-matrix", 64).add('-feat', "Acoustic feature: spec, mspec, mfcc", 'mfcc').add( '--gmm', "Force re-run training GMM", False).add( '--stat', "Force re-extraction of centered statistics", False).add('--tmat', "Force re-run training Tmatrix", False).add( '--ivec', "Force re-run extraction of i-vector", False).add( '--all', "Run all the system again, just a shortcut", False).add('--acous', "Force re-run acoustic feature extraction", False).parse() args.gmm |= args.all args.stat |= args.all | args.gmm args.tmat |= args.all | args.stat args.ivec |= args.all | args.tmat FEAT = args.feat # =========================================================================== # Const # =========================================================================== EXP_DIR = get_exppath('FSDD') PATH_ACOUSTIC_FEATURES = os.path.join(EXP_DIR, 'features')
from __future__ import print_function, division, absolute_import import time import numpy as np import pandas as pd from collections import defaultdict from odin.utils import ctext, ArgController from sisua.inference import Inference, InferenceSCVI, InferenceDCA args = ArgController( ).add('-path', "Save path csv file", '/tmp/tmp.csv' ).add('--sisua', "running SISUA first then scVAE", False ).add('--test', "evaluate the test time", False ).parse() # =========================================================================== # Configurations # =========================================================================== SEED = 87654321 n_cells = [200, 500, 1000, 2000, 5000, 10000, 40000, 100000, 1000000] n_genes = [500] n_proteins = [10] n_epoch = 100 batch_size = 128 n_trials = 1 np.random.seed(SEED)
# =========================================================================== # Argument parsing # =========================================================================== def call_main(dsname, outpath): try: main(dsname=dsname, outpath=outpath) except Exception as e: import traceback traceback.print_exc() print(f"error:'{e}'\nds:'{dsname}'\npath:'{outpath}'") if __name__ == "__main__": all_dataset = list(get_dataset_meta().keys()) args = ArgController(print_parsed=True\ ).add("dsname", f"all available datasets: {', '.join(all_dataset)}"\ ).add("-path", "Output directory", '/tmp'\ ).parse() all_dsname = args.dsname.split(',') path = args.path for dsname in all_dsname: if dsname not in all_dataset: print(f"No support for dataset with name: {dsname}, " f"all available datasets are: {all_dataset}") continue outpath = os.path.join(path, dsname) # override exists path if os.path.exists(outpath): shutil.rmtree(outpath) if not os.path.exists(outpath): os.makedirs(outpath)
from matplotlib import pyplot as plt from odin import visual as vs from odin.ml import fast_pca, fast_umap from odin.utils import ArgController, md5_checksum from sisua.data import OMIC, get_dataset, normalization_recipes from sisua.models import (MISA, SCALE, SCVI, SISUA, DeepCountAutoencoder, NetConf, RVmeta, VariationalAutoEncoder, load, save) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' sns.set() tf.random.set_seed(8) np.random.seed(8) args = ArgController().add('--load', "Load and check model integrity", False).parse() BASE_DIR = '/tmp/exp' if not os.path.exists(BASE_DIR): os.mkdir(BASE_DIR) random.seed(1234) # =========================================================================== # Helper # =========================================================================== def predict2info(model, x): dists = tf.nest.flatten(model.predict(x, verbose=0)) to_numbers = lambda d: [ fn(i).numpy() for i in (d.mean(), d.variance()) for fn in (tf.reduce_mean, tf.reduce_min, tf.reduce_max)
from odin.stats import train_valid_test_split, freqcount from odin import ml from odin import training from odin import preprocessing as pp from odin.visual import print_dist, print_confusion, print_hist from odin.utils import (Progbar, unique_labels, chain, get_formatted_datetime, as_tuple_of_shape, stdio, ctext, ArgController) from utils import prepare_data, get_exp_path # =========================================================================== # Input arguments # =========================================================================== args = ArgController().add('-nmix', "Number of GMM mixture", 256).add( '-tdim', "Dimension of t-matrix", 128).add('-feat', "Acoustic feature: spec, mspec, mfcc, bnf, sdc", 'bnf').add('-task', 'gender, age, dialect, speaker, digit', 'gender').add( '--retrain', "deleted trained model, and re-train everything", False).parse() # =========================================================================== # Const # =========================================================================== DTYPE = 'float64' # ====== GMM trainign ====== # NMIX = args.nmix GMM_NITER = 10 GMM_DOWNSAMPLE = 4 GMM_STOCHASTIC = True # ====== IVEC training ====== # TV_DIM = args.tdim TV_NITER = 10
# Benchmark TRAIN-epoch: 5.98400415693 (s) # Benchmark PRED-batch: 0.183033730263 (s) # Benchmark PRED-epoch: 3.5595933524 (s) # NOTE: the performance of float16 and float32 dataset are identical # =========================================================================== from __future__ import print_function, absolute_import, division from odin.utils import ArgController # ====== parse arguments ====== # args = ArgController().add( '-bk', 'backend: tensorflow or theano', 'tensorflow').add( '-dev', 'gpu or cpu', 'gpu').add('-dt', 'dtype: float32 or float16', 'float16').add( '-feat', 'feature type: mfcc, mspec, spec, qspec, qmspec, qmfcc', 'mspec').add('-cnn', 'enable CNN or not', True).add('-vad', 'number of GMM component for VAD', 2 # for trainign ).add('-lr', 'learning rate', 0.0001).add( '-epoch', 'number of epoch', 5).add('-bs', 'batch size', 8).parse() # ====== import ====== # import os os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk']) import numpy as np np.random.seed(1208)
import os os.environ['ODIN'] = 'gpu,float32' import pickle import numpy as np from odin import ml from odin import fuel as F from odin.utils import ctext, ArgController from odin import visual as V from sklearn.metrics import confusion_matrix, accuracy_score args = ArgController().add('--reset', "re-run the fitting of the model", False).parse() # =========================================================================== # Const # =========================================================================== ds = F.MNIST.load() print(ds) nb_classes = 10 PATH = '/tmp/lore.ai' # =========================================================================== # Model # =========================================================================== if not os.path.exists(PATH) or args.reset: f = ml.LogisticRegression(nb_classes=nb_classes, tol=1e-4, fit_intercept=True, path=PATH,
from sisua.models.variational_autoencoder import VariationalAutoEncoder # turn off TF logging and set reproducibile random seed os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.random.set_seed(8) np.random.seed(8) x, y = get_dataset('pbmc8kly') x_train, x_test = x.split() y_train, y_test = y.split() x_train.assert_matching_cells(y_train) x_test.assert_matching_cells(y_test) flags = ArgController().add('--no-train', 'Stop training', False).add('--no-score', 'Stop scoring', False).add('--analyze', "Analyzing", False).parse() no_train = flags.no_train no_score = flags.no_score analyze = flags.analyze # assume the scores were ready when analyze is enable if analyze: no_train = True no_score = True # =========================================================================== # Configurations # =========================================================================== path = '/tmp/grid' if not os.path.exists(path): os.mkdir(path)
from __future__ import print_function, division, absolute_import from odin.utils import ArgController, get_all_files, pad_sequences, get_modelpath, one_hot args = ArgController().add('-dev', 'device: gpu or cpu', 'gpu').add( '-bk', 'backend: tensorflow or theano', 'tensorflow').add( '-nclass', 'number of classes in newsgroup dataset will be used', 20).add('-lr', 'learning rate', 0.0001).add('-epoch', 'number of epoch', 3).add('--rebuild', 'rebuild the tokenizer or not', False).parse() import os os.environ['ODIN'] = 'float32,%s,%s' % (args['dev'], args['bk']) from six.moves import cPickle from itertools import chain import numpy as np from odin import backend as K, nnet as N, fuel as F from odin.preprocessing.text import (Tokenizer, language, POSfilter, TYPEfilter, CasePreprocessor, TransPreprocessor) from odin import training # =========================================================================== # Const # =========================================================================== embedding_dims = 100 MAX_SEQ_LEN = 1000 MAX_NB_WORDS = 20000
from odin.bay.vi import RandomVariable, VariationalAutoencoder from odin.utils import ArgController tfk = tf.keras tfkl = tf.keras.layers tfpl = tfp.layers tfd = tfp.distributions tf.random.set_seed(1) tf.config.experimental.set_memory_growth( tf.config.list_physical_devices('GPU')[0], True) tf.debugging.set_log_device_placement(False) tf.autograph.set_verbosity(0) np.random.seed(1) args = ArgController(\ ).add("-epochs", "Number of training epochs", 200\ ).parse() # =========================================================================== # configs # =========================================================================== learning_rate = 1e-3 epochs = int(args.epochs) SAVE_PATH = "/tmp/vae_basic" if os.path.exists(SAVE_PATH): shutil.rmtree(SAVE_PATH) os.makedirs(SAVE_PATH) # =========================================================================== # load data # ===========================================================================
# plotting the figures if plot_enable: robust_run("evaluate_plotting", f"model:{model} train:{train_ds} test:{test_ds}", plotting, posterior, outpath, train_ds, test_ds) # =========================================================================== # Main # =========================================================================== if __name__ == "__main__": args = ArgController(\ ).add("model", "The model alias, multiple model separated by comma" \ ).add("ds1", "The first dataset", "eccx" \ ).add("-ds2", "The second dataset, if empty, only analyze the first one", "" \ ).add("-bs", "batch size", 4 \ ).add("--score", "re-calculating the model scores", False \ ).add("--plot", "plotting the analysis", False \ ).add("--override", "remove exists folders", False \ ).add("--only-cross", "Only do cross dataset experiments", False \ ).parse() # preprocess the arguments models = args.model.split(",") ds1s = args.ds1.split(",") ds2s = args.ds2.split(",") assert len(ds1s) > 0 and len(ds1s[0])> 0,\ "-ds1 option for the first dataset must be provided." # evaluation modes plot = bool(args.plot) score = bool(args.score) if not (plot or score): plot = True
import os os.environ['ODIN'] = "cpu=1,float32" import shutil import numpy as np from odin import fuel as F, nnet as N from odin import preprocessing as pp from odin.utils import (get_all_files, get_all_ext, exec_commands, MPI, cpu_count, Progbar, ArgController, stdio, ctext, crypto) args = ArgController().add('path', "path to TIDIGITS dataset").add( '--wav', "re-run Converting sphere file to wave", False).add('--ds', "re-run Group wave files into a dataset", False).add('--compress', "re-run compression of the dataset", False).parse() TOTAL_FILES = 25096 README = \ """ Original sample rate: 20,000 Hz Downsampled sample rate: 8,000 Hz Saved WAV file format: * [train|test] * [m|w|b|g] (alias for man, women, boy, girl) * [age] * [dialectID] * [speakerID]
from __future__ import print_function, division, absolute_import import matplotlib matplotlib.use('Agg') from odin.utils import ArgController, stdio, one_hot args = ArgController( ).add('-model', 'model name, specified in models_cifar.py', 'cnn' ).parse() import os os.environ['ODIN'] = 'float32,gpu,seed=87654321,log' import numpy as np import tensorflow as tf from odin import fuel as F, nnet as N, backend as K, training, utils from odin.stats import train_valid_test_split MODEL_NAME = args.model MODEL_PATH = utils.get_modelpath(name='cifar10_%s' % MODEL_NAME, override=True) LOG_PATH = utils.get_logpath(name='cifar10_%s.log' % MODEL_NAME, override=True) stdio(LOG_PATH) # =========================================================================== # Some handmade constants # =========================================================================== NB_EPOCH = 10 LEARNING_RATE = 0.001 # =========================================================================== # Load dataset # ===========================================================================
from matplotlib import pyplot as plt from scipy import sparse from tensorflow.python import keras from odin import visual as vs from odin.bay.vi.autoencoder import AmortizedLDA from odin.bay.vi.metrics import unsupervised_clustering_scores from odin.fuel import MNIST, PBMC, Cortex, HumanEmbryos, Newsgroup5, Newsgroup20 from odin.ml import fast_lda_topics, get_topics_string from odin.utils import ArgController args = ArgController( ).add('-ds', 'cortex, embryo, pbmc5k, pbmc10k, news5, news20, mnist', 'cortex' \ ).add('-warmup', 'warmup iteration', 30000 \ ).add('-niter', 'max iteration', 35000 \ ).add('-post', 'posterior distribution', "dirichlet" \ ).add('-dist', 'output distribution', "categorical" \ ).add('--log', 'log norm the input data', False \ ).add('--override', 'override saved results', False \ ).add('--lda', 'training LDA model', False \ ).parse() # python amortized_lda_test.py --log -post dirichlet -dist categorical -ds news5 # python amortized_lda_test.py --log -post dirichlet -dist negativebinomial -ds news5 # python amortized_lda_test.py --log -post dirichlet -dist zinb -ds news5 # python amortized_lda_test.py -post dirichlet -dist categorical -ds news5 # python amortized_lda_test.py -post dirichlet -dist negativebinomial -ds news5 # python amortized_lda_test.py -post dirichlet -dist zinb -ds news5 # python amortized_lda_test.py --log -post gaussian -dist categorical -ds news5 # python amortized_lda_test.py --log -post gaussian -dist negativebinomial -ds news5 # python amortized_lda_test.py --log -post gaussian -dist zinb -ds news5 # python amortized_lda_test.py -post gaussian -dist categorical -ds news5