def test_save_cudnn_rnn(self): np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) num_layers = 2 num_gates = 'lstm' skip_input = False is_bidirectional = False path = '/tmp/rnn' weights, biases = K.init_rnn(input_dim=8, hidden_dim=18, b_init=init_ops.random_normal_initializer(), num_layers=num_layers, num_gates=num_gates, skip_input=skip_input, is_bidirectional=is_bidirectional) rnn = N.CudnnRNN(num_units=18, W_init=weights, b_init=biases, rnn_mode=num_gates, num_layers=num_layers, skip_input=skip_input, is_bidirectional=is_bidirectional, return_states=False, dropout=0., name="CudnnRNNTest") y = rnn(X) K.initialize_all_variables() y = K.eval(y) N.serialize(nnops=rnn, path=path, binary_output=True, override=True) test_script = r""" from __future__ import print_function, division, absolute_import import os os.environ['ODIN'] = 'gpu,float32,seed=5218' import pickle import numpy as np import tensorflow as tf from tensorflow.python.ops import init_ops from odin.config import randint from odin import backend as K, nnet as N np.random.seed(5218) X = K.variable(np.random.rand(25, 12, 8)) rnn = N.deserialize("%s", force_restore_vars=True) y = rnn(X) K.initialize_all_variables() y = K.eval(y) print(len(rnn.variables), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight)), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias)), y.sum(), (y**2).sum()) """ % path outputs = run_script(test_script)[1] num_variables, w, b, s1, s2 = outputs.split(' ') assert int(num_variables) == len(rnn.variables) assert np.allclose(float(w), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Weight))) assert np.allclose(float(b), sum(np.sum(K.eval(i)) for i in rnn.variables if K.role.has_roles(i, K.role.Bias))) assert np.allclose(float(s1), y.sum()) assert np.allclose(float(s2), (y**2).sum())
N.Dense(128, activation=K.relu), N.Dense(10, activation=tf.nn.softmax) ], debug=True) # ====== applying the NNOps ====== # y_pred = ops(X) if arg.rnn: loss = tf.losses.softmax_cross_entropy(y_onehot, ops(X, training=True)) else: loss = tf.losses.softmax_cross_entropy(y_onehot, y_pred) acc = K.metrics.categorical_accuracy(y, y_pred, name="Acc") cm = K.metrics.confusion_matrix(y_pred=y_pred, y_true=y, labels=10) # ====== optimizer ====== # optimizer = K.optimizers.Adam(lr=0.001) updates = optimizer.minimize(loss, verbose=True) # ====== initialize all variable ====== # K.initialize_all_variables() # ====== function ====== # print('Building training functions ...') f_train = K.function([X, y], [loss, optimizer.norm, cm], updates=updates, training=True) print('Building testing functions ...') f_test = K.function([X, y], [loss, acc, cm], training=False) print('Building predicting functions ...') f_pred = K.function(X, y_pred, training=False) # =========================================================================== # Build trainer # =========================================================================== print('Start training ...') # ====== some configurations ====== # model_save_path = '/tmp/EXP_MNIST' if os.path.exists(model_save_path):
dtype=X_probas.dtype)) f_samples = K.function(inputs=[], outputs=X_samples, training=False) # ====== `distortion` is the negative log likelihood ====== # if args.loss == 'ce': loss = tf.losses.softmax_cross_entropy(onehot_labels=X, logits=X_logits) elif args.loss == 'mse': loss = tf.losses.mean_squared_error(labels=X, predictions=X_probas) elif args.loss == 'huber': loss = tf.losses.huber_loss(labels=X, predictions=X_probas) elif args.loss == 'lglo': loss = tf.losses.log_loss(labels=X, predictions=X_probas) # =========================================================================== # Optimizing the network # =========================================================================== update_ops = K.optimizers.Adam(lr=0.001).minimize(loss) K.initialize_all_variables() # ====== intitalize ====== # record_train_loss = [] record_valid_loss = [] patience = 3 epoch = 0 # We want the rate to go up but the distortion to go down while True: # ====== training ====== # train_losses = [] prog = Progbar(target=X_train.shape[0], name='Epoch%d' % epoch) start_time = timeit.default_timer() for start, end in batching(batch_size=args.bs, n=X_train.shape[0], seed=K.get_rng().randint(10e8)): _ = K.eval(loss, feed_dict={X: X_train[start:end]}, update_after=update_ops)
def train(X, y_true, y_pred, train_data, valid_data=None, valid_freq=1., patience=3, threshold=5, rollback=True, objectives=[tf.losses.softmax_cross_entropy], metrics=[0], training_metrics=[], l1_regu=0., l2_regu=0., parameters=[], prior_weights=None, sample_weights=None, batch_size=256, epochs=8, shuffle=True, optimizer='rmsprop', optz_kwargs={'lr': 0.001}, updates=None, init_vars=True, labels=None, seed=5218, verbose=2): """ Parameters ---------- rollback : bool (default: True) if True, allow rollback to the best checkpoint during training objectives : {callable, tensorflow.Tensor} if `callable`, the function must take `y_true`, and `y_pred` The objectives must be differentiable and used for training. metrics : {callable, tensorflow.Tensor, int} if `callable`, the function must take `y_true`, and `y_pred` The `metrics` is for monitoring the training process. if `int`, it is the index of the loss in `objectives` NOTE: the first metrics in the list will be used for early-stopping (smaller is better). training_metrics : {callable, tensorflow.Tensor, int} if `int`, it is the index of the loss in `metrics` parameters : {list or tensorflow.Variables} All the parameters will be updated by the `optimizer`, if None or empty list is given, use ComputationalGraph to get all variables with Parameters roles related to the objectives init_vars : bool (default: True) automatically initialize all variables labels : {None, list of string} Given labels for classification task seed : int specific random seed for reproducible verbose : int 0 - Turn off all log 1 - only show notification 2 - show notification, important log and summary 3 - Show progress, summary, notification and logging 4 - Show debug information and everything Return ------ Function used for prediction """ from odin import backend as K # ====== preprocess inputs ====== # X = as_tuple(X, t=K.is_tensor) y_true = as_tuple(y_true, t=K.is_tensor) y_pred = as_tuple(y_pred, t=K.is_tensor) # ====== parsing objectives and metrics ====== # # for training prior_weights = _preprocess_prior_weights(y_true=y_true, prior_weights=prior_weights) if prior_weights is not None: if sample_weights is not None: sample_weights = sample_weights + prior_weights else: sample_weights = prior_weights objectives = _preprocessing_losses(as_tuple(objectives), y_true, y_pred, sample_weights=sample_weights) # metrics for monitoring metrics = as_tuple(metrics) get_value = lambda x: np.mean(x) if len(metrics) > 0 and \ (metrics[0] == tf.metrics.accuracy or metrics[0] == K.metrics.categorical_accuracy): get_value = lambda x: 1 - np.mean(x) metrics = _preprocessing_losses(metrics, y_true, y_pred, inherit_losses=objectives) # training_metrics training_metrics = _preprocessing_losses(as_tuple(training_metrics), y_true, y_pred, inherit_losses=metrics) # sum the objectives for differentiable if len(objectives) > 0: objectives = [ sum(objectives) if len(objectives) > 1 else objectives[0] ] # ====== preprocess optimizer and get updates====== # if updates is None: # not given updates if is_string(optimizer): optimizer = _parse_optimizer(optimizer) optimizer = optimizer(**optz_kwargs) elif not isinstance(optimizer, K.optimizers.Optimizer): raise ValueError( "`optimizer` must be string - name of algorithm or instance " "of odin.backend.optimizers.Optimizer") parameters = K.ComputationGraph(objectives).parameters\ if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable) # check objectives if len(objectives) == 0: raise RuntimeError( "`objectives` must be given due to `updates=None`") weights = [ p for p in parameters if K.role.has_roles(p, roles=K.role.Weight) ] # l1 regularization if l1_regu > 0.: l1_norm = sum(tf.norm(w, ord=1) for w in weights) objectives[0] += l1_norm # l2 regularization if l2_regu > 0.: l2_norm = sum(tf.norm(w, ord=2) for w in weights) objectives[0] += l2_norm # update rules updates = optimizer.get_updates(objectives[0], parameters) # adding global norm and learning rate training_metrics.append(optimizer.norm) training_metrics.append(optimizer.lr) elif K.is_operation(updates): # given updates optimizer = None else: raise ValueError( "`updates` can be None or tensorflow Operation, but given " "type: %s" % str(type(updates))) # ====== placeholders ====== # inputs_plh = [] for plh in X: for i in (K.ComputationGraph(plh).placeholders if not K.is_placeholder(plh) else as_tuple(plh)): inputs_plh.append(i) outputs_plh = [] for plh in y_true: # no duplicated inputs (e.g. autoencoder X == y) if not K.is_placeholder(plh): plh = K.ComputationGraph(plh).placeholders for i in as_tuple(plh): if i not in inputs_plh: outputs_plh.append(i) inputs = inputs_plh + outputs_plh # ====== initialize variables ====== # if bool(init_vars): K.initialize_all_variables() # ====== creating function ====== # # training function f_train = K.function(inputs=inputs, outputs=objectives + training_metrics, updates=updates, training=True) # scoring function f_score = None if len(metrics) > 0: f_score = K.function(inputs=inputs, outputs=metrics, training=False) # prediction function f_pred = K.function(inputs=inputs_plh, outputs=y_pred[0] if len(y_pred) == 1 else y_pred, training=False) # ====== preprocessing data ====== # train_data, valid_data = _preprocessing_data(train_data, valid_data) # print some debug information if necessary if verbose >= 4: print( "%s %s %s" % (ctext("============", 'cyan'), ctext( "Prepare for Training", 'red'), ctext("============", 'cyan'))) print(ctext("Input placeholders:", 'yellow')) for i in inputs_plh: print(" * ", str(i)) print(ctext("Output placeholders:", 'yellow')) for i in outputs_plh: print(" * ", str(i)) print(ctext("Parameters:", 'yellow')) for p in parameters: print(" * ", p.name, '-', p.shape, ';', p.dtype.name) print(ctext("Optimizer:", 'yellow')) print(" * ", str(optimizer)) print(" * Optimizer kwargs:", optz_kwargs) print(" * L1:", l1_regu) print(" * L2:", l2_regu) print(ctext("Training:", 'yellow')) print(" * Valid freq:", valid_freq) print(" * Patience:", patience) print(" * Threshold:", threshold) print(" * Rollback:", rollback) print(" * Batch size:", batch_size) print(" * Epoch:", epochs) print(" * Shuffle:", shuffle) print(" * Seed:", seed) print(ctext("Objectives:", 'yellow')) for o in objectives: print(" * ", str(o)) print(ctext("Weights:", 'yellow')) print(" * Prior:", str(prior_weights)) print(" * Sample:", str(sample_weights)) print(ctext("Metrics:", 'yellow')) for m in metrics: print(" * ", str(m)) print(ctext("Training metrics:", 'yellow')) for t in training_metrics: print(" * ", str(t)) print(ctext("Training Data:", 'yellow'), str(train_data)) print(ctext("Validating Data:", 'yellow'), str(valid_data)) print(ctext("Labels:", 'yellow'), labels) # ====== create trainer ====== # callback_log = True if verbose > 0 else False trainer = MainLoop(batch_size=batch_size, seed=seed if shuffle else None, shuffle_level=2 if shuffle else 0, allow_rollback=rollback, verbose=verbose, labels=labels) trainer.set_checkpoint(path=None, obj=None, variables=parameters) # create callback callbacks = [NaNDetector(patience=patience, log=callback_log)] if valid_data is not None and f_score is not None: callbacks.append( EarlyStopGeneralizationLoss(task_name='valid', output_name=metrics[0], threshold=threshold, patience=patience, log=callback_log, get_value=get_value)) trainer.set_callbacks(callbacks) # set the tasks trainer.set_train_task(func=f_train, data=train_data, epoch=epochs, name='train') if valid_data is not None and f_score is not None: trainer.set_valid_task(func=f_score, data=valid_data, freq=Timer(percentage=valid_freq), name='valid') # running trainer.run() return f_pred
def train(X, y_true, y_pred, train_data, valid_data=None, valid_freq=1., patience=3, threshold=5, rollback=True, objectives=[tf.losses.softmax_cross_entropy], metrics=[0], training_metrics=[], l1_regu=0., l2_regu=0., parameters=[], prior_weights=None, sample_weights=None, batch_size=256, epochs=8, shuffle=True, optimizer='rmsprop', optz_kwargs={'lr': 0.001}, updates=None, init_vars=True, labels=None, seed=5218, verbose=2): """ Parameters ---------- rollback : bool (default: True) if True, allow rollback to the best checkpoint during training objectives : {callable, tensorflow.Tensor} if `callable`, the function must take `y_true`, and `y_pred` The objectives must be differentiable and used for training. metrics : {callable, tensorflow.Tensor, int} if `callable`, the function must take `y_true`, and `y_pred` The `metrics` is for monitoring the training process. if `int`, it is the index of the loss in `objectives` NOTE: the first metrics in the list will be used for early-stopping (smaller is better). training_metrics : {callable, tensorflow.Tensor, int} if `int`, it is the index of the loss in `metrics` parameters : {list or tensorflow.Variables} All the parameters will be updated by the `optimizer`, if None or empty list is given, use ComputationalGraph to get all variables with Parameters roles related to the objectives init_vars : bool (default: True) automatically initialize all variables labels : {None, list of string} Given labels for classification task seed : int specific random seed for reproducible verbose : int 0 - Turn off all log 1 - only show notification 2 - show notification, important log and summary 3 - Show progress, summary, notification and logging 4 - Show debug information and everything Return ------ Function used for prediction """ from odin import backend as K # ====== preprocess inputs ====== # X = as_tuple(X, t=K.is_tensor) y_true = as_tuple(y_true, t=K.is_tensor) y_pred = as_tuple(y_pred, t=K.is_tensor) # ====== parsing objectives and metrics ====== # # for training prior_weights = _preprocess_prior_weights(y_true=y_true, prior_weights=prior_weights) if prior_weights is not None: if sample_weights is not None: sample_weights = sample_weights + prior_weights else: sample_weights = prior_weights objectives = _preprocessing_losses(as_tuple(objectives), y_true, y_pred, sample_weights=sample_weights) # metrics for monitoring metrics = as_tuple(metrics) get_value = lambda x: np.mean(x) if len(metrics) > 0 and \ (metrics[0] == tf.metrics.accuracy or metrics[0] == K.metrics.categorical_accuracy): get_value = lambda x: 1 - np.mean(x) metrics = _preprocessing_losses(metrics, y_true, y_pred, inherit_losses=objectives) # training_metrics training_metrics = _preprocessing_losses(as_tuple(training_metrics), y_true, y_pred, inherit_losses=metrics) # sum the objectives for differentiable if len(objectives) > 0: objectives = [sum(objectives) if len(objectives) > 1 else objectives[0]] # ====== preprocess optimizer and get updates====== # if updates is None: # not given updates if is_string(optimizer): optimizer = _parse_optimizer(optimizer) optimizer = optimizer(**optz_kwargs) elif not isinstance(optimizer, K.optimizers.Optimizer): raise ValueError("`optimizer` must be string - name of algorithm or instance " "of odin.backend.optimizers.Optimizer") parameters = K.ComputationGraph(objectives).parameters\ if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable) # check objectives if len(objectives) == 0: raise RuntimeError("`objectives` must be given due to `updates=None`") weights = [p for p in parameters if K.role.has_roles(p, roles=K.role.Weight)] # l1 regularization if l1_regu > 0.: l1_norm = sum(tf.norm(w, ord=1) for w in weights) objectives[0] += l1_norm # l2 regularization if l2_regu > 0.: l2_norm = sum(tf.norm(w, ord=2) for w in weights) objectives[0] += l2_norm # update rules updates = optimizer.get_updates(objectives[0], parameters) # adding global norm and learning rate training_metrics.append(optimizer.norm) training_metrics.append(optimizer.lr) elif K.is_operation(updates): # given updates optimizer = None else: raise ValueError("`updates` can be None or tensorflow Operation, but given " "type: %s" % str(type(updates))) # ====== placeholders ====== # inputs_plh = [] for plh in X: for i in (K.ComputationGraph(plh).placeholders if not K.is_placeholder(plh) else as_tuple(plh)): inputs_plh.append(i) outputs_plh = [] for plh in y_true: # no duplicated inputs (e.g. autoencoder X == y) if not K.is_placeholder(plh): plh = K.ComputationGraph(plh).placeholders for i in as_tuple(plh): if i not in inputs_plh: outputs_plh.append(i) inputs = inputs_plh + outputs_plh # ====== initialize variables ====== # if bool(init_vars): K.initialize_all_variables() # ====== creating function ====== # # training function f_train = K.function(inputs=inputs, outputs=objectives + training_metrics, updates=updates, training=True) # scoring function f_score = None if len(metrics) > 0: f_score = K.function(inputs=inputs, outputs=metrics, training=False) # prediction function f_pred = K.function(inputs=inputs_plh, outputs=y_pred[0] if len(y_pred) == 1 else y_pred, training=False) # ====== preprocessing data ====== # train_data, valid_data = _preprocessing_data(train_data, valid_data) # print some debug information if necessary if verbose >= 4: print("%s %s %s" % ( ctext("============", 'cyan'), ctext("Prepare for Training", 'red'), ctext("============", 'cyan'))) print(ctext("Input placeholders:", 'yellow')) for i in inputs_plh: print(" * ", str(i)) print(ctext("Output placeholders:", 'yellow')) for i in outputs_plh: print(" * ", str(i)) print(ctext("Parameters:", 'yellow')) for p in parameters: print(" * ", p.name, '-', p.shape, ';', p.dtype.name) print(ctext("Optimizer:", 'yellow')) print(" * ", str(optimizer)) print(" * Optimizer kwargs:", optz_kwargs) print(" * L1:", l1_regu) print(" * L2:", l2_regu) print(ctext("Training:", 'yellow')) print(" * Valid freq:", valid_freq) print(" * Patience:", patience) print(" * Threshold:", threshold) print(" * Rollback:", rollback) print(" * Batch size:", batch_size) print(" * Epoch:", epochs) print(" * Shuffle:", shuffle) print(" * Seed:", seed) print(ctext("Objectives:", 'yellow')) for o in objectives: print(" * ", str(o)) print(ctext("Weights:", 'yellow')) print(" * Prior:", str(prior_weights)) print(" * Sample:", str(sample_weights)) print(ctext("Metrics:", 'yellow')) for m in metrics: print(" * ", str(m)) print(ctext("Training metrics:", 'yellow')) for t in training_metrics: print(" * ", str(t)) print(ctext("Training Data:", 'yellow'), str(train_data)) print(ctext("Validating Data:", 'yellow'), str(valid_data)) print(ctext("Labels:", 'yellow'), labels) # ====== create trainer ====== # callback_log = True if verbose > 0 else False trainer = MainLoop(batch_size=batch_size, seed=seed if shuffle else None, shuffle_level=2 if shuffle else 0, allow_rollback=rollback, verbose=verbose, labels=labels) trainer.set_checkpoint(path=None, obj=None, variables=parameters) # create callback callbacks = [NaNDetector(patience=patience, log=callback_log)] if valid_data is not None and f_score is not None: callbacks.append( EarlyStopGeneralizationLoss(task_name='valid', output_name=metrics[0], threshold=threshold, patience=patience, log=callback_log, get_value=get_value)) trainer.set_callbacks(callbacks) # set the tasks trainer.set_train_task(func=f_train, data=train_data, epoch=epochs, name='train') if valid_data is not None and f_score is not None: trainer.set_valid_task(func=f_score, data=valid_data, freq=Timer(percentage=valid_freq), name='valid') # running trainer.run() return f_pred
def test_cudnn_rnn(self): if get_ngpu() == 0: return print() batch_size = 2 time_steps = 5 input_dim = 12 hidden_dim = 8 X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim), dtype='float32', name='X') for rnn_mode in ('lstm', 'rnn_relu', 'gru'): for num_layers in [1, 2]: for W_init in [ init_ops.glorot_uniform_initializer(seed=1234), init_ops.random_normal_initializer(seed=1234) ]: for b_init in [0, 1]: for bidirectional in (True, False): for skip_input in (False, ): print('RNNmode:%s' % rnn_mode, "#Layers:%d" % num_layers, 'Bidirectional:%s' % bidirectional, 'SkipInput:%s' % skip_input) weights, biases = K.init_rnn( input_dim=input_dim, hidden_dim=hidden_dim, num_gates=rnn_mode, num_layers=num_layers, W_init=W_init, b_init=b_init, skip_input=skip_input, cudnn_vector=False, is_bidirectional=bidirectional, name=None) # ====== check number of params ====== # params1 = K.params_to_cudnn(weights, biases) n = params1.shape[0].value nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional') nb_params = K.eval(nb_params) assert n == nb_params # ====== check cannonical shape match ====== # kwargs = { 'num_layers': num_layers, 'num_units': hidden_dim, 'input_mode': 'skip_input' if skip_input else 'linear_input', 'direction': 'bidirectional' if bidirectional else 'unidirectional' } if rnn_mode == 'lstm': rnn = cudnn_rnn.CudnnLSTM(**kwargs) elif rnn_mode == 'gru': rnn = cudnn_rnn.CudnnGRU(**kwargs) if rnn_mode == 'rnn_relu': rnn = cudnn_rnn.CudnnRNNRelu(**kwargs) if rnn_mode == 'rnn_tanh': rnn = cudnn_rnn.CudnnRNNTanh(**kwargs) rnn.build(input_shape=(None, None, input_dim)) assert len(weights) == len( rnn.canonical_weight_shapes) assert len(biases) == len( rnn.canonical_bias_shapes) for w, s in zip(weights, rnn.canonical_weight_shapes): assert tuple(w.shape.as_list()) == s # ====== check params conversion ====== # K.initialize_all_variables() params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params( rnn_mode=rnn_mode, num_layers=num_layers, num_units=hidden_dim, input_size=input_dim, input_mode='skip_input' if skip_input else 'linear_input', direction='bidirectional' if bidirectional else 'unidirectional', weights=weights, biases=biases) assert np.all( K.eval(params1) == K.eval(params2)) # ====== odin cudnn implementation ====== # name = 'TEST' + uuid(length=25) outputs = K.cudnn_rnn( X=X, num_units=hidden_dim, rnn_mode=rnn_mode, num_layers=num_layers, parameters=None, skip_input=skip_input, is_bidirectional=bidirectional, dropout=0.1, name=name) K.initialize_all_variables() s0 = K.eval(outputs[0]).sum() s1 = K.eval(outputs[1]).sum() all_variables = K.get_all_variables(scope=name) new_weights = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Weight) ] new_biases = [ i for i in all_variables if K.role.has_roles(i, roles=K.role.Bias) ] new_weights, new_biases = K.sort_cudnn_params( new_weights, new_biases, rnn_mode=rnn_mode) assert len(weights) == len(weights) assert len(biases) == len(biases) for i, j in zip(weights + biases, new_weights + new_biases): assert i.name.split( '/')[-1] == j.name.split('/')[-1] # ====== CudnnRNN wrapper ====== # rnn = N.CudnnRNN( num_units=hidden_dim, W_init=new_weights, b_init=new_biases, rnn_mode=rnn_mode, num_layers=num_layers, skip_input=skip_input, is_bidirectional=bidirectional, return_states=True, dropout=0.) outputs = rnn(X) K.initialize_all_variables() y0 = K.eval(outputs[0]).sum() y1 = K.eval(outputs[1]).sum() assert y0 == s0 assert y1 == s1