def test_computational_graph3(self): # validate the number of updates found by ComputationGraph X = K.placeholder(shape=(None, 28, 28, 3)) f = N.Sequence([ N.Conv(32, 3, pad='same', activation=K.linear), N.BatchNorm(activation=K.relu), N.Flatten(outdim=2), N.Dense(16), N.BatchNorm(), N.Dense(10) ]) K.set_training(True) y_train = f(X) K.set_training(False) y_score = f(X) self.assertTrue( K.get_shape(y_train) == K.get_shape(y_score) and K.get_shape(y_score) == (None, 10)) cc_train = K.ComputationGraph(y_train) cc_score = K.ComputationGraph(y_score) self.assertTrue(len(cc_score.updates) == 0) self.assertTrue(len(cc_train.updates) == 4) # create real function fn_train = K.function(X, y_train) fn_score = K.function(X, y_score) shape1 = fn_train(np.random.rand(12, 28, 28, 3)).shape shape2 = fn_score(np.random.rand(12, 28, 28, 3)).shape self.assertTrue(shape1 == shape2 and shape1 == (12, 10))
def test_simple_rnn(self): np.random.seed(12082518) x = np.random.rand(128, 8, 32) # X = K.placeholder(shape=(None, 8, 32)) X1 = K.placeholder(shape=(None, 8, 32)) X2 = K.placeholder(shape=(None, 8, 32)) X3 = K.placeholder(shape=(None, 8, 33)) f = N.RNN(32, activation=K.relu, input_mode='skip') # y = f(X, mask=K.ones(shape=(128, 8))) graph = K.ComputationGraph(y) self.assertEqual(len(graph.inputs), 1) f1 = K.function([X], y) x1 = f1(x) # ====== different placeholder ====== # y = f(X1) f2 = K.function([X1], y) x2 = f1(x) self.assertEqual(np.sum(x1[0] == x2[0]), np.prod(x1[0].shape)) # ====== pickle load ====== # f = cPickle.loads(cPickle.dumps(f)) y = f(X2) f2 = K.function([X2], y) x3 = f2(x) self.assertEqual(np.sum(x2[0] == x3[0]), np.prod(x2[0].shape)) # ====== other input shape ====== # error_happen = False try: y = f(X3) f3 = K.function([X3], y) x3 = f3(np.random.rand(128, 8, 33)) except (ValueError, Exception): error_happen = True self.assertTrue(error_happen)
def test_computational_graph1(self): X = K.placeholder(shape=(None, 32), name='input') z = K.variable(np.random.rand(10, 10), name='z') f = N.Sequence( [N.Dense(16, activation=K.relu), N.Dense(8, activation=K.softmax)]) y = f(X) add_auxiliary_variable(y, K.constant(10, name='aux_const')) tmp = K.ComputationGraph(y) self.assertEqual(len(tmp.placeholders), 1) self.assertEqual(len(tmp.trainable_variables), 4) self.assertEqual(len(tmp.parameters), 4) self.assertEqual(len(tmp.dict_of_placeholders), 1) self.assertEqual(len(tmp.auxiliary_variables), 1) tmp.intermediary_variables # no idea how to test this self.assertEqual(len(tmp.updates), 1) self.assertEqual(K.ComputationGraph(y), tmp)
def test_computational_graph2(self): np.random.seed(1208) X = K.variable(np.zeros((8, 12)), name='X') Y = K.variable(np.random.rand(12, 8), name='Y') Z = K.placeholder(shape=(8, 8), name='Z') a = K.dot(X, Y) add_roles(a, Auxiliary) a = a + Z g1 = K.ComputationGraph(a) self.assertEqual(len(g1.trainable_variables), 2) self.assertEqual(len(g1.placeholders), 1) self.assertEqual(len(g1.updates), 1) self.assertEqual(len(g1.auxiliary_variables), 1) f = K.function(Z, [a] + g1.auxiliary_variables) output = f(np.random.rand(8, 8)) self.assertEqual(repr(np.sum(output[0]))[:5], "32.20") self.assertEqual(np.sum(output[1]), 0) self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
def test_load_save1(self): K.set_training(True) X = K.placeholder((None, 1, 28, 28)) f = N.Dense(128, activation=K.relu) y = f(X) W, b = [K.get_value(p).sum() for p in K.ComputationGraph(y).parameters] num_units = f.num_units W_init = f.W_init b_init = f.b_init activation = f.activation f = cPickle.loads(cPickle.dumps(f)) W1, b1 = [K.get_value(p).sum() for p in f.parameters] num_units1 = f.num_units W_init1 = f.W_init b_init1 = f.b_init activation1 = f.activation self.assertEqual(W1, W) self.assertEqual(b1, b) self.assertEqual(num_units1, num_units) self.assertEqual(W_init1.__name__, W_init.__name__) self.assertEqual(b_init.__name__, b_init1.__name__) self.assertEqual(activation1, activation)
N.Conv(num_filters=64, filter_size=(5, 3)), N.BatchNorm(), N.Pool(pool_size=(3, 2), strides=(2, 2), name='PoolOutput2'), N.Flatten(outdim=2), N.Dense(512, name="LatentDense"), N.BatchNorm(), N.Dense(512), N.BatchNorm(), N.Dense(n_classes) ], debug=1) # ====== create outputs ====== # y_logit = f(X) y_proba = tf.nn.softmax(y_logit) z1 = K.ComputationGraph(y_proba).get(roles=N.Pool, scope='PoolOutput1', beginning_scope=False)[0] z2 = K.ComputationGraph(y_proba).get(roles=N.Pool, scope='PoolOutput2', beginning_scope=False)[0] z3 = K.ComputationGraph(y_proba).get(scope='LatentDense', beginning_scope=False)[0] print('Latent space:', ctext([z1, z2, z3], 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) cm = K.metrics.confusion_matrix(y_true=y, y_pred=y_proba, labels=len(labels)) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.001).minimize( loss=ce, roles=[K.role.TrainableParameter],
N.StatsPool(axes=1, output_mode='concat'), N.Flatten(outdim=2), N.Dense(512, name="LatentOutput"), N.BatchNorm(), N.Dense(512), N.BatchNorm(), N.Dense(n_speakers, activation=K.linear, b_init=init_ops.constant_initializer(value=0)) ], debug=1) # ====== create outputs ====== # y_logit = x_vec(X) y_proba = tf.nn.softmax(y_logit) z = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentOutput', beginning_scope=False)[0] print('Latent space:', ctext(z, 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.0001, name='XAdam').minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True) K.initialize_all_variables() # # ====== Functions ====== # print('Building training functions ...') f_train = K.function(inputs, [ce, acc], updates=updates, training=True)
def train(X, y_true, y_pred, train_data, valid_data=None, valid_freq=1., patience=3, threshold=5, rollback=True, objectives=[tf.losses.softmax_cross_entropy], metrics=[0], training_metrics=[], l1_regu=0., l2_regu=0., parameters=[], prior_weights=None, sample_weights=None, batch_size=256, epochs=8, shuffle=True, optimizer='rmsprop', optz_kwargs={'lr': 0.001}, updates=None, init_vars=True, labels=None, seed=5218, verbose=2): """ Parameters ---------- rollback : bool (default: True) if True, allow rollback to the best checkpoint during training objectives : {callable, tensorflow.Tensor} if `callable`, the function must take `y_true`, and `y_pred` The objectives must be differentiable and used for training. metrics : {callable, tensorflow.Tensor, int} if `callable`, the function must take `y_true`, and `y_pred` The `metrics` is for monitoring the training process. if `int`, it is the index of the loss in `objectives` NOTE: the first metrics in the list will be used for early-stopping (smaller is better). training_metrics : {callable, tensorflow.Tensor, int} if `int`, it is the index of the loss in `metrics` parameters : {list or tensorflow.Variables} All the parameters will be updated by the `optimizer`, if None or empty list is given, use ComputationalGraph to get all variables with Parameters roles related to the objectives init_vars : bool (default: True) automatically initialize all variables labels : {None, list of string} Given labels for classification task seed : int specific random seed for reproducible verbose : int 0 - Turn off all log 1 - only show notification 2 - show notification, important log and summary 3 - Show progress, summary, notification and logging 4 - Show debug information and everything Return ------ Function used for prediction """ from odin import backend as K # ====== preprocess inputs ====== # X = as_tuple(X, t=K.is_tensor) y_true = as_tuple(y_true, t=K.is_tensor) y_pred = as_tuple(y_pred, t=K.is_tensor) # ====== parsing objectives and metrics ====== # # for training prior_weights = _preprocess_prior_weights(y_true=y_true, prior_weights=prior_weights) if prior_weights is not None: if sample_weights is not None: sample_weights = sample_weights + prior_weights else: sample_weights = prior_weights objectives = _preprocessing_losses(as_tuple(objectives), y_true, y_pred, sample_weights=sample_weights) # metrics for monitoring metrics = as_tuple(metrics) get_value = lambda x: np.mean(x) if len(metrics) > 0 and \ (metrics[0] == tf.metrics.accuracy or metrics[0] == K.metrics.categorical_accuracy): get_value = lambda x: 1 - np.mean(x) metrics = _preprocessing_losses(metrics, y_true, y_pred, inherit_losses=objectives) # training_metrics training_metrics = _preprocessing_losses(as_tuple(training_metrics), y_true, y_pred, inherit_losses=metrics) # sum the objectives for differentiable if len(objectives) > 0: objectives = [ sum(objectives) if len(objectives) > 1 else objectives[0] ] # ====== preprocess optimizer and get updates====== # if updates is None: # not given updates if is_string(optimizer): optimizer = _parse_optimizer(optimizer) optimizer = optimizer(**optz_kwargs) elif not isinstance(optimizer, K.optimizers.Optimizer): raise ValueError( "`optimizer` must be string - name of algorithm or instance " "of odin.backend.optimizers.Optimizer") parameters = K.ComputationGraph(objectives).parameters\ if len(parameters) == 0 else as_tuple(parameters, t=K.is_variable) # check objectives if len(objectives) == 0: raise RuntimeError( "`objectives` must be given due to `updates=None`") weights = [ p for p in parameters if K.role.has_roles(p, roles=K.role.Weight) ] # l1 regularization if l1_regu > 0.: l1_norm = sum(tf.norm(w, ord=1) for w in weights) objectives[0] += l1_norm # l2 regularization if l2_regu > 0.: l2_norm = sum(tf.norm(w, ord=2) for w in weights) objectives[0] += l2_norm # update rules updates = optimizer.get_updates(objectives[0], parameters) # adding global norm and learning rate training_metrics.append(optimizer.norm) training_metrics.append(optimizer.lr) elif K.is_operation(updates): # given updates optimizer = None else: raise ValueError( "`updates` can be None or tensorflow Operation, but given " "type: %s" % str(type(updates))) # ====== placeholders ====== # inputs_plh = [] for plh in X: for i in (K.ComputationGraph(plh).placeholders if not K.is_placeholder(plh) else as_tuple(plh)): inputs_plh.append(i) outputs_plh = [] for plh in y_true: # no duplicated inputs (e.g. autoencoder X == y) if not K.is_placeholder(plh): plh = K.ComputationGraph(plh).placeholders for i in as_tuple(plh): if i not in inputs_plh: outputs_plh.append(i) inputs = inputs_plh + outputs_plh # ====== initialize variables ====== # if bool(init_vars): K.initialize_all_variables() # ====== creating function ====== # # training function f_train = K.function(inputs=inputs, outputs=objectives + training_metrics, updates=updates, training=True) # scoring function f_score = None if len(metrics) > 0: f_score = K.function(inputs=inputs, outputs=metrics, training=False) # prediction function f_pred = K.function(inputs=inputs_plh, outputs=y_pred[0] if len(y_pred) == 1 else y_pred, training=False) # ====== preprocessing data ====== # train_data, valid_data = _preprocessing_data(train_data, valid_data) # print some debug information if necessary if verbose >= 4: print( "%s %s %s" % (ctext("============", 'cyan'), ctext( "Prepare for Training", 'red'), ctext("============", 'cyan'))) print(ctext("Input placeholders:", 'yellow')) for i in inputs_plh: print(" * ", str(i)) print(ctext("Output placeholders:", 'yellow')) for i in outputs_plh: print(" * ", str(i)) print(ctext("Parameters:", 'yellow')) for p in parameters: print(" * ", p.name, '-', p.shape, ';', p.dtype.name) print(ctext("Optimizer:", 'yellow')) print(" * ", str(optimizer)) print(" * Optimizer kwargs:", optz_kwargs) print(" * L1:", l1_regu) print(" * L2:", l2_regu) print(ctext("Training:", 'yellow')) print(" * Valid freq:", valid_freq) print(" * Patience:", patience) print(" * Threshold:", threshold) print(" * Rollback:", rollback) print(" * Batch size:", batch_size) print(" * Epoch:", epochs) print(" * Shuffle:", shuffle) print(" * Seed:", seed) print(ctext("Objectives:", 'yellow')) for o in objectives: print(" * ", str(o)) print(ctext("Weights:", 'yellow')) print(" * Prior:", str(prior_weights)) print(" * Sample:", str(sample_weights)) print(ctext("Metrics:", 'yellow')) for m in metrics: print(" * ", str(m)) print(ctext("Training metrics:", 'yellow')) for t in training_metrics: print(" * ", str(t)) print(ctext("Training Data:", 'yellow'), str(train_data)) print(ctext("Validating Data:", 'yellow'), str(valid_data)) print(ctext("Labels:", 'yellow'), labels) # ====== create trainer ====== # callback_log = True if verbose > 0 else False trainer = MainLoop(batch_size=batch_size, seed=seed if shuffle else None, shuffle_level=2 if shuffle else 0, allow_rollback=rollback, verbose=verbose, labels=labels) trainer.set_checkpoint(path=None, obj=None, variables=parameters) # create callback callbacks = [NaNDetector(patience=patience, log=callback_log)] if valid_data is not None and f_score is not None: callbacks.append( EarlyStopGeneralizationLoss(task_name='valid', output_name=metrics[0], threshold=threshold, patience=patience, log=callback_log, get_value=get_value)) trainer.set_callbacks(callbacks) # set the tasks trainer.set_train_task(func=f_train, data=train_data, epoch=epochs, name='train') if valid_data is not None and f_score is not None: trainer.set_valid_task(func=f_score, data=valid_data, freq=Timer(percentage=valid_freq), name='valid') # running trainer.run() return f_pred
N.StatsPool(axes=1, output_mode='concat'), N.Flatten(outdim=2, name="StatsPooling"), N.Dense(512, name="LatentDense"), N.BatchNorm(activation=K.relu), N.Dense(512), N.BatchNorm(activation=K.relu), N.Dense(num_units=n_classes, activation=K.linear, b_init=init_ops.constant_initializer(0)) ], debug=1) # ====== create outputs ====== # y_logit = f(X) y_proba = tf.nn.softmax(y_logit) z1 = K.ComputationGraph(y_proba).get(roles=N.Dense, scope='LatentDense', beginning_scope=False)[0] z2 = K.ComputationGraph(y_proba).get(roles=N.TimeDelayedConv, scope='LatentTDNN', beginning_scope=False)[0] print('Latent space:', ctext([z1, z2], 'cyan')) # ====== create loss ====== # ce = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=y_logit) acc = K.metrics.categorical_accuracy(y_true=y, y_pred=y_proba) cm = K.metrics.confusion_matrix(y_true=y, y_pred=y_proba, labels=len(labels)) # ====== params and optimizing ====== # updates = K.optimizers.Adam(lr=0.0001).minimize( loss=ce, roles=[K.role.TrainableParameter], exclude_roles=[K.role.InitialState], verbose=True)