def _apply(self, X, h0=None, c0=None, mask=None): batch_size = K.get_shape(X, native=True)[0] is_bidirectional = self.direction_mode == 'bidirectional' input_mode = ('skip' if self.input_mode == 'skip' or self.input_mode == 'norm' else 'linear') # ====== precompute input ====== # # linear or norm input mode if self.input_mode == 'norm': X = K.dot(X, self.W_in) # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # cudnnRNN doesnt' support multiple inputs shapeX = K.get_shape(X, native=True) ndims = K.ndim(X) if 'rnn' in self.rnn_mode: N = 1 elif self.rnn_mode == 'gru': N = 3 else: N = 4 newshape = [shapeX[i] for i in range(ndims - 1)] + [self.num_units, N] X = K.mean(K.reshape(X, newshape), axis=-1) # ====== hidden state ====== # num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers require_shape = (num_layers, batch_size, self.num_units) h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0') c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0') # ====== parameters ====== # if self.params_split: parameters = K.concatenate([ K.flatten(i, outdim=1) for i in self.parameters if not has_roles(i, INITIAL_STATE) ]) else: parameters = self.params # ====== return CuDNN RNN ====== # results = K.rnn_dnn(X, hidden_size=self.num_units, rnn_mode=self.rnn_mode, num_layers=self.num_layers, parameters=parameters, h0=h0, c0=c0, input_mode=input_mode, direction_mode=self.direction_mode, dropout=self.dropout, name=self.name) if not self.return_states: results = results[0] # only get the output return results
def _apply(self, x): input_shape = K.get_shape(x) is_training = K.is_training() ndim = K.ndim(x) # if is training, normalize input by its own mean and std if not is_training: mean = self.mean inv_std = self.inv_std else: mean = K.mean(x, self.axes) inv_std = K.inv(K.sqrt(K.var(x, self.axes) + self.epsilon)) # set a default update for them: running_mean = ((1 - self.alpha) * self.mean + self.alpha * mean) running_inv_std = ((1 - self.alpha) * self.inv_std + self.alpha * inv_std) # prepare dimshuffle pattern inserting broadcastable axes as needed param_axes = iter(range(ndim - len(self.axes))) pattern = [ 'x' if input_axis in self.axes else next(param_axes) for input_axis in range(ndim) ] # apply dimshuffle pattern to all parameters beta = 0 if not hasattr(self, 'beta') else K.dimshuffle( self.beta, pattern) gamma = 1 if not hasattr(self, 'gamma') else K.dimshuffle( self.gamma, pattern) # normalize normalized = (x - K.dimshuffle(mean, pattern)) * \ (gamma * K.dimshuffle(inv_std, pattern)) + beta # set shape for output K.add_shape(normalized, input_shape) # activated output output = self.activation(normalized) # add updates for final output if is_training: add_updates(output, self.mean, running_mean) add_updates(output, self.inv_std, running_inv_std) return output
N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu), N.Pool(pool_size=(2, 2), ignore_border=True, strides=None, mode='max'), N.Dropout(level=0.25), N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dropout(level=0.5), N.Dense(10, activation=K.softmax) ], debug=True) K.set_training(True) y_train = f(X) K.set_training(False) y_pred = f(X) cost_train = K.mean(K.categorical_crossentropy(y_train, y_true)) cost_pred = K.mean(K.categorical_accuracy(y_pred, y_true)) cost_eval = K.mean(K.categorical_crossentropy(y_pred, y_true)) parameters = f.parameters print('Parameters:', [p.name for p in parameters]) optz = K.optimizers.RMSProp() updates = optz.get_updates(cost_train, parameters) print("Build training function ...") f_train = K.function([X, y_true], cost_train, updates=updates) print("Build scoring function ...") f_score = K.function([X, y_true], [cost_pred, cost_eval]) # =========================================================================== # Create trainer
filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(35, 1), pad='valid', mode='max'), N.Flatten(outdim=2), N.Dense(num_units=128, activation=K.relu), N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)] print('Params:', [p.name for p in params]) cost_train = K.mean(K.categorical_crossentropy(y_pred, y)) cost_score = K.mean(K.categorical_accuracy(y_pred, y)) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, params) print('Build training function ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ...') f_score = K.function([X, y], cost_score) trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2) trainer.set_task(f_train, (X_train, y_train), epoch=args['epoch'], name='train') trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid')
N.Pool(pool_size=(2, 2), strides=None), N.Conv(64, (3, 3), strides=(1, 1), pad='same', activation=K.relu), N.Pool(pool_size=(2, 2), strides=None), N.Flatten(outdim=2), N.Dense(256, activation=K.relu), N.Dense(10, activation=K.softmax) ], debug=True) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able K.set_training(True) y_pred_train = ops(X) K.set_training(False) y_pred_score = ops(X) cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y)) cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y)) cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y)) cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10)) parameters = ops.parameters optimizer = K.optimizers.SGD(lr=arg['lr']) updates = optimizer(cost_train, parameters) print('Building training functions ...') f_train = K.function([X, y], [cost_train, optimizer.norm], updates=updates) print('Building testing functions ...') f_test = K.function([X, y], [cost_test_1, cost_test_2, cost_test_3]) print('Building predicting functions ...') f_pred = K.function(X, y_pred_score) # ===========================================================================
activation=K.linear, name='cellupdate'), # cell-update N.Dense(lstm_output_size, activation=K.linear, name='outgate') # output-gate ], merge_function=K.concatenate), N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1], N.Dense(1, activation=K.sigmoid) ], debug=True) K.set_training(True) y_pred_train = f(X_train) K.set_training(False) y_pred_score = f(X_score) cost_train = K.mean(K.binary_crossentropy(y_pred_train, y)) cost_score = K.mean(K.binary_accuracy(y_pred_score, y)) parameters = f.parameters print('Params:', [p.name for p in parameters]) updates = K.optimizers.Adam(lr=0.001).get_updates(cost_train, parameters) print('Building training function ...') f_train = K.function([X_train, y], cost_train, updates) print('Building scoring function ...') f_score = K.function([X_score, y], cost_score) # =========================================================================== # Test # ===========================================================================
mean=ds['mspec_mean'], std=ds['mspec_std']), fuel.Stacking(left_context=10, right_context=10, shift=None), fuel.OneHotTrans(n_classes=10), fuel.CreateBatch()) print('Number of CPU for feeders:', data.ncpu) # =========================================================================== # Training # =========================================================================== X = K.placeholder(shape=(None, 2583), name='X') y = K.placeholder(shape=(None, 10), name='y') f = N.Sequence( [N.Dense(128, activation=K.linear), N.Dense(10, activation=K.softmax)]) y_ = f(X) cost_train = K.mean(K.categorical_crossentropy(y_, y)) f_train = K.function([X, y], cost_train) # ====== single process ====== # with UnitTimer(): for _, (i, j) in enumerate(get_data()): f_train(i, j) print(_) # ====== multi-processes ====== # with UnitTimer(): for _, (i, j) in enumerate(data): f_train(i, j) print(_)
N.Pool(pool_size=(5, 1), pad='valid', mode='max'), N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(35, 1), pad='valid', mode='max'), N.Flatten(outdim=2), N.Dense(num_units=128, activation=K.relu), N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)] print('Params:', [p.name for p in params]) cost_train = K.mean(K.categorical_crossentropy(y_pred, y)) cost_score = K.mean(K.categorical_accuracy(y_pred, y)) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, params) print('Build training function ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ...') f_score = K.function([X, y], cost_score) trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2) trainer.set_task(f_train, (X_train, y_train), epoch=args['epoch'], name='train') trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid') trainer.set_callback([ training.ProgressMonitor('train', format='Train:{:.4f}'),
def standard_trainer(train_data, valid_data, X, y_train, y_score, y_target, parameters, test_data=None, cost_train=None, cost_score=None, optimizer=None, confusion_matrix=False, gradient_norm=True, save_path=None, save_obj=None, batch_size=64, nb_epoch=3, valid_freq=0.6, seed=1208, shuffle_level=2, patience=3, earlystop=5, report_path=None): """ Parameters ---------- cost_train: list of callable each function will be apply to a pair y_train and y_target Return ------ MainLoop, and History Note ---- """ from odin import backend as K # ====== prepare variables and cost ====== # # check optimizer if optimizer is None: optimizer = K.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True) elif not isinstance(optimizer, K.optimizers.Optimizer) and \ not hasattr(optimizer, "get_updates"): raise ValueError( "Invalid optimizer, the optimizer must be instance of " "backend.optimizers.Optimizer or having function " "get_updates(self, loss_or_grads, params).") # check the cost functions if cost_train is None: cost_train = K.categorical_crossentropy if cost_score is None: cost_score = K.categorical_crossentropy cost_train = as_tuple(cost_train) cost_score = as_tuple(cost_score) # check input X, y, parameters X = as_tuple(X) y_train = as_tuple(y_train) y_score = as_tuple(y_score) y_target = as_tuple(y_target) parameters = as_tuple(parameters) if len(X) == 0 or len(y_train) == 0 or len(y_score) == 0 or \ len(y_target) == 0 or len(parameters) == 0: raise ValueError( "X(len=%d), y_train(len=%d), y_score(len=%d), y_target(len=%d)," "and parameters(len=%d) must be list or tuple with length > 0." % (len(X), len(y_train), len(y_score), len(y_target), len(parameters))) # get all cost if len(y_train) == 1: y_train = y_train * len(cost_train) if len(y_score) == 1: y_score = y_score * len(cost_score) cost_train = [ K.mean(f_cost(y_, y), axis=0) for f_cost, y_, y in zip( cost_train, y_train, y_target * len(cost_train) if len(y_target) == 1 else y_target) ] cost_score = [ K.mean(f_cost(y_, y), axis=0) for f_cost, y_, y in zip( cost_score, y_score, y_target * len(cost_score) if len(y_target) == 1 else y_target) ] # add confusion matrix if confusion_matrix: if not is_number(confusion_matrix) and \ not isinstance(confusion_matrix, (tuple, list, np.ndarray)): raise ValueError( "confusion_matrix must be an integer, or list, tuple" " specifies number of classes, or list of all classes.") if is_number(confusion_matrix): confusion_matrix = list(range(int(confusion_matrix))) for y_, y in zip(y_score, y_target): cost_score.append( K.confusion_matrix(y_pred=y_, y_true=y, labels=confusion_matrix)) # get the update updates = optimizer.get_updates(cost_train[0], parameters) # ====== create function ====== # grad_norm = [] if not gradient_norm or not hasattr(optimizer, 'norm') else \ [optimizer.norm] cost_train = cost_train + grad_norm print('Building training functions ...') f_train = K.function(inputs=X + y_target, outputs=cost_train, updates=updates) print('Building scoring functions ...') f_score = K.function(inputs=X + y_target, outputs=cost_score) # ====== Create trainer ====== # task = MainLoop(batch_size=batch_size, seed=seed, shuffle_level=shuffle_level) if save_path is not None and save_obj is not None: task.set_save(save_path, save_obj, save_hist=True) # set task task.set_task(f_train, train_data, epoch=nb_epoch, name='train') task.set_subtask(f_score, valid_data, freq=valid_freq, name='valid') if test_data is not None: task.set_subtask(f_score, test_data, when=-1, epoch=1, name='test') # format for score score_format = 'Results:' + __format_string( len(cost_score) - (1 if confusion_matrix else 0)) score_tracking = { (len(cost_score) - 1): lambda x: sum(x) } if confusion_matrix else [] # set the callback history = History() task.set_callback([ ProgressMonitor(name='train', format='Results:' + __format_string(len(cost_train))), ProgressMonitor(name='valid', format=score_format, tracking=score_tracking), (ProgressMonitor( name='test', format=score_format, tracking=score_tracking) if test_data is not None else None), history, EarlyStopGeneralizationLoss( 'valid', threshold=earlystop, patience=patience, get_value=lambda x: np.mean([i[0] for i in x] if isinstance(x[0], (tuple, list)) else x)), NaNDetector(('train', 'valid'), patience=patience, rollback=True) ]) return task, history
y = K.placeholder(shape=(None,), name='y', dtype='int32') # =========================================================================== # Build network # =========================================================================== ops = N.Sequence([ N.Flatten(outdim=2), N.Dense(512, activation=K.relu), N.Dense(256, activation=K.relu), N.Dense(10, activation=K.softmax) ]) ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able y_pred_train = ops(X_train) y_pred_score = ops(X_score) cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y)) cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y)) cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y)) cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10)) parameters = ops.parameters optimizer = K.optimizers.RMSProp(lr= 0.0001, clipnorm=100.) updates = optimizer(cost_train, parameters) print('Building training functions ...') f_train = K.function([X_train, y], [cost_train, optimizer.norm], updates=updates) print('Building testing functions ...') f_test = K.function([X_score, y], [cost_test_1, cost_test_2, cost_test_3]) # ====== normalize 0-1 ====== # if False: