def _apply(self, X, h0=None, c0=None, mask=None): batch_size = K.get_shape(X, native=True)[0] is_bidirectional = self.direction_mode == 'bidirectional' input_mode = ('skip' if self.input_mode == 'skip' or self.input_mode == 'norm' else 'linear') # ====== precompute input ====== # # linear or norm input mode if self.input_mode == 'norm': X = K.dot(X, self.W_in) # normalize all axes except the time dimension bn = BatchNorm(axes=(0, 1), activation=K.linear, gamma_init=self.gamma, beta_init=self.beta, mean_init=self.mean, inv_std_init=self.inv_std) X = bn(X) # cudnnRNN doesnt' support multiple inputs shapeX = K.get_shape(X, native=True) ndims = K.ndim(X) if 'rnn' in self.rnn_mode: N = 1 elif self.rnn_mode == 'gru': N = 3 else: N = 4 newshape = [shapeX[i] for i in range(ndims - 1)] + [self.num_units, N] X = K.mean(K.reshape(X, newshape), axis=-1) # ====== hidden state ====== # num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers require_shape = (num_layers, batch_size, self.num_units) h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0') c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0') # ====== parameters ====== # if self.params_split: parameters = K.concatenate([ K.flatten(i, outdim=1) for i in self.parameters if not has_roles(i, INITIAL_STATE) ]) else: parameters = self.params # ====== return CuDNN RNN ====== # results = K.rnn_dnn(X, hidden_size=self.num_units, rnn_mode=self.rnn_mode, num_layers=self.num_layers, parameters=parameters, h0=h0, c0=c0, input_mode=input_mode, direction_mode=self.direction_mode, dropout=self.dropout, name=self.name) if not self.return_states: results = results[0] # only get the output return results
def auxiliary_variables(self): return [var for var in self.variables if has_roles(var, [AUXILIARY])]
def parameters(self): return [ var for var in self.trainable_variables if has_roles(var, [PARAMETER]) ]
def is_training(self): self._check_initialized() for i in self._inputs: if has_roles(i, TRAINING): return True return False
def parameters(self): """ return all TensorVariables which have the PARAMETER role""" return [i for i in self.variables if has_roles(i, PARAMETER)]
], debug=True) K.set_training(True) y_train = f(X) K.set_training(False) y_score = f(X) # ====== create cost ====== # cost_train = K.mean(K.categorical_crossentropy(y_train, y)) cost_test1 = K.mean(K.categorical_crossentropy(y_score, y)) cost_test2 = K.mean(K.categorical_accuracy(y_score, y)) cost_test3 = K.confusion_matrix(y_score, y, labels=range(10)) # ====== create optimizer ====== # parameters = [p for p in f.parameters if has_roles(p, [WEIGHT, BIAS])] optimizer = K.optimizers.RMSProp(lr=0.0001) # =========================================================================== # Standard trainer # =========================================================================== trainer, hist = training.standard_trainer(train_data=train_feeder, valid_data=valid_feeder, test_data=test_feeder, cost_train=cost_train, cost_score=[cost_test1, cost_test2], cost_regu=None, parameters=parameters, optimizer=optimizer, confusion_matrix=cost_test3, gradient_norm=True, batch_size=4,
activation=K.relu), N.Pool(pool_size=(5, 1), pad='valid', mode='max'), N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid', activation=K.relu), N.Pool(pool_size=(35, 1), pad='valid', mode='max'), N.Flatten(outdim=2), N.Dense(num_units=128, activation=K.relu), N.Dense(num_units=nb_labels, activation=K.softmax) ], debug=True) y_pred = f(X) params = [p for p in f.parameters if not has_roles(p, EMBEDDING)] print('Params:', [p.name for p in params]) cost_train = K.mean(K.categorical_crossentropy(y_pred, y)) cost_score = K.mean(K.categorical_accuracy(y_pred, y)) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, params) print('Build training function ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ...') f_score = K.function([X, y], cost_score) trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2) trainer.set_task(f_train, (X_train, y_train),
merge_function=K.concatenate), N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1], N.Dense(1, activation=K.sigmoid) ], debug=True) print('Building ODIN network ...') K.set_training(True) y_odin_train = net_odin(X) K.set_training(False) y_odin_score = net_odin(X) cost_train = K.mean(K.binary_crossentropy(y_odin_train, y)) cost_score = K.mean(K.binary_accuracy(y_odin_score, y)) parameters = [ p for p in net_odin.parameters if not has_roles(p, INITIAL_STATE) ] print('Params:', [p.name for p in parameters]) opt = K.optimizers.RMSProp() updates = opt.get_updates(cost_train, parameters) print('Build training function ODIN ...') f_train = K.function([X, y], cost_train, updates) print('Build scoring function ODIN ...') f_score = K.function([X, y], cost_score) print('Build predicting function ODIN ...') f_pred = K.function(X, y_odin_score) trainer = training.MainLoop(batch_size=batch_size, seed=12082518,