Ejemplo n.º 1
0
Archivo: rnn.py Proyecto: liqin123/odin
 def _apply(self, X, h0=None, c0=None, mask=None):
     batch_size = K.get_shape(X, native=True)[0]
     is_bidirectional = self.direction_mode == 'bidirectional'
     input_mode = ('skip' if self.input_mode == 'skip'
                   or self.input_mode == 'norm' else 'linear')
     # ====== precompute input ====== #
     # linear or norm input mode
     if self.input_mode == 'norm':
         X = K.dot(X, self.W_in)
         # normalize all axes except the time dimension
         bn = BatchNorm(axes=(0, 1),
                        activation=K.linear,
                        gamma_init=self.gamma,
                        beta_init=self.beta,
                        mean_init=self.mean,
                        inv_std_init=self.inv_std)
         X = bn(X)
         # cudnnRNN doesnt' support multiple inputs
         shapeX = K.get_shape(X, native=True)
         ndims = K.ndim(X)
         if 'rnn' in self.rnn_mode: N = 1
         elif self.rnn_mode == 'gru': N = 3
         else: N = 4
         newshape = [shapeX[i]
                     for i in range(ndims - 1)] + [self.num_units, N]
         X = K.mean(K.reshape(X, newshape), axis=-1)
     # ====== hidden state ====== #
     num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers
     require_shape = (num_layers, batch_size, self.num_units)
     h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0')
     c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0')
     # ====== parameters ====== #
     if self.params_split:
         parameters = K.concatenate([
             K.flatten(i, outdim=1) for i in self.parameters
             if not has_roles(i, INITIAL_STATE)
         ])
     else:
         parameters = self.params
     # ====== return CuDNN RNN ====== #
     results = K.rnn_dnn(X,
                         hidden_size=self.num_units,
                         rnn_mode=self.rnn_mode,
                         num_layers=self.num_layers,
                         parameters=parameters,
                         h0=h0,
                         c0=c0,
                         input_mode=input_mode,
                         direction_mode=self.direction_mode,
                         dropout=self.dropout,
                         name=self.name)
     if not self.return_states:
         results = results[0]  # only get the output
     return results
Ejemplo n.º 2
0
 def auxiliary_variables(self):
     return [var for var in self.variables if has_roles(var, [AUXILIARY])]
Ejemplo n.º 3
0
 def parameters(self):
     return [
         var for var in self.trainable_variables
         if has_roles(var, [PARAMETER])
     ]
Ejemplo n.º 4
0
 def is_training(self):
     self._check_initialized()
     for i in self._inputs:
         if has_roles(i, TRAINING):
             return True
     return False
Ejemplo n.º 5
0
 def parameters(self):
     """ return all TensorVariables which have the PARAMETER role"""
     return [i for i in self.variables if has_roles(i, PARAMETER)]
Ejemplo n.º 6
0
    ],
    debug=True)

K.set_training(True)
y_train = f(X)
K.set_training(False)
y_score = f(X)

# ====== create cost ====== #
cost_train = K.mean(K.categorical_crossentropy(y_train, y))
cost_test1 = K.mean(K.categorical_crossentropy(y_score, y))
cost_test2 = K.mean(K.categorical_accuracy(y_score, y))
cost_test3 = K.confusion_matrix(y_score, y, labels=range(10))

# ====== create optimizer ====== #
parameters = [p for p in f.parameters if has_roles(p, [WEIGHT, BIAS])]
optimizer = K.optimizers.RMSProp(lr=0.0001)
# ===========================================================================
# Standard trainer
# ===========================================================================
trainer, hist = training.standard_trainer(train_data=train_feeder,
                                          valid_data=valid_feeder,
                                          test_data=test_feeder,
                                          cost_train=cost_train,
                                          cost_score=[cost_test1, cost_test2],
                                          cost_regu=None,
                                          parameters=parameters,
                                          optimizer=optimizer,
                                          confusion_matrix=cost_test3,
                                          gradient_norm=True,
                                          batch_size=4,
Ejemplo n.º 7
0
           activation=K.relu),
    N.Pool(pool_size=(5, 1), pad='valid', mode='max'),
    N.Conv(num_filters=128,
           filter_size=(5, 1),
           strides=1,
           pad='valid',
           activation=K.relu),
    N.Pool(pool_size=(35, 1), pad='valid', mode='max'),
    N.Flatten(outdim=2),
    N.Dense(num_units=128, activation=K.relu),
    N.Dense(num_units=nb_labels, activation=K.softmax)
],
               debug=True)

y_pred = f(X)
params = [p for p in f.parameters if not has_roles(p, EMBEDDING)]
print('Params:', [p.name for p in params])

cost_train = K.mean(K.categorical_crossentropy(y_pred, y))
cost_score = K.mean(K.categorical_accuracy(y_pred, y))

opt = K.optimizers.RMSProp()
updates = opt.get_updates(cost_train, params)

print('Build training function ...')
f_train = K.function([X, y], cost_train, updates)
print('Build scoring function ...')
f_score = K.function([X, y], cost_score)

trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2)
trainer.set_task(f_train, (X_train, y_train),
Ejemplo n.º 8
0
            merge_function=K.concatenate),
        N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1],
        N.Dense(1, activation=K.sigmoid)
    ],
    debug=True)

print('Building ODIN network ...')
K.set_training(True)
y_odin_train = net_odin(X)
K.set_training(False)
y_odin_score = net_odin(X)

cost_train = K.mean(K.binary_crossentropy(y_odin_train, y))
cost_score = K.mean(K.binary_accuracy(y_odin_score, y))
parameters = [
    p for p in net_odin.parameters if not has_roles(p, INITIAL_STATE)
]
print('Params:', [p.name for p in parameters])

opt = K.optimizers.RMSProp()
updates = opt.get_updates(cost_train, parameters)

print('Build training function ODIN ...')
f_train = K.function([X, y], cost_train, updates)
print('Build scoring function ODIN ...')
f_score = K.function([X, y], cost_score)
print('Build predicting function ODIN ...')
f_pred = K.function(X, y_odin_score)

trainer = training.MainLoop(batch_size=batch_size,
                            seed=12082518,