예제 #1
0
파일: rnn.py 프로젝트: liqin123/odin
 def _apply(self, X, h0=None, c0=None, mask=None):
     batch_size = K.get_shape(X, native=True)[0]
     is_bidirectional = self.direction_mode == 'bidirectional'
     input_mode = ('skip' if self.input_mode == 'skip'
                   or self.input_mode == 'norm' else 'linear')
     # ====== precompute input ====== #
     # linear or norm input mode
     if self.input_mode == 'norm':
         X = K.dot(X, self.W_in)
         # normalize all axes except the time dimension
         bn = BatchNorm(axes=(0, 1),
                        activation=K.linear,
                        gamma_init=self.gamma,
                        beta_init=self.beta,
                        mean_init=self.mean,
                        inv_std_init=self.inv_std)
         X = bn(X)
         # cudnnRNN doesnt' support multiple inputs
         shapeX = K.get_shape(X, native=True)
         ndims = K.ndim(X)
         if 'rnn' in self.rnn_mode: N = 1
         elif self.rnn_mode == 'gru': N = 3
         else: N = 4
         newshape = [shapeX[i]
                     for i in range(ndims - 1)] + [self.num_units, N]
         X = K.mean(K.reshape(X, newshape), axis=-1)
     # ====== hidden state ====== #
     num_layers = self.num_layers * 2 if is_bidirectional else self.num_layers
     require_shape = (num_layers, batch_size, self.num_units)
     h0 = _check_cudnn_hidden_init(h0, require_shape, self, 'h0')
     c0 = _check_cudnn_hidden_init(c0, require_shape, self, 'c0')
     # ====== parameters ====== #
     if self.params_split:
         parameters = K.concatenate([
             K.flatten(i, outdim=1) for i in self.parameters
             if not has_roles(i, INITIAL_STATE)
         ])
     else:
         parameters = self.params
     # ====== return CuDNN RNN ====== #
     results = K.rnn_dnn(X,
                         hidden_size=self.num_units,
                         rnn_mode=self.rnn_mode,
                         num_layers=self.num_layers,
                         parameters=parameters,
                         h0=h0,
                         c0=c0,
                         input_mode=input_mode,
                         direction_mode=self.direction_mode,
                         dropout=self.dropout,
                         name=self.name)
     if not self.return_states:
         results = results[0]  # only get the output
     return results
예제 #2
0
 def _apply(self, x):
     input_shape = K.get_shape(x)
     is_training = K.is_training()
     ndim = K.ndim(x)
     # if is training, normalize input by its own mean and std
     if not is_training:
         mean = self.mean
         inv_std = self.inv_std
     else:
         mean = K.mean(x, self.axes)
         inv_std = K.inv(K.sqrt(K.var(x, self.axes) + self.epsilon))
         # set a default update for them:
         running_mean = ((1 - self.alpha) * self.mean + self.alpha * mean)
         running_inv_std = ((1 - self.alpha) * self.inv_std +
                            self.alpha * inv_std)
     # prepare dimshuffle pattern inserting broadcastable axes as needed
     param_axes = iter(range(ndim - len(self.axes)))
     pattern = [
         'x' if input_axis in self.axes else next(param_axes)
         for input_axis in range(ndim)
     ]
     # apply dimshuffle pattern to all parameters
     beta = 0 if not hasattr(self, 'beta') else K.dimshuffle(
         self.beta, pattern)
     gamma = 1 if not hasattr(self, 'gamma') else K.dimshuffle(
         self.gamma, pattern)
     # normalize
     normalized = (x - K.dimshuffle(mean, pattern)) * \
         (gamma * K.dimshuffle(inv_std, pattern)) + beta
     # set shape for output
     K.add_shape(normalized, input_shape)
     # activated output
     output = self.activation(normalized)
     # add updates for final output
     if is_training:
         add_updates(output, self.mean, running_mean)
         add_updates(output, self.inv_std, running_inv_std)
     return output
예제 #3
0
    N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu),
    N.Conv(64, (3, 3), pad='same', stride=(1, 1), activation=K.relu),
    N.Pool(pool_size=(2, 2), ignore_border=True, strides=None, mode='max'),
    N.Dropout(level=0.25),
    N.Flatten(outdim=2),
    N.Dense(512, activation=K.relu),
    N.Dropout(level=0.5),
    N.Dense(10, activation=K.softmax)
],
               debug=True)
K.set_training(True)
y_train = f(X)
K.set_training(False)
y_pred = f(X)

cost_train = K.mean(K.categorical_crossentropy(y_train, y_true))
cost_pred = K.mean(K.categorical_accuracy(y_pred, y_true))
cost_eval = K.mean(K.categorical_crossentropy(y_pred, y_true))
parameters = f.parameters
print('Parameters:', [p.name for p in parameters])

optz = K.optimizers.RMSProp()
updates = optz.get_updates(cost_train, parameters)

print("Build training function ...")
f_train = K.function([X, y_true], cost_train, updates=updates)
print("Build scoring function ...")
f_score = K.function([X, y_true], [cost_pred, cost_eval])

# ===========================================================================
# Create trainer
예제 #4
0
           filter_size=(5, 1),
           strides=1,
           pad='valid',
           activation=K.relu),
    N.Pool(pool_size=(35, 1), pad='valid', mode='max'),
    N.Flatten(outdim=2),
    N.Dense(num_units=128, activation=K.relu),
    N.Dense(num_units=nb_labels, activation=K.softmax)
],
               debug=True)

y_pred = f(X)
params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)]
print('Params:', [p.name for p in params])

cost_train = K.mean(K.categorical_crossentropy(y_pred, y))
cost_score = K.mean(K.categorical_accuracy(y_pred, y))

opt = K.optimizers.RMSProp()
updates = opt.get_updates(cost_train, params)

print('Build training function ...')
f_train = K.function([X, y], cost_train, updates)
print('Build scoring function ...')
f_score = K.function([X, y], cost_score)

trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2)
trainer.set_task(f_train, (X_train, y_train),
                 epoch=args['epoch'],
                 name='train')
trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid')
예제 #5
0
    N.Pool(pool_size=(2, 2), strides=None),
    N.Conv(64, (3, 3), strides=(1, 1), pad='same', activation=K.relu),
    N.Pool(pool_size=(2, 2), strides=None),
    N.Flatten(outdim=2),
    N.Dense(256, activation=K.relu),
    N.Dense(10, activation=K.softmax)
],
                 debug=True)
ops = cPickle.loads(cPickle.dumps(ops))  # test if the ops is pickle-able

K.set_training(True)
y_pred_train = ops(X)
K.set_training(False)
y_pred_score = ops(X)

cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y))
cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y))
cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y))
cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10))

parameters = ops.parameters
optimizer = K.optimizers.SGD(lr=arg['lr'])
updates = optimizer(cost_train, parameters)
print('Building training functions ...')
f_train = K.function([X, y], [cost_train, optimizer.norm], updates=updates)
print('Building testing functions ...')
f_test = K.function([X, y], [cost_test_1, cost_test_2, cost_test_3])
print('Building predicting functions ...')
f_pred = K.function(X, y_pred_score)

# ===========================================================================
예제 #6
0
                        activation=K.linear,
                        name='cellupdate'),  # cell-update
                N.Dense(lstm_output_size, activation=K.linear,
                        name='outgate')  # output-gate
            ],
            merge_function=K.concatenate),
        N.LSTM(num_units=lstm_output_size, input_mode='skip')[:, -1],
        N.Dense(1, activation=K.sigmoid)
    ],
    debug=True)
K.set_training(True)
y_pred_train = f(X_train)
K.set_training(False)
y_pred_score = f(X_score)

cost_train = K.mean(K.binary_crossentropy(y_pred_train, y))
cost_score = K.mean(K.binary_accuracy(y_pred_score, y))

parameters = f.parameters
print('Params:', [p.name for p in parameters])

updates = K.optimizers.Adam(lr=0.001).get_updates(cost_train, parameters)

print('Building training function ...')
f_train = K.function([X_train, y], cost_train, updates)
print('Building scoring function ...')
f_score = K.function([X_score, y], cost_score)

# ===========================================================================
# Test
# ===========================================================================
예제 #7
0
                       mean=ds['mspec_mean'],
                       std=ds['mspec_std']),
    fuel.Stacking(left_context=10, right_context=10, shift=None),
    fuel.OneHotTrans(n_classes=10), fuel.CreateBatch())
print('Number of CPU for feeders:', data.ncpu)

# ===========================================================================
# Training
# ===========================================================================
X = K.placeholder(shape=(None, 2583), name='X')
y = K.placeholder(shape=(None, 10), name='y')

f = N.Sequence(
    [N.Dense(128, activation=K.linear),
     N.Dense(10, activation=K.softmax)])
y_ = f(X)
cost_train = K.mean(K.categorical_crossentropy(y_, y))
f_train = K.function([X, y], cost_train)

# ====== single process ====== #
with UnitTimer():
    for _, (i, j) in enumerate(get_data()):
        f_train(i, j)
print(_)

# ====== multi-processes ====== #
with UnitTimer():
    for _, (i, j) in enumerate(data):
        f_train(i, j)
print(_)
예제 #8
0
    N.Pool(pool_size=(5, 1), pad='valid', mode='max'),

    N.Conv(num_filters=128, filter_size=(5, 1), strides=1, pad='valid',
           activation=K.relu),
    N.Pool(pool_size=(35, 1), pad='valid', mode='max'),

    N.Flatten(outdim=2),
    N.Dense(num_units=128, activation=K.relu),
    N.Dense(num_units=nb_labels, activation=K.softmax)
], debug=True)

y_pred = f(X)
params = [p for p in f.parameters if not has_roles(p, EmbeddingWeight)]
print('Params:', [p.name for p in params])

cost_train = K.mean(K.categorical_crossentropy(y_pred, y))
cost_score = K.mean(K.categorical_accuracy(y_pred, y))

opt = K.optimizers.RMSProp()
updates = opt.get_updates(cost_train, params)

print('Build training function ...')
f_train = K.function([X, y], cost_train, updates)
print('Build scoring function ...')
f_score = K.function([X, y], cost_score)

trainer = training.MainLoop(batch_size=128, seed=1208, shuffle_level=2)
trainer.set_task(f_train, (X_train, y_train), epoch=args['epoch'], name='train')
trainer.set_subtask(f_score, (X_valid, y_valid), freq=1., name='valid')
trainer.set_callback([
    training.ProgressMonitor('train', format='Train:{:.4f}'),
예제 #9
0
파일: __init__.py 프로젝트: liqin123/odin
def standard_trainer(train_data,
                     valid_data,
                     X,
                     y_train,
                     y_score,
                     y_target,
                     parameters,
                     test_data=None,
                     cost_train=None,
                     cost_score=None,
                     optimizer=None,
                     confusion_matrix=False,
                     gradient_norm=True,
                     save_path=None,
                     save_obj=None,
                     batch_size=64,
                     nb_epoch=3,
                     valid_freq=0.6,
                     seed=1208,
                     shuffle_level=2,
                     patience=3,
                     earlystop=5,
                     report_path=None):
    """
    Parameters
    ----------
    cost_train: list of callable
        each function will be apply to a pair y_train and y_target

    Return
    ------
    MainLoop, and History

    Note
    ----

    """
    from odin import backend as K
    # ====== prepare variables and cost ====== #
    # check optimizer
    if optimizer is None:
        optimizer = K.optimizers.SGD(lr=0.0001, momentum=0.9, nesterov=True)
    elif not isinstance(optimizer, K.optimizers.Optimizer) and \
    not hasattr(optimizer, "get_updates"):
        raise ValueError(
            "Invalid optimizer, the optimizer must be instance of "
            "backend.optimizers.Optimizer or having function "
            "get_updates(self, loss_or_grads, params).")
    #  check the cost functions
    if cost_train is None:
        cost_train = K.categorical_crossentropy
    if cost_score is None:
        cost_score = K.categorical_crossentropy
    cost_train = as_tuple(cost_train)
    cost_score = as_tuple(cost_score)
    # check input X, y, parameters
    X = as_tuple(X)
    y_train = as_tuple(y_train)
    y_score = as_tuple(y_score)
    y_target = as_tuple(y_target)
    parameters = as_tuple(parameters)
    if len(X) == 0 or len(y_train) == 0 or len(y_score) == 0 or \
    len(y_target) == 0 or len(parameters) == 0:
        raise ValueError(
            "X(len=%d), y_train(len=%d), y_score(len=%d), y_target(len=%d),"
            "and parameters(len=%d) must be list or tuple with length > 0." %
            (len(X), len(y_train), len(y_score), len(y_target),
             len(parameters)))
    # get all cost
    if len(y_train) == 1:
        y_train = y_train * len(cost_train)
    if len(y_score) == 1:
        y_score = y_score * len(cost_score)
    cost_train = [
        K.mean(f_cost(y_, y), axis=0) for f_cost, y_, y in zip(
            cost_train, y_train,
            y_target * len(cost_train) if len(y_target) == 1 else y_target)
    ]
    cost_score = [
        K.mean(f_cost(y_, y), axis=0) for f_cost, y_, y in zip(
            cost_score, y_score,
            y_target * len(cost_score) if len(y_target) == 1 else y_target)
    ]
    # add confusion matrix
    if confusion_matrix:
        if not is_number(confusion_matrix) and \
        not isinstance(confusion_matrix, (tuple, list, np.ndarray)):
            raise ValueError(
                "confusion_matrix must be an integer, or list, tuple"
                " specifies number of classes, or list of all classes.")
        if is_number(confusion_matrix):
            confusion_matrix = list(range(int(confusion_matrix)))
        for y_, y in zip(y_score, y_target):
            cost_score.append(
                K.confusion_matrix(y_pred=y_,
                                   y_true=y,
                                   labels=confusion_matrix))
    # get the update
    updates = optimizer.get_updates(cost_train[0], parameters)
    # ====== create function ====== #
    grad_norm = [] if not gradient_norm or not hasattr(optimizer, 'norm') else \
        [optimizer.norm]
    cost_train = cost_train + grad_norm
    print('Building training functions ...')
    f_train = K.function(inputs=X + y_target,
                         outputs=cost_train,
                         updates=updates)
    print('Building scoring functions ...')
    f_score = K.function(inputs=X + y_target, outputs=cost_score)
    # ====== Create trainer ====== #
    task = MainLoop(batch_size=batch_size,
                    seed=seed,
                    shuffle_level=shuffle_level)
    if save_path is not None and save_obj is not None:
        task.set_save(save_path, save_obj, save_hist=True)
    # set task
    task.set_task(f_train, train_data, epoch=nb_epoch, name='train')
    task.set_subtask(f_score, valid_data, freq=valid_freq, name='valid')
    if test_data is not None:
        task.set_subtask(f_score, test_data, when=-1, epoch=1, name='test')
    # format for score
    score_format = 'Results:' + __format_string(
        len(cost_score) - (1 if confusion_matrix else 0))
    score_tracking = {
        (len(cost_score) - 1): lambda x: sum(x)
    } if confusion_matrix else []
    # set the callback
    history = History()
    task.set_callback([
        ProgressMonitor(name='train',
                        format='Results:' + __format_string(len(cost_train))),
        ProgressMonitor(name='valid',
                        format=score_format,
                        tracking=score_tracking),
        (ProgressMonitor(
            name='test', format=score_format, tracking=score_tracking)
         if test_data is not None else None), history,
        EarlyStopGeneralizationLoss(
            'valid',
            threshold=earlystop,
            patience=patience,
            get_value=lambda x: np.mean([i[0] for i in x]
                                        if isinstance(x[0],
                                                      (tuple, list)) else x)),
        NaNDetector(('train', 'valid'), patience=patience, rollback=True)
    ])
    return task, history
예제 #10
0
y = K.placeholder(shape=(None,), name='y', dtype='int32')

# ===========================================================================
# Build network
# ===========================================================================
ops = N.Sequence([
    N.Flatten(outdim=2),
    N.Dense(512, activation=K.relu),
    N.Dense(256, activation=K.relu),
    N.Dense(10, activation=K.softmax)
])
ops = cPickle.loads(cPickle.dumps(ops)) # test if the ops is pickle-able

y_pred_train = ops(X_train)
y_pred_score = ops(X_score)
cost_train = K.mean(K.categorical_crossentropy(y_pred_train, y))
cost_test_1 = K.mean(K.categorical_crossentropy(y_pred_score, y))
cost_test_2 = K.mean(K.categorical_accuracy(y_pred_score, y))
cost_test_3 = K.confusion_matrix(y_pred_score, y, labels=range(10))

parameters = ops.parameters
optimizer = K.optimizers.RMSProp(lr= 0.0001, clipnorm=100.)
updates = optimizer(cost_train, parameters)
print('Building training functions ...')
f_train = K.function([X_train, y], [cost_train, optimizer.norm],
                     updates=updates)
print('Building testing functions ...')
f_test = K.function([X_score, y], [cost_test_1, cost_test_2, cost_test_3])

# ====== normalize 0-1 ====== #
if False: