Beispiel #1
0
def main():
    # setup the model and run for num_epochs saving the last state only
    # this is at the top so that the be is generated
    mlp = gen_model(args.backend)

    # setup data iterators
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
    if args.backend == 'nervanacpu' or args.backend == 'cpu':
        # limit data since cpu backend runs slower
        train = DataIterator(X_train[:1000], y_train[:1000], nclass=nclass, lshape=(1, 28, 28))
        valid = DataIterator(X_test[:1000], y_test[:1000], nclass=nclass, lshape=(1, 28, 28))
    else:
        train = DataIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
        valid = DataIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

    # serialization related
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    checkpoint_model_path = os.path.join('./', 'test_oneshot.pkl')
    checkpoint_schedule = 1  # save at every step

    callbacks = Callbacks(mlp, train)
    callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path, history=2)

    # run the fit all the way through saving a checkpoint e
    mlp.fit(train, optimizer=opt_gdm, num_epochs=num_epochs, cost=cost, callbacks=callbacks)

    # setup model with same random seed run epoch by epoch
    # serializing and deserializing at each step
    mlp = gen_model(args.backend)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    opt_gdm = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

    # reset data iterators
    train.reset()
    valid.reset()

    checkpoint_model_path = os.path.join('./', 'test_manyshot.pkl')
    checkpoint_schedule = 1  # save at evey step
    callbacks = Callbacks(mlp, train)
    callbacks.add_serialize_callback(checkpoint_schedule,
                                     checkpoint_model_path,
                                     history=num_epochs)
    for epoch in range(num_epochs):
        # _0 points to state at end of epoch 0
        mlp.fit(train, optimizer=opt_gdm, num_epochs=epoch+1, cost=cost, callbacks=callbacks)

        # load saved file
        prts = os.path.splitext(checkpoint_model_path)
        fn = prts[0] + '_%d' % epoch + prts[1]
        mlp.load_weights(fn)  # load the saved weights

    # compare test_oneshot_<num_epochs>.pkl to test_manyshot_<num_epochs>.pkl
    try:
        compare_model_pickles('test_oneshot_%d.pkl' % (num_epochs-1),
                              'test_manyshot_%d.pkl' % (num_epochs-1))
    except:
        print 'test failed....'
        sys.exit(1)
    def train(self, dataset, model=None):
        """Trains the passed model on the given dataset. If no model is passed, `generate_default_model` is used."""
        print "Starting training..."
        start = time.time()

        # The training will be run on the CPU. If a GPU is available it should be used instead.
        backend = gen_backend(backend='cpu',
                              batch_size=self.batch_size,
                              rng_seed=self.random_seed,
                              stochastic_round=False)

        cost = GeneralizedCost(
            name='cost',
            costfunc=CrossEntropyMulti())

        optimizer = GradientDescentMomentum(
            learning_rate=self.lrate,
            momentum_coef=0.9)

        # set up the model and experiment
        if not model:
            model = self.generate_default_model(dataset.num_labels)

        callbacks = Callbacks(model, dataset.train(),
                              output_file=os.path.join(self.root_path, self.Callback_Store_Filename),
                              progress_bar=True,
                              valid_set=dataset.test(),
                              valid_freq=1)

        # add a callback that saves the best model state
        callbacks.add_save_best_state_callback(self.model_path)
        callbacks.add_serialize_callback(
            serialize_schedule=1,
            save_path=os.path.join(self.root_path, self.Intermediate_Model_Filename),
            history=100)

        # Uncomment line below to run on GPU using cudanet backend
        # backend = gen_backend(rng_seed=0, gpu='cudanet')
        model.fit(
            dataset.train(),
            optimizer=optimizer,
            num_epochs=self.max_epochs,
            cost=cost,
            callbacks=callbacks)

        print('Misclassification error = %.1f%%'
              % (model.eval(dataset.test(), metric=Misclassification()) * 100))
        print "Finished training!"
        end = time.time()
        print "Duration", end - start, "seconds"

        return model
Beispiel #3
0
         gate_activation=Tanh(),
         reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))
checkpoint_model_path = "~/image_caption2.pickle"
checkpoint_schedule = range(num_epochs)

model = Model(layers=layers)

callbacks = Callbacks(model,
                      train_set,
                      output_file=args.output_file,
                      progress_bar=args.progress_bar)
callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path)

opt = RMSProp(decay_rate=0.997,
              learning_rate=0.0005,
              epsilon=1e-8,
              clip_gradients=True,
              gradient_limit=1.0)

# train model
model.fit(train_set,
          optimizer=opt,
          num_epochs=num_epochs,
          cost=cost,
          callbacks=callbacks)

# load model (if exited) and evaluate bleu score on test set
Beispiel #4
0
if args.model_file:
    import os
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    mlp.load_weights(args.model_file)

# configure callbacks
callbacks = Callbacks(mlp, train, output_file=args.output_file)

if args.validation_freq:
    class TopKMetrics(Callback):
        def __init__(self, valid_set, epoch_freq=args.validation_freq):
            super(TopKMetrics, self).__init__(epoch_freq=epoch_freq)
            self.valid_set = valid_set

        def on_epoch_end(self, epoch):
            self.valid_set.reset()
            allmetrics = TopKMisclassification(k=5)
            stats = mlp.eval(self.valid_set, metric=allmetrics)
            print ", ".join(allmetrics.metric_names) + ": " + ", ".join(map(str, stats.flatten()))

    callbacks.add_callback(TopKMetrics(test))

if args.save_path:
    checkpoint_schedule = range(1, args.epochs)
    callbacks.add_serialize_callback(checkpoint_schedule, args.save_path, history=2)

mlp.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)

test.exit_batch_provider()
train.exit_batch_provider()
Beispiel #5
0
if args.validation_freq:

    class TopKMetrics(Callback):
        def __init__(self, valid_set, epoch_freq=args.validation_freq):
            super(TopKMetrics, self).__init__(epoch_freq=epoch_freq)
            self.valid_set = valid_set

        def on_epoch_end(self, epoch):
            self.valid_set.reset()
            allmetrics = TopKMisclassification(k=5)
            stats = mlp.eval(self.valid_set, metric=allmetrics)
            print ", ".join(allmetrics.metric_names) + ": " + ", ".join(
                map(str, stats.flatten()))

    callbacks.add_callback(TopKMetrics(test))

if args.save_path:
    checkpoint_schedule = range(args.epochs)
    callbacks.add_serialize_callback(checkpoint_schedule,
                                     args.save_path,
                                     history=2)

mlp.fit(train,
        optimizer=opt,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)

test.exit_batch_provider()
train.exit_batch_provider()
             'step_config': 1,
             'learning_rate': 0.1,
             'weight_decay': 0}

# initialize model object
rbm = RBM(layers=layers)

if args.model_file:
    assert os.path.exists(args.model_file), '%s not found' % args.model_file
    logger.info('loading initial model state from %s' % args.model_file)
    rbm.load_weights(args.model_file)

# setup standard fit callbacks
callbacks = Callbacks(rbm, train_set, output_file=args.output_file,
                      progress_bar=args.progress_bar)

# add a callback ot calculate

if args.serialize > 0:
    # add callback for saving checkpoint file
    # every args.serialize epchs
    checkpoint_schedule = args.serialize
    checkpoint_model_path = args.save_path
    callbacks.add_serialize_callback(checkpoint_schedule, checkpoint_model_path)

rbm.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)

for mb_idx, (x_val, y_val) in enumerate(valid_set):
    hidden = rbm.fprop(x_val)
    break
Beispiel #7
0
layers = [
    LSTM(hidden_size, init, Logistic(), Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(clip_gradients=clip_gradients, stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, train_set, output_file=args.output_file,
                      progress_bar=args.progress_bar,
                      valid_set=valid_set, valid_freq=1,
                      )
callbacks.add_serialize_callback(1, args.save_path)

# fit and validate
model.fit(train_set, optimizer=optimizer, num_epochs=num_epochs, cost=cost, callbacks=callbacks)


def sample(prob):
    """
    Sample index from probability distribution
    """
    prob = prob / (prob.sum() + 1e-6)
    return np.argmax(np.random.multinomial(1, prob, 1))

# Set batch size and time_steps to 1 for generation and reset buffers
be.bsz = 1
time_steps = 1
Beispiel #8
0
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(clip_gradients=clip_gradients,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model,
                      train_set,
                      output_file=args.output_file,
                      valid_set=valid_set,
                      valid_freq=1,
                      progress_bar=args.progress_bar)
callbacks.add_serialize_callback(1, args.save_path)

# fit and validate
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=num_epochs,
          cost=cost,
          callbacks=callbacks)


def sample(prob):
    """
    Sample index from probability distribution
    """
    prob = prob / (prob.sum() + 1e-6)
    return np.argmax(np.random.multinomial(1, prob, 1))