Example #1
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples

        train_stream = DataStream(
            dataset=datafile,
            iteration_scheme=OverlapSequentialScheme(
                nbexamples, self.time_dim))

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            extensions=[
                FinishAfter(after_n_epochs=EPOCHS),
                TrainingDataMonitoring(
                    [self.model.outputs[0]],
                    prefix="train",
                    after_epoch=True,
                    every_n_batches=4000),
                #ProgressBar(),
                Printing()
            ])

        main_loop.run()
Example #2
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Example #3
0
def main():
  x = tensor.matrix("features")
  input_to_hidden1 = get_typical_layer(x, 784, 500)
  #hidden1_to_hidden2 = get_typical_layer(input_to_hidden1, 500, 300)
  hidden1_to_latent = get_typical_layer(input_to_hidden1, 500, 20)

  latent_to_hidden2 = get_typical_layer(hidden1_to_latent, 20, 500)
  #hidden3_to_hidden4 = get_typical_layer(latent_to_hidden3, 300, 500)
  hidden2_to_output = get_typical_layer(latent_to_hidden2, 500, 784, Logistic())
  hidden2_to_output.name = "last_before_output"

  from blocks.bricks.cost import SquaredError, AbsoluteError, BinaryCrossEntropy
  from blocks.graph import ComputationGraph
  from blocks.algorithms import Adam, GradientDescent, Scale
  from blocks.roles import WEIGHT

  cost = BinaryCrossEntropy(name="error").apply(x, hidden2_to_output)
  cg = ComputationGraph(cost)
  weights = VariableFilter(roles=[WEIGHT]) (cg.variables)
#  cost += 0.0001 * tensor.sum(map(lambda x: (x**2).sum(), weights))
#  cost.name = "regularized error"
  gd = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Adam())

  from blocks.main_loop import MainLoop
  from blocks.extensions import FinishAfter, Printing, ProgressBar
  from blocks.extensions.monitoring import DataStreamMonitoring, TrainingDataMonitoring
  monitor = TrainingDataMonitoring([cost], after_epoch=True)
  main_loop = MainLoop(data_stream=get_data_stream(), algorithm=gd, extensions=[monitor, FinishAfter(after_n_epochs=5),  ProgressBar(), Printing()])

  main_loop.run()
  showcase(cg, "last_before_output")
Example #4
0
def run(discriminative_regularization=True):
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=False)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams[:3]

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    rval = create_training_computation_graphs(discriminative_regularization)
    cg, bn_cg, variance_parameters = rval
    pop_updates = list(
        set(get_batch_normalization_updates(bn_cg, allow_duplicates=True)))
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    model = Model(bn_cg.outputs[0])
    selector = Selector(
        find_bricks(
            model.top_bricks,
            lambda brick: brick.name in ('encoder_convnet', 'encoder_mlp',
                                         'decoder_convnet', 'decoder_mlp')))
    parameters = list(selector.get_parameters().values()) + variance_parameters

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_cg.outputs[0],
                                parameters=parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    monitored_quantities_list = []
    for graph in [bn_cg, cg]:
        cost, kl_term, reconstruction_term = graph.outputs
        cost.name = 'nll_upper_bound'
        avg_kl_term = kl_term.mean(axis=0)
        avg_kl_term.name = 'avg_kl_term'
        avg_reconstruction_term = -reconstruction_term.mean(axis=0)
        avg_reconstruction_term.name = 'avg_reconstruction_term'
        monitored_quantities_list.append(
            [cost, avg_kl_term, avg_reconstruction_term])
    train_monitoring = DataStreamMonitoring(
        monitored_quantities_list[0], train_monitor_stream, prefix="train",
        updates=extra_updates, after_epoch=False, before_first_epoch=False,
        every_n_epochs=5)
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities_list[1], valid_monitor_stream, prefix="valid",
        after_epoch=False, before_first_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    save_path = 'celeba_vae_{}regularization.zip'.format(
        '' if discriminative_regularization else 'no_')
    checkpoint = Checkpoint(save_path, every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=75), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    main_loop.run()
def main():
    print("Fetching dataset...")
    trainset, validset, testset = load_jsb_chorales()

    print("Initializing model...")
    lstm = LstmBlocks(trainset.input_size, 100, trainset.target_size)

    print("Building DataStream...")
    dataset_train = IterableDataset({'x': trainset.inputs, 'y': trainset.targets})
    dataset_valid = IterableDataset({'x': validset.inputs, 'y': validset.targets})

    stream_train = DataStream(dataset=dataset_train)
    stream_valid = DataStream(dataset=dataset_valid)

    print("Build training process...")
    algorithm = GradientDescent(cost=lstm.cost, parameters=lstm.computation_graph.parameters, step_rule=Adam())

    valid_monitor = DataStreamMonitoring(variables=[lstm.cost], data_stream=stream_valid, prefix="valid")
    train_monitor = TrainingDataMonitoring(variables=[lstm.cost], prefix="train", after_epoch=True)

    main_loop = MainLoop(data_stream=stream_train, algorithm=algorithm,
                         extensions=[valid_monitor, train_monitor, FinishAfter(after_n_epochs=N_EPOCHS), Printing(),
                                     ProgressBar()])

    print("Training...")
    main_loop.run()
Example #6
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0),
              seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(cost=cost,
                        params=ComputationGraph(cost).parameters,
                        step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) + [
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)), prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()
        ])
    main_loop.run()
    return main_loop
Example #7
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W],
                          step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate,
        lambda _, val: numpy.cast[theano.config.floatX](val * 0.2))
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value,
                    0.001 * 0.2 ** n_batches,
                    atol=1e-5)
Example #8
0
def test_shared_variable_modifier_two_params():
    weights = numpy.array([-1, 1], dtype=floatX)
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = ContainerDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, params=[W], step_rule=step_rule)
    modifier = SharedVariableModifier(
        step_rule.learning_rate, lambda _, val: numpy.cast[floatX](val * 0.2))
    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_default_stream(),
                         algorithm=sgd,
                         extensions=[FinishAfter(after_n_epochs=1), modifier])

    main_loop.run()

    new_value = step_rule.learning_rate.get_value()
    assert_allclose(new_value, 0.001 * 0.2**n_batches, atol=1e-5)
Example #9
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples
        nbexamples -= nbexamples % (self.sequence_dim * self.time_dim)

        train_stream = ReshapeTransformer(
            DataStream(dataset=datafile,
                       iteration_scheme=ShuffledBatchChunkScheme(
                           nbexamples, self.sequence_dim * self.time_dim)),
            self.sequence_dim, self.time_dim)

        if self.image_size is not None:
            train_stream = Mapping(train_stream,
                                   spec_mapping,
                                   add_sources=['spectrogram'])

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(algorithm=algorithm,
                             data_stream=train_stream,
                             model=self.model,
                             extensions=[
                                 FinishAfter(after_n_epochs=EPOCHS),
                                 TrainingDataMonitoring(
                                     [aggregation.mean(self.model.outputs[0])],
                                     prefix="train",
                                     after_epoch=True),
                                 Printing(),
                                 SaveParams(EXP_PATH + NAME, after_epoch=True)
                             ])

        main_loop.run()
Example #10
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector("features")
    y = tensor.scalar("targets")
    W = shared_floatx([0, 0], name="W")
    cost = ((x * W).sum() - y) ** 2
    cost.name = "cost"

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W], step_rule=step_rule)
    main_loop = MainLoop(
        model=None,
        data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(step_rule.learning_rate, lambda n: numpy.cast[theano.config.floatX](10.0 / n)),
        ],
    )

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(), numpy.cast[theano.config.floatX](10.0 / n_batches))
Example #11
0
    def train(self):
        print "Loading data"
        datafile = self.get_datafile()
        nbexamples = datafile.num_examples
        nbexamples -= nbexamples%(self.sequence_dim*self.time_dim)

        train_stream = ReshapeTransformer(
            DataStream(
                dataset=datafile,
                iteration_scheme=ShuffledBatchChunkScheme(
                    nbexamples, self.sequence_dim*self.time_dim)),
            self.sequence_dim,
            self.time_dim)

        if self.image_size is not None :
            train_stream = Mapping(train_stream, spec_mapping, add_sources=['spectrogram'])

        print "Building Theano Graph"
        algorithm, self.fprop = self.build_theano_functions()

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=train_stream,
            model=self.model,
            extensions=[
                FinishAfter(after_n_epochs=EPOCHS),
                TrainingDataMonitoring(
                    [aggregation.mean(self.model.outputs[0])],
                    prefix="train",
                    after_epoch=True),
                Printing(),
                SaveParams(EXP_PATH+NAME, after_epoch=True)
            ])

        main_loop.run()
Example #12
0
def main(save_to, num_batches):
    linear = Linear()
    rnn=SORN()
    x = tensor.vector('numbers')
    states_E, states_I, updates=rnn.apply(linear.apply(x[None, :]))
    y=linear.apply(states_E[-1])
    cost=SquaredError().apply(y[:,None], mlp.apply(states_E[-1]))
    # consider updates about linear from x and to y
    # 1. make all in SORN
    # 2. gradient?
    main_loop = MainLoop(
        UpdatesAlgorithm(
            updates=updates),
        get_data_stream(range(100)),
        model=Model(),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Example #13
0
 def run(self):
     self.build_extensions_list()
     print "Calling MainLoop"
     main_loop = MainLoop(data_stream=self.streams['mainloop'],
                          algorithm=self.model.algorithm,
                          extensions=self.extensions)
     main_loop.run()
Example #14
0
def test_main_loop():

    class TestDataStream(object):

        def __init__(self):
            self.epochs = self._generate_data()

        def _generate_data(self):
            def wrap_in_dicts(iterable):
                for x in iterable:
                    yield dict(data=x)
            yield iter(wrap_in_dicts([1, 2, 3]))
            yield iter(wrap_in_dicts([4, 5]))
            yield iter(wrap_in_dicts([6, 7, 8, 9]))

        def get_epoch_iterator(self, as_dict):
            assert as_dict is True
            return next(self.epochs)

    finish_extension = FinishAfter()
    finish_extension.add_condition(
        'after_epoch', predicate=lambda log: log.status['epochs_done'] == 2)
    main_loop = MainLoop(MockAlgorithm(), TestDataStream(),
                         extensions=[WriteBatchExtension(),
                                     finish_extension])
    main_loop.run()

    assert main_loop.log.status['iterations_done'] == 5
    assert main_loop.log.status['_epoch_ends'] == [3, 5]
    assert len(main_loop.log) == 5
    for i in range(1, 6):
        assert main_loop.log[i]['batch'] == dict(data=i)
Example #15
0
def train_model(cost, train_stream, valid_stream, args):
    step_rule = learning_algorithm(args)
    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, step_rule=step_rule,
                                parameters=cg.parameters)

    extensions = []

    # Training and Validation score monitoring
    extensions.extend([
        TrainingDataMonitoring([cost],
                               prefix='train',
                               every_n_batches=args.monitoring_freq),
        DataStreamMonitoring([cost],
                             stream=valid_stream,
                             prefix='valid',
                             every_n_batches=args.monitoring_freq)]
                      )

    # Printing
    extensions.append(ProgressBar())
    extensions.append(Printing(every_n_batches=args.monitoring_freq))

    main_loop = MainLoop(model=Model(cost),
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions
                         )

    # This is where the magic happens!
    main_loop.run()
Example #16
0
def test_main_loop():

    class TestDataStream(object):

        def __init__(self):
            self.epochs = self._generate_data()

        def _generate_data(self):
            def wrap_in_dicts(iterable):
                for x in iterable:
                    yield dict(data=x)
            yield iter(wrap_in_dicts([1, 2, 3]))
            yield iter(wrap_in_dicts([4, 5]))
            yield iter(wrap_in_dicts([6, 7, 8, 9]))

        def get_epoch_iterator(self, as_dict):
            assert as_dict is True
            return next(self.epochs)

    finish_extension = FinishAfter()
    finish_extension.add_condition(
        'after_epoch', predicate=lambda log: log.status.epochs_done == 2)
    main_loop = MainLoop(MockAlgorithm(), TestDataStream(),
                         extensions=[WriteBatchExtension(),
                                     finish_extension])
    main_loop.run()

    assert main_loop.log.status.iterations_done == 5
    assert main_loop.log.status._epoch_ends == [3, 5]
    assert len(list(main_loop.log)) == 7
    for i in range(1, 6):
        assert main_loop.log[i].batch == dict(data=i)
Example #17
0
def test_shared_variable_modifier():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    step_rule = Scale(0.001)
    sgd = GradientDescent(cost=cost, parameters=[W],
                          step_rule=step_rule)
    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=sgd,
        extensions=[
            FinishAfter(after_n_epochs=1),
            SharedVariableModifier(
                step_rule.learning_rate,
                lambda n: numpy.cast[theano.config.floatX](10. / n)
            )])

    main_loop.run()

    assert_allclose(step_rule.learning_rate.get_value(),
                    numpy.cast[theano.config.floatX](10. / n_batches))
Example #18
0
def main(save_to, num_batches, continue_=False):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, params=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=([LoadFromDump(save_to)] if continue_ else []) +
        [Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Dump(save_to),
            Printing()])
    main_loop.run()
    return main_loop
Example #19
0
 def train_base_model(self, train_data, test_data, input_dim):
     x = T.matrix('features')
     y = T.matrix('targets')
     mlp, cost, mis_cost = self.create_base_model(x, y, input_dim)
     cg = ComputationGraph([cost])
     inputs = VariableFilter(roles=[INPUT])(cg.variables)
     cg = apply_dropout(cg, inputs, 0.2)
     algorithm = GradientDescent(cost=cost,
                                 parameters=cg.parameters,
                                 step_rule=Adam(learning_rate=0.001))
     data_stream = train_data
     data_stream_test = test_data
     monitor = DataStreamMonitoring(variables=[mis_cost],
                                    data_stream=data_stream_test,
                                    prefix="test")
     plot_ext = Plot('F1-measure',
                     channels=[['test_MisclassificationRate']],
                     after_batch=True)
     main_loop = MainLoop(data_stream=data_stream,
                          algorithm=algorithm,
                          extensions=[
                              monitor,
                              FinishAfter(after_n_epochs=50),
                              Printing(), plot_ext
                          ])
     main_loop.run()
     return mlp
Example #20
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y)**2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):
        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = ((
                (W.get_value() * data["features"]).sum() - data["targets"])**2)

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=GradientDescent(cost=cost,
                                                   parameters=[W],
                                                   step_rule=Scale(0.001)),
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                             TrainingDataMonitoring([W_sum, cost, V],
                                                    prefix="train1",
                                                    after_batch=True),
                             TrainingDataMonitoring(
                                 [aggregation.mean(W_sum), cost],
                                 prefix="train2",
                                 after_epoch=True),
                             TrueCostExtension()
                         ])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([
            main_loop.log[i]['train1_W_sum'] for i in range(1, n_batches + 1)
        ]) / n_batches)
Example #21
0
def main(max_seq_length, lstm_dim, batch_size, num_batches, num_epochs):
    dataset_train = IterableDataset(generate_data(max_seq_length, batch_size,
                                                  num_batches))
    dataset_test = IterableDataset(generate_data(max_seq_length, batch_size,
                                                 100))

    stream_train = DataStream(dataset=dataset_train)
    stream_test = DataStream(dataset=dataset_test)

    x = T.tensor3('x')
    y = T.matrix('y')

    # we need to provide data for the LSTM layer of size 4 * ltsm_dim, see
    # LSTM layer documentation for the explanation
    x_to_h = Linear(1, lstm_dim * 4, name='x_to_h',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))
    lstm = LSTM(lstm_dim, name='lstm',
                weights_init=IsotropicGaussian(),
                biases_init=Constant(0.0))
    h_to_o = Linear(lstm_dim, 1, name='h_to_o',
                    weights_init=IsotropicGaussian(),
                    biases_init=Constant(0.0))

    x_transform = x_to_h.apply(x)
    h, c = lstm.apply(x_transform)

    # only values of hidden units of the last timeframe are used for
    # the classification
    y_hat = h_to_o.apply(h[-1])
    y_hat = Logistic().apply(y_hat)

    cost = BinaryCrossEntropy().apply(y, y_hat)
    cost.name = 'cost'

    lstm.initialize()
    x_to_h.initialize()
    h_to_o.initialize()

    cg = ComputationGraph(cost)

    algorithm = GradientDescent(cost=cost, parameters=cg.parameters,
                                step_rule=Adam())
    test_monitor = DataStreamMonitoring(variables=[cost],
                                        data_stream=stream_test, prefix="test")
    train_monitor = TrainingDataMonitoring(variables=[cost], prefix="train",
                                           after_epoch=True)

    main_loop = MainLoop(algorithm, stream_train,
                         extensions=[test_monitor, train_monitor,
                                     FinishAfter(after_n_epochs=num_epochs),
                                     Printing(), ProgressBar()])
    main_loop.run()

    print 'Learned weights:'
    for layer in (x_to_h, lstm, h_to_o):
        print "Layer '%s':" % layer.name
        for param in layer.parameters:
            print param.name, ': ', param.get_value()
        print
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Example #23
0
class Runner(object):
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(cost=m.cost,
                                    params=cg.parameters,
                                    step_rule=AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"),
                                    job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model=Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring([m.cost, m.accur],
                                       prefix="train",
                                       after_epoch=True),
                DataStreamMonitoring([m.cost, m.accur],
                                     test_stream,
                                     prefix="test"),
                FinishAfter(after_n_epochs=1),
                LogToFile(os.path.join(update_path, "log.csv")),
                Printing(),
                EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
            ])

    def run(self):
        self.main_loop.run()
Example #24
0
def main(config, tr_stream):
    # Create Theano variables
    logger.info('Creating theano variables')
    source_char_seq = tensor.lmatrix('source_char_seq')
    source_sample_matrix = tensor.btensor3('source_sample_matrix')
    source_char_aux = tensor.bmatrix('source_char_aux')
    source_word_mask = tensor.bmatrix('source_word_mask')
    target_char_seq = tensor.lmatrix('target_char_seq')
    target_char_aux = tensor.bmatrix('target_char_aux')
    target_char_mask = tensor.bmatrix('target_char_mask')
    target_sample_matrix = tensor.btensor3('target_sample_matrix')
    target_word_mask = tensor.bmatrix('target_word_mask')
    target_resample_matrix = tensor.btensor3('target_resample_matrix')
    target_prev_char_seq = tensor.lmatrix('target_prev_char_seq')
    target_prev_char_aux = tensor.bmatrix('target_prev_char_aux')
    target_bos_idx = tr_stream.trg_bos
    target_space_idx = tr_stream.space_idx['target']
    src_vocab = pickle.load(open(config['src_vocab'], 'rb'))

    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['src_dgru_nhids'],
                                   config['enc_nhids'], config['src_dgru_depth'], config['bidir_encoder_depth'])

    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['trg_dgru_nhids'], config['trg_igru_nhids'],
                      config['dec_nhids'], config['enc_nhids'] * 2, config['transition_depth'], config['trg_igru_depth'],
                      config['trg_dgru_depth'], target_space_idx, target_bos_idx)

    representation = encoder.apply(source_char_seq, source_sample_matrix, source_char_aux,
                                   source_word_mask)
    cost = decoder.cost(representation, source_word_mask, target_char_seq, target_sample_matrix,
                        target_resample_matrix, target_char_aux, target_char_mask,
                        target_word_mask, target_prev_char_seq, target_prev_char_aux)

    # Set up model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    # Reload model if necessary
    extensions = [LoadNMT(config['saveto'])]

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=None,
        data_stream=None,
        extensions=extensions
    )

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')

    char_embedding = encoder.decimator.apply(source_char_seq.T, source_sample_matrix, source_char_aux.T)
    embedding(Model(char_embedding), src_vocab)
Example #25
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(
        cost=cost, parameters=all_params,
        step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(
        variables=monitored_variables,
        prefix="train",
        after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables,
        data_stream=valid_data_stream,
        prefix="valid",
        after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
	    SaveParams('valid_misclassificationrate_apply_error_rate', blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
	    ProgressBar(),
            Printing()])
    main_loop.run()
Example #26
0
def main(save_to, num_epochs, bokeh=False):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(
        cost=cost, params=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      DataStream(mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if bokeh:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(
                       mnist_train.num_examples, 50)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()
Example #27
0
def run(model_name):

    running_on_laptop = socket.gethostname() == 'yop'

    X = tensor.tensor4('image_features', dtype='float32')
    T = tensor.matrix('targets', dtype='float32')

    image_border_size = 100

    if running_on_laptop:
        host_plot = 'http://*****:*****@ %s' %
             (model_name, datetime.datetime.now(), socket.gethostname()),
             channels=[['loss', 'valid_loss_test'], ['valid_error']],
             after_epoch=True,
             server_url=host_plot),
        Printing(),
        Checkpoint('train2')
    ]

    main_loop = MainLoop(data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Example #28
0
File: main.py Project: caomw/MLFun
class Runner(object):
    def __init__(self, worker, experiment, config):
        # Data
        dataset = CIFAR10('train', flatten=False)
        test_dataset = CIFAR10('test', flatten=False)
        batch_size = 128

        scheme = ShuffledScheme(dataset.num_examples, batch_size)
        datastream = DataStream(dataset, iteration_scheme=scheme)

        test_scheme = ShuffledScheme(test_dataset.num_examples, batch_size)
        test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

        # Model
        m = ModelHelper(config)

        def score_func(mainloop):
            scores = mainloop.log.to_dataframe()["test_accur"].values
            return np.mean(np.sort(scores)[-4:-1])

        # Algorithm
        cg = ComputationGraph([m.cost])
        algorithm = GradientDescent(
                cost = m.cost, params=cg.parameters,
                step_rule = AdaM())

        #job_name = os.path.basename(worker.running_job)
        job_name = os.path.basename(".")
        update_path = (os.path.join(os.path.join(worker.path, "updates"), job_name))
        if not os.path.exists(update_path):
            os.mkdir(update_path)

        self.main_loop = MainLoop(
            algorithm,
            datastream,
            model = Model(m.cost),
            extensions=[
                Timing(),
                TrainingDataMonitoring(
                    [m.cost, m.accur], prefix="train", after_epoch=True)
                , DataStreamMonitoring(
                    [m.cost, m.accur],
                    test_stream,
                    prefix="test")
                , FinishAfter(after_n_epochs=1)
                , LogToFile(os.path.join(update_path, "log.csv"))
                , Printing()
                , EpochProgress(dataset.num_examples // batch_size + 1)
                #, DistributeUpdate(worker, every_n_epochs=1)
                #, DistributeWhetlabFinish(worker, experiment, score_func)
                #, Plot('cifar10',
                    #channels=[['train_cost', 'test_cost'], ['train_accur', 'test_accur']])
                ])
    def run(self):
        self.main_loop.run()
Example #29
0
def train(cost, error_rate, batch_size=100, num_epochs=150):
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/memory_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=Adam(learning_rate=0.001))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost, error_rate]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_mnist_video_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            Printing()
        ])
    main_loop.run()
Example #30
0
def main():

    import configurations
    from stream import DStream
    logger = logging.getLogger(__name__)
    cfig = getattr(configurations, 'get_config_penn')()

    rnnlm = Rnnlm(cfig['vocabsize'], cfig['nemb'], cfig['nhids'])
    rnnlm.weights_init = IsotropicGaussian(0.1)
    rnnlm.biases_init = Constant(0.)
    rnnlm.push_initialization_config()
    rnnlm.generator.transition.weights_init = Orthogonal()

    sentence = tensor.lmatrix('sentence')
    sentence_mask = tensor.matrix('sentence_mask')
    batch_cost = rnnlm.cost(sentence, sentence_mask).sum()
    batch_size = sentence.shape[1].copy(name='batch_size')
    cost = aggregation.mean(batch_cost, batch_size)
    cost.name = "sequence_log_likelihood"
    logger.info("Cost graph is built")

    model = Model(cost)
    parameters = model.get_parameter_dict()
    logger.info("Parameters:\n" +
                pprint.pformat(
                    [(key, value.get_value().shape) for key, value
                        in parameters.items()],
                    width=120))

    for brick in model.get_top_bricks():
        brick.initialize()
    cg = ComputationGraph(cost)
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=CompositeRule([StepClipping(10.0), Scale(0.01)]))

    gradient_norm = aggregation.mean(algorithm.total_gradient_norm)
    step_norm = aggregation.mean(algorithm.total_step_norm)
    monitored_vars = [cost, gradient_norm, step_norm]

    train_monitor = TrainingDataMonitoring(variables=monitored_vars, after_batch=True,
                                           before_first_epoch=True, prefix='tra')

    extensions = [train_monitor, Timing(), Printing(after_batch=True),
                  FinishAfter(after_n_epochs=1000),
                  Printing(every_n_batches=1)]

    train_stream = DStream(datatype='train', config=cfig)
    main_loop = MainLoop(model=model,
                         data_stream=train_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Example #31
0
 def test_save_and_load(self):
     """Check that main loop have been saved properly."""
     old_value = self.W.get_value()
     self.W.set_value(old_value * 2)
     new_main_loop = MainLoop(model=self.model,
                              data_stream=self.data_stream,
                              algorithm=self.algorithm,
                              extensions=[Load('myweirdmodel.tar')])
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions('before_training')
     assert_allclose(self.W.get_value(), old_value)
Example #32
0
def test_training_data_monitoring():
    weights = numpy.array([-1, 1], dtype=theano.config.floatX)
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    targets = [(weights * f).sum() for f in features]
    n_batches = 3
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    W = shared_floatx([0, 0], name='W')
    V = shared_floatx(7, name='V')
    W_sum = named_copy(W.sum(), 'W_sum')
    cost = ((x * W).sum() - y) ** 2
    cost.name = 'cost'

    class TrueCostExtension(TrainingExtension):

        def before_batch(self, data):
            self.main_loop.log.current_row['true_cost'] = (
                ((W.get_value() * data["features"]).sum() -
                 data["targets"]) ** 2)

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=GradientDescent(cost=cost, params=[W],
                                  step_rule=Scale(0.001)),
        extensions=[
            FinishAfter(after_n_epochs=1),
            TrainingDataMonitoring([W_sum, cost, V], prefix="train1",
                                   after_batch=True),
            TrainingDataMonitoring([aggregation.mean(W_sum), cost],
                                   prefix="train2", after_epoch=True),
            TrueCostExtension()])

    main_loop.run()

    # Check monitoring of a shared varible
    assert_allclose(main_loop.log.current_row['train1_V'], 7.0)

    for i in range(n_batches):
        # The ground truth is written to the log before the batch is
        # processed, where as the extension writes after the batch is
        # processed. This is why the iteration numbers differs here.
        assert_allclose(main_loop.log[i]['true_cost'],
                        main_loop.log[i + 1]['train1_cost'])
    assert_allclose(
        main_loop.log[n_batches]['train2_cost'],
        sum([main_loop.log[i]['true_cost']
             for i in range(n_batches)]) / n_batches)
    assert_allclose(
        main_loop.log[n_batches]['train2_W_sum'],
        sum([main_loop.log[i]['train1_W_sum']
             for i in range(1, n_batches + 1)]) / n_batches)
Example #33
0
def run(model_name, port_train, port_valid):

	running_on_laptop = socket.gethostname() == 'yop'

	X = tensor.tensor4('image_features', dtype='float32')
	T = tensor.matrix('targets', dtype='float32')

	image_border_size = (100, 100)

	if running_on_laptop:
		host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss'], ['error', 'valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train2')
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()
Example #34
0
def test_main_loop():

    main_loop = MainLoop(
        MockAlgorithm(), IterableDataset(range(10)).get_example_stream(),
        extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)])
    main_loop.run()
    assert_raises(AttributeError, getattr, main_loop, 'model')

    assert main_loop.log.status['iterations_done'] == 20
    assert main_loop.log.status['_epoch_ends'] == [10, 20]
    assert len(main_loop.log) == 20
    for i in range(20):
        assert main_loop.log[i + 1]['batch'] == {'data': i % 10}
Example #35
0
 def test_save_and_load(self):
     """Check that main loop have been saved properly."""
     old_value = self.W.get_value()
     self.W.set_value(old_value * 2)
     new_main_loop = MainLoop(
         model=self.model,
         data_stream=self.data_stream,
         algorithm=self.algorithm,
         extensions=[Load("myweirdmodel.tar")],
     )
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions("before_training")
     assert_allclose(self.W.get_value(), old_value)
Example #36
0
def train_model(cost,
                train_stream,
                valid_stream,
                valid_freq,
                valid_rare,
                load_location=None,
                save_location=None):
    cost.name = 'nll'
    perplexity = 2**(cost / tensor.log(2))
    perplexity.name = 'ppl'

    # Define the model
    model = Model(cost)

    # Load the parameters from a dumped model
    if load_location is not None:
        logger.info('Loading parameters...')
        model.set_param_values(load_parameter_values(load_location))

    cg = ComputationGraph(cost)
    algorithm = GradientDescent(cost=cost,
                                step_rule=Scale(learning_rate=0.01),
                                params=cg.parameters)
    main_loop = MainLoop(
        model=model,
        data_stream=train_stream,
        algorithm=algorithm,
        extensions=[
            DataStreamMonitoring([cost, perplexity],
                                 valid_stream,
                                 prefix='valid_all',
                                 every_n_batches=5000),
            # Overfitting of rare words occurs between 3000 and 4000 iterations
            DataStreamMonitoring([cost, perplexity],
                                 valid_rare,
                                 prefix='valid_rare',
                                 every_n_batches=500),
            DataStreamMonitoring([cost, perplexity],
                                 valid_freq,
                                 prefix='valid_frequent',
                                 every_n_batches=5000),
            Printing(every_n_batches=500)
        ])
    main_loop.run()

    # Save the main loop
    if save_location is not None:
        logger.info('Saving the main loop...')
        dump_manager = MainLoopDumpManager(save_location)
        dump_manager.dump(main_loop)
        logger.info('Saved')
Example #37
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(
        cost=nmt_model.cost,
        parameters=train_params
    )
    main_loop = MainLoop(
        model=nmt_model.training_model,
        algorithm=algorithm,
        data_stream=align_stream,
        extensions=extensions
    )
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path,
                          config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments
Example #38
0
def infer_population(data_stream, model, n_batches):
    """ Sets the population parameters for a given model"""
    # construct a main loop with algorithm
    algorithm = BatchNormAccumulate(model)
    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=data_stream,
        model=model,
        extensions=[FinishAfter(after_n_batches=n_batches), ProgressBar()])
    main_loop.run()
    parameters = get_batchnorm_parameters(model)
    batchnorm_bricks = set([get_brick(p) for p in parameters])
    for b in batchnorm_bricks:
        b.use_population = True
Example #39
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHTS])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST("train")
    mnist_test = MNIST("test")

    algorithm = GradientDescent(cost=cost,
                                step_rule=SteepestDescent(learning_rate=0.1))
    main_loop = MainLoop(
        mlp,
        DataStream(mnist_train,
                   iteration_scheme=SequentialScheme(mnist_train.num_examples,
                                                     50)),
        algorithm,
        extensions=[
            Timing(),
            FinishAfter(after_n_epochs=num_epochs),
            DataStreamMonitoring([cost, error_rate],
                                 DataStream(mnist_test,
                                            iteration_scheme=SequentialScheme(
                                                mnist_test.num_examples, 500)),
                                 prefix="test"),
            TrainingDataMonitoring([
                cost, error_rate,
                aggregation.mean(algorithm.total_gradient_norm)
            ],
                                   prefix="train",
                                   after_every_epoch=True),
            SerializeMainLoop(save_to),
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]),
            Printing()
        ])
    main_loop.run()
Example #40
0
def train(model, batch_size=50, num_epochs=1500):
    # Setting Logger
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = "results/memory_" + timestr
    log_path = os.path.join(save_path, "log.txt")
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    cost = model.outputs["cost"]
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    training_algorithm = GradientDescent(cost=cost, params=all_params, step_rule=Adam(learning_rate=model.default_lr))

    # training_algorithm = GradientDescent(
    #     cost=cost, params=all_params,
    #     step_rule=Scale(learning_rate=model.default_lr))

    monitored_variables = [cost]

    # the rest is for validation
    # train_data_stream, valid_data_stream = get_mnist_streams(
    #     50000, batch_size)
    train_data_stream, valid_data_stream = get_memory_streams(20, 10)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables, prefix="train", after_epoch=True)

    valid_monitoring = DataStreamMonitoring(
        variables=monitored_variables, data_stream=valid_data_stream, prefix="valid", after_epoch=True
    )

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams("valid_MSE", blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            Printing(),
        ],
    )
    main_loop.run()
Example #41
0
def run():
    streams = create_celeba_streams(training_batch_size=100,
                                    monitoring_batch_size=500,
                                    include_targets=True)
    main_loop_stream = streams[0]
    train_monitor_stream = streams[1]
    valid_monitor_stream = streams[2]

    cg, bn_dropout_cg = create_training_computation_graphs()

    # Compute parameter updates for the batch normalization population
    # statistics. They are updated following an exponential moving average.
    pop_updates = get_batch_normalization_updates(bn_dropout_cg)
    decay_rate = 0.05
    extra_updates = [(p, m * decay_rate + p * (1 - decay_rate))
                     for p, m in pop_updates]

    # Prepare algorithm
    step_rule = Adam()
    algorithm = GradientDescent(cost=bn_dropout_cg.outputs[0],
                                parameters=bn_dropout_cg.parameters,
                                step_rule=step_rule)
    algorithm.add_updates(extra_updates)

    # Prepare monitoring
    cost = bn_dropout_cg.outputs[0]
    cost.name = 'cost'
    train_monitoring = DataStreamMonitoring(
        [cost], train_monitor_stream, prefix="train",
        before_first_epoch=False, after_epoch=False, after_training=True,
        updates=extra_updates)

    cost, accuracy = cg.outputs
    cost.name = 'cost'
    accuracy.name = 'accuracy'
    monitored_quantities = [cost, accuracy]
    valid_monitoring = DataStreamMonitoring(
        monitored_quantities, valid_monitor_stream, prefix="valid",
        before_first_epoch=False, after_epoch=False, every_n_epochs=5)

    # Prepare checkpoint
    checkpoint = Checkpoint(
        'celeba_classifier.zip', every_n_epochs=5, use_cpickle=True)

    extensions = [Timing(), FinishAfter(after_n_epochs=50), train_monitoring,
                  valid_monitoring, checkpoint, Printing(), ProgressBar()]
    main_loop = MainLoop(data_stream=main_loop_stream, algorithm=algorithm,
                         extensions=extensions)
    main_loop.run()
Example #42
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_position', 'multiplier', 'sax', 'sax_features', 'targets'), False, hwm=10, port=5558)

	ftensor5 = tensor.TensorType('float32', (False,)*5)

	input_var  = ftensor5('sax_features')
	target_var = tensor.matrix('targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	prediction, test_prediction, test_pred_mid, params_bottom, params_top = get_model(input_var, multiply_var)

	# load parameters
	cg = ComputationGraph(test_pred_mid)
	params_val = numpy.load('sunnybrook/best_weights.npz')
	
	for p, value in zip(cg.shared_variables, params_val['arr_0']):
		p.set_value(value)

	crps = tensor.abs_(test_prediction - target_var).mean()

	loss = squared_error(prediction, target_var).mean()

	loss.name = 'loss'
	crps.name = 'crps'

	algorithm = GradientDescent(
		cost=loss,
		parameters=params_top,
		step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[crps, loss], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'], ['valid_crps']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train'),
		FinishAfter(after_n_epochs=20)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()
Example #43
0
def main(save_to, num_epochs, batch_size):
    mlp = MLP([Tanh(), Tanh(), Tanh(), Softmax()], [3072, 4096, 1024, 512, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tt.tensor4('features', dtype='float32')
    y = tt.vector('label', dtype='int32')

    probs = mlp.apply(x.reshape((-1, 3072)))
    cost = CategoricalCrossEntropy().apply(y, probs)
    error_rate = MisclassificationRate().apply(y, probs)

    cg = ComputationGraph([cost])
    ws = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * sum(([(w**2).sum() for w in ws]))
    cost.name = 'final_cost'

    train_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=True)
    valid_dataset = Cifar10Dataset(data_dir='/home/belohlavek/data/cifar10',
                                   is_train=False)

    train_stream = train_dataset.get_stream(batch_size)
    valid_stream = valid_dataset.get_stream(batch_size)

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Adam(learning_rate=0.001))
    extensions = [
        Timing(),
        LogExtension('/home/belohlavek/ALI/mlp.log'),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate], valid_stream, prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    main_loop = MainLoop(algorithm,
                         train_stream,
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Example #44
0
def infer_population(data_stream, model, n_batches):
    """ Sets the population parameters for a given model"""
    # construct a main loop with algorithm
    algorithm = BatchNormAccumulate(model)
    main_loop = MainLoop(
        algorithm=algorithm,
        data_stream=data_stream,
        model=model,
        extensions=[FinishAfter(after_n_batches=n_batches),
                    ProgressBar()])
    main_loop.run()
    parameters = get_batchnorm_parameters(model)
    batchnorm_bricks = set([get_brick(p) for p in parameters])
    for b in batchnorm_bricks:
        b.use_population = True
Example #45
0
def test_training_data_monitoring_updates_algorithm():
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 5], [5, 8]]
    ]
    targets = numpy.array([f.sum() for f in features])
    dataset = IterableDataset(dict(features=features, targets=targets))

    x = tensor.vector('features')
    y = tensor.scalar('targets')
    m = x.mean().copy(name='features_mean')
    t = y.sum().copy(name='targets_sum')

    main_loop = MainLoop(
        model=None,
        data_stream=dataset.get_example_stream(),
        algorithm=UpdatesAlgorithm(),
        extensions=[
            TrainingDataMonitoring([m, t], prefix="train1", after_batch=True)
        ],
    )
    main_loop.extensions[0].main_loop = main_loop
    assert len(main_loop.algorithm.updates) == 0
    main_loop.extensions[0].do('before_training')
    assert len(main_loop.algorithm.updates) > 0
Example #46
0
def create_main_loop(save_path):
    model, bn_model, bn_updates = create_models()
    ali, = bn_model.top_bricks
    discriminator_loss, generator_loss = bn_model.outputs

    step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1)
    algorithm = ali_algorithm(discriminator_loss, ali.discriminator_parameters,
                              step_rule, generator_loss,
                              ali.generator_parameters, step_rule)
    algorithm.add_updates(bn_updates)
    streams = create_celeba_data_streams(BATCH_SIZE, MONITORING_BATCH_SIZE)
    main_loop_stream, train_monitor_stream, valid_monitor_stream = streams
    bn_monitored_variables = (
        [v for v in bn_model.auxiliary_variables if 'norm' not in v.name] +
        bn_model.outputs)
    monitored_variables = (
        [v for v in model.auxiliary_variables if 'norm' not in v.name] +
        model.outputs)
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=NUM_EPOCHS),
        DataStreamMonitoring(
            bn_monitored_variables, train_monitor_stream, prefix="train",
            updates=bn_updates),
        DataStreamMonitoring(
            monitored_variables, valid_monitor_stream, prefix="valid"),
        Checkpoint(save_path, after_epoch=True, after_training=True,
                   use_cpickle=True),
        ProgressBar(),
        Printing(),
    ]
    main_loop = MainLoop(model=bn_model, data_stream=main_loop_stream,
                         algorithm=algorithm, extensions=extensions)
    return main_loop
Example #47
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    features = [
        numpy.array(f, dtype=theano.config.floatX)
        for f in [[1, 2], [3, 4], [5, 6]]
    ]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, params=[W], step_rule=Scale(1e-3))

    main_loop = MainLoop(model=None,
                         data_stream=dataset.get_example_stream(),
                         algorithm=algorithm,
                         extensions=[FinishAfter(after_n_epochs=1), extension])

    return main_loop
Example #48
0
def setup_mainloop(extensions):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)

    W = shared_floatx([0, 0], name='W')
    add_role(W, PARAMETER)
    x = tensor.vector('features')
    cost = tensor.sum((x - W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost,
                                parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(model=Model(cost),
                         data_stream=datastream,
                         algorithm=algorithm,
                         extensions=[
                             FinishAfter(after_n_epochs=1),
                         ] + extensions)

    return main_loop
Example #49
0
        def create_main_loop():
            model, bn_model, bn_updates = create_models()
            ali, = bn_model.top_bricks
            discriminator_loss, generator_loss = bn_model.outputs
            step_rule = Adam(learning_rate=LEARNING_RATE, beta1=BETA1)
            algorithm = ali_algorithm(discriminator_loss,
                                      ali.discriminator_parameters, step_rule,
                                      generator_loss, ali.generator_parameters,
                                      step_rule)
            algorithm.add_updates(bn_updates)
            streams = create_gaussian_mixture_data_streams(
                batch_size=BATCH_SIZE,
                monitoring_batch_size=MONITORING_BATCH_SIZE,
                means=MEANS,
                variances=VARIANCES,
                priors=PRIORS)
            main_loop_stream, train_monitor_stream, valid_monitor_stream = streams
            bn_monitored_variables = ([
                v for v in bn_model.auxiliary_variables if 'norm' not in v.name
            ] + bn_model.outputs)
            monitored_variables = (
                [v
                 for v in model.auxiliary_variables if 'norm' not in v.name] +
                model.outputs)
            extensions = [
                Timing(),
                FinishAfter(after_n_epochs=NUM_EPOCHS),
                DataStreamMonitoring(bn_monitored_variables,
                                     train_monitor_stream,
                                     prefix="train",
                                     updates=bn_updates),
                DataStreamMonitoring(monitored_variables,
                                     valid_monitor_stream,
                                     prefix="valid"),
                Checkpoint(os.path.join(self._work_dir, "main_loop.tar"),
                           after_epoch=True,
                           after_training=True,
                           use_cpickle=True),
                ProgressBar(),
                Printing(),

                #ModelLogger(folder=self._work_dir, after_epoch=True),
                GraphLogger(num_modes=1,
                            num_samples=2500,
                            dimension=2,
                            r=0,
                            std=1,
                            folder=self._work_dir,
                            after_epoch=True,
                            after_training=True),
                MetricLogger(means=MEANS,
                             variances=VARIANCES,
                             folder=self._work_dir,
                             after_epoch=True)
            ]
            main_loop = MainLoop(model=bn_model,
                                 data_stream=main_loop_stream,
                                 algorithm=algorithm,
                                 extensions=extensions)
            return main_loop
Example #50
0
def setup_mainloop(extension):
    """Set up a simple main loop for progress bar tests.

    Create a MainLoop, register the given extension, supply it with a
    DataStream and a minimal model/cost to optimize.

    """
    # Since progressbar2 3.6.0, the `maxval` kwarg has been replaced by
    # `max_value`, which has a default value of 100. If we're still using
    # `maxval` by accident, this test should fail complaining that
    # the progress bar has received a value out of range.
    features = [numpy.array(f, dtype=theano.config.floatX)
                for f in [[1, 2]] * 101]
    dataset = IterableDataset(dict(features=features))

    W = shared_floatx([0, 0], name='W')
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=None, data_stream=dataset.get_example_stream(),
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            extension])

    return main_loop
Example #51
0
def work():
    config_dict = yaml.load(open(sys.argv[1], 'r'))
    print config_dict

    if config_dict['working_mode'] == 'train_new':
        train, valid, alphabet = build_datasets(config_dict)
        generator, cost = build_model(len(alphabet), config_dict)
        algorithm = build_algorithm(generator, cost, config_dict)
        extensions = build_extensions(cost, algorithm, valid, config_dict)
        main_loop = MainLoop(algorithm=algorithm, data_stream=train,
                             model=Model(cost), extensions=extensions)
        main_loop.run()

    elif config_dict['working_mode'] == 'train_resume':
        # TODO
        pass
Example #52
0
def train(config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag,
          use_load_ext, load_log, fast_start):

    model, algorithm, data, extensions = initialize_all(
        config, save_path, bokeh_name, params, bokeh_server, bokeh, test_tag,
        use_load_ext, load_log, fast_start)

    # Save the config into the status
    log = NDarrayLog()
    log.status['_config'] = repr(config)
    main_loop = MainLoop(model=model,
                         log=log,
                         algorithm=algorithm,
                         data_stream=data.get_stream("train"),
                         extensions=extensions)
    main_loop.run()
Example #53
0
def run(get_model, model_name):
	train_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10)
	valid_stream = ServerDataStream(('cases', 'image_features', 'image_targets', 'multiplier'), False, hwm=10, port=5558)

	input_var  = tensor.tensor4('image_features')
	target_var = tensor.tensor4('image_targets')
	multiply_var = tensor.matrix('multiplier')
	multiply_var = T.addbroadcast(multiply_var, 1)

	test_prediction, prediction, params = get_model(input_var, target_var, multiply_var)

	loss = binary_crossentropy(prediction, target_var).mean()


	loss.name = 'loss'

	valid_error = T.neq((test_prediction>0.5)*1., target_var).mean()
	valid_error.name = 'error'

	scale = Scale(0.1)
	algorithm = GradientDescent(
		cost=loss,
		parameters=params,
		step_rule=scale,
		#step_rule=Adam(),
		on_unused_sources='ignore'
	)

	host_plot = 'http://localhost:5006'

	extensions = [
		Timing(),
		TrainingDataMonitoring([loss], after_epoch=True),
		DataStreamMonitoring(variables=[loss, valid_error], data_stream=valid_stream, prefix="valid"),
		Plot('%s %s %s' % (model_name, datetime.date.today(), time.strftime('%H:%M')), channels=[['loss','valid_loss'],['valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		# Checkpoint('train'),
		FinishAfter(after_n_epochs=10)
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	cg = ComputationGraph(test_prediction)
	while True:
		main_loop.run()
		scale.learning_rate.set_value(numpy.float32(scale.learning_rate.get_value()*0.7))
		numpy.savez('best_weights.npz', [param.get_value() for param in cg.shared_variables])
Example #54
0
def test_main_loop():
    old_config_profile_value = config.profile
    config.profile = True

    main_loop = MainLoop(
        MockAlgorithm(), IterableDataset(range(10)).get_example_stream(),
        extensions=[WriteBatchExtension(), FinishAfter(after_n_epochs=2)])
    main_loop.run()
    assert_raises(AttributeError, getattr, main_loop, 'model')

    assert main_loop.log.status['iterations_done'] == 20
    assert main_loop.log.status['_epoch_ends'] == [10, 20]
    assert len(main_loop.log) == 20
    for i in range(20):
        assert main_loop.log[i + 1]['batch'] == {'data': i % 10}

    config.profile = old_config_profile_value
Example #55
0
def train(config, save_path, bokeh_name,
          params, bokeh_server, bokeh, test_tag, use_load_ext,
          load_log, fast_start):

    model, algorithm, data, extensions = initialize_all(
        config, save_path, bokeh_name,
        params, bokeh_server, bokeh, test_tag, use_load_ext,
        load_log, fast_start)

    # Save the config into the status
    log = NDarrayLog()
    log.status['_config'] = repr(config)
    main_loop = MainLoop(
        model=model, log=log, algorithm=algorithm,
        data_stream=data.get_stream("train"),
        extensions=extensions)
    main_loop.run()
Example #56
0
def align_with_nam(config, args):
    """Main method for using the Neural Alignment Model.
    
    Args:
        config (dict): NMT configuration
        args (object): ArgumentParser object containing the command
                       line arguments
    
    Returns:
        list. List of alignments, where alignments are represented as
        numpy matrices containing confidences between 0 and 1.
    """
    global alignments
    config['attention'] = 'parameterized'
    alignments = []
    nmt_model = NMTModel(config)
    nmt_model.set_up()
    align_stream = _get_align_stream(**config)
    extensions = [
        FinishAfter(after_epoch=True),
        TrainingDataMonitoring([nmt_model.cost], after_batch=True),
        PrintCurrentLogRow(after_batch=True),
        NextSentenceExtension(align_stream=align_stream,
                              every_n_batches=args.iterations,
                              before_training=True)
    ]
    train_params = []
    for p in nmt_model.cg.parameters:
        if p.name in 'alignment_matrix':
            train_params.append(p)
            break
    algorithm = GradientDescent(cost=nmt_model.cost, parameters=train_params)
    main_loop = MainLoop(model=nmt_model.training_model,
                         algorithm=algorithm,
                         data_stream=align_stream,
                         extensions=extensions)
    nmt_model_path = get_nmt_model_path(args.nmt_model_selector, config)
    loader = LoadNMTUtils(nmt_model_path, config['saveto'],
                          nmt_model.training_model)
    loader.load_weights()
    try:
        main_loop.run()
    except StopIteration:
        logging.info("Alignment finished")
    return alignments
Example #57
0
def run(model_name):

	running_on_laptop = socket.gethostname() == 'yop'

	X = tensor.tensor4('image_features', dtype='float32')
	T = tensor.matrix('targets', dtype='float32')

	image_border_size = 100

	if running_on_laptop:
		host_plot = 'http://*****:*****@ %s' % (model_name, datetime.datetime.now(), socket.gethostname()), channels=[['loss', 'valid_loss_test'], ['valid_error']], after_epoch=True, server_url=host_plot),
		Printing(),
		Checkpoint('train2')
	]

	main_loop = MainLoop(data_stream=train_stream, algorithm=algorithm,
	                     extensions=extensions)
	main_loop.run()