Ejemplo n.º 1
0
def main(config, tr_stream):
    # Create Theano variables
    logger.info('Creating theano variables')
    source_char_seq = tensor.lmatrix('source_char_seq')
    source_sample_matrix = tensor.btensor3('source_sample_matrix')
    source_char_aux = tensor.bmatrix('source_char_aux')
    source_word_mask = tensor.bmatrix('source_word_mask')
    target_char_seq = tensor.lmatrix('target_char_seq')
    target_char_aux = tensor.bmatrix('target_char_aux')
    target_char_mask = tensor.bmatrix('target_char_mask')
    target_sample_matrix = tensor.btensor3('target_sample_matrix')
    target_word_mask = tensor.bmatrix('target_word_mask')
    target_resample_matrix = tensor.btensor3('target_resample_matrix')
    target_prev_char_seq = tensor.lmatrix('target_prev_char_seq')
    target_prev_char_aux = tensor.bmatrix('target_prev_char_aux')
    target_bos_idx = tr_stream.trg_bos
    target_space_idx = tr_stream.space_idx['target']
    src_vocab = pickle.load(open(config['src_vocab'], 'rb'))

    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'], config['src_dgru_nhids'],
                                   config['enc_nhids'], config['src_dgru_depth'], config['bidir_encoder_depth'])

    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'], config['trg_dgru_nhids'], config['trg_igru_nhids'],
                      config['dec_nhids'], config['enc_nhids'] * 2, config['transition_depth'], config['trg_igru_depth'],
                      config['trg_dgru_depth'], target_space_idx, target_bos_idx)

    representation = encoder.apply(source_char_seq, source_sample_matrix, source_char_aux,
                                   source_word_mask)
    cost = decoder.cost(representation, source_word_mask, target_char_seq, target_sample_matrix,
                        target_resample_matrix, target_char_aux, target_char_mask,
                        target_word_mask, target_prev_char_seq, target_prev_char_aux)

    # Set up model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    # Reload model if necessary
    extensions = [LoadNMT(config['saveto'])]

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=None,
        data_stream=None,
        extensions=extensions
    )

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')

    char_embedding = encoder.decimator.apply(source_char_seq.T, source_sample_matrix, source_char_aux.T)
    embedding(Model(char_embedding), src_vocab)
Ejemplo n.º 2
0
 def test_save_and_load(self):
     """Check that main loop have been saved properly."""
     old_value = self.W.get_value()
     self.W.set_value(old_value * 2)
     new_main_loop = MainLoop(model=self.model,
                              data_stream=self.data_stream,
                              algorithm=self.algorithm,
                              extensions=[Load('myweirdmodel.tar')])
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions('before_training')
     assert_allclose(self.W.get_value(), old_value)
Ejemplo n.º 3
0
 def test_save_and_load(self):
     """Check that main loop have been saved properly."""
     old_value = self.W.get_value()
     self.W.set_value(old_value * 2)
     new_main_loop = MainLoop(
         model=self.model,
         data_stream=self.data_stream,
         algorithm=self.algorithm,
         extensions=[Load("myweirdmodel.tar")],
     )
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions("before_training")
     assert_allclose(self.W.get_value(), old_value)
Ejemplo n.º 4
0
def test_load():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None],
              dims=[10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             FinishAfter(after_n_batches=5),
                             Checkpoint('myweirdmodel.picklebarrel')
                         ])
    main_loop.run()

    # Load the parameters, log and iteration state
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('myweirdmodel.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(model=Model(cost),
                         data_stream=data_stream,
                         algorithm=GradientDescent(cost=cost, parameters=[W]),
                         extensions=[
                             Load('mynonexisting.picklebarrel',
                                  load_iteration_state=True,
                                  load_log=True)
                         ])
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
Ejemplo n.º 5
0
def test_checkpointing():
    # Create a main loop and checkpoint it
    mlp = MLP(activations=[None], dims=[10, 10], weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[0].W
    x = tensor.vector('data')
    cost = mlp.apply(x).mean()
    data = numpy.random.rand(10, 10).astype(theano.config.floatX)
    data_stream = IterableDataset(data).get_example_stream()

    main_loop = MainLoop(
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[FinishAfter(after_n_batches=5),
                    Checkpoint('myweirdmodel.tar', parameters=[W])]
    )
    main_loop.run()

    # Load it again
    old_value = W.get_value()
    W.set_value(old_value * 2)
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('myweirdmodel.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')
    assert_allclose(W.get_value(), old_value)

    # Make sure things work too if the model was never saved before
    main_loop = MainLoop(
        model=Model(cost),
        data_stream=data_stream,
        algorithm=GradientDescent(cost=cost, parameters=[W]),
        extensions=[Load('mynonexisting.tar')]
    )
    main_loop.extensions[0].main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Cleaning
    if os.path.exists('myweirdmodel.tar'):
        os.remove('myweirdmodel.tar')
Ejemplo n.º 6
0
 def test_load_log_and_iteration_state(self):
     """Check we can save the log and iteration state separately."""
     skip_if_configuration_set("log_backend", "sqlite", 'Bug with log.status["resumed_from"]')
     new_main_loop = MainLoop(
         model=self.model,
         data_stream=self.data_stream,
         algorithm=self.algorithm,
         extensions=[Load("myweirdmodel.tar", True, True)],
     )
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions("before_training")
     # Check the log
     new_keys = sorted(new_main_loop.log.status.keys())
     old_keys = sorted(self.main_loop.log.status.keys())
     for new_key, old_key in zip(new_keys, old_keys):
         assert new_key == old_key
         assert new_main_loop.log.status[new_key] == self.main_loop.log.status[old_key]
     # Check the iteration state
     new = next(new_main_loop.iteration_state[1])["data"]
     old = next(self.main_loop.iteration_state[1])["data"]
     assert_allclose(new, old)
Ejemplo n.º 7
0
 def test_load_log_and_iteration_state(self):
     """Check we can save the log and iteration state separately."""
     skip_if_configuration_set('log_backend', 'sqlite',
                               'Bug with log.status["resumed_from"]')
     new_main_loop = MainLoop(
         model=self.model,
         data_stream=self.data_stream,
         algorithm=self.algorithm,
         extensions=[Load('myweirdmodel.tar', True, True)])
     new_main_loop.extensions[0].main_loop = new_main_loop
     new_main_loop._run_extensions('before_training')
     # Check the log
     new_keys = sorted(new_main_loop.log.status.keys())
     old_keys = sorted(self.main_loop.log.status.keys())
     for new_key, old_key in zip(new_keys, old_keys):
         assert new_key == old_key
         assert (new_main_loop.log.status[new_key] ==
                 self.main_loop.log.status[old_key])
     # Check the iteration state
     new = next(new_main_loop.iteration_state[1])['data']
     old = next(self.main_loop.iteration_state[1])['data']
     assert_allclose(new, old)
Ejemplo n.º 8
0
def main(config, test_stream):
    # Create Theano variables
    logger.info('Creating theano variables')
    source_char_seq = tensor.lmatrix('source_char_seq')
    source_sample_matrix = tensor.tensor3('source_sample_matrix')
    source_char_aux = tensor.matrix('source_char_aux')
    source_word_mask = tensor.matrix('source_word_mask')
    target_char_seq = tensor.lmatrix('target_char_seq')
    target_char_aux = tensor.matrix('target_char_aux')
    target_char_mask = tensor.matrix('target_char_mask')
    target_sample_matrix = tensor.tensor3('target_sample_matrix')
    target_word_mask = tensor.matrix('target_word_mask')
    target_resample_matrix = tensor.tensor3('target_resample_matrix')
    target_prev_char_seq = tensor.lmatrix('target_prev_char_seq')
    target_prev_char_aux = tensor.matrix('target_prev_char_aux')

    target_bos_idx = test_stream.trg_bos
    target_space_idx = test_stream.space_idx['target']

    # Construct model
    logger.info('Building RNN encoder-decoder')

    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'],
                                   config['src_dgru_nhids'],
                                   config['enc_nhids'],
                                   config['src_dgru_depth'],
                                   config['bidir_encoder_depth'])

    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['trg_dgru_nhids'], config['trg_igru_nhids'],
                      config['dec_nhids'], config['enc_nhids'] * 2,
                      config['transition_depth'], config['trg_igru_depth'],
                      config['trg_dgru_depth'], target_space_idx,
                      target_bos_idx)

    representation = encoder.apply(source_char_seq, source_sample_matrix,
                                   source_char_aux, source_word_mask)
    cost = decoder.cost(representation, source_word_mask, target_char_seq,
                        target_sample_matrix, target_resample_matrix,
                        target_char_aux, target_char_mask, target_word_mask,
                        target_prev_char_seq, target_prev_char_aux)

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    # Extensions
    extensions = []
    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Set up beam search and sampling computation graphs if necessary
    if config['bleu_script'] is not None:
        logger.info("Building sampling model")
        generated = decoder.generate(representation, source_word_mask)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[config['transition_depth']])
            )  # generated[config['transition_depth']] is next_outputs

        logger.info("Building bleu tester")
        extensions.append(
            BleuTester(source_char_seq,
                       source_sample_matrix,
                       source_char_aux,
                       source_word_mask,
                       samples=samples,
                       config=config,
                       model=search_model,
                       data_stream=test_stream,
                       normalize=config['normalized_bleu']))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=None,
                         data_stream=None,
                         extensions=extensions)

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
Ejemplo n.º 9
0
    # ds, valid_stream = data.setup_squad_datastream(valid_path, vocab_path, config)
    ds, valid_stream = data.setup_squad_ranker_datastream(os.path.join(os.getcwd(),'squad_short/squadnewdev.txt'),os.path.join(os.getcwd(), 'squad/vocab.txt'),config, 221697)
    snapshot_path = os.path.join("model_params", model_name+".pkl")

    # Build model
    m = config.Model(config, ds.vocab_size)

    # Build the Blocks stuff for training
    # test_model = Model(m.generations)
    test_model = Model(m.predictions)
    model = Model(m.sgd_cost)

    algorithm = None

    extensions = [RankerEvaluator(path=snapshot_path, model=test_model, data_stream=valid_stream, vocab_size = ds.vocab_size, vocab = ds.vocab, eval_mode=eval_mode, before_training=True)]

    main_loop = MainLoop(
        model=model,
        data_stream=valid_stream,
        algorithm=algorithm,
        extensions=extensions
    )

    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')

    # Run the model !
    # main_loop.run()
    # main_loop.profile.report()
Ejemplo n.º 10
0
def main(config):
    print('working on it ...')
    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)
    # Extensions
    extensions = []
    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Set up beam search and sampling computation graphs if necessary
    if config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs'''

     
    # Add sampling
    logger.info("Building sampler")
    global samplers_ob
    samplers_ob=Sampler(model=search_model, data_stream=input_sentence_mask,
                hook_samples=config['hook_samples'],
                every_n_batches=config['sampling_freq'],
                src_vocab_size=config['src_vocab_size'])
                # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=None,
        data_stream=None,
        extensions=extensions
    )
                
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')