예제 #1
0
def get_sampling_model_and_input(exp_config):
    # Create Theano variables
    encoder = BidirectionalEncoder(
        exp_config['src_vocab_size'], exp_config['enc_embed'], exp_config['enc_nhids'])

    decoder = Decoder(
        exp_config['trg_vocab_size'], exp_config['dec_embed'], exp_config['dec_nhids'],
        exp_config['enc_nhids'] * 2,
        loss_function='min_risk'
    )

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_source_input = tensor.lmatrix('source')
    sampling_target_prefix_input = tensor.lmatrix('target')

    # Get beam search
    logger.info("Building sampling model")
    sampling_representation = encoder.apply(
        sampling_source_input, tensor.ones(sampling_source_input.shape))

    generated = decoder.generate(sampling_source_input, sampling_representation,
                                 target_prefix=sampling_target_prefix_input)

    # build the model that will let us get a theano function from the sampling graph
    logger.info("Creating Sampling Model...")
    sampling_model = Model(generated)

    # Set the parameters from a trained models
    logger.info("Loading parameters from model: {}".format(exp_config['saved_parameters']))
    # load the parameter values from an .npz file
    param_values = LoadNMT.load_parameter_values(exp_config['saved_parameters'], brick_delimiter='-')
    LoadNMT.set_model_parameters(sampling_model, param_values)

    return sampling_model, sampling_source_input, encoder, decoder
예제 #2
0
def get_sampling_model_and_input(exp_config):
    # Create Theano variables
    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    decoder = Decoder(exp_config['trg_vocab_size'],
                      exp_config['dec_embed'],
                      exp_config['dec_nhids'],
                      exp_config['enc_nhids'] * 2,
                      loss_function='min_risk')

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_input = tensor.lmatrix('source')

    # Get beam search
    logger.info("Building sampling model")
    sampling_representation = encoder.apply(sampling_input,
                                            tensor.ones(sampling_input.shape))
    generated = decoder.generate(sampling_input, sampling_representation)

    # build the model that will let us get a theano function from the sampling graph
    logger.info("Creating Sampling Model...")
    sampling_model = Model(generated)

    return sampling_model, sampling_input, encoder, decoder
예제 #3
0
def load_params_and_get_beam_search(exp_config):

    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    decoder = Decoder(exp_config['trg_vocab_size'], exp_config['dec_embed'],
                      exp_config['dec_nhids'], exp_config['enc_nhids'] * 2)

    # Create Theano variables
    logger.info('Creating theano variables')
    sampling_input = tensor.lmatrix('source')

    # Get beam search
    logger.info("Building sampling model")
    sampling_representation = encoder.apply(sampling_input,
                                            tensor.ones(sampling_input.shape))
    generated = decoder.generate(sampling_input, sampling_representation)

    _, samples = VariableFilter(
        bricks=[decoder.sequence_generator], name="outputs")(ComputationGraph(
            generated[1]))  # generated[1] is next_outputs
    beam_search = BeamSearch(samples=samples)

    # Set the parameters
    logger.info("Creating Model...")
    model = Model(generated)
    logger.info("Loading parameters from model: {}".format(
        exp_config['saved_parameters']))

    # load the parameter values from an .npz file if the `saved_parameters` field is present in the config
    param_values = LoadNMT.load_parameter_values(
        exp_config['saved_parameters'],
        brick_delimiter=exp_config.get('brick_delimiter', None))
    LoadNMT.set_model_parameters(model, param_values)

    return beam_search, sampling_input
def test_search_model():

    # Create Theano variables
    floatX = theano.config.floatX
    source_sentence = theano.tensor.lmatrix('source')
    source_sentence_mask = theano.tensor.matrix('source_mask', dtype=floatX)
    target_sentence = theano.tensor.lmatrix('target')
    target_sentence_mask = theano.tensor.matrix('target_mask', dtype=floatX)

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    # Compile a function for the cost
    f_cost = theano.function(
        inputs=[source_sentence, source_sentence_mask,
                target_sentence, target_sentence_mask],
        outputs=cost)

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(22, 4))
    y = numpy.random.randint(0, 12, size=(22, 6))
    x_mask = numpy.ones_like(x).astype(floatX)
    y_mask = numpy.ones_like(y).astype(floatX)

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    cost_ = f_cost(x, x_mask, y, y_mask)
    assert_allclose(cost_, 14.90944)
def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix('input')

    # Construct model
    encoder = BidirectionalEncoder(
        vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(
        vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16,
        theano_seed=1234)
    sampling_representation = encoder.apply(
        sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4
def main(config, tr_stream, dev_stream, use_bokeh=False):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2)
    cost = decoder.cost(encoder.apply(source_sentence, source_sentence_mask),
                        source_sentence_mask, target_sentence,
                        target_sentence_mask)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [
            x for x in cg.intermediary_variables
            if x.name == 'maxout_apply_output'
        ]
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params + dec_params,
                         config['weight_noise_ff'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(
        Selector(encoder).get_parameters(),
        Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}".format(
        len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(model=search_model,
                    data_stream=tr_stream,
                    hook_samples=config['hook_samples'],
                    every_n_batches=config['sampling_freq'],
                    src_vocab_size=config['src_vocab_size']))

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input,
                          samples=samples,
                          config=config,
                          model=search_model,
                          data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Cs-En', channels=[['decoder_cost_cost']], after_batch=True))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=CompositeRule([
                                    StepClipping(config['step_clipping']),
                                    eval(config['step_rule'])()
                                ]))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()
예제 #7
0
def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')

        # Get training and development set streams
        tr_stream = get_tr_stream(**config)
        dev_stream = get_dev_stream(**config)

        # Get cost of the model
        cost = decoder.cost(
            encoder.apply(source_sentence, source_sentence_mask),
            source_sentence_mask, target_sentence, target_sentence_mask)

        logger.info('Creating computational graph')
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [x for x in cg.intermediary_variables
                              if x.name == 'maxout_apply_output']
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(
                cg, enc_params+dec_params, config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                                   Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}"
                    .format(len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([cost], after_batch=True),
            Printing(after_batch=True),
            CheckpointNMT(config['saveto'],
                          every_n_batches=config['save_freq'])
        ]

        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(
                sampling_input, sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(
                bricks=[decoder.sequence_generator], name="outputs")(
                    ComputationGraph(generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model, data_stream=tr_stream,
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if config['bleu_script'] is not None:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input, samples=samples, config=config,
                              model=search_model, data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq']))

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En', channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(
            cost=cost, parameters=cg.parameters,
            step_rule=CompositeRule([StepClipping(config['step_clipping']),
                                     eval(config['step_rule'])()])
        )

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(
            model=training_model,
            algorithm=algorithm,
            data_stream=tr_stream,
            extensions=extensions
        )

        # Train!
        main_loop.run()

    elif mode == 'translate':

        # Create Theano variables
        logger.info('Creating theano variables')
        sampling_input = tensor.lmatrix('source')

        # Get test set stream
        test_stream = get_dev_stream(
            config['test_set'], config['src_vocab'],
            config['src_vocab_size'], config['unk_id'])
        ftrans = open(config['test_set'] + '.trans.out', 'w')

        # Helper utilities
        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1

        # Get beam search
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        # Get target vocabulary
        trg_vocab = _ensure_special_tokens(
            pickle.load(open(config['trg_vocab'], 'rb')), bos_idx=0,
            eos_idx=trg_eos_idx, unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Started translation: ")
        total_cost = 0.0

        for i, line in enumerate(test_stream.get_epoch_iterator()):

            seq = sutils._oov_to_unk(
                line[0], config['src_vocab_size'], unk_idx)
            input_ = numpy.tile(seq, (config['beam_size'], 1))

            # draw sample, checking to ensure we don't get an empty string back
            trans, costs = \
                beam_search.search(
                    input_values={sampling_input: input_},
                    max_length=3*len(seq), eol_symbol=src_eos_idx,
                    ignore_first_eol=True)

            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths

            best = numpy.argsort(costs)[0]
            try:
                total_cost += costs[best]
                trans_out = trans[best]

                # convert idx to words
                trans_out = sutils._idx_to_word(trans_out, trg_ivocab)

            except ValueError:
                logger.info(
                    "Can NOT find a translation for line: {}".format(i+1))
                trans_out = '<UNK>'

            print(trans_out, file=ftrans)

            if i != 0 and i % 100 == 0:
                logger.info(
                    "Translated {} lines of test set...".format(i))

        logger.info("Total cost of the test: {}".format(total_cost))
        ftrans.close()
예제 #8
0
def main(config,
         tr_stream,
         dev_stream,
         use_bokeh=False,
         src_vocab=None,
         trg_vocab=None):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2)
    cost = decoder.cost(encoder.apply(source_sentence, source_sentence_mask),
                        source_sentence_mask, target_sentence,
                        target_sentence_mask)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # GRAPH TRANSFORMATIONS FOR BETTER TRAINING

    # TODO: allow user to remove some params from the graph, for example if embeddings should be kept static
    if config.get('l2_regularization', False) is True:
        l2_reg_alpha = config['l2_regularization_alpha']
        logger.info(
            'Applying l2 regularization with alpha={}'.format(l2_reg_alpha))
        model_weights = VariableFilter(roles=[WEIGHT])(cg.variables)

        for W in model_weights:
            cost = cost + (l2_reg_alpha * (W**2).sum())

        # why do we need to name the cost variable? Where did the original name come from?
        cost.name = 'decoder_cost_cost'

    cg = ComputationGraph(cost)

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        # this is the probability of dropping out, so you probably want to make it <=0.5
        logger.info('Applying dropout')
        dropout_inputs = [
            x for x in cg.intermediary_variables
            if x.name == 'maxout_apply_output'
        ]
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(
        Selector(encoder).get_parameters(),
        Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}".format(
        len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # allow user to externally initialize some params
    model_params = training_model.get_parameter_dict()
    if config.get('external_embeddings', None) is not None:
        for key in config['external_embeddings']:
            path_to_params = config['external_embeddings'][key]
            logger.info(
                'Replacing {} parameters with external params at: {}'.format(
                    key, path_to_params))
            external_params = numpy.load(path_to_params)
            len_external_idx = external_params.shape[0]
            print(external_params.shape)
            # Working: look in the dictionary and overwrite the correct rows
            existing_params = model_params[key].get_value()
            if key == '/bidirectionalencoder/embeddings.W':
                vocab = src_vocab
            elif key == '/decoder/sequencegenerator/readout/lookupfeedbackwmt15/lookuptable.W':
                vocab = trg_vocab
            else:
                raise KeyError(
                    'Unknown embedding parameter key: {}'.format(key))
            for k, i in vocab.items():
                if i < len_external_idx:
                    existing_params[i] = external_params[i]

            # model_params_shape = model_params[key].get_value().shape
            # assert model_params[key].get_value().shape == external_params.shape, ("Parameter dims must not change,"
            #                                                                       "shapes {} and {} do not match".
            #                                                                       format(model_params_shape,
            #                                                                              external_params.shape))
            model_params[key].set_value(existing_params)

    # create the training directory, and copy this config there if directory doesn't exist
    if not os.path.isdir(config['saveto']):
        os.makedirs(config['saveto'])
        shutil.copy(config['config_file'], config['saveto'])

    # Set extensions
    logger.info("Initializing extensions")
    extensions = []

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        # note that generated containes several different outputs
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    # Note: this is broken for unicode chars
    #if config['hook_samples'] >= 1:
    #    logger.info("Building sampler")
    #    extensions.append(
    #        Sampler(model=search_model, data_stream=tr_stream,
    #                hook_samples=config['hook_samples'],
    #                every_n_batches=config['sampling_freq'],
    #                src_vocab_size=config['src_vocab_size']))

    # WORKING: remove these validators in favor of Async
    # TODO: implement burn-in in the validation extension (don't fire until we're past the burn-in iteration)
    # Add early stopping based on bleu
    # if config.get('bleu_script', None) is not None:
    #     logger.info("Building bleu validator")
    #     extensions.append(
    #         BleuValidator(sampling_input, samples=samples, config=config,
    #                       model=search_model, data_stream=dev_stream,
    #                       normalize=config['normalized_bleu'],
    #                       every_n_batches=config['bleu_val_freq']))

    # Add early stopping based on Meteor
    # if config.get('meteor_directory', None) is not None:
    #     logger.info("Building meteor validator")
    #     extensions.append(
    #         MeteorValidator(sampling_input, samples=samples, config=config,
    #                       model=search_model, data_stream=dev_stream,
    #                       normalize=config['normalized_bleu'],
    #                       every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    # if there is dropout or random noise, we need to use the output of the modified graph
    if config['dropout'] < 1.0 or config['weight_noise_ff'] > 0.0:
        algorithm = GradientDescent(cost=cg.outputs[0],
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))
    else:
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))

    # enrich the logged information
    extensions.extend([
        Timing(every_n_batches=100),
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'], every_n_batches=config['save_freq'])
    ])

    # External non-blocking validation
    extensions.append(
        RunExternalValidation(config=config,
                              every_n_batches=config['bleu_val_freq']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot(config['model_save_directory'],
                 channels=[['decoder_cost_cost'],
                           ['validation_set_bleu_score'],
                           ['validation_set_meteor_score']],
                 every_n_batches=1))

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(model=training_model,
                         algorithm=algorithm,
                         data_stream=tr_stream,
                         extensions=extensions)

    # Train!
    main_loop.run()
예제 #9
0
def main(mode, config, use_bokeh=False):

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(config['src_vocab_size'],
                                   config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                      config['dec_nhids'], config['enc_nhids'] * 2)

    if mode == "train":

        # Create Theano variables
        logger.info('Creating theano variables')
        source_sentence = tensor.lmatrix('source')
        source_sentence_mask = tensor.matrix('source_mask')
        target_sentence = tensor.lmatrix('target')
        target_sentence_mask = tensor.matrix('target_mask')
        sampling_input = tensor.lmatrix('input')

        # Get training and development set streams
        tr_stream = get_tr_stream(**config)
        dev_stream = get_dev_stream(**config)

        # Get cost of the model
        cost = decoder.cost(
            encoder.apply(source_sentence, source_sentence_mask),
            source_sentence_mask, target_sentence, target_sentence_mask)

        logger.info('Creating computational graph')
        cg = ComputationGraph(cost)

        # Initialize model
        logger.info('Initializing model')
        encoder.weights_init = decoder.weights_init = IsotropicGaussian(
            config['weight_scale'])
        encoder.biases_init = decoder.biases_init = Constant(0)
        encoder.push_initialization_config()
        decoder.push_initialization_config()
        encoder.bidir.prototype.weights_init = Orthogonal()
        decoder.transition.weights_init = Orthogonal()
        encoder.initialize()
        decoder.initialize()

        # apply dropout for regularization
        if config['dropout'] < 1.0:
            # dropout is applied to the output of maxout in ghog
            logger.info('Applying dropout')
            dropout_inputs = [
                x for x in cg.intermediary_variables
                if x.name == 'maxout_apply_output'
            ]
            cg = apply_dropout(cg, dropout_inputs, config['dropout'])

        # Apply weight noise for regularization
        if config['weight_noise_ff'] > 0.0:
            logger.info('Applying weight noise to ff layers')
            enc_params = Selector(encoder.lookup).get_params().values()
            enc_params += Selector(encoder.fwd_fork).get_params().values()
            enc_params += Selector(encoder.back_fork).get_params().values()
            dec_params = Selector(
                decoder.sequence_generator.readout).get_params().values()
            dec_params += Selector(
                decoder.sequence_generator.fork).get_params().values()
            dec_params += Selector(decoder.state_init).get_params().values()
            cg = apply_noise(cg, enc_params + dec_params,
                             config['weight_noise_ff'])

        # Print shapes
        shapes = [param.get_value().shape for param in cg.parameters]
        logger.info("Parameter shapes: ")
        for shape, count in Counter(shapes).most_common():
            logger.info('    {:15}: {}'.format(shape, count))
        logger.info("Total number of parameters: {}".format(len(shapes)))

        # Print parameter names
        enc_dec_param_dict = merge(
            Selector(encoder).get_parameters(),
            Selector(decoder).get_parameters())
        logger.info("Parameter names: ")
        for name, value in enc_dec_param_dict.items():
            logger.info('    {:15}: {}'.format(value.get_value().shape, name))
        logger.info("Total number of parameters: {}".format(
            len(enc_dec_param_dict)))

        # Set up training model
        logger.info("Building model")
        training_model = Model(cost)

        # Set extensions
        logger.info("Initializing extensions")
        extensions = [
            FinishAfter(after_n_batches=config['finish_after']),
            TrainingDataMonitoring([cost], after_batch=True),
            Printing(after_batch=True),
            CheckpointNMT(config['saveto'],
                          every_n_batches=config['save_freq'])
        ]

        # Set up beam search and sampling computation graphs if necessary
        if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
            logger.info("Building sampling model")
            sampling_representation = encoder.apply(
                sampling_input, tensor.ones(sampling_input.shape))
            generated = decoder.generate(sampling_input,
                                         sampling_representation)
            search_model = Model(generated)
            _, samples = VariableFilter(bricks=[decoder.sequence_generator],
                                        name="outputs")(ComputationGraph(
                                            generated[1]))

        # Add sampling
        if config['hook_samples'] >= 1:
            logger.info("Building sampler")
            extensions.append(
                Sampler(model=search_model,
                        data_stream=tr_stream,
                        hook_samples=config['hook_samples'],
                        every_n_batches=config['sampling_freq'],
                        src_vocab_size=config['src_vocab_size']))

        # Add early stopping based on bleu
        if config['bleu_script'] is not None:
            logger.info("Building bleu validator")
            extensions.append(
                BleuValidator(sampling_input,
                              samples=samples,
                              config=config,
                              model=search_model,
                              data_stream=dev_stream,
                              normalize=config['normalized_bleu'],
                              every_n_batches=config['bleu_val_freq']))

        # Reload model if necessary
        if config['reload']:
            extensions.append(LoadNMT(config['saveto']))

        # Plot cost in bokeh if necessary
        if use_bokeh and BOKEH_AVAILABLE:
            extensions.append(
                Plot('Cs-En',
                     channels=[['decoder_cost_cost']],
                     after_batch=True))

        # Set up training algorithm
        logger.info("Initializing training algorithm")
        algorithm = GradientDescent(cost=cost,
                                    parameters=cg.parameters,
                                    step_rule=CompositeRule([
                                        StepClipping(config['step_clipping']),
                                        eval(config['step_rule'])()
                                    ]))

        # Initialize main loop
        logger.info("Initializing main loop")
        main_loop = MainLoop(model=training_model,
                             algorithm=algorithm,
                             data_stream=tr_stream,
                             extensions=extensions)

        # Train!
        main_loop.run()

    elif mode == 'translate':

        # Create Theano variables
        logger.info('Creating theano variables')
        sampling_input = tensor.lmatrix('source')

        # Get test set stream
        test_stream = get_dev_stream(config['test_set'], config['src_vocab'],
                                     config['src_vocab_size'],
                                     config['unk_id'])
        ftrans = open(config['test_set'] + '.trans.out', 'w')

        # Helper utilities
        sutils = SamplingBase()
        unk_idx = config['unk_id']
        src_eos_idx = config['src_vocab_size'] - 1
        trg_eos_idx = config['trg_vocab_size'] - 1

        # Get beam search
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs
        beam_search = BeamSearch(samples=samples)

        logger.info("Loading the model..")
        model = Model(generated)
        loader = LoadNMT(config['saveto'])
        loader.set_model_parameters(model, loader.load_parameters())

        # Get target vocabulary
        trg_vocab = _ensure_special_tokens(pickle.load(
            open(config['trg_vocab'])),
                                           bos_idx=0,
                                           eos_idx=trg_eos_idx,
                                           unk_idx=unk_idx)
        trg_ivocab = {v: k for k, v in trg_vocab.items()}

        logger.info("Started translation: ")
        total_cost = 0.0

        for i, line in enumerate(test_stream.get_epoch_iterator()):

            seq = sutils._oov_to_unk(line[0], config['src_vocab_size'],
                                     unk_idx)
            input_ = numpy.tile(seq, (config['beam_size'], 1))

            # draw sample, checking to ensure we don't get an empty string back
            trans, costs = \
                beam_search.search(
                    input_values={sampling_input: input_},
                    max_length=3*len(seq), eol_symbol=src_eos_idx,
                    ignore_first_eol=True)

            # normalize costs according to the sequence lengths
            if config['normalized_bleu']:
                lengths = numpy.array([len(s) for s in trans])
                costs = costs / lengths

            best = numpy.argsort(costs)[0]
            try:
                total_cost += costs[best]
                trans_out = trans[best]

                # convert idx to words
                trans_out = sutils._idx_to_word(trans_out, trg_ivocab)

            except ValueError:
                logger.info(
                    "Can NOT find a translation for line: {}".format(i + 1))
                trans_out = '<UNK>'

            print(trans_out, file=ftrans)

            if i != 0 and i % 100 == 0:
                logger.info("Translated {} lines of test set...".format(i))

        logger.info("Total cost of the test: {}".format(total_cost))
        ftrans.close()
예제 #10
0
def main(config, tr_stream, dev_stream, use_bokeh=True):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [x for x in cg.intermediary_variables
                          if x.name == 'maxout_apply_output']
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params+dec_params, config['weight_noise_ff'])

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    # Print parameter names
    enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                               Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}"
                .format(len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)

    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'],
                      every_n_batches=config['save_freq'])
    ]

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append(
            Sampler(model=search_model, data_stream=tr_stream,
                    hook_samples=config['hook_samples'],
                    every_n_batches=config['sampling_freq'],
                    src_vocab_size=config['src_vocab_size']))

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input, samples=samples, config=config,
                          model=search_model, data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        logger.info("Adding bokeh plot extension")
        extensions.append(
            Plot('De-En', channels=[['decoder_cost_cost']],
                 after_batch=True))

    # Set up training algorithm
    logger.info("Initializing training algorithm")
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=CompositeRule([StepClipping(config['step_clipping']),
                                 eval(config['step_rule'])()])
    )

    # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=algorithm,
        data_stream=tr_stream,
        extensions=extensions
    )

    # Train!
    main_loop.run()
def main(config, tr_stream, dev_stream, use_bokeh=False):
    print("~def main")

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    print("~sampling_input = tensor.lmatrix")


    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    print("~source_sentence_mask, target_sentence, target_sentence_mask")

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    print("~ComputationGraph")

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()


    print("~decoder.initialize()")



    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [x for x in cg.intermediary_variables
                          if x.name == 'maxout_apply_output']
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])



    print("~cg = apply_dropout")

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params+dec_params, config['weight_noise_ff'])


    print("~cg = apply_noise")

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    print("~logger.info")



    # Print parameter names
    enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                               Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}"
                .format(len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)
    print("~training_model")


    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'],
                      every_n_batches=config['save_freq'])
    ]
    print("~every_n_batches=config")

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    sample = Sampler(model=search_model, data_stream=tr_stream,
                hook_samples=config['hook_samples'],
                every_n_batches=config['sampling_freq'],
                src_vocab_size=config['src_vocab_size'])

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append( sample )

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input, samples=samples, config=config,
                          model=search_model, data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Cs-En', channels=[['decoder_cost_cost']],
                 after_batch=True))










    sampling_fn = search_model.get_theano_function()



    print(" - - - - - - - - - - - - - - "  )


    sort_k_batches = 12
    batch_size = 80
    seq_len = 50
    trg_ivocab = None
    src_vocab_size = config['src_vocab_size']
    trg_vocab_size = config['trg_vocab_size']
    unk_id = config['unk_id'] 

    src_vocab = config['src_vocab']
    trg_vocab = config['trg_vocab']
    src_vocab = ensure_special_tokens(
        src_vocab if isinstance(src_vocab, dict)
        else cPickle.load(open(src_vocab)),
        bos_idx=0, eos_idx=src_vocab_size - 1, unk_idx=unk_id)
    trg_vocab = ensure_special_tokens(
        trg_vocab if isinstance(trg_vocab, dict) else
        cPickle.load(open(trg_vocab)),
        bos_idx=0, eos_idx=trg_vocab_size - 1, unk_idx=unk_id)
    if not trg_ivocab:
        trg_ivocab = {v: k for k, v in trg_vocab.items()}


    src_data = config['src_data']
    trg_data = config['trg_data']
    src_dataset = TextFile([src_data], src_vocab, None)
    trg_dataset = TextFile([trg_data], trg_vocab, None)




    inputstringfile="inputstringfile.cs"
    input_dataset = TextFile([inputstringfile], src_vocab, None)







    stream = Merge([input_dataset.get_example_stream(),
                    trg_dataset.get_example_stream()],
                   ('source', 'target'))
    stream2 = Filter(stream,
                    predicate=_too_long(seq_len=seq_len))
    stream3 = Mapping(stream2,
                     _oov_to_unk(src_vocab_size=src_vocab_size,
                                 trg_vocab_size=trg_vocab_size,
                                 unk_id=unk_id))
    stream4 = Batch(stream3,
                   iteration_scheme=ConstantScheme(
                       batch_size*sort_k_batches))
                       
    stream5 = Mapping(stream4, SortMapping(_length))
    stream6 = Unpack(stream5)
    stream7 = Batch(
        stream6, iteration_scheme=ConstantScheme(batch_size))

    input_stream = DataStream(input_dataset)





    print("dev_stream : ", type( dev_stream )   )
    print("input_stream : ",  type( input_stream )   )






    epochone = input_stream.get_epoch_iterator() 
    vocab = input_stream.dataset.dictionary
    unk_sym = input_stream.dataset.unk_token
    eos_sym = input_stream.dataset.eos_token




    for i, line in enumerate(epochone):
        seq = oov_to_unk(
            line[0], config['src_vocab_size'], unk_id)
        input_ = numpy.tile(seq, ( 1 , 1))


        print("seq : " ,   type( seq )  ,  seq   )
        print("input_ : ", type( input_ )  , input_ ,  inspect.getmembers( input_ )    )



        _1, outputs, _2, _3, costs = ( sampling_fn(  input_  ) )

        outputs = outputs.flatten()
        costs = costs.T

        print(" outputs : "    ,   outputs   ,   type( outputs )  )
        print("idx_to_word: ", idx_to_word(outputs  ,  trg_ivocab))












    print(" - - - - - - - - - - - - - - "  )
예제 #12
0
def main(config):
    print('working on it ...')
    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)
    # Extensions
    extensions = []
    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Set up beam search and sampling computation graphs if necessary
    if config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs'''

     
    # Add sampling
    logger.info("Building sampler")
    global samplers_ob
    samplers_ob=Sampler(model=search_model, data_stream=input_sentence_mask,
                hook_samples=config['hook_samples'],
                every_n_batches=config['sampling_freq'],
                src_vocab_size=config['src_vocab_size'])
                # Initialize main loop
    logger.info("Initializing main loop")
    main_loop = MainLoop(
        model=training_model,
        algorithm=None,
        data_stream=None,
        extensions=extensions
    )
                
    for extension in main_loop.extensions:
        extension.main_loop = main_loop
    main_loop._run_extensions('before_training')
예제 #13
0
next(tr_stream.get_epoch_iterator())

# Create Theano variables
logger.info('Creating theano variables')
source_sentence = tensor.lmatrix('source')
source_sentence_mask = tensor.matrix('source_mask')
target_sentence = tensor.lmatrix('target')
target_sentence_mask = tensor.matrix('target_mask')
sampling_input = tensor.lmatrix('input')

# Construct model
logger.info('Building RNN encoder-decoder')
encoder = BidirectionalEncoder(
    config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
decoder = Decoder(
    config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
    config['enc_nhids'] * 2)
cost = decoder.cost(
    encoder.apply(source_sentence, source_sentence_mask),
    source_sentence_mask, target_sentence, target_sentence_mask)

logger.info('Creating computational graph')
cg = ComputationGraph(cost)

# Initialize model
logger.info('Initializing model')
encoder.weights_init = decoder.weights_init = IsotropicGaussian(
    config['weight_scale'])
encoder.biases_init = decoder.biases_init = Constant(0)
encoder.push_initialization_config()
decoder.push_initialization_config()
예제 #14
0
config = parse_config()
logger.info('Configuration:\n{}'.format(pprint.pformat(config)))

# Create Theano variables
logger.info('Creating theano variables')
source_sentence = tensor.lmatrix('source')
source_sentence_mask = tensor.matrix('source_mask')
target_sentence = tensor.lmatrix('target')
target_sentence_mask = tensor.matrix('target_mask')
sampling_input = tensor.lmatrix('input')

# Construct model
logger.info('Building RNN encoder-decoder')
encoder = BidirectionalEncoder(config['src_vocab_size'], config['enc_embed'],
                               config['enc_nhids'])
decoder = Decoder(config['trg_vocab_size'], config['dec_embed'],
                  config['dec_nhids'], config['enc_nhids'] * 2)

logger.info('Creating computational graph')

# Initialize model
logger.info('Initializing model')
encoder.weights_init = decoder.weights_init = IsotropicGaussian(
    config['weight_scale'])
encoder.biases_init = decoder.biases_init = Constant(0)
encoder.push_initialization_config()
decoder.push_initialization_config()
encoder.bidir.prototype.weights_init = Orthogonal()
decoder.transition.weights_init = Orthogonal()
encoder.initialize()
decoder.initialize()
예제 #15
0
logger.info('Configuration:\n{}'.format(pprint.pformat(config)))

# Create Theano variables
logger.info('Creating theano variables')
source_sentence = tensor.lmatrix('source')
source_sentence_mask = tensor.matrix('source_mask')
target_sentence = tensor.lmatrix('target')
target_sentence_mask = tensor.matrix('target_mask')
sampling_input = tensor.lmatrix('input')

# Construct model
logger.info('Building RNN encoder-decoder')
encoder = BidirectionalEncoder(
    config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
decoder = Decoder(
    config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
    config['enc_nhids'] * 2)

logger.info('Creating computational graph')

# Initialize model
logger.info('Initializing model')
encoder.weights_init = decoder.weights_init = IsotropicGaussian(config['weight_scale'])
encoder.biases_init = decoder.biases_init = Constant(0)
encoder.push_initialization_config()
decoder.push_initialization_config()
encoder.bidir.prototype.weights_init = Orthogonal()
decoder.transition.weights_init = Orthogonal()
encoder.initialize()
decoder.initialize()