def test_sampling():

    # Create Theano variables
    sampling_input = theano.tensor.lmatrix("input")

    # Construct model
    encoder = BidirectionalEncoder(vocab_size=10, embedding_dim=5, state_dim=8)
    decoder = Decoder(vocab_size=12, embedding_dim=6, state_dim=8, representation_dim=16, theano_seed=1234)
    sampling_representation = encoder.apply(sampling_input, theano.tensor.ones(sampling_input.shape))
    generateds = decoder.generate(sampling_input, sampling_representation)
    model = Model(generateds[1])

    # Initialize model
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(0.01)
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()

    # Compile a function for the generated
    sampling_fn = model.get_theano_function()

    # Create literal variables
    numpy.random.seed(1234)
    x = numpy.random.randint(0, 10, size=(1, 2))

    # Call function and check result
    generated_step = sampling_fn(x)
    assert len(generated_step[0].flatten()) == 4
Example #2
0
 def __init__(self, model_name, model, stream, **kwargs):
     super(RunOnTest, self).__init__(**kwargs)
     self.model_name = model_name
     cg = Model(model.predict(**stream.inputs()))
     self.inputs = cg.inputs
     self.outputs = model.predict.outputs
     req_vars_test = model.predict.inputs + ['trip_id']
     self.test_stream = stream.test(req_vars_test)
     self.function = cg.get_theano_function()
     self.best_dvc = None
     self.best_tvc = None
Example #3
0
    def __init__(self, model_name, model, stream, **kwargs):
        super(RunOnTest, self).__init__(**kwargs)

        self.model_name = model_name

        cg = Model(model.predict(**stream.inputs()))

        self.inputs = cg.inputs
        self.outputs = model.predict.outputs

        req_vars_test = model.predict.inputs + ['trip_id']
        self.test_stream = stream.test(req_vars_test)

        self.function = cg.get_theano_function()
Example #4
0
def get_prediction_function(exp_config):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target_suffix')
    target_sentence_mask = tensor.matrix('target_suffix_mask')
    target_prefix = tensor.lmatrix('target_prefix')
    target_prefix_mask = tensor.matrix('target_prefix_mask')

    # build the model
    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    # Note: the 'min_risk' kwarg tells the decoder which sequence_generator and cost_function to use
    decoder = NMTPrefixDecoder(exp_config['trg_vocab_size'],
                               exp_config['dec_embed'],
                               exp_config['dec_nhids'],
                               exp_config['enc_nhids'] * 2,
                               loss_function='cross_entropy')

    # rename to match baseline NMT systems
    decoder.name = 'decoder'

    prediction_tags = decoder.prediction_tags(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask,
        target_prefix, target_prefix_mask)

    logger.info('Creating computational graph')

    prediction_model = Model(prediction_tags)

    # Note that the parameters of this model must be pretrained, otherwise this doesn't make sense
    param_values = LoadNMT.load_parameter_values(
        exp_config['saved_parameters'], brick_delimiter=None)
    LoadNMT.set_model_parameters(prediction_model, param_values)

    prediction_function = prediction_model.get_theano_function()

    return prediction_function
Example #5
0
        print(cg.inputs)

    # Strangely, all the examples use : DataStreamMonitoring in MainLoop

    model = Model(labels)
    print("Model.dict_of_inputs():")
    print(model.dict_of_inputs())
    print("Model list inputs:")
    print([v.name for v in model.inputs])

    ## Model loading from saved file
    model.set_parameter_values(load_parameter_values(save_state_path))

    examine_embedding(lookup.W.get_value())

    label_ner = model.get_theano_function()
    print(model.inputs)
    print("printed label_ner.params")

    for test_data in data_stream.get_epoch_iterator():
        ordered_batch = test_data[
            0:3]  # Explicitly strip off the pre-defined labels
        #print(ordered_batch)

        results = label_ner(*ordered_batch)
        #print(results)  # This is a pure array of labels

        inputs = _transpose(ordered_batch)
        for tokens, mask, labels in zip(inputs[0], inputs[1],
                                        np.transpose(results)):
            #print(labels)
    print(cg.inputs)
  
  # Strangely, all the examples use : DataStreamMonitoring in MainLoop

  model = Model(labels)
  print("Model.dict_of_inputs():");
  print(model.dict_of_inputs())
  print("Model list inputs:");
  print([ v.name for v in model.inputs])

  ## Model loading from saved file
  model.set_parameter_values(load_parameter_values(save_state_path))  

  examine_embedding(lookup.W.get_value())
    
  label_ner = model.get_theano_function()
  print(model.inputs)
  print("printed label_ner.params")

  for test_data in data_stream.get_epoch_iterator():
    ordered_batch = test_data[0:3]   # Explicitly strip off the pre-defined labels
    #print(ordered_batch)
    
    results = label_ner(*ordered_batch)
    #print(results)  # This is a pure array of labels
    
    inputs = _transpose(ordered_batch)
    for tokens, mask, labels in zip(inputs[0], inputs[1], np.transpose(results)):
      #print(labels)
      for (token, m, label) in zip(tokens, mask, labels.tolist()):
        if m<0.1: 
Example #7
0
def get_confidence_function(exp_config):

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target_suffix')
    target_sentence_mask = tensor.matrix('target_suffix_mask')
    target_prefix = tensor.lmatrix('target_prefix')
    target_prefix_mask = tensor.matrix('target_prefix_mask')

    logger.info('Creating computational graph')

    # build the model
    encoder = BidirectionalEncoder(exp_config['src_vocab_size'],
                                   exp_config['enc_embed'],
                                   exp_config['enc_nhids'])

    # Note: the 'min_risk' kwarg tells the decoder which sequence_generator and cost_function to use
    decoder = NMTPrefixDecoder(exp_config['trg_vocab_size'],
                               exp_config['dec_embed'],
                               exp_config['dec_nhids'],
                               exp_config['enc_nhids'] * 2,
                               loss_function='cross_entropy')

    # rename to match baseline NMT systems
    decoder.name = 'decoder'

    predictions, merged_states = decoder.prediction_tags(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask,
        target_prefix, target_prefix_mask)

    # WORKING: also get the softmax prediction feature
    # WORKING: add features for source len, prefix len, position in suffix (position in suffix only makes sense if we're training on predictions)
    p_shape = predictions.shape
    predictions = predictions.reshape([p_shape[0] * p_shape[1], p_shape[2]])
    prediction_softmax = tensor.nnet.nnet.softmax(
        predictions.reshape([p_shape[0] * p_shape[1],
                             p_shape[2]])).reshape(p_shape)
    prediction_feature = prediction_softmax.max(axis=-1)[:, :, None]
    all_features = tensor.concatenate([merged_states, prediction_feature],
                                      axis=-1)

    confidence_output = decoder.sequence_generator.confidence_predictions(
        all_features)

    logger.info('Creating computational graph')
    confidence_model = Model(confidence_output)

    # Note that the parameters of this model must be pretrained, otherwise this doesn't make sense
    param_values = LoadNMT.load_parameter_values(
        exp_config['saved_parameters'], brick_delimiter=None)
    LoadNMT.set_model_parameters(confidence_model, param_values)

    confidence_param_values = LoadNMT.load_parameter_values(
        exp_config['confidence_saved_parameters'], brick_delimiter=None)
    LoadNMT.set_model_parameters(confidence_model, confidence_param_values)

    confidence_function = confidence_model.get_theano_function()

    return confidence_function
Example #8
0
File: VAE.py Project: udibr/VAE
def main(name, model, epochs, batch_size, learning_rate, bokeh, layers, gamma,
         rectifier, predict, dropout, qlinear, sparse):
    runname = "vae%s-L%s%s%s%s-l%s-g%s-b%d" % (name, layers,
                                            'r' if rectifier else '',
                                            'd' if dropout else '',
                                            'l' if qlinear else '',
                                      shnum(learning_rate), shnum(gamma), batch_size//100)
    if rectifier:
        activation = Rectifier()
        full_weights_init = Orthogonal()
    else:
        activation = Tanh()
        full_weights_init = Orthogonal()

    if sparse:
        runname += '-s%d'%sparse
        weights_init = Sparse(num_init=sparse, weights_init=full_weights_init)
    else:
        weights_init = full_weights_init

    layers = map(int,layers.split(','))

    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_enc", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    if qlinear:
        sampler = Qlinear(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    else:
        sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)

    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Logistic()],
              decoder_layers,
              name="MLP_dec", biases_init=Constant(0.), weights_init=weights_init)


    vae = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae.initialize()

    x = tensor.matrix('features')/256.
    x.tag.test_value = np.random.random((batch_size,layers[0])).astype(np.float32)

    if predict:
        mean_z, enc = vae.mean_z(x)
        # cg = ComputationGraph([mean_z, enc])
        newmodel = Model([mean_z,enc])
    else:
        x_recons, kl_terms = vae.reconstruct(x)
        recons_term = BinaryCrossEntropy().apply(x, x_recons)
        recons_term.name = "recons_term"

        cost = recons_term + kl_terms.mean()
        cg = ComputationGraph([cost])

        if gamma > 0:
            weights = VariableFilter(roles=[WEIGHT])(cg.variables)
            cost += gamma * blocks.theano_expressions.l2_norm(weights)

        cost.name = "nll_bound"
        newmodel = Model(cost)

        if dropout:
            from blocks.roles import INPUT
            inputs = VariableFilter(roles=[INPUT])(cg.variables)
            # dropout_target = [v for k,v in newmodel.get_params().iteritems()
            #            if k.find('MLP')>=0 and k.endswith('.W') and not k.endswith('MLP_enc/linear_0.W')]
            dropout_target = filter(lambda x: x.name.startswith('linear_'), inputs)
            cg = apply_dropout(cg, dropout_target, 0.5)
            target_cost = cg.outputs[0]
        else:
            target_cost = cost

    if name == 'mnist':
        if predict:
            train_ds = MNIST("train")
        else:
            train_ds = MNIST("train", sources=['features'])
        test_ds = MNIST("test")
    else:
        datasource_dir = os.path.join(fuel.config.data_path, name)
        datasource_fname = os.path.join(datasource_dir , name+'.hdf5')
        if predict:
            train_ds = H5PYDataset(datasource_fname, which_set='train')
        else:
            train_ds = H5PYDataset(datasource_fname, which_set='train', sources=['features'])
        test_ds = H5PYDataset(datasource_fname, which_set='test')
    train_s = Flatten(DataStream(train_ds,
                 iteration_scheme=ShuffledScheme(
                     train_ds.num_examples, batch_size)))
    test_s = Flatten(DataStream(test_ds,
                 iteration_scheme=ShuffledScheme(
                     test_ds.num_examples, batch_size)))

    if predict:
        from itertools import chain
        fprop = newmodel.get_theano_function()
        allpdata = None
        alledata = None
        f = train_s.sources.index('features')
        assert f == test_s.sources.index('features')
        sources = test_s.sources
        alllabels = dict((s,[]) for s in sources if s != 'features')
        for data in chain(train_s.get_epoch_iterator(), test_s.get_epoch_iterator()):
            for s,d in zip(sources,data):
                if s != 'features':
                    alllabels[s].extend(list(d))

            pdata, edata = fprop(data[f])
            if allpdata is None:
                allpdata = pdata
            else:
                allpdata = np.vstack((allpdata, pdata))
            if alledata is None:
                alledata = edata
            else:
                alledata = np.vstack((alledata, edata))
        print 'Saving',allpdata.shape,'intermidiate layer, for all training and test examples, to',name+'_z.npy'
        np.save(name+'_z', allpdata)
        print 'Saving',alledata.shape,'last encoder layer to',name+'_e.npy'
        np.save(name+'_e', alledata)
        print 'Saving additional labels/targets:',','.join(alllabels.keys()),
        print ' of size',','.join(map(lambda x: str(len(x)),alllabels.values())),
        print 'to',name+'_labels.pkl'
        with open(name+'_labels.pkl','wb') as fp:
            pickle.dump(alllabels, fp, -1)
    else:
        cg = ComputationGraph([target_cost])
        algorithm = GradientDescent(
            cost=target_cost, params=cg.parameters,
            step_rule=Adam(learning_rate)  # Scale(learning_rate=learning_rate)
        )
        extensions = []
        if model:
            extensions.append(Load(model))

        extensions += [Timing(),
                      FinishAfter(after_n_epochs=epochs),
                      DataStreamMonitoring(
                          [cost, recons_term],
                          test_s,
                          prefix="test"),
                      TrainingDataMonitoring(
                          [cost,
                           aggregation.mean(algorithm.total_gradient_norm)],
                          prefix="train",
                          after_epoch=True),
                      Checkpoint(runname, every_n_epochs=10),
                      Printing()]

        if bokeh:
            extensions.append(Plot(
                'Auto',
                channels=[
                    ['test_recons_term','test_nll_bound','train_nll_bound'
                     ],
                    ['train_total_gradient_norm']]))

        main_loop = MainLoop(
            algorithm,
            train_s,
            model=newmodel,
            extensions=extensions)

        main_loop.run()
Example #9
0
def main(mode, save_path, num_batches, data_path=None):
    # Experiment configuration
    dimension = 100
    readout_dimension = len(char2code)

    # Build bricks
    encoder = Bidirectional(SimpleRecurrent(dim=dimension, activation=Tanh()),
                            weights_init=Orthogonal())
    fork = Fork(
        [name for name in encoder.prototype.apply.sequences if name != 'mask'],
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0))
    fork.input_dim = dimension
    fork.output_dims = {name: dimension for name in fork.input_names}
    lookup = LookupTable(readout_dimension,
                         dimension,
                         weights_init=IsotropicGaussian(0.1))
    transition = SimpleRecurrent(activation=Tanh(),
                                 dim=dimension,
                                 name="transition")
    attention = SequenceContentAttention(state_names=transition.apply.states,
                                         sequence_dim=2 * dimension,
                                         match_dim=dimension,
                                         name="attention")
    readout = LinearReadout(readout_dim=readout_dimension,
                            source_names=["states"],
                            emitter=SoftmaxEmitter(name="emitter"),
                            feedbacker=LookupFeedback(readout_dimension,
                                                      dimension),
                            name="readout")
    generator = SequenceGenerator(readout=readout,
                                  transition=transition,
                                  attention=attention,
                                  weights_init=IsotropicGaussian(0.1),
                                  biases_init=Constant(0),
                                  name="generator")
    generator.push_initialization_config()
    transition.weights_init = Orthogonal()

    if mode == "train":
        # Data processing pipeline
        dataset_options = dict(dictionary=char2code,
                               level="character",
                               preprocess=_lower)
        if data_path:
            dataset = TextFile(data_path, **dataset_options)
        else:
            dataset = OneBillionWord("training", [99], **dataset_options)
        data_stream = DataStreamMapping(
            mapping=_transpose,
            data_stream=PaddingDataStream(
                BatchDataStream(
                    iteration_scheme=ConstantScheme(10),
                    data_stream=DataStreamMapping(
                        mapping=reverse_words,
                        add_sources=("targets", ),
                        data_stream=DataStreamFilter(
                            predicate=_filter_long,
                            data_stream=dataset.get_default_stream())))))

        # Build the cost computation graph
        chars = tensor.lmatrix("features")
        chars_mask = tensor.matrix("features_mask")
        targets = tensor.lmatrix("targets")
        targets_mask = tensor.matrix("targets_mask")
        batch_cost = generator.cost(
            targets,
            targets_mask,
            attended=encoder.apply(**dict_union(fork.apply(
                lookup.lookup(chars), return_dict=True),
                                                mask=chars_mask)),
            attended_mask=chars_mask).sum()
        batch_size = named_copy(chars.shape[1], "batch_size")
        cost = aggregation.mean(batch_cost, batch_size)
        cost.name = "sequence_log_likelihood"
        logger.info("Cost graph is built")

        # Give an idea of what's going on
        model = Model(cost)
        params = model.get_params()
        logger.info("Parameters:\n" +
                    pprint.pformat([(key, value.get_value().shape)
                                    for key, value in params.items()],
                                   width=120))

        # Initialize parameters
        for brick in model.get_top_bricks():
            brick.initialize()

        # Fetch variables useful for debugging
        max_length = named_copy(chars.shape[0], "max_length")
        cost_per_character = named_copy(
            aggregation.mean(batch_cost, batch_size * max_length),
            "character_log_likelihood")
        cg = ComputationGraph(cost)
        (energies, ) = VariableFilter(application=readout.readout,
                                      name="output")(cg.variables)
        min_energy = named_copy(energies.min(), "min_energy")
        max_energy = named_copy(energies.max(), "max_energy")
        (activations, ) = VariableFilter(
            application=generator.transition.apply,
            name="states")(cg.variables)
        mean_activation = named_copy(
            abs(activations).mean(), "mean_activation")

        # Define the training algorithm.
        algorithm = GradientDescent(cost=cost,
                                    step_rule=CompositeRule(
                                        [StepClipping(10.0),
                                         Scale(0.01)]))

        # More variables for debugging
        observables = [
            cost, min_energy, max_energy, mean_activation, batch_size,
            max_length, cost_per_character, algorithm.total_step_norm,
            algorithm.total_gradient_norm
        ]
        for name, param in params.items():
            observables.append(named_copy(param.norm(2), name + "_norm"))
            observables.append(
                named_copy(algorithm.gradients[param].norm(2),
                           name + "_grad_norm"))

        # Construct the main loop and start training!
        average_monitoring = TrainingDataMonitoring(observables,
                                                    prefix="average",
                                                    every_n_batches=10)
        main_loop = MainLoop(
            model=model,
            data_stream=data_stream,
            algorithm=algorithm,
            extensions=[
                Timing(),
                TrainingDataMonitoring(observables, after_every_batch=True),
                average_monitoring,
                FinishAfter(after_n_batches=num_batches).add_condition(
                    "after_batch", _is_nan),
                Plot(os.path.basename(save_path),
                     [[average_monitoring.record_name(cost)],
                      [average_monitoring.record_name(cost_per_character)]],
                     every_n_batches=10),
                SerializeMainLoop(save_path,
                                  every_n_batches=500,
                                  save_separately=["model", "log"]),
                Printing(every_n_batches=1)
            ])
        main_loop.run()
    elif mode == "test":
        logger.info("Model is loaded")
        chars = tensor.lmatrix("features")
        generated = generator.generate(
            n_steps=3 * chars.shape[0],
            batch_size=chars.shape[1],
            attended=encoder.apply(**dict_union(
                fork.apply(lookup.lookup(chars), return_dict=True))),
            attended_mask=tensor.ones(chars.shape))
        model = Model(generated)
        model.set_param_values(load_parameter_values(save_path))
        sample_function = model.get_theano_function()
        logging.info("Sampling function is compiled")

        while True:
            # Python 2-3 compatibility
            line = input("Enter a sentence\n")
            batch_size = int(input("Enter a number of samples\n"))
            encoded_input = [
                char2code.get(char, char2code["<UNK>"])
                for char in line.lower().strip()
            ]
            encoded_input = ([char2code['<S>']] + encoded_input +
                             [char2code['</S>']])
            print("Encoder input:", encoded_input)
            target = reverse_words((encoded_input, ))[0]
            print("Target: ", target)
            states, samples, glimpses, weights, costs = sample_function(
                numpy.repeat(numpy.array(encoded_input)[:, None],
                             batch_size,
                             axis=1))

            messages = []
            for i in range(samples.shape[1]):
                sample = list(samples[:, i])
                try:
                    true_length = sample.index(char2code['</S>']) + 1
                except ValueError:
                    true_length = len(sample)
                sample = sample[:true_length]
                cost = costs[:true_length, i].sum()
                message = "({})".format(cost)
                message += "".join(code2char[code] for code in sample)
                if sample == target:
                    message += " CORRECT!"
                messages.append((cost, message))
            messages.sort(key=operator.itemgetter(0), reverse=True)
            for _, message in messages:
                print(message)
def main(config, tr_stream, dev_stream, use_bokeh=False):
    print("~def main")

    # Create Theano variables
    logger.info('Creating theano variables')
    source_sentence = tensor.lmatrix('source')
    source_sentence_mask = tensor.matrix('source_mask')
    target_sentence = tensor.lmatrix('target')
    target_sentence_mask = tensor.matrix('target_mask')
    sampling_input = tensor.lmatrix('input')

    print("~sampling_input = tensor.lmatrix")


    # Construct model
    logger.info('Building RNN encoder-decoder')
    encoder = BidirectionalEncoder(
        config['src_vocab_size'], config['enc_embed'], config['enc_nhids'])
    decoder = Decoder(
        config['trg_vocab_size'], config['dec_embed'], config['dec_nhids'],
        config['enc_nhids'] * 2)
    cost = decoder.cost(
        encoder.apply(source_sentence, source_sentence_mask),
        source_sentence_mask, target_sentence, target_sentence_mask)

    print("~source_sentence_mask, target_sentence, target_sentence_mask")

    logger.info('Creating computational graph')
    cg = ComputationGraph(cost)

    print("~ComputationGraph")

    # Initialize model
    logger.info('Initializing model')
    encoder.weights_init = decoder.weights_init = IsotropicGaussian(
        config['weight_scale'])
    encoder.biases_init = decoder.biases_init = Constant(0)
    encoder.push_initialization_config()
    decoder.push_initialization_config()
    encoder.bidir.prototype.weights_init = Orthogonal()
    decoder.transition.weights_init = Orthogonal()
    encoder.initialize()
    decoder.initialize()


    print("~decoder.initialize()")



    # apply dropout for regularization
    if config['dropout'] < 1.0:
        # dropout is applied to the output of maxout in ghog
        logger.info('Applying dropout')
        dropout_inputs = [x for x in cg.intermediary_variables
                          if x.name == 'maxout_apply_output']
        cg = apply_dropout(cg, dropout_inputs, config['dropout'])



    print("~cg = apply_dropout")

    # Apply weight noise for regularization
    if config['weight_noise_ff'] > 0.0:
        logger.info('Applying weight noise to ff layers')
        enc_params = Selector(encoder.lookup).get_params().values()
        enc_params += Selector(encoder.fwd_fork).get_params().values()
        enc_params += Selector(encoder.back_fork).get_params().values()
        dec_params = Selector(
            decoder.sequence_generator.readout).get_params().values()
        dec_params += Selector(
            decoder.sequence_generator.fork).get_params().values()
        dec_params += Selector(decoder.state_init).get_params().values()
        cg = apply_noise(cg, enc_params+dec_params, config['weight_noise_ff'])


    print("~cg = apply_noise")

    # Print shapes
    shapes = [param.get_value().shape for param in cg.parameters]
    logger.info("Parameter shapes: ")
    for shape, count in Counter(shapes).most_common():
        logger.info('    {:15}: {}'.format(shape, count))
    logger.info("Total number of parameters: {}".format(len(shapes)))

    print("~logger.info")



    # Print parameter names
    enc_dec_param_dict = merge(Selector(encoder).get_parameters(),
                               Selector(decoder).get_parameters())
    logger.info("Parameter names: ")
    for name, value in enc_dec_param_dict.items():
        logger.info('    {:15}: {}'.format(value.get_value().shape, name))
    logger.info("Total number of parameters: {}"
                .format(len(enc_dec_param_dict)))

    # Set up training model
    logger.info("Building model")
    training_model = Model(cost)
    print("~training_model")


    # Set extensions
    logger.info("Initializing extensions")
    extensions = [
        FinishAfter(after_n_batches=config['finish_after']),
        TrainingDataMonitoring([cost], after_batch=True),
        Printing(after_batch=True),
        CheckpointNMT(config['saveto'],
                      every_n_batches=config['save_freq'])
    ]
    print("~every_n_batches=config")

    # Set up beam search and sampling computation graphs if necessary
    if config['hook_samples'] >= 1 or config['bleu_script'] is not None:
        logger.info("Building sampling model")
        sampling_representation = encoder.apply(
            sampling_input, tensor.ones(sampling_input.shape))
        generated = decoder.generate(sampling_input, sampling_representation)
        search_model = Model(generated)
        _, samples = VariableFilter(
            bricks=[decoder.sequence_generator], name="outputs")(
                ComputationGraph(generated[1]))  # generated[1] is next_outputs

    sample = Sampler(model=search_model, data_stream=tr_stream,
                hook_samples=config['hook_samples'],
                every_n_batches=config['sampling_freq'],
                src_vocab_size=config['src_vocab_size'])

    # Add sampling
    if config['hook_samples'] >= 1:
        logger.info("Building sampler")
        extensions.append( sample )

    # Add early stopping based on bleu
    if config['bleu_script'] is not None:
        logger.info("Building bleu validator")
        extensions.append(
            BleuValidator(sampling_input, samples=samples, config=config,
                          model=search_model, data_stream=dev_stream,
                          normalize=config['normalized_bleu'],
                          every_n_batches=config['bleu_val_freq']))

    # Reload model if necessary
    if config['reload']:
        extensions.append(LoadNMT(config['saveto']))

    # Plot cost in bokeh if necessary
    if use_bokeh and BOKEH_AVAILABLE:
        extensions.append(
            Plot('Cs-En', channels=[['decoder_cost_cost']],
                 after_batch=True))










    sampling_fn = search_model.get_theano_function()



    print(" - - - - - - - - - - - - - - "  )


    sort_k_batches = 12
    batch_size = 80
    seq_len = 50
    trg_ivocab = None
    src_vocab_size = config['src_vocab_size']
    trg_vocab_size = config['trg_vocab_size']
    unk_id = config['unk_id'] 

    src_vocab = config['src_vocab']
    trg_vocab = config['trg_vocab']
    src_vocab = ensure_special_tokens(
        src_vocab if isinstance(src_vocab, dict)
        else cPickle.load(open(src_vocab)),
        bos_idx=0, eos_idx=src_vocab_size - 1, unk_idx=unk_id)
    trg_vocab = ensure_special_tokens(
        trg_vocab if isinstance(trg_vocab, dict) else
        cPickle.load(open(trg_vocab)),
        bos_idx=0, eos_idx=trg_vocab_size - 1, unk_idx=unk_id)
    if not trg_ivocab:
        trg_ivocab = {v: k for k, v in trg_vocab.items()}


    src_data = config['src_data']
    trg_data = config['trg_data']
    src_dataset = TextFile([src_data], src_vocab, None)
    trg_dataset = TextFile([trg_data], trg_vocab, None)




    inputstringfile="inputstringfile.cs"
    input_dataset = TextFile([inputstringfile], src_vocab, None)







    stream = Merge([input_dataset.get_example_stream(),
                    trg_dataset.get_example_stream()],
                   ('source', 'target'))
    stream2 = Filter(stream,
                    predicate=_too_long(seq_len=seq_len))
    stream3 = Mapping(stream2,
                     _oov_to_unk(src_vocab_size=src_vocab_size,
                                 trg_vocab_size=trg_vocab_size,
                                 unk_id=unk_id))
    stream4 = Batch(stream3,
                   iteration_scheme=ConstantScheme(
                       batch_size*sort_k_batches))
                       
    stream5 = Mapping(stream4, SortMapping(_length))
    stream6 = Unpack(stream5)
    stream7 = Batch(
        stream6, iteration_scheme=ConstantScheme(batch_size))

    input_stream = DataStream(input_dataset)





    print("dev_stream : ", type( dev_stream )   )
    print("input_stream : ",  type( input_stream )   )






    epochone = input_stream.get_epoch_iterator() 
    vocab = input_stream.dataset.dictionary
    unk_sym = input_stream.dataset.unk_token
    eos_sym = input_stream.dataset.eos_token




    for i, line in enumerate(epochone):
        seq = oov_to_unk(
            line[0], config['src_vocab_size'], unk_id)
        input_ = numpy.tile(seq, ( 1 , 1))


        print("seq : " ,   type( seq )  ,  seq   )
        print("input_ : ", type( input_ )  , input_ ,  inspect.getmembers( input_ )    )



        _1, outputs, _2, _3, costs = ( sampling_fn(  input_  ) )

        outputs = outputs.flatten()
        costs = costs.T

        print(" outputs : "    ,   outputs   ,   type( outputs )  )
        print("idx_to_word: ", idx_to_word(outputs  ,  trg_ivocab))












    print(" - - - - - - - - - - - - - - "  )