Esempio n. 1
0
def test_update_dataset_from_file():
    params = load_parameters()
    for rebuild_dataset in [True, False]:
        params['REBUILD_DATASET'] = rebuild_dataset
        params['DATASET_STORE_PATH'] = '.'
        for splits in [[], None, ['val']]:
            ds = build_dataset(params)
            assert isinstance(ds, Dataset)
            for output_text_filename in [
                    None,
                    os.path.join(
                        params['DATA_ROOT_PATH'],
                        params['TEXT_FILES']['test'] + params['TRG_LAN'])
            ]:
                for remove_outputs in [True, False]:
                    for compute_state_below in [True, False]:
                        for recompute_references in [True, False]:
                            ds2 = update_dataset_from_file(
                                copy.deepcopy(ds),
                                os.path.join(
                                    params['DATA_ROOT_PATH'],
                                    params['TEXT_FILES']['test'] +
                                    params['SRC_LAN']),
                                params,
                                splits=splits,
                                output_text_filename=output_text_filename,
                                remove_outputs=remove_outputs,
                                compute_state_below=compute_state_below,
                                recompute_references=recompute_references)
                            assert isinstance(ds2, Dataset)

    # Final check: We update the val set with the test data. We check that dimensions match.
    split = 'val'
    len_test = 2996
    ds2 = update_dataset_from_file(
        copy.deepcopy(ds),
        params['DATA_ROOT_PATH'] + params['TEXT_FILES']['test'] +
        params['SRC_LAN'],
        params,
        splits=[split],
        output_text_filename=os.path.join(
            params['DATA_ROOT_PATH'],
            params['TEXT_FILES']['test'] + params['TRG_LAN']),
        remove_outputs=False,
        compute_state_below=True,
        recompute_references=True)
    assert isinstance(ds2, Dataset)
    assert eval('ds2.len_' + split) == len_test
    assert eval('all(ds2.loaded_' + split + ')')
    assert len(eval('ds2.X_' + split +
                    str([params['INPUTS_IDS_DATASET'][0]]))) == len_test
    assert len(eval('ds2.Y_' + split +
                    str([params['OUTPUTS_IDS_DATASET'][0]]))) == len_test

    if __name__ == '__main__':
        pytest.main([__file__])
Esempio n. 2
0
    def test_update_dataset_from_file(self):
        params = load_parameters()
        params['REBUILD_DATASET'] = True
        params['DATASET_STORE_PATH'] = './'
        ds = build_dataset(params)
        self.assertIsInstance(ds, Dataset)
        for splits in [[], ['val']]:
            for output_text_filename in [
                    None, params['DATA_ROOT_PATH'] +
                    params['TEXT_FILES']['test'] + params['TRG_LAN']
            ]:
                for remove_outputs in [True, False]:
                    for compute_state_below in [True, False]:
                        for recompute_references in [True, False]:
                            ds2 = update_dataset_from_file(
                                copy.deepcopy(ds),
                                params['DATA_ROOT_PATH'] +
                                params['TEXT_FILES']['test'] +
                                params['SRC_LAN'],
                                params,
                                splits=splits,
                                output_text_filename=output_text_filename,
                                remove_outputs=remove_outputs,
                                compute_state_below=compute_state_below,
                                recompute_references=recompute_references)
                            self.assertIsInstance(ds2, Dataset)

        # Final check: We update the val set with the test data. We check that dimensions match.
        split = 'val'
        len_test = 2996
        ds2 = update_dataset_from_file(
            copy.deepcopy(ds),
            params['DATA_ROOT_PATH'] + params['TEXT_FILES']['test'] +
            params['SRC_LAN'],
            params,
            splits=[split],
            output_text_filename=params['DATA_ROOT_PATH'] +
            params['TEXT_FILES']['test'] + params['TRG_LAN'],
            remove_outputs=False,
            compute_state_below=True,
            recompute_references=True)
        self.assertIsInstance(ds2, Dataset)
        self.assertEqual(eval('ds2.len_' + split), len_test)
        self.assertTrue(eval('all(ds2.loaded_' + split + ')'))
        self.assertEqual(
            len(eval('ds2.X_' + split +
                     str([params['INPUTS_IDS_DATASET'][0]]))), len_test)
        self.assertEqual(
            len(
                eval('ds2.Y_' + split +
                     str([params['OUTPUTS_IDS_DATASET'][0]]))), len_test)
Esempio n. 3
0
def score_corpus(args, params):
    print "Using an ensemble of %d models" % len(args.models)
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    if args.source is not None:
        dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits,
                                           output_text_filename=args.target, compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])
    for s in args.splits:
        # Apply model predictions
        params_prediction = {'max_batch_size': params['BATCH_SIZE'],
                             'n_parallel_loaders': params['PARALLEL_LOADERS'],
                             'predict_on_sets': [s]}

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True)
            params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True)
            params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2)
            beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.')
        else:
            print scores
Esempio n. 4
0
                k, v = arg.split('=')
            except ValueError:
                print 'Overwritten arguments must have the form key=Value. \n Currently are: %s' % str(
                    args.changes)
                exit(1)
            try:
                params[k] = ast.literal_eval(v)
            except ValueError:
                params[k] = v
    except ValueError:
        print 'Error processing arguments: (', k, ",", v, ")"
        exit(2)
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset,
                                       args.text,
                                       params,
                                       splits=args.splits,
                                       remove_outputs=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # For converting predictions into sentences
    index2word_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET']
                                      [0]]['idx2words']

    if params.get('APPLY_DETOKENIZATION', False):
        detokenize_function = eval('dataset.' +
                                   params['DETOKENIZATION_METHOD'])
Esempio n. 5
0
def train_model(params, load_dataset=None):
    """
    Training function. Sets the training parameters from params. Build or loads the model and launches the training.
    :param params: Dictionary of network hyperparameters.
    :param load_dataset: Load dataset from file or build it from the parameters.
    :return: None
    """
    check_params(params)

    if params['RELOAD'] > 0:
        logging.info('Resuming training.')
        # Load data
        if load_dataset is None:
            if params['REBUILD_DATASET']:
                logging.info('Rebuilding dataset.')
                dataset = build_dataset(params)
            else:
                logging.info('Updating dataset.')
                dataset = loadDataset(params['DATASET_STORE_PATH'] +
                                      '/Dataset_' + params['DATASET_NAME'] +
                                      '_' + params['SRC_LAN'] +
                                      params['TRG_LAN'] + '.pkl')
                params['EPOCH_OFFSET'] = params['RELOAD'] if params['RELOAD_EPOCH'] else \
                    int(params['RELOAD'] * params['BATCH_SIZE'] / dataset.len_train)
                for split, filename in params['TEXT_FILES'].iteritems():
                    dataset = update_dataset_from_file(
                        dataset,
                        params['DATA_ROOT_PATH'] + '/' + filename +
                        params['SRC_LAN'],
                        params,
                        splits=list([split]),
                        output_text_filename=params['DATA_ROOT_PATH'] + '/' +
                        filename + params['TRG_LAN'],
                        remove_outputs=False,
                        compute_state_below=True,
                        recompute_references=True)
                    dataset.name = params['DATASET_NAME'] + '_' + params[
                        'SRC_LAN'] + params['TRG_LAN']
                saveDataset(dataset, params['DATASET_STORE_PATH'])

        else:
            logging.info('Reloading and using dataset.')
            dataset = loadDataset(load_dataset)
    else:
        # Load data
        if load_dataset is None:
            dataset = build_dataset(params)
        else:
            dataset = loadDataset(load_dataset)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]

    # Build model
    set_optimizer = True if params['RELOAD'] == 0 else False
    clear_dirs = True if params['RELOAD'] == 0 else False

    # build new model
    nmt_model = TranslationModel(params,
                                 model_type=params['MODEL_TYPE'],
                                 verbose=params['VERBOSE'],
                                 model_name=params['MODEL_NAME'],
                                 vocabularies=dataset.vocabulary,
                                 store_path=params['STORE_PATH'],
                                 set_optimizer=set_optimizer,
                                 clear_dirs=clear_dirs)

    # Define the inputs and outputs mapping from our Dataset instance to our model
    inputMapping = dict()
    for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
        pos_source = dataset.ids_inputs.index(id_in)
        id_dest = nmt_model.ids_inputs[i]
        inputMapping[id_dest] = pos_source
    nmt_model.setInputsMapping(inputMapping)

    outputMapping = dict()
    for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
        pos_target = dataset.ids_outputs.index(id_out)
        id_dest = nmt_model.ids_outputs[i]
        outputMapping[id_dest] = pos_target
    nmt_model.setOutputsMapping(outputMapping)

    if params['RELOAD'] > 0:
        nmt_model = updateModel(nmt_model,
                                params['STORE_PATH'],
                                params['RELOAD'],
                                reload_epoch=params['RELOAD_EPOCH'])
        nmt_model.setParams(params)
        nmt_model.setOptimizer()
        if params.get('EPOCH_OFFSET') is None:
            params['EPOCH_OFFSET'] = params['RELOAD'] if params['RELOAD_EPOCH'] else \
                int(params['RELOAD'] * params['BATCH_SIZE'] / dataset.len_train)

    # Store configuration as pkl
    dict2pkl(params, params['STORE_PATH'] + '/config')

    # Callbacks
    callbacks = buildCallbacks(params, nmt_model, dataset)

    # Training
    total_start_time = timer()

    logger.debug('Starting training!')
    training_params = {
        'n_epochs':
        params['MAX_EPOCH'],
        'batch_size':
        params['BATCH_SIZE'],
        'homogeneous_batches':
        params['HOMOGENEOUS_BATCHES'],
        'maxlen':
        params['MAX_OUTPUT_TEXT_LEN'],
        'joint_batches':
        params['JOINT_BATCHES'],
        'lr_decay':
        params.get('LR_DECAY', None),  # LR decay parameters
        'reduce_each_epochs':
        params.get('LR_REDUCE_EACH_EPOCHS', True),
        'start_reduction_on_epoch':
        params.get('LR_START_REDUCTION_ON_EPOCH', 0),
        'lr_gamma':
        params.get('LR_GAMMA', 0.9),
        'lr_reducer_type':
        params.get('LR_REDUCER_TYPE', 'linear'),
        'lr_reducer_exp_base':
        params.get('LR_REDUCER_EXP_BASE', 0),
        'lr_half_life':
        params.get('LR_HALF_LIFE', 50000),
        'epochs_for_save':
        params['EPOCHS_FOR_SAVE'],
        'verbose':
        params['VERBOSE'],
        'eval_on_sets':
        params['EVAL_ON_SETS_KERAS'],
        'n_parallel_loaders':
        params['PARALLEL_LOADERS'],
        'extra_callbacks':
        callbacks,
        'reload_epoch':
        params['RELOAD'],
        'epoch_offset':
        params.get('EPOCH_OFFSET', 0),
        'data_augmentation':
        params['DATA_AUGMENTATION'],
        'patience':
        params.get('PATIENCE', 0),  # early stopping parameters
        'metric_check':
        params.get('STOP_METRIC', None)
        if params.get('EARLY_STOP', False) else None,
        'eval_on_epochs':
        params.get('EVAL_EACH_EPOCHS', True),
        'each_n_epochs':
        params.get('EVAL_EACH', 1),
        'start_eval_on_epoch':
        params.get('START_EVAL_ON_EPOCH', 0),
        'tensorboard':
        params.get('TENSORBOARD', False),
        'tensorboard_params': {
            'log_dir':
            params.get('LOG_DIR', 'tensorboard_logs'),
            'histogram_freq':
            params.get('HISTOGRAM_FREQ', 0),
            'batch_size':
            params.get('TENSORBOARD_BATCH_SIZE', params['BATCH_SIZE']),
            'write_graph':
            params.get('WRITE_GRAPH', True),
            'write_grads':
            params.get('WRITE_GRADS', False),
            'write_images':
            params.get('WRITE_IMAGES', False),
            'embeddings_freq':
            params.get('EMBEDDINGS_FREQ', 0),
            'embeddings_layer_names':
            params.get('EMBEDDINGS_LAYER_NAMES', None),
            'embeddings_metadata':
            params.get('EMBEDDINGS_METADATA', None),
            'label_word_embeddings_with_vocab':
            params.get('LABEL_WORD_EMBEDDINGS_WITH_VOCAB', False),
            'word_embeddings_labels':
            params.get('WORD_EMBEDDINGS_LABELS', None),
        }
    }
    nmt_model.trainNet(dataset, training_params)

    total_end_time = timer()
    time_difference = total_end_time - total_start_time
    logging.info('In total is {0:.2f}s = {1:.2f}m'.format(
        time_difference, time_difference / 60.0))
Esempio n. 6
0
def sample_ensemble(args, params):

    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.model_ensemble import BeamSearchEnsemble
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.utils import decode_predictions_beam_search

    logging.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset,
                                       args.text,
                                       params,
                                       splits=args.splits,
                                       remove_outputs=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # For converting predictions into sentences
    index2word_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET']
                                      [0]]['idx2words']

    if params.get('APPLY_DETOKENIZATION', False):
        detokenize_function = eval('dataset.' +
                                   params['DETOKENIZATION_METHOD'])

    params_prediction = dict()
    params_prediction['max_batch_size'] = params.get('BATCH_SIZE', 20)
    params_prediction['n_parallel_loaders'] = params.get('PARALLEL_LOADERS', 1)
    params_prediction['beam_size'] = params.get('BEAM_SIZE', 6)
    params_prediction['maxlen'] = params.get('MAX_OUTPUT_TEXT_LEN_TEST', 100)
    params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
    params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
    params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
    params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
    params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET']
    params_prediction['search_pruning'] = params.get('SEARCH_PRUNING', False)
    params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING',
                                                      False)
    params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
    params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY',
                                                       False)
    params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False)
    params_prediction['length_norm_factor'] = params.get(
        'LENGTH_NORM_FACTOR', 0.0)
    params_prediction['coverage_norm_factor'] = params.get(
        'COVERAGE_NORM_FACTOR', 0.0)
    params_prediction['pos_unk'] = params.get('POS_UNK', False)
    params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
        else params.get('MAX_OUTPUT_TEXT_LEN', 50)
    params_prediction['output_max_length_depending_on_x'] = params.get(
        'MAXLEN_GIVEN_X', True)
    params_prediction['output_max_length_depending_on_x_factor'] = params.get(
        'MAXLEN_GIVEN_X_FACTOR', 3)
    params_prediction['output_min_length_depending_on_x'] = params.get(
        'MINLEN_GIVEN_X', True)
    params_prediction['output_min_length_depending_on_x_factor'] = params.get(
        'MINLEN_GIVEN_X_FACTOR', 2)
    params_prediction['attend_on_output'] = params.get(
        'ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower())

    heuristic = params.get('HEURISTIC', 0)
    mapping = None if dataset.mapping == dict() else dataset.mapping
    model_weights = args.weights

    if model_weights is not None and model_weights != []:
        assert len(model_weights) == len(
            models
        ), 'You should give a weight to each model. You gave %d models and %d weights.' % (
            len(models), len(model_weights))
        model_weights = map(lambda x: float(x), model_weights)
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' %
                        str(model_weights))
    for s in args.splits:
        # Apply model predictions
        params_prediction['predict_on_sets'] = [s]
        beam_searcher = BeamSearchEnsemble(models,
                                           dataset,
                                           params_prediction,
                                           model_weights=model_weights,
                                           n_best=args.n_best,
                                           verbose=args.verbose)
        if args.n_best:
            predictions, n_best = beam_searcher.predictBeamSearchNet()[s]
        else:
            predictions = beam_searcher.predictBeamSearchNet()[s]
            n_best = None
        if params_prediction['pos_unk']:
            samples = predictions[0]
            alphas = predictions[1]
            sources = [
                x.strip() for x in open(args.text, 'r').read().split('\n')
            ]
            sources = sources[:-1] if len(sources[-1]) == 0 else sources
        else:
            samples = predictions
            alphas = None
            heuristic = None
            sources = None

        predictions = decode_predictions_beam_search(samples,
                                                     index2word_y,
                                                     alphas=alphas,
                                                     x_text=sources,
                                                     heuristic=heuristic,
                                                     mapping=mapping,
                                                     verbose=args.verbose)
        # Apply detokenization function if needed
        if params.get('APPLY_DETOKENIZATION', False):
            predictions = map(detokenize_function, predictions)

        if args.n_best:
            n_best_predictions = []
            for i, (n_best_preds, n_best_scores,
                    n_best_alphas) in enumerate(n_best):
                n_best_sample_score = []
                for n_best_pred, n_best_score, n_best_alpha in zip(
                        n_best_preds, n_best_scores, n_best_alphas):
                    pred = decode_predictions_beam_search(
                        [n_best_pred],
                        index2word_y,
                        alphas=[n_best_alpha]
                        if params_prediction['pos_unk'] else None,
                        x_text=[sources[i]]
                        if params_prediction['pos_unk'] else None,
                        heuristic=heuristic,
                        mapping=mapping,
                        verbose=args.verbose)
                    # Apply detokenization function if needed
                    if params.get('APPLY_DETOKENIZATION', False):
                        pred = map(detokenize_function, pred)

                    n_best_sample_score.append([i, pred, n_best_score])
                n_best_predictions.append(n_best_sample_score)
        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params.get('SAMPLING_SAVE_MODE', 'list'):
                list2file(filepath, predictions)
                if args.n_best:
                    nbest2file(filepath + '.nbest', n_best_predictions)
            else:
                raise Exception(
                    'Only "list" is allowed in "SAMPLING_SAVE_MODE"')
        else:
            list2stdout(predictions)
            if args.n_best:
                logging.info('Storing n-best sentences in ./' + s + '.nbest')
                nbest2file('./' + s + '.nbest', n_best_predictions)
        logging.info('Sampling finished')
Esempio n. 7
0
def score_corpus(args, params):
    """
    Use one or several translation models for scoring source--target pairs-

    :param argparse.Namespace args: Arguments given to the method:

                                * dataset: Dataset instance with data.
                                * source: Text file with source sentences.
                                * target: Text file with target sentences.
                                * splits: Splits to sample. Should be already included in the dataset object.
                                * dest: Output file to save scores.
                                * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N).
                                * verbose: Be verbose or not.
                                * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py.
                                * models: Path to the models.
    :param dict params: parameters of the translation model.
    """

    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.model_ensemble import BeamSearchEnsemble

    logging.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset,
                                       args.source,
                                       params,
                                       splits=args.splits,
                                       output_text_filename=args.target,
                                       compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])

    model_weights = args.weights
    if model_weights is not None and model_weights != []:
        assert len(model_weights) == len(
            models
        ), 'You should give a weight to each model. You gave %d models and %d weights.' % (
            len(models), len(model_weights))
        model_weights = map(float, model_weights)
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' %
                        str(model_weights))

    for s in args.splits:
        # Apply model predictions
        params_prediction = {
            'max_batch_size': params['BATCH_SIZE'],
            'n_parallel_loaders': params['PARALLEL_LOADERS'],
            'predict_on_sets': [s]
        }

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params[
                'OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get(
                'NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get(
                'COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get(
                'LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get(
                'LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get(
                'COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get(
                'MAXLEN_GIVEN_X', True)
            params_prediction[
                'output_max_length_depending_on_x_factor'] = params.get(
                    'MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get(
                'MINLEN_GIVEN_X', True)
            params_prediction[
                'output_min_length_depending_on_x_factor'] = params.get(
                    'MINLEN_GIVEN_X_FACTOR', 2)
            params_prediction['attend_on_output'] = params.get(
                'ATTEND_ON_OUTPUT', 'transformer'
                in params['MODEL_TYPE'].lower())
            beam_searcher = BeamSearchEnsemble(models,
                                               dataset,
                                               params_prediction,
                                               model_weights=model_weights,
                                               verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' +
                                params['SAMPLING_SAVE_MODE'] +
                                ' is not currently supported.')
        else:
            print(scores)
Esempio n. 8
0
def sample_ensemble(args, params):
    """
    Use several translation models for obtaining predictions from a source text file.

    :param argparse.Namespace args: Arguments given to the method:

                      * dataset: Dataset instance with data.
                      * text: Text file with source sentences.
                      * splits: Splits to sample. Should be already included in the dataset object.
                      * dest: Output file to save scores.
                      * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N).
                      * n_best: Write n-best list (n = beam size).
                      * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py.
                      * models: Path to the models.
                      * verbose: Be verbose or not.

    :param params: parameters of the translation model.
    """
    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.model_ensemble import BeamSearchEnsemble
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.utils import decode_predictions_beam_search

    logger.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset, args.text, params, splits=args.splits, remove_outputs=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]]
    # For converting predictions into sentences
    index2word_y = dataset.vocabulary[params['OUTPUTS_IDS_DATASET'][0]]['idx2words']

    if params.get('APPLY_DETOKENIZATION', False):
        detokenize_function = eval('dataset.' + params['DETOKENIZATION_METHOD'])

    params_prediction = dict()
    params_prediction['max_batch_size'] = params.get('BATCH_SIZE', 20)
    params_prediction['n_parallel_loaders'] = params.get('PARALLEL_LOADERS', 1)
    params_prediction['beam_size'] = params.get('BEAM_SIZE', 6)
    params_prediction['maxlen'] = params.get('MAX_OUTPUT_TEXT_LEN_TEST', 100)
    params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
    params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
    params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
    params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
    params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET']
    params_prediction['search_pruning'] = params.get('SEARCH_PRUNING', False)
    params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False)
    params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
    params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False)
    params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False)
    params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0)
    params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0)
    params_prediction['pos_unk'] = params.get('POS_UNK', False)
    params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
        else params.get('MAX_OUTPUT_TEXT_LEN', 50)
    params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True)
    params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3)
    params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True)
    params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2)
    params_prediction['attend_on_output'] = params.get('ATTEND_ON_OUTPUT',
                                                       'transformer' in params['MODEL_TYPE'].lower())
    params_prediction['glossary'] = params.get('GLOSSARY', None)

    heuristic = params.get('HEURISTIC', 0)
    mapping = None if dataset.mapping == dict() else dataset.mapping
    model_weights = args.weights

    if args.glossary is not None:
        glossary = pkl2dict(args.glossary)
    elif params_prediction['glossary'] is not None:
        glossary = pkl2dict(params_prediction['glossary'])
    else:
        glossary = None

    if model_weights:
        assert len(model_weights) == len(
            models), 'You should give a weight to each model. You gave %d models and %d weights.' % (
            len(models), len(model_weights))
        model_weights = list(map(float, model_weights))
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' % str(model_weights))

    for s in args.splits:
        # Apply model predictions
        params_prediction['predict_on_sets'] = [s]
        beam_searcher = BeamSearchEnsemble(models,
                                           dataset,
                                           params_prediction,
                                           model_weights=model_weights,
                                           n_best=args.n_best,
                                           verbose=args.verbose)
        predictions = beam_searcher.predictBeamSearchNet()[s]
        samples = predictions['samples']
        alphas = predictions['alphas'] if params_prediction['pos_unk'] else None

        if params_prediction['pos_unk']:
            sources = [x.strip() for x in open(args.text, 'r').read().split('\n')]
            sources = sources[:-1] if len(sources[-1]) == 0 else sources
        else:
            sources = None

        decoded_predictions = decode_predictions_beam_search(samples,
                                                             index2word_y,
                                                             glossary=glossary,
                                                             alphas=alphas,
                                                             x_text=sources,
                                                             heuristic=heuristic,
                                                             mapping=mapping,
                                                             verbose=args.verbose)
        # Apply detokenization function if needed
        if params.get('APPLY_DETOKENIZATION', False):
            decoded_predictions = list(map(detokenize_function, decoded_predictions))

        if args.n_best:
            n_best_predictions = []
            for i, (n_best_preds, n_best_scores, n_best_alphas) in enumerate(predictions['n_best']):
                n_best_sample_score = []
                for n_best_pred, n_best_score, n_best_alpha in zip(n_best_preds, n_best_scores, n_best_alphas):
                    pred = decode_predictions_beam_search([n_best_pred],
                                                          index2word_y,
                                                          glossary=glossary,
                                                          alphas=[n_best_alpha] if params_prediction[
                                                              'pos_unk'] else None,
                                                          x_text=[sources[i]] if params_prediction['pos_unk'] else None,
                                                          heuristic=heuristic,
                                                          mapping=mapping,
                                                          verbose=args.verbose)
                    # Apply detokenization function if needed
                    if params.get('APPLY_DETOKENIZATION', False):
                        pred = list(map(detokenize_function, pred))

                    n_best_sample_score.append([i, pred, n_best_score])
                n_best_predictions.append(n_best_sample_score)
        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params.get('SAMPLING_SAVE_MODE', 'list'):
                list2file(filepath, decoded_predictions)
                if args.n_best:
                    nbest2file(filepath + '.nbest', n_best_predictions)
            else:
                raise Exception('Only "list" is allowed in "SAMPLING_SAVE_MODE"')
        else:
            list2stdout(decoded_predictions)
            if args.n_best:
                logger.info('Storing n-best sentences in ./' + s + '.nbest')
                nbest2file('./' + s + '.nbest', n_best_predictions)
        logger.info('Sampling finished')
Esempio n. 9
0
    args = parse_args()
    models = args.models
    print "Using an ensemble of %d models" % len(args.models)
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    if args.config is None:
        print "Reading parameters from config.py"
        params = load_parameters()
    else:
        print "Loading parameters from %s" % str(args.config)
        params = pkl2dict(args.config)

    dataset = loadDataset(args.dataset)
    if args.source is not None:
        dataset = update_dataset_from_file(dataset,
                                           args.source,
                                           params,
                                           splits=args.splits,
                                           output_text_filename=args.target,
                                           compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])
    for s in args.splits:
        # Apply model predictions
        params_prediction = {
            'max_batch_size': params['BATCH_SIZE'],
            'n_parallel_loaders': params['PARALLEL_LOADERS'],
Esempio n. 10
0
def score_corpus(args, params):

    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.model_ensemble import BeamSearchEnsemble

    logging.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset,
                                       args.source,
                                       params,
                                       splits=args.splits,
                                       output_text_filename=args.target,
                                       compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])

    model_weights = args.weights
    if model_weights is not None and model_weights != []:
        assert len(model_weights) == len(
            models
        ), 'You should give a weight to each model. You gave %d models and %d weights.' % (
            len(models), len(model_weights))
        model_weights = map(lambda x: float(x), model_weights)
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' %
                        str(model_weights))

    for s in args.splits:
        # Apply model predictions
        params_prediction = {
            'max_batch_size': params['BATCH_SIZE'],
            'n_parallel_loaders': params['PARALLEL_LOADERS'],
            'predict_on_sets': [s]
        }

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params[
                'OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get(
                'NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get(
                'COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get(
                'LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get(
                'LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get(
                'COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get(
                'MAXLEN_GIVEN_X', True)
            params_prediction[
                'output_max_length_depending_on_x_factor'] = params.get(
                    'MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get(
                'MINLEN_GIVEN_X', True)
            params_prediction[
                'output_min_length_depending_on_x_factor'] = params.get(
                    'MINLEN_GIVEN_X_FACTOR', 2)
            params_prediction['attend_on_output'] = params.get(
                'ATTEND_ON_OUTPUT', 'transformer'
                in params['MODEL_TYPE'].lower())
            beam_searcher = BeamSearchEnsemble(models,
                                               dataset,
                                               params_prediction,
                                               model_weights=model_weights,
                                               verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' +
                                params['SAMPLING_SAVE_MODE'] +
                                ' is not currently supported.')
        else:
            print(scores)
def train_model(params,
                weights_dict,
                load_dataset=None,
                trainable_pred=True,
                trainable_est=True,
                weights_path=None):
    """
    Training function. Sets the training parameters from params. Build or loads the model and launches the training.
    :param params: Dictionary of network hyperparameters.
    :param load_dataset: Load dataset from file or build it from the parameters.
    :return: None
    """
    check_params(params)

    if params['RELOAD'] > 0:
        logging.info('Resuming training.')
        # Load data
        if load_dataset is None:
            if params['REBUILD_DATASET']:
                logging.info('Rebuilding dataset.')

                pred_vocab = params.get('PRED_VOCAB', None)
                if pred_vocab is not None:
                    dataset_voc = loadDataset(params['PRED_VOCAB'])
                    dataset = build_dataset(params, dataset_voc.vocabulary,
                                            dataset_voc.vocabulary_len)
                else:
                    dataset = build_dataset(params)
            else:
                logging.info('Updating dataset.')
                dataset = loadDataset(params['DATASET_STORE_PATH'] +
                                      '/Dataset_' + params['DATASET_NAME'] +
                                      '_' + params['SRC_LAN'] +
                                      params['TRG_LAN'] + '.pkl')

                for split, filename in params['TEXT_FILES'].iteritems():
                    dataset = update_dataset_from_file(
                        dataset,
                        params['DATA_ROOT_PATH'] + '/' + filename +
                        params['SRC_LAN'],
                        params,
                        splits=list([split]),
                        output_text_filename=params['DATA_ROOT_PATH'] + '/' +
                        filename + params['TRG_LAN'],
                        remove_outputs=False,
                        compute_state_below=True,
                        recompute_references=True)
                    dataset.name = params['DATASET_NAME'] + '_' + params[
                        'SRC_LAN'] + params['TRG_LAN']
                saveDataset(dataset, params['DATASET_STORE_PATH'])

        else:
            logging.info('Reloading and using dataset.')
            dataset = loadDataset(load_dataset)
    else:
        # Load data
        if load_dataset is None:
            pred_vocab = params.get('PRED_VOCAB', None)
            if pred_vocab is not None:
                dataset_voc = loadDataset(params['PRED_VOCAB'])
                # for the testing pharse handle model vocab differences
                #dataset_voc.vocabulary['target_text'] = dataset_voc.vocabulary['target']
                #dataset_voc.vocabulary_len['target_text'] = dataset_voc.vocabulary_len['target']
                dataset = build_dataset(params, dataset_voc.vocabulary,
                                        dataset_voc.vocabulary_len)
            else:
                dataset = build_dataset(params)
        else:
            dataset = loadDataset(load_dataset)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    #params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET_FULL'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len['target_text']

    # Build model
    if params['RELOAD'] == 0:  # build new model
        nmt_model = TranslationModel(params,
                                     model_type=params['MODEL_TYPE'],
                                     verbose=params['VERBOSE'],
                                     model_name=params['MODEL_NAME'],
                                     vocabularies=dataset.vocabulary,
                                     store_path=params['STORE_PATH'],
                                     trainable_pred=trainable_pred,
                                     trainable_est=trainable_est,
                                     clear_dirs=True,
                                     weights_path=weights_path)

        # Define the inputs and outputs mapping from our Dataset instance to our model
        inputMapping = dict()
        for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
            pos_source = dataset.ids_inputs.index(id_in)
            id_dest = nmt_model.ids_inputs[i]
            inputMapping[id_dest] = pos_source
        nmt_model.setInputsMapping(inputMapping)

        outputMapping = dict()
        for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
            pos_target = dataset.ids_outputs.index(id_out)
            id_dest = nmt_model.ids_outputs[i]
            outputMapping[id_dest] = pos_target
        nmt_model.setOutputsMapping(outputMapping)

    else:  # resume from previously trained model
        nmt_model = TranslationModel(params,
                                     model_type=params['MODEL_TYPE'],
                                     verbose=params['VERBOSE'],
                                     model_name=params['MODEL_NAME'],
                                     vocabularies=dataset.vocabulary,
                                     store_path=params['STORE_PATH'],
                                     set_optimizer=False,
                                     trainable_pred=trainable_pred,
                                     trainable_est=trainable_est,
                                     weights_path=weights_path)

        # Define the inputs and outputs mapping from our Dataset instance to our model
        inputMapping = dict()
        for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
            pos_source = dataset.ids_inputs.index(id_in)
            id_dest = nmt_model.ids_inputs[i]
            inputMapping[id_dest] = pos_source
        nmt_model.setInputsMapping(inputMapping)

        outputMapping = dict()
        for i, id_out in enumerate(params['OUTPUTS_IDS_DATASET']):
            pos_target = dataset.ids_outputs.index(id_out)
            id_dest = nmt_model.ids_outputs[i]
            outputMapping[id_dest] = pos_target

        nmt_model.setOutputsMapping(outputMapping)
        nmt_model = updateModel(nmt_model,
                                params['STORE_PATH'],
                                params['RELOAD'],
                                reload_epoch=params['RELOAD_EPOCH'])
        nmt_model.setParams(params)
        nmt_model.setOptimizer()
        params['EPOCH_OFFSET'] = params['RELOAD'] if params['RELOAD_EPOCH'] else \
            int(params['RELOAD'] * params['BATCH_SIZE'] / dataset.len_train)

    # Store configuration as pkl
    dict2pkl(params, params['STORE_PATH'] + '/config')

    # Callbacks
    callbacks = buildCallbacks(params, nmt_model, dataset)

    # Training
    total_start_time = timer()

    logger.debug('Starting training!')
    training_params = {
        'n_epochs':
        params['MAX_EPOCH'],
        'batch_size':
        params['BATCH_SIZE'],
        'homogeneous_batches':
        params['HOMOGENEOUS_BATCHES'],
        'maxlen':
        params['MAX_OUTPUT_TEXT_LEN'],
        'joint_batches':
        params['JOINT_BATCHES'],
        'lr_decay':
        params.get('LR_DECAY', None),  # LR decay parameters
        'reduce_each_epochs':
        params.get('LR_REDUCE_EACH_EPOCHS', True),
        'start_reduction_on_epoch':
        params.get('LR_START_REDUCTION_ON_EPOCH', 0),
        'lr_gamma':
        params.get('LR_GAMMA', 0.9),
        'lr_reducer_type':
        params.get('LR_REDUCER_TYPE', 'linear'),
        'lr_reducer_exp_base':
        params.get('LR_REDUCER_EXP_BASE', 0),
        'lr_half_life':
        params.get('LR_HALF_LIFE', 50000),
        'epochs_for_save':
        params['EPOCHS_FOR_SAVE'],
        'verbose':
        params['VERBOSE'],
        'eval_on_sets':
        params['EVAL_ON_SETS_KERAS'],
        'n_parallel_loaders':
        params['PARALLEL_LOADERS'],
        'extra_callbacks':
        callbacks,
        'reload_epoch':
        params['RELOAD'],
        'epoch_offset':
        params.get('EPOCH_OFFSET', 0),
        'data_augmentation':
        params['DATA_AUGMENTATION'],
        'patience':
        params.get('PATIENCE', 0),  # early stopping parameters
        'metric_check':
        params.get('STOP_METRIC', None)
        if params.get('EARLY_STOP', False) else None,
        'eval_on_epochs':
        params.get('EVAL_EACH_EPOCHS', True),
        'each_n_epochs':
        params.get('EVAL_EACH', 1),
        'start_eval_on_epoch':
        params.get('START_EVAL_ON_EPOCH', 0)
    }
    if weights_dict is not None:
        for layer in nmt_model.model.layers:
            if layer.name in weights_dict:
                layer.set_weights(weights_dict[layer.name])

    nmt_model.trainNet(dataset, training_params)

    if weights_dict is not None:
        for layer in nmt_model.model.layers:
            weights_dict[layer.name] = layer.get_weights()

    total_end_time = timer()
    time_difference = total_end_time - total_start_time
    logging.info('In total is {0:.2f}s = {1:.2f}m'.format(
        time_difference, time_difference / 60.0))