Esempio n. 1
0
def score_corpus(args, params):
    print "Using an ensemble of %d models" % len(args.models)
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    if args.source is not None:
        dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits,
                                           output_text_filename=args.target, compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])
    for s in args.splits:
        # Apply model predictions
        params_prediction = {'max_batch_size': params['BATCH_SIZE'],
                             'n_parallel_loaders': params['PARALLEL_LOADERS'],
                             'predict_on_sets': [s]}

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True)
            params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True)
            params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2)
            beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.')
        else:
            print scores
    def evaluate(self, epoch, counter_name='epoch', logs=None):
        """
        Evaluation function. Works for evaluators external to Keras.
        Computes the predictions according to the configuration and evaluates them, storing the results.
        :param epoch: Current epoch or update.
        :param counter_name: 'epoch' or 'update', string used for logging.
        :param logs:
        :return:
        """
        if logs is None:
            logs = {}
        # Change inputs and outputs mappings for evaluation
        self.changeInOutMappings()

        # Evaluate on each set separately
        all_metrics = []

        for s in self.set_name:
            # Apply model predictions
            if self.beam_search:
                params_prediction = {
                    'max_batch_size':
                    self.batch_size,
                    'n_parallel_loaders':
                    self.extra_vars.get('n_parallel_loaders', 1),
                    'predict_on_sets': [s],
                    'beam_batch_size':
                    self.beam_batch_size
                    if self.beam_batch_size is not None else self.batch_size,
                    'pos_unk':
                    False,
                    'normalize':
                    self.normalize,
                    'normalization_type':
                    self.normalization_type,
                    'max_eval_samples':
                    self.max_eval_samples
                }

                params_prediction.update(
                    checkDefaultParamsBeamSearch(self.extra_vars))
                predictions_all = self.model_to_eval.predictBeamSearchNet(
                    self.ds, params_prediction)[s]
            else:
                orig_size = self.extra_vars.get('eval_orig_size', False)
                params_prediction = {
                    'batch_size':
                    self.batch_size,
                    'n_parallel_loaders':
                    self.extra_vars.get('n_parallel_loaders', 1),
                    'predict_on_sets': [s],
                    'normalize':
                    self.normalize,
                    'normalization_type':
                    self.normalization_type,
                    'max_eval_samples':
                    self.max_eval_samples,
                    'model_name':
                    self.model_name,
                }
                # Convert predictions
                postprocess_fun = None
                if self.is_3DLabel:
                    postprocess_fun = [
                        self.ds.convert_3DLabels_to_bboxes,
                        self.extra_vars[s]['references_orig_sizes']
                    ]
                elif orig_size:
                    postprocess_fun = [
                        self.ds.resize_semantic_output,
                        self.extra_vars[s]['eval_orig_size_id']
                    ]
                predictions_all = \
                    self.model_to_eval.predictNet(self.ds,
                                                  params_prediction,
                                                  postprocess_fun=postprocess_fun)[s]

            # Single-output model
            if not self.gt_pos or self.gt_pos == 0 or len(self.gt_pos) == 1:
                if len(predictions_all) != 2:
                    predictions_all = [predictions_all]
                gt_positions = [0]

            # Multi-output model
            else:
                gt_positions = self.gt_pos

            # Select each output to evaluate separately
            for gt_pos, type_out, these_metrics, gt_id, write_type, index2word_y, index2word_x in zip(
                    gt_positions, self.output_types, self.metric_name,
                    self.gt_id, self.write_type, self.index2word_y,
                    self.index2word_x):

                predictions = predictions_all[gt_pos]
                prediction_costs = None
                if self.verbose > 0:
                    print('')
                    logger.info('Prediction output ' + str(gt_pos) + ': ' +
                                str(gt_id) + ' (' + str(type_out) + ')')
                # Postprocess outputs of type text
                if type_out == 'text':
                    samples = predictions['samples']
                    prediction_costs = predictions['costs']
                    alphas = None
                    sources = None
                    if params_prediction.get('pos_unk', False):
                        alphas = predictions['alphas']
                        if eval('self.ds.loaded_raw_' + s + '[0]'):
                            sources = predictions['sources']
                        else:
                            sources = []
                            for preds in predictions['sources']:
                                for src in preds[self.input_text_id]:
                                    sources.append(src)
                            sources = decode_predictions_beam_search(
                                sources,
                                index2word_x,
                                pad_sequences=True,
                                verbose=self.verbose)
                    if self.out_pred_idx is not None:
                        samples = samples[self.out_pred_idx]

                    # Convert predictions into sentences
                    if self.beam_search:
                        decoded_predictions = decode_predictions_beam_search(
                            samples,
                            index2word_y,
                            glossary=self.extra_vars.get('glossary', None),
                            alphas=alphas,
                            x_text=sources,
                            heuristic=self.extra_vars.get('heuristic', 0),
                            mapping=self.extra_vars.get('mapping', None),
                            verbose=self.verbose)
                    else:
                        probs = predictions
                        decoded_predictions = decode_predictions(
                            predictions,
                            1,
                            # always set temperature to 1
                            index2word_y,
                            self.sampling_type,
                            verbose=self.verbose)
                    # Apply detokenization function if needed
                    if self.extra_vars.get('apply_detokenization', False):
                        decoded_predictions = list(
                            map(self.extra_vars['detokenize_f'],
                                decoded_predictions))

                # Postprocess outputs of type binary
                elif type_out == 'binary':
                    decoded_predictions = decode_multilabel(
                        predictions,
                        index2word_y,
                        min_val=self.min_pred_multilabel[gt_pos],
                        verbose=self.verbose)

                    # Prepare references
                    y_split = getattr(self.ds, 'Y_' + s)
                    y_raw = y_split[gt_id]
                    self.extra_vars[gt_pos][s][
                        'references'] = self.ds.loadBinary(y_raw, gt_id)

                # Postprocess outputs of type 3DLabel
                elif type_out == '3DLabel':
                    self.extra_vars[gt_pos][s] = dict()
                    y_split = getattr(self.ds, 'Y_' + s)
                    ref = y_split[gt_id]
                    [ref, original_sizes
                     ] = self.ds.convert_GT_3DLabels_to_bboxes(ref)
                    self.extra_vars[gt_pos][s]['references'] = ref
                    self.extra_vars[gt_pos][s][
                        'references_orig_sizes'] = original_sizes

                # Postprocess outputs of type 3DSemanticLabel
                elif type_out == '3DSemanticLabel':
                    self.extra_vars[gt_pos][
                        'eval_orig_size'] = self.eval_orig_size
                    self.extra_vars[gt_pos][s] = dict()
                    y_split = getattr(self.ds, 'Y_' + s)
                    ref = y_split[gt_id]
                    if self.eval_orig_size:
                        old_crop = copy.deepcopy(self.ds.img_size_crop)
                        self.ds.img_size_crop = copy.deepcopy(self.ds.img_size)
                        self.extra_vars[gt_pos][s][
                            'eval_orig_size_id'] = np.array([gt_id] * len(ref))
                    ref = self.ds.load_GT_3DSemanticLabels(ref, gt_id)
                    if self.eval_orig_size:
                        self.ds.img_size_crop = copy.deepcopy(old_crop)
                    self.extra_vars[gt_pos][s]['references'] = ref

                # Other output data types
                else:
                    y_split = getattr(self.ds, 'Y_' + s)
                    self.extra_vars[gt_pos][s]['references'] = y_split[gt_id]
                # Store predictions
                if self.write_samples:
                    # Store result
                    filepath = os.path.join(
                        self.save_path,
                        s + '_' + counter_name + '_' + str(epoch) +
                        '_output_' + str(gt_pos) + '.pred')  # results file
                    if write_type == 'list':
                        list2file(filepath, decoded_predictions)
                    elif write_type == 'vqa':
                        try:
                            y_split = getattr(self.ds, 'Y_' + s)
                            refs = y_split[gt_id]
                        except Exception:
                            refs = ['N/A' for _ in range(probs.shape[0])]
                        extra_data_plot = {
                            'reference': refs,
                            'probs': probs,
                            'vocab': index2word_y
                        }
                        list2vqa(filepath,
                                 decoded_predictions,
                                 self.extra_vars[gt_pos][s]['question_ids'],
                                 extra=extra_data_plot)
                    elif write_type == 'listoflists':
                        listoflists2file(filepath, decoded_predictions)
                    elif write_type == 'numpy':
                        numpy2file(filepath, decoded_predictions)
                    elif write_type == '3DLabels':
                        raise NotImplementedError(
                            'Write 3DLabels function is not implemented')
                    elif write_type == '3DSemanticLabel':
                        folder_path = os.path.join(
                            self.save_path,
                            s + '_' + counter_name + '_' + str(epoch))
                        numpy2imgs(
                            folder_path, decoded_predictions,
                            eval('self.ds.X_' + s + '["' + self.input_id +
                                 '"]'), self.ds)
                    else:
                        raise NotImplementedError('The store type "' +
                                                  self.write_type +
                                                  '" is not implemented.')

                # Store current epoch/iteration in model log
                self.model_to_eval.log(s, counter_name, epoch)
                # Evaluate on each metric
                for metric in these_metrics:
                    if self.verbose > 0:
                        logger.info('Evaluating on metric ' + metric)
                    filepath = os.path.join(self.save_path, s + '.' + metric)

                    if s == 'train':
                        logger.info(
                            "WARNING: evaluation results on 'train' split might be incorrect when"
                            "applying random image shuffling.")

                    # Evaluate on the chosen metric
                    metrics = evaluation.select[metric](
                        pred_list=decoded_predictions,
                        verbose=self.verbose,
                        extra_vars=self.extra_vars[gt_pos],
                        split=s,
                        costs=prediction_costs)
                    self.model_to_eval.log_tensorboard(metrics, epoch, split=s)
                    # Print results to file and store in model log
                    with open(filepath, 'a') as f:
                        header = counter_name + ','
                        line = str(epoch) + ','
                        for metric_ in sorted(metrics):
                            value = metrics[metric_]
                            # Multiple-output model
                            if self.gt_pos and self.gt_pos != 0:
                                metric_ += '_output_' + str(gt_pos)
                            all_metrics.append(metric_)
                            header += metric_ + ','
                            line += str(value) + ','
                            # Store in model log
                            self.model_to_eval.log(s, metric_, value)
                        if not self.written_header:
                            f.write(header + '\n')
                            self.written_header = True
                        f.write(line + '\n')

                    if self.verbose > 0:
                        logger.info('Done evaluating on metric ' + metric)

        # Store losses
        if logs.get('loss') is not None:
            self.model_to_eval.log('train', 'train_loss', logs['loss'])
        if logs.get('valid_loss') is not None:
            self.model_to_eval.log('val', 'val_loss', logs['valid_loss'])

        # Plot results so far
        if self.do_plot:
            if self.metric_name:
                self.model_to_eval.plot(counter_name,
                                        set(all_metrics),
                                        self.set_name,
                                        upperbound=self.max_plot)

        # Save the model
        if self.save_each_evaluation:
            from keras_wrapper.cnn_model import saveModel
            saveModel(self.model_to_eval,
                      epoch,
                      store_iter=not self.eval_on_epochs)

        # Recover inputs and outputs mappings for resume training
        self.recoverInOutMappings()
Esempio n. 3
0
def score_corpus(args, params):
    """
    Use one or several translation models for scoring source--target pairs-

    :param argparse.Namespace args: Arguments given to the method:

                                * dataset: Dataset instance with data.
                                * source: Text file with source sentences.
                                * target: Text file with target sentences.
                                * splits: Splits to sample. Should be already included in the dataset object.
                                * dest: Output file to save scores.
                                * weights: Weight given to each model in the ensemble. You should provide the same number of weights than models. By default, it applies the same weight to each model (1/N).
                                * verbose: Be verbose or not.
                                * config: Config .pkl for loading the model configuration. If not specified, hyperparameters are read from config.py.
                                * models: Path to the models.
    :param dict params: parameters of the translation model.
    """

    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.model_ensemble import BeamSearchEnsemble

    logging.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    dataset = update_dataset_from_file(dataset,
                                       args.source,
                                       params,
                                       splits=args.splits,
                                       output_text_filename=args.target,
                                       compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[
        params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])

    model_weights = args.weights
    if model_weights is not None and model_weights != []:
        assert len(model_weights) == len(
            models
        ), 'You should give a weight to each model. You gave %d models and %d weights.' % (
            len(models), len(model_weights))
        model_weights = map(float, model_weights)
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' %
                        str(model_weights))

    for s in args.splits:
        # Apply model predictions
        params_prediction = {
            'max_batch_size': params['BATCH_SIZE'],
            'n_parallel_loaders': params['PARALLEL_LOADERS'],
            'predict_on_sets': [s]
        }

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params[
                'OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get(
                'NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get(
                'COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get(
                'LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get(
                'LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get(
                'COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get(
                'MAXLEN_GIVEN_X', True)
            params_prediction[
                'output_max_length_depending_on_x_factor'] = params.get(
                    'MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get(
                'MINLEN_GIVEN_X', True)
            params_prediction[
                'output_min_length_depending_on_x_factor'] = params.get(
                    'MINLEN_GIVEN_X_FACTOR', 2)
            params_prediction['attend_on_output'] = params.get(
                'ATTEND_ON_OUTPUT', 'transformer'
                in params['MODEL_TYPE'].lower())
            beam_searcher = BeamSearchEnsemble(models,
                                               dataset,
                                               params_prediction,
                                               model_weights=model_weights,
                                               verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' +
                                params['SAMPLING_SAVE_MODE'] +
                                ' is not currently supported.')
        else:
            print(scores)
Esempio n. 4
0
                    'MINLEN_GIVEN_X_FACTOR', 2)

            mapping = None if dataset.mapping == dict() else dataset.mapping
            if params['POS_UNK']:
                params_prediction['heuristic'] = params['HEURISTIC']
                input_text_id = params['INPUTS_IDS_DATASET'][0]
                vocab_src = dataset.vocabulary[input_text_id]['idx2words']
            else:
                input_text_id = None
                vocab_src = None
                mapping = None
            beam_searcher = BeamSearchEnsemble(models,
                                               dataset,
                                               params_prediction,
                                               verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' +
                                params['SAMPLING_SAVE_MODE'] +
                                ' is not currently supported.')
        else:
            print scores
Esempio n. 5
0
def score_corpus(args, params):

    from data_engine.prepare_data import update_dataset_from_file
    from keras_wrapper.dataset import loadDataset
    from keras_wrapper.cnn_model import loadModel
    from keras_wrapper.model_ensemble import BeamSearchEnsemble

    logging.info("Using an ensemble of %d models" % len(args.models))
    models = [loadModel(m, -1, full_path=True) for m in args.models]
    dataset = loadDataset(args.dataset)
    if args.source is not None:
        dataset = update_dataset_from_file(dataset, args.source, params, splits=args.splits,
                                           output_text_filename=args.target, compute_state_below=True)

    params['INPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['INPUTS_IDS_DATASET'][0]]
    params['OUTPUT_VOCABULARY_SIZE'] = dataset.vocabulary_len[params['OUTPUTS_IDS_DATASET'][0]]
    # Apply scoring
    extra_vars = dict()
    extra_vars['tokenize_f'] = eval('dataset.' + params['TOKENIZATION_METHOD'])

    model_weights = args.weights
    if model_weights is not None and model_weights != []:
        assert len(model_weights) == len(models), 'You should give a weight to each model. You gave %d models and %d weights.' % (len(models), len(model_weights))
        model_weights = map(lambda x: float(x), model_weights)
        if len(model_weights) > 1:
            logger.info('Giving the following weights to each model: %s' % str(model_weights))

    for s in args.splits:
        # Apply model predictions
        params_prediction = {'max_batch_size': params['BATCH_SIZE'],
                             'n_parallel_loaders': params['PARALLEL_LOADERS'],
                             'predict_on_sets': [s]}

        if params['BEAM_SEARCH']:
            params_prediction['beam_size'] = params['BEAM_SIZE']
            params_prediction['maxlen'] = params['MAX_OUTPUT_TEXT_LEN_TEST']
            params_prediction['optimized_search'] = params['OPTIMIZED_SEARCH']
            params_prediction['model_inputs'] = params['INPUTS_IDS_MODEL']
            params_prediction['model_outputs'] = params['OUTPUTS_IDS_MODEL']
            params_prediction['dataset_inputs'] = params['INPUTS_IDS_DATASET']
            params_prediction['dataset_outputs'] = params['OUTPUTS_IDS_DATASET']
            params_prediction['normalize_probs'] = params.get('NORMALIZE_SAMPLING', False)
            params_prediction['alpha_factor'] = params.get('ALPHA_FACTOR', 1.0)
            params_prediction['coverage_penalty'] = params.get('COVERAGE_PENALTY', False)
            params_prediction['length_penalty'] = params.get('LENGTH_PENALTY', False)
            params_prediction['length_norm_factor'] = params.get('LENGTH_NORM_FACTOR', 0.0)
            params_prediction['coverage_norm_factor'] = params.get('COVERAGE_NORM_FACTOR', 0.0)
            params_prediction['pos_unk'] = params.get('POS_UNK', False)
            params_prediction['state_below_maxlen'] = -1 if params.get('PAD_ON_BATCH', True) \
                else params.get('MAX_OUTPUT_TEXT_LEN', 50)
            params_prediction['output_max_length_depending_on_x'] = params.get('MAXLEN_GIVEN_X', True)
            params_prediction['output_max_length_depending_on_x_factor'] = params.get('MAXLEN_GIVEN_X_FACTOR', 3)
            params_prediction['output_min_length_depending_on_x'] = params.get('MINLEN_GIVEN_X', True)
            params_prediction['output_min_length_depending_on_x_factor'] = params.get('MINLEN_GIVEN_X_FACTOR', 2)
            params_prediction['attend_on_output'] = params.get('ATTEND_ON_OUTPUT', 'transformer' in params['MODEL_TYPE'].lower())
            beam_searcher = BeamSearchEnsemble(models, dataset, params_prediction, model_weights=model_weights, verbose=args.verbose)
            scores = beam_searcher.scoreNet()[s]

        # Store result
        if args.dest is not None:
            filepath = args.dest  # results file
            if params['SAMPLING_SAVE_MODE'] == 'list':
                list2file(filepath, scores)
            elif params['SAMPLING_SAVE_MODE'] == 'numpy':
                numpy2file(filepath, scores)
            else:
                raise Exception('The sampling mode ' + params['SAMPLING_SAVE_MODE'] + ' is not currently supported.')
        else:
            print (scores)
Esempio n. 6
0
def apply_Feature_Extractor_model(params, dataset=None, extractor_model=None):
    """
        Function for using a previously trained model for sampling.
    """

    ########### Load data
    if dataset is None:
        dataset = build_dataset(params)

    ########### Load model
    if extractor_model is None and params['RELOAD'] > 0:
        extractor_model = loadModel(params['STORE_PATH'], params['RELOAD'])
    else:
        extractor_model = Feature_Extractor(params,
                                            type=params['MODEL_TYPE'],
                                            verbose=params['VERBOSE'],
                                            model_name=params['MODEL_NAME'],
                                            store_path=params['STORE_PATH'])
        # Define the inputs and outputs mapping from our Dataset instance to our model
        inputMapping = dict()
        for i, id_in in enumerate(params['INPUTS_IDS_DATASET']):
            if len(extractor_model.ids_inputs) > i:
                pos_source = dataset.ids_inputs.index(id_in)
                id_dest = extractor_model.ids_inputs[i]
                inputMapping[id_dest] = pos_source
        extractor_model.setInputsMapping(inputMapping)

    ########### Apply sampling
    extra_vars = dict()
    for s in params["EVAL_ON_SETS"]:
        # Apply model predictions
        params_prediction = {'batch_size': params['BATCH_SIZE'],
                             'n_parallel_loaders': params['PARALLEL_LOADERS'],
                             'predict_on_sets': [s],
                             'verbose': 0}
        logging.info("<<< Predicting outputs of " + s + " set >>>")

        if params['SAMPLING_SAVE_MODE'] == 'list':
            filepath = extractor_model.model_path + '/' + s + '_sampling.pred' # results file
            list2file(filepath, [], permission='w')

        start_time = time.time()
        eta = -1
        mode = 'w'
        for n_sample in range(0, eval('dataset.len_' + s), params.get('PREDICTION_STEP', 100)):
            params_prediction['init_sample'] = n_sample
            params_prediction['final_sample'] = min(n_sample + params.get('PREDICTION_STEP', 100), eval('dataset.len_' + s))
            predictions = extractor_model.predictNet(dataset, params_prediction)[s]
            # Store result
            if params['SAMPLING_SAVE_MODE'] == 'list':
                filepath = extractor_model.model_path + '/' + s + '_sampling.pred' # results file
                list2file(filepath, predictions, permission='a')
            elif params['SAMPLING_SAVE_MODE'] == 'npy':
                filepath = extractor_model.model_path + '/' + s + '_' + params.get('MODEL_TYPE', '') + '_features.npy'
                numpy2file(filepath, predictions, permission=mode)
            elif params['SAMPLING_SAVE_MODE'] == 'hdf5':
                filepath = extractor_model.model_path + '/' + s + '_' + params.get('MODEL_TYPE', '') + '_features.hdf5'
                numpy2hdf5(filepath, predictions, permission=mode)
            else:
                raise Exception, 'Only "list" or "hdf5" are allowed in "SAMPLING_SAVE_MODE"'
            mode = 'a'
            sys.stdout.write('\r')
            sys.stdout.write("\t Processed %d/%d  -  ETA: %ds " % (n_sample, eval('dataset.len_' + s), int(eta)))
            sys.stdout.flush()
            eta = (eval('dataset.len_' + s) - n_sample) * (time.time() - start_time) / max(n_sample, 1)