コード例 #1
0
ファイル: tf_manager.py プロジェクト: weiczhu/neuralmonkey
    def execute(self,
                dataset: Dataset,
                execution_scripts,
                train=False,
                compute_losses=True,
                summaries=True,
                batch_size=None,
                log_progress: int = 0) -> List[ExecutionResult]:
        if batch_size is None:
            batch_size = len(dataset)
        batched_dataset = dataset.batch_dataset(batch_size)
        last_log_time = time.process_time()

        batch_results = [
            [] for _ in execution_scripts]  # type: List[List[ExecutionResult]]
        for batch_id, batch in enumerate(batched_dataset):
            if 0 < log_progress < time.process_time() - last_log_time:
                log("Processed {} examples.".format(batch_id * batch_size))
                last_log_time = time.process_time()
            executables = [s.get_executable(compute_losses=compute_losses,
                                            summaries=summaries,
                                            num_sessions=len(self.sessions))
                           for s in execution_scripts]

            while not all(ex.result is not None for ex in executables):
                self._run_executables(batch, executables, train)

            for script_list, executable in zip(batch_results, executables):
                script_list.append(executable.result)

        collected_results = []  # type: List[ExecutionResult]
        for result_list in batch_results:
            collected_results.append(reduce_execution_results(result_list))

        return collected_results
コード例 #2
0
ファイル: tf_manager.py プロジェクト: pmahnke/neuralmonkey
    def execute(self,
                dataset: Dataset,
                execution_scripts,
                train=False,
                compute_losses=True,
                summaries=True,
                batch_size=None) -> List[ExecutionResult]:
        if batch_size is None:
            batch_size = len(dataset)
        batched_dataset = dataset.batch_dataset(batch_size)

        batch_results = [[] for _ in execution_scripts
                         ]  # type: List[List[ExecutionResult]]
        for batch in batched_dataset:
            executables = [
                s.get_executable(compute_losses=compute_losses,
                                 summaries=summaries)
                for s in execution_scripts
            ]
            while not all(ex.result is not None for ex in executables):
                all_feedables = set()  # type: Set[Any]
                # type: Dict[Executable, tf.Tensor]
                all_tensors_to_execute = {}
                additional_feed_dicts = []
                tensor_list_lengths = []  # type: List[int]

                for executable in executables:
                    if executable.result is None:
                        (feedables, tensors_to_execute,
                         add_feed_dict) = executable.next_to_execute()
                        all_feedables = all_feedables.union(feedables)
                        all_tensors_to_execute[executable] = tensors_to_execute
                        additional_feed_dicts.append(add_feed_dict)
                        tensor_list_lengths.append(len(tensors_to_execute))
                    else:
                        tensor_list_lengths.append(0)

                feed_dict = _feed_dicts(batch, all_feedables, train=train)
                for fdict in additional_feed_dicts:
                    feed_dict.update(fdict)

                session_results = [
                    sess.run(all_tensors_to_execute, feed_dict=feed_dict)
                    for sess in self.sessions
                ]

                for executable in executables:
                    if executable.result is None:
                        executable.collect_results(
                            [res[executable] for res in session_results])

            for script_list, executable in zip(batch_results, executables):
                script_list.append(executable.result)

        collected_results = []  # type: List[ExecutionResult]
        for result_list in batch_results:
            collected_results.append(reduce_execution_results(result_list))

        return collected_results
コード例 #3
0
    def execute(self,
                dataset: Dataset,
                execution_scripts,
                train=False,
                compute_losses=True,
                summaries=True,
                batch_size=None,
                temp=None,
                log_progress: int = 0) -> List[ExecutionResult]:
        if batch_size is None:
            batch_size = len(dataset)
        batched_dataset = dataset.batch_dataset(batch_size)
        last_log_time = time.process_time()

        batch_results = [[] for _ in execution_scripts
                         ]  # type: List[List[ExecutionResult]]
        for batch_id, batch in enumerate(batched_dataset):
            if (time.process_time() - last_log_time > log_progress
                    and log_progress > 0):
                log("Processed {} examples.".format(batch_id * batch_size))
                last_log_time = time.process_time()
            executables = [
                s.get_executable(compute_losses=compute_losses,
                                 summaries=summaries)
                for s in execution_scripts
            ]
            while not all(ex.result is not None for ex in executables):
                all_feedables = set()  # type: Set[Any]
                all_tensors_to_execute = {}
                additional_feed_dicts = []
                tensor_list_lengths = []  # type: List[int]

                for executable in executables:
                    if executable.result is None:
                        (feedables, tensors_to_execute,
                         add_feed_dict) = executable.next_to_execute()
                        if temp is not None:
                            add_feed_dict[executable.placeholder[0]] = temp
                        all_feedables = all_feedables.union(feedables)
                        all_tensors_to_execute[executable] = tensors_to_execute
                        additional_feed_dicts.append(add_feed_dict)
                        tensor_list_lengths.append(len(tensors_to_execute))
                    else:
                        tensor_list_lengths.append(0)

                feed_dict = _feed_dicts(batch, all_feedables, train=train)
                for fdict in additional_feed_dicts:
                    feed_dict.update(fdict)

                session_results = [
                    sess.run(all_tensors_to_execute, feed_dict=feed_dict)
                    for sess in self.sessions
                ]

                for executable in executables:
                    if executable.result is None:
                        executable.collect_results(
                            [res[executable] for res in session_results])

            for script_list, executable in zip(batch_results, executables):
                script_list.append(executable.result)

        collected_results = []  # type: List[ExecutionResult]
        for result_list in batch_results:
            collected_results.append(reduce_execution_results(result_list))

        return collected_results
コード例 #4
0
def run_on_dataset(tf_manager: TensorFlowManager,
                   runners: List[BaseRunner],
                   dataset: Dataset,
                   postprocess: Postprocess,
                   batching_scheme: BatchingScheme,
                   write_out: bool = False,
                   log_progress: int = 0) -> Tuple[
                       List[ExecutionResult], Dict[str, List[Any]]]:
    """Apply the model on a dataset and optionally write outputs to files.

    This function processes the dataset in batches and optionally prints out
    the execution progress.

    Args:
        tf_manager: TensorFlow manager with initialized sessions.
        runners: A function that runs the code
        dataset: The dataset on which the model will be executed.
        evaluators: List of evaluators that are used for the model
            evaluation if the target data are provided.
        postprocess: Dataset-level postprocessors
        write_out: Flag whether the outputs should be printed to a file defined
            in the dataset object.
        batching_scheme: Scheme used for batching.
        log_progress: log progress every X seconds

        extra_fetches: Extra tensors to evaluate for each batch.

    Returns:
        Tuple of resulting sentences/numpy arrays, and evaluation results if
        they are available which are dictionary function -> value.

    """
    # If the dataset contains the target series, compute also losses.
    contains_targets = all(dataset.has_series(runner.decoder_data_id)
                           for runner in runners
                           if runner.decoder_data_id is not None)

    last_log_time = time.process_time()
    batch_results = [[] for _ in runners]  # type: List[List[ExecutionResult]]

    feedables = set.union(*[runner.feedables for runner in runners])

    processed_examples = 0
    for batch in dataset.batches(batching_scheme):
        if 0 < log_progress < time.process_time() - last_log_time:
            log("Processed {} examples.".format(processed_examples))
            last_log_time = time.process_time()

        execution_results = tf_manager.execute(
            batch, feedables, runners, compute_losses=contains_targets)
        processed_examples += len(batch)

        for script_list, ex_result in zip(batch_results, execution_results):
            script_list.append(ex_result)

    # Transpose runner interim results.
    all_results = [reduce_execution_results(res) for res in batch_results]

    # Convert execution results to dictionary.
    result_data = {runner.output_series: result.outputs
                   for runner, result in zip(runners, all_results)}

    # Run dataset-level postprocessing.
    if postprocess is not None:
        for series_name, postprocessor in postprocess:
            postprocessed = postprocessor(dataset, result_data)
            if not hasattr(postprocessed, "__len__"):
                postprocessed = list(postprocessed)

            result_data[series_name] = postprocessed

    # Check output series lengths.
    for series_id, data in result_data.items():
        if len(data) != len(dataset):
            warn("Output '{}' for dataset '{}' has length {}, but "
                 "len(dataset) == {}".format(series_id, dataset.name,
                                             len(data), len(dataset)))

    if write_out and dataset.outputs is not None:
        for series_id, data in result_data.items():
            if series_id in dataset.outputs:
                path, writer = dataset.outputs[series_id]
                writer(path, data)
            else:
                log("There is no file for output series '{}' in dataset: '{}'"
                    .format(series_id, dataset.name), color="red")
    elif write_out:
        log("Dataset does not have any outputs, nothing to write out.",
            color="red")

    return all_results, result_data