Example #1
0
def test_initialize_tensorflow_without_gpu(mock_tf_config):
    mock_tf_config.list_physical_devices.return_value = [
        'gpu0', 'gpu1', 'gpu2', 'gpu3'
    ]
    with clean_params():
        initialize_tensorflow(gpus=-1)
    mock_tf_config.set_visible_devices.assert_called_with([], 'GPU')
Example #2
0
def test_initialize_tensorflow_only_once(mock_tf_config):
    mock_tf_config.list_physical_devices.return_value = [
        'gpu0', 'gpu1', 'gpu2', 'gpu3'
    ]
    with clean_params():
        # During first time initialization, set TensorFlow parallelism
        initialize_tensorflow()
        mock_tf_config.threading.set_intra_op_parallelism_threads.assert_called_once(
        )
        mock_tf_config.threading.set_inter_op_parallelism_threads.assert_called_once(
        )

        # Reset call counts on all threading calls
        mock_tf_config.threading.reset_mock()

        # In the second call to initialization, avoid calling these methods again, as TensorFlow
        # will raise an exception
        initialize_tensorflow()
        mock_tf_config.threading.set_intra_op_parallelism_threads.assert_not_called(
        )
        mock_tf_config.threading.set_inter_op_parallelism_threads.assert_not_called(
        )

    # No GPUs were specified, so this should not have been called even once
    mock_tf_config.set_visible_devices.assert_not_called()
Example #3
0
def test_initialize_tensorflow_with_gpu_list(mock_tf_config):
    # For test purposes, these devices can be anything, we just need to be able to uniquely
    # identify them.
    mock_tf_config.list_physical_devices.return_value = [
        'gpu0', 'gpu1', 'gpu2', 'gpu3'
    ]
    with clean_params():
        initialize_tensorflow(gpus=[1, 2])
    mock_tf_config.set_visible_devices.assert_called_with(['gpu1', 'gpu2'],
                                                          'GPU')
Example #4
0
def init_tensorflow_cpu(request):
    """Initialize tensorflow at the start of testing to only use CPUs.

    This fixture runs once before any tests, and ensures that the main process
    running the pytests does not claim any GPU resources.

    This is critical to avoid OOM errors when running subprocesses that need GPUs (e.g., hyperopt),
    as otherwise the main process will consume all the memory and cause the subprocesses to crash.
    """
    initialize_tensorflow(gpus=-1)
Example #5
0
    def __init__(self,
                 model_definition,
                 logging_level=logging.ERROR,
                 use_horovod=None,
                 gpus=None,
                 gpu_memory_limit=None,
                 allow_parallel_threads=True,
                 random_seed=default_random_seed):
        """
        :param model_definition: (dict, string) in-memory representation of model definition
               or string path to the saved JSON model definition file.
        :param model_definition_fp: (string) path to user-defined definition YAML file.
        :param logging_level: Log level that will be sent to stderr.
        :param use_horovod: (bool) use Horovod for distributed training. Will be set
               automatically if `horovodrun` is used to launch the training script.
        :param gpus: (string, default: `None`) list of GPUs to use (it uses the
               same syntax of CUDA_VISIBLE_DEVICES)
        :param gpu_memory_limit: (int: default: `None`) maximum memory in MB to allocate
              per GPU device.
        :param allow_parallel_threads: (bool, default: `True`) allow TensorFlow to use
               multithreading parallelism to improve performance at the cost of
               determinism.
        """
        # check if model definition is a path or a dict
        if isinstance(model_definition, str):  # assume path
            with open(model_definition, 'r') as def_file:
                model_definition_dict = yaml.safe_load(def_file)
            self.model_definition_fp = model_definition
        else:
            model_definition_dict = copy.deepcopy(model_definition)
            self.model_definition_fp = None

        # merge model definition with defaults
        self.model_definition = merge_with_defaults(model_definition_dict)

        # setup horovod
        self._horovod = configure_horovod(use_horovod)

        # setup logging
        self.set_logging_level(logging_level)

        # setup TensorFlow
        initialize_tensorflow(gpus, gpu_memory_limit, allow_parallel_threads,
                              self._horovod)
        # todo refactoring: decide where to put this,
        #  here or at the beginning of training.
        #  Either way make sure it is called before the model is initialized.
        # tf.random.set_seed(random_seed)

        # setup model
        self.model = None
        self.training_set_metadata = None

        # online training state
        self._online_trainer = None
Example #6
0
def test_initialize_tensorflow_with_horovod_explicit_gpus(mock_tf_config):
    mock_tf_config.list_physical_devices.return_value = [
        'gpu0', 'gpu1', 'gpu2', 'gpu3'
    ]

    mock_hvd = Mock()
    mock_hvd.local_rank.return_value = 1
    mock_hvd.local_size.return_value = 4

    with clean_params():
        initialize_tensorflow(gpus='-1', horovod=mock_hvd)

    mock_tf_config.set_visible_devices.assert_called_with([], 'GPU')
Example #7
0
    def __init__(self,
                 gpus=None,
                 gpu_memory_limit=None,
                 allow_parallel_threads=True,
                 **kwargs):
        horovod = initialize_horovod()
        initialize_tensorflow(gpus=gpus,
                              gpu_memory_limit=gpu_memory_limit,
                              allow_parallel_threads=allow_parallel_threads,
                              horovod=horovod)
        super().__init__(horovod=horovod, **kwargs)

        # Only return results from rank 0 to reduce network overhead
        self.batch_predict = return_first(self.batch_predict)
        self.batch_evaluation = return_first(self.batch_evaluation)
Example #8
0
def init_tensorflow_cpu(request):
    """Initialize tensorflow at the start of testing to only use CPUs.

    This fixture runs once before any tests, and ensures that the main process
    running the pytests does not claim any GPU resources.

    This is critical to avoid OOM errors when running subprocesses that need GPUs (e.g., hyperopt),
    as otherwise the main process will consume all the memory and cause the subprocesses to crash.

    Run most tests eagerly as the cost of graph construction can easily increase runtime by
    and order of magnitude for small tests. Tests that execute in subprocesses, and tests
    in `test_graph_execution.py` still run in graph mode.
    """
    import tensorflow as tf
    tf.config.experimental_run_functions_eagerly(True)
    initialize_tensorflow(gpus=-1)
Example #9
0
def test_initialize_tensorflow_with_horovod_bad_local_rank(
        mock_tf_config, mock_warnings):
    """In this scenario, the local_size 5 is out of the bounds of the GPU indices."""
    mock_tf_config.list_physical_devices.return_value = [
        'gpu0', 'gpu1', 'gpu2', 'gpu3'
    ]

    mock_hvd = Mock()
    mock_hvd.local_rank.return_value = 1
    mock_hvd.local_size.return_value = 5

    with clean_params():
        initialize_tensorflow(horovod=mock_hvd)

    mock_tf_config.set_visible_devices.assert_called_with([], 'GPU')
    mock_warnings.warn.assert_called()
Example #10
0
 def initialize_tensorflow(self, *args, **kwargs):
     initialize_tensorflow(*args, **kwargs)
Example #11
0
 def initialize_tensorflow(self, **kwargs):
     # Make sure we don't claim any GPU resources on the head node
     initialize_tensorflow(gpus=-1)
     self._tensorflow_kwargs = kwargs
Example #12
0
 def initialize_tensorflow(self, *args, **kwargs):
     initialize_tensorflow(*args, horovod=self._horovod, **kwargs)