def test_calculating_batch_size(self): with self.cached_session(): # 64 is the number of input samples. inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) strategy.extended._require_static_shapes = True model.compile(optimizer, loss, distribute=strategy) iterator = model._distribution_standardize_user_data(inputs, targets, batch_size=None, check_steps=True, steps_name='steps', steps=3) # The global batch size(21) across all replicas is the ratio of the input # samples(64) to the steps(3). # The batch size(10) per device is the ratio of the global batch size(21) # to the number of replicas(2). # The global batch size and batch size are rounded integer values. self.assertEqual(10, distributed_training_utils.get_batch_dimension( iterator))
def test_calculating_batch_size(self): with self.cached_session(): # 64 is the number of input samples. inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) strategy._require_static_shapes = True model.compile(optimizer, loss, distribute=strategy) iterator = model._distribution_standardize_user_data(inputs, targets, batch_size=None, check_steps=True, steps_name='steps', steps=3) # The global batch size(21) across all replicas is the ratio of the input # samples(64) to the steps(3). # The batch size(10) per device is the ratio of the global batch size(21) # to the number of replicas(2). # The global batch size and batch size are rounded integer values. self.assertEqual(10, distributed_training_utils.get_batch_dimension( iterator._iterator))
def experimental_tpu_predict_loop(model, dataset, verbose=0, steps=None, callbacks=None): """Predict loop for predicting with TPU DistributionStrategy. Arguments: model: Keras Model instance. dataset: Dataset for input data. verbose: Integer, Verbosity mode 0 or 1. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. callbacks: List of callbacks to be called during training Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ mode = ModeKeys.PREDICT dataset_fully_shaped = (distributed_training_utils. is_dataset_shape_fully_defined(dataset)) padding_handler = None if not dataset_fully_shaped: # TODO(hongjunchoi): Investigate whether operations from # PartialBatchPaddingHandler are unnecessarily pruned out # during graph optimization. padding_handler = padding_util.PartialBatchPaddingHandler( model._feed_output_shapes) batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes(dataset) padding_handler.padded_batch_size = batch_size padding_handler.padding_mask = dataset.reduce(padding_handler.padding_mask, padding_handler.update_mask) dataset = dataset.map(padding_handler.pad_batch) dataset = dataset.apply(batching.unbatch()) # Upon this point, it is guaranteed that the dataset does not # have partial batches. Thus, we set `drop_remainder=True` to # get static shape information about the elements in the dataset. dataset = dataset.batch(batch_size, drop_remainder=True) if prefetch_buffer is not None: dataset = dataset.prefetch(prefetch_buffer) current_strategy = model._distribution_strategy iterator = distributed_training_utils.get_iterator(dataset, current_strategy) scope = distributed_training_utils.distributed_scope( strategy=current_strategy, learning_phase=0) scope.__enter__() def _per_device_predict_function(model): model._make_predict_function() return (model.predict_function.inputs, model.predict_function.outputs, model.predict_function.updates_op, model.predict_function.session_kwargs) def step_fn(ctx, inputs): """Clones the model and calls make_predict_function.""" if model._compile_distribution: distributed_training_utils.clone_model_on_replicas( model, current_strategy, mode, inputs=inputs) else: distributed_training_utils._build_distributed_network( model, current_strategy, mode, inputs) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.extended.call_for_each_replica( _per_device_predict_function, args=(distributed_training_utils.get_distributed_model( model, ModeKeys.PREDICT),)) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function( all_inputs, all_outputs, updates=all_updates, name='distributed_predict_function', **all_session_args) for label, output in zip(model.output_names, combined_fn.outputs): ctx.set_last_step_output(label, output) return combined_fn.updates_op # Add initial dummy values for outputs. initial_loop_values = {} batch_dimension = distributed_training_utils.get_batch_dimension(iterator) for name, tensor in zip(model.output_names, model.outputs): # TODO(priyag): This is a workaround as we do not know the batch dimension # of the model's output at this point. shape = tensor_shape.TensorShape(tensor.shape.dims) shape.dims = [batch_dimension] + shape.dims[1:] initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype) # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed. ctx = current_strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=1, initial_loop_values=initial_loop_values) predict_op = ctx.run_op output_tensors = ctx.last_step_outputs if verbose == 1: progbar = Progbar(target=steps) if model._compile_distribution: distributed_training_utils._copy_weights_to_distributed_model(model, mode) distributed_training_utils._reset_metrics(model) callbacks = cbks.configure_callbacks( callbacks, model, do_validation=False, epochs=1, steps_per_epoch=steps, verbose=verbose, count_mode='steps', mode=mode) callbacks._call_begin_hook(mode) assert steps is not None # Since we do not know how many samples we will see, we cannot pre-allocate # the returned Numpy arrays. Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [[] for _ in model.outputs] for step in range(steps): batch_logs = {'batch': step, 'size': 1} callbacks._call_batch_hook(mode, 'begin', step, batch_logs) _, batch_outs = K.get_session().run([predict_op, output_tensors]) # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. for i, label in enumerate(model.output_names): unconcatenated_outs[i].extend(batch_outs[label]) batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) callbacks._call_batch_hook(mode, 'end', step, batch_logs) if verbose >= 1: progbar.update(step + 1) callbacks._call_end_hook(mode) scope.__exit__(None, None, None) if len(unconcatenated_outs) == 1: prediction_result = np.concatenate(unconcatenated_outs[0], axis=0) else: prediction_result = [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ] if padding_handler: prediction_result = padding_handler.apply_mask(prediction_result) return prediction_result
def experimental_tpu_predict_loop(model, dataset, verbose=0, steps=None): """Predict loop for predicting with TPU DistributionStrategy. Arguments: model: Keras Model instance. dataset: Dataset for input data. verbose: Integer, Verbosity mode 0 or 1. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ dataset_fully_shaped = ( distributed_training_utils.is_dataset_shape_fully_defined(dataset)) padding_handler = None if not dataset_fully_shaped: # TODO(hongjunchoi): Investigate whether operations from # PartialBatchPaddingHandler are unnecessarily pruned out # during graph optimization. padding_handler = padding_util.PartialBatchPaddingHandler( model._feed_output_shapes) batched_dataset = input_lib._get_batched_dataset(dataset) batch_size, _, prefetch_buffer = input_lib._get_batched_dataset_attributes( batched_dataset) padding_handler.padded_batch_size = batch_size padding_handler.padding_mask = dataset.reduce( padding_handler.padding_mask, padding_handler.update_mask) dataset = dataset.map(padding_handler.pad_batch) dataset = dataset.apply(batching.unbatch()) # Upon this point, it is guaranteed that the dataset does not # have partial batches. Thus, we set `drop_remainder=True` to # get static shape information about the elements in the dataset. dataset = dataset.batch(batch_size, drop_remainder=True) if prefetch_buffer is not None: dataset = dataset.prefetch(prefetch_buffer) current_strategy = model._distribution_strategy iterator = distributed_training_utils.get_iterator(dataset, current_strategy) scope = distributed_training_utils.distributed_scope( strategy=current_strategy, learning_phase=0) scope.__enter__() def _per_device_predict_function(model): model._make_predict_function() return (model.predict_function.inputs, model.predict_function.outputs, model.predict_function.updates_op, model.predict_function.session_kwargs) def step_fn(ctx, inputs): """Clones the model and calls make_predict_function.""" if model._compile_distribution: distributed_training_utils.clone_model_on_replicas( model, current_strategy, ModeKeys.PREDICT, inputs=inputs) else: distributed_training_utils._build_distributed_network( model, current_strategy, ModeKeys.PREDICT, inputs) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args ) = current_strategy.extended.call_for_each_replica( _per_device_predict_function, args=(model._distributed_model_predict, )) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function(all_inputs, all_outputs, updates=all_updates, name='distributed_predict_function', **all_session_args) for label, output in zip(model.output_names, combined_fn.outputs): ctx.set_last_step_output(label, output) return combined_fn.updates_op # Add initial dummy values for outputs. initial_loop_values = {} batch_dimension = distributed_training_utils.get_batch_dimension(iterator) for name, tensor in zip(model.output_names, model.outputs): # TODO(priyag): This is a workaround as we do not know the batch dimension # of the model's output at this point. shape = tensor_shape.TensorShape(tensor.shape.dims) shape.dims = [batch_dimension] + shape.dims[1:] initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype) # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed. ctx = current_strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=1, initial_loop_values=initial_loop_values) predict_op = ctx.run_op output_tensors = ctx.last_step_outputs if verbose == 1: progbar = Progbar(target=steps) if model._compile_distribution: distributed_training_utils._copy_weights_to_distributed_model( model, ModeKeys.PREDICT) distributed_training_utils._reset_metrics(model) assert steps is not None # Since we do not know how many samples we will see, we cannot pre-allocate # the returned Numpy arrays. Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [[] for _ in model.outputs] for step in range(steps): _, batch_outs = K.get_session().run([predict_op, output_tensors]) # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. for i, label in enumerate(model.output_names): unconcatenated_outs[i].extend(batch_outs[label]) if verbose >= 1: progbar.update(step + 1) scope.__exit__(None, None, None) if len(unconcatenated_outs) == 1: prediction_result = np.concatenate(unconcatenated_outs[0], axis=0) else: prediction_result = [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ] if padding_handler: prediction_result = padding_handler.apply_mask(prediction_result) return prediction_result
def _experimental_predict_loop(model, iterator, verbose=0, steps=None): """Predict loop for predicting with TPU DistributionStrategy. Arguments: model: Keras Model instance. iterator: Iterator for input data. verbose: Integer, Verbosity mode 0 or 1. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ current_strategy = model._distribution_strategy K.get_session().run(current_strategy.initialize()) # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here. K.set_learning_phase(0) def _per_device_predict_function(model): model._make_predict_function() return (model.predict_function.inputs, model.predict_function.outputs, model.predict_function.updates_op, model.predict_function.session_kwargs) def step_fn(ctx, inputs): """Clones the model and calls make_predict_function.""" # TODO(priyag, sourabhbajaj): The model gets cloned every time # fit/test/predict is called. We should look into caching this keyed on # input shapes. clone_model_on_replicas(model, current_strategy, make_callback_model=False, inputs=inputs, mode=_Mode.PREDICT) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.call_for_each_replica( _per_device_predict_function, args=(model._grouped_model_predict, )) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function(all_inputs, all_outputs, updates=all_updates, name='distributed_predict_function', **all_session_args) for label, output in zip(model.output_names, combined_fn.outputs): ctx.set_last_step_output(label, output) return combined_fn.updates_op # Add initial dummy values for outputs. initial_loop_values = {} batch_dimension = distributed_training_utils.get_batch_dimension(iterator) for name, tensor in zip(model.output_names, model.outputs): # TODO(priyag): This is a workaround as we do not know the batch dimension # of the model's output at this point. shape = tensor_shape.TensorShape(tensor.shape.dims) shape.dims = [batch_dimension] + shape.dims[1:] initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype) with current_strategy.scope(): # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed. ctx = current_strategy.run_steps_on_dataset( step_fn, iterator, iterations=1, initial_loop_values=initial_loop_values) predict_op = ctx.run_op output_tensors = ctx.last_step_outputs if verbose == 1: progbar = Progbar(target=steps) # Copy the weights from the original model to each of the replicated models. orig_model_weights = model.get_weights() with current_strategy.scope(): distributed_model = current_strategy.unwrap( model._grouped_model_predict)[0] distributed_training_utils.set_weights(current_strategy, distributed_model, orig_model_weights) assert steps is not None # Since we do not know how many samples we will see, we cannot pre-allocate # the returned Numpy arrays. Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [[] for _ in model.outputs] for step in range(steps): _, batch_outs = K.get_session().run([predict_op, output_tensors]) # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. for i, label in enumerate(model.output_names): unconcatenated_outs[i].extend(batch_outs[label]) if verbose >= 1: progbar.update(step + 1) K.get_session().run(current_strategy.finalize()) if len(unconcatenated_outs) == 1: return np.concatenate(unconcatenated_outs[0], axis=0) return [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ]
def _experimental_predict_loop(model, iterator, verbose=0, steps=None): """Predict loop for predicting with TPU DistributionStrategy. Arguments: model: Keras Model instance. iterator: Iterator for input data. verbose: Integer, Verbosity mode 0 or 1. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ current_strategy = model._distribution_strategy K.get_session().run(current_strategy.initialize()) # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here. K.set_learning_phase(0) def _per_device_predict_function(model): model._make_predict_function() return (model.predict_function.inputs, model.predict_function.outputs, model.predict_function.updates_op, model.predict_function.session_kwargs) def step_fn(ctx, *inputs): """Clones the model and calls make_predict_function.""" # TODO(priyag, sourabhbajaj): The model gets cloned every time # fit/test/predict is called. We should look into caching this keyed on # input shapes. clone_model_on_replicas( model, current_strategy, make_callback_model=False, inputs=inputs, mode=_Mode.PREDICT) (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) = current_strategy.call_for_each_replica( _per_device_predict_function, args=(model._grouped_model_predict,)) (all_inputs, all_outputs, all_updates, all_session_args) = distributed_training_utils.unwrap_values( current_strategy, grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args) combined_fn = K.function( all_inputs, all_outputs, updates=all_updates, name='distributed_predict_function', **all_session_args) for label, output in zip(model.output_names, combined_fn.outputs): ctx.set_last_step_output(label, output) return combined_fn.updates_op # Add initial dummy values for outputs. initial_loop_values = {} batch_dimension = distributed_training_utils.get_batch_dimension(iterator) for name, tensor in zip(model.output_names, model.outputs): # TODO(priyag): This is a workaround as we do not know the batch dimension # of the model's output at this point. shape = tensor_shape.TensorShape(tensor.shape.dims) shape.dims = [batch_dimension] + shape.dims[1:] initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype) with current_strategy.scope(): # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed. ctx = current_strategy.run_steps_on_dataset( step_fn, iterator, iterations=1, initial_loop_values=initial_loop_values) predict_op = ctx.run_op output_tensors = ctx.last_step_outputs if verbose == 1: progbar = Progbar(target=steps) # Copy the weights from the original model to each of the replicated models. orig_model_weights = model.get_weights() with current_strategy.scope(): distributed_model = current_strategy.unwrap(model._grouped_model_predict)[0] distributed_training_utils.set_weights( current_strategy, distributed_model, orig_model_weights) assert steps is not None # Since we do not know how many samples we will see, we cannot pre-allocate # the returned Numpy arrays. Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [[] for _ in model.outputs] for step in range(steps): _, batch_outs = K.get_session().run([predict_op, output_tensors]) # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. for i, label in enumerate(model.output_names): unconcatenated_outs[i].extend(batch_outs[label]) if verbose >= 1: progbar.update(step + 1) K.get_session().run(current_strategy.finalize()) if len(unconcatenated_outs) == 1: return np.concatenate(unconcatenated_outs[0], axis=0) return [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ]
def experimental_tpu_predict_loop(model, dataset, verbose=0, steps=None, callbacks=None): """Predict loop for predicting with TPU DistributionStrategy. Arguments: model: Keras Model instance. dataset: Dataset for input data. verbose: Integer, Verbosity mode 0 or 1. steps: Total number of steps (batches of samples) before declaring `_predict_loop` finished. Ignored with the default value of `None`. callbacks: List of callbacks to be called during training Returns: Array of predictions (if the model has a single output) or list of arrays of predictions (if the model has multiple outputs). """ mode = ModeKeys.PREDICT steps = training_utils.infer_steps_for_dataset(dataset, steps, steps_name='steps') dataset_fully_shaped = ( distributed_training_utils.is_dataset_shape_fully_defined(dataset)) padding_handler = None if not dataset_fully_shaped: # TODO(hongjunchoi): Investigate whether operations from # PartialBatchPaddingHandler are unnecessarily pruned out # during graph optimization. padding_handler = padding_util.PartialBatchPaddingHandler( model._feed_output_shapes) batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes( dataset) padding_handler.padded_batch_size = batch_size padding_handler.padding_mask = dataset.reduce( padding_handler.padding_mask, padding_handler.update_mask) dataset = dataset.map(padding_handler.pad_batch) dataset = dataset.apply(batching.unbatch()) # Upon this point, it is guaranteed that the dataset does not # have partial batches. Thus, we set `drop_remainder=True` to # get static shape information about the elements in the dataset. dataset = dataset.batch(batch_size, drop_remainder=True) if prefetch_buffer is not None: dataset = dataset.prefetch(prefetch_buffer) current_strategy = model._distribution_strategy iterator = distributed_training_utils.get_iterator(dataset, current_strategy) scope = distributed_training_utils.distributed_scope( strategy=current_strategy, learning_phase=0) scope.__enter__() out_labels = model.output_names step_fn = _make_step_fn(model, ModeKeys.PREDICT, current_strategy, out_labels) # Add initial dummy values for outputs. initial_loop_values = {} batch_dimension = distributed_training_utils.get_batch_dimension(iterator) for name, tensor in zip(model.output_names, model.outputs): # TODO(priyag): This is a workaround as we do not know the batch dimension # of the model's output at this point. shape = tensor_shape.TensorShape(tensor.shape.dims) shape.dims = [batch_dimension] + shape.dims[1:] initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype) # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed. ctx = current_strategy.extended.experimental_run_steps_on_iterator( step_fn, iterator, iterations=1, initial_loop_values=initial_loop_values) predict_op = ctx.run_op output_tensors = ctx.last_step_outputs if verbose == 1: progbar = Progbar(target=steps) if model._compile_distribution: distributed_training_utils._copy_weights_to_distributed_model( model, mode) distributed_training_utils._reset_metrics(model) callbacks = cbks.configure_callbacks(callbacks, model, do_validation=False, epochs=1, steps_per_epoch=steps, verbose=verbose, count_mode='steps', mode=mode) callbacks._call_begin_hook(mode) # Since we do not know how many samples we will see, we cannot pre-allocate # the returned Numpy arrays. Instead, we store one array per batch seen # and concatenate them upon returning. unconcatenated_outs = [[] for _ in model.outputs] if steps is not None: target_steps = steps else: target_steps = np.inf current_step = 0 while current_step < target_steps: batch_logs = {'batch': current_step, 'size': 1} callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs) try: _, batch_outs = K.batch_get_value([predict_op, output_tensors]) except errors.OutOfRangeError: if steps is not None: warning_msg = 'Make sure that your dataset can generate at least ' '`steps` batches (in this case, {} batches).'.format(steps) else: warning_msg = 'Number of steps ran: {} steps'.format( current_step) logging.warning('Your dataset iterator ran out of data; ' 'interrupting evaluation. ' + warning_msg) break # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy. for i, label in enumerate(model.output_names): unconcatenated_outs[i].extend(batch_outs[label]) batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode) callbacks._call_batch_hook(mode, 'end', current_step, batch_logs) if verbose >= 1: progbar.update(current_step + 1) current_step += 1 callbacks._call_end_hook(mode) scope.__exit__(None, None, None) if len(unconcatenated_outs) == 1: prediction_result = np.concatenate(unconcatenated_outs[0], axis=0) else: prediction_result = [ np.concatenate(unconcatenated_outs[i], axis=0) for i in range(len(unconcatenated_outs)) ] if padding_handler: prediction_result = padding_handler.apply_mask(prediction_result) return prediction_result