def show_results(self, mode="train", num_steps=1, current_epoch=0, reuse=False): """Processes the pipeline ops on the given input data. Args: mode: can be either "train" or "eval". num_steps: the number of steps for the pipeline to run. current_epoch: to specify the current epoch in the training. This is useful if you are using a schedule to change the pipeline during training. """ data = [] self.global_batch_multiplier = get_num_devices() if not self._is_prepared: self.prepare() ds_iter = self.dataset_schedule[mode].get_current_value(current_epoch) for _ in range(num_steps): data.append(next(ds_iter)) if self.global_batch_multiplier > 1: data = [per_replica_to_global(item) for item in data] if not reuse: self._reset() return data
def benchmark(self, mode="train", num_steps=1000, log_interval=100, current_epoch=0): """Runs benchmarks for the current epoch. Args: mode: can be either "train" or "eval". num_steps: the number of steps to show the results for. log_interval: current_epoch: to specify the current epoch in the training. """ self.global_batch_multiplier = get_num_devices() global_batch_size = self.get_global_batch_size(current_epoch) self.prepare() ds_iter = self.dataset_schedule[mode].get_current_value(current_epoch) start = time.perf_counter() for idx in range(num_steps + 1): _ = next(ds_iter) if idx % log_interval == 0: if idx == 0: start = time.perf_counter() else: duration = time.perf_counter() - start example_per_sec = log_interval * global_batch_size / duration print("FastEstimator: Step: %d, Epoch: %d, Batch Size %d, Example/sec %.2f" % (idx, current_epoch, global_batch_size, example_per_sec)) start = time.perf_counter() self._reset()
def __init__(self, pipeline, network, epochs, steps_per_epoch=None, validation_steps=None, traces=None, log_steps=100): self.pipeline = pipeline self.network = network self.epochs = epochs self.steps_per_epoch = steps_per_epoch self.validation_steps = validation_steps self.traces = traces assert log_steps is None or log_steps > 0, "log_steps must be positive or None" self.log_steps = log_steps self.summary = False self.inputs = None self.num_devices = get_num_devices() self.train_step = 0 self.train_epoch = 0 self.total_train_steps = 0 self.num_examples = {} self.do_eval = False self._is_initialized = False self.mode_list = ["train"]
def __init__(self, pipeline, network, epochs, steps_per_epoch=None, validation_steps=None, traces=None, log_steps=100): self.pipeline = pipeline self.network = network self.epochs = epochs self.steps_per_epoch = steps_per_epoch self.validation_steps = validation_steps self.traces = traces assert log_steps is None or log_steps > 0, "log_steps must be positive or None" self.log_steps = log_steps self.summary = False self.inputs = None self.num_devices = get_num_devices() if self.num_devices > 1: self.distribute_strategy = tf.distribute.MirroredStrategy() else: self.distribute_strategy = None self.train_step = 0 self.train_epoch = 0 self.total_train_steps = 0 self.do_eval = False
def __init__(self, train_data: Union[None, DataSource, Scheduler[DataSource], Dict[str, Union[DataSource, Scheduler[DataSource]]]] = None, eval_data: Union[None, DataSource, Scheduler[DataSource], Dict[str, DataSource]] = None, test_data: Union[None, DataSource, Scheduler[DataSource], Dict[str, DataSource]] = None, batch_size: Union[None, int, Scheduler[int]] = None, ops: Union[None, NumpyOp, Scheduler[NumpyOp], List[Union[NumpyOp, Scheduler[NumpyOp]]]] = None, num_process: Optional[int] = None): data = { x: y for (x, y) in zip(["train", "eval", "test"], [train_data, eval_data, test_data]) if y } self.data = self._register_ds_ids(data) self.batch_size = batch_size self.ops = to_list(ops) if mp.get_start_method(allow_none=True) is None and os.name != 'nt': mp.set_start_method('fork') if mp.get_start_method(allow_none=True) != 'fork': print( "FastEstimator-Warn: Pipeline multiprocessing is disabled. OS must support the 'fork' start method." ) num_process = 0 self.num_process = num_process if num_process is not None else min( os.cpu_count(), 32 * get_num_devices()) self._verify_inputs( **{k: v for k, v in locals().items() if k != 'self'}) # Loader Variables self.ctx_lock = Lock() self.ctx_mode = 'train' self.ctx_epoch = 1 self.ctx_shuffle = True self.ctx_output_keys = None self.ctx_loader = None self.ctx_ds_id = None self.ctx_batch_size = None self.ctx_ops = [] self.ctx_batch_info = Batch() self.ctx_batch_ops = [] self.ctx_batch_input_keys = set()
def __init__(self, model: Union[tf.keras.Model, torch.nn.Module], inputs: Union[None, str, Iterable[str]] = None, outputs: Union[None, str, Iterable[str]] = None, mode: Union[None, str, Iterable[str]] = None, trainable: bool = True, intermediate_layers: Union[None, str, int, List[Union[str, int]]] = None): super().__init__(inputs=inputs, outputs=outputs, mode=mode) assert hasattr( model, "fe_compiled"), "must use fe.build to compile the model before use" self.intermediate_outputs = [] # [{device: Tensor}] intermediate_layers = to_list(intermediate_layers) if intermediate_layers and get_num_devices() > 1: print("\033[93m {}\033[00m".format( "FastEstimator-Warn: Layer names / ids may be different between single-gpu and multi-gpu environments" )) for intermediate_layer in intermediate_layers: storage = {} if isinstance(model, tf.keras.Model): layers = model.submodules if isinstance(intermediate_layer, int): intermediate_layer = layers[intermediate_layer] else: layers = {layer.name: layer for layer in layers} intermediate_layer = layers[intermediate_layer] if not hasattr(intermediate_layer, 'fe_original_call'): intermediate_layer.fe_original_call = intermediate_layer.call intermediate_layer.call = partial( _capture_call_tf, fe_storage=storage, fe_layer=intermediate_layer) elif isinstance(model, torch.nn.Module): layers = model.named_modules() if get_num_devices() > 1: # Try to automatically adjust parameters for multi-gpu so that user doesn't need to change code layers2 = list(model.named_modules( )) # It's a generator, so don't corrupt the other copy if isinstance(layers2[0][1], torch.nn.parallel.DataParallel): parallel_prefix = "module." if isinstance( intermediate_layer, str) and not intermediate_layer.startswith( parallel_prefix): intermediate_layer = parallel_prefix + intermediate_layer elif isinstance(intermediate_layer, int): layers = layers2[1:] if isinstance(intermediate_layer, int): intermediate_layer = list(layers)[intermediate_layer][1] else: intermediate_layer = dict(layers)[intermediate_layer] intermediate_layer.register_forward_hook( partial(_capture_call_torch, fe_storage=storage)) self.intermediate_outputs.append(storage) self.model = model self.trainable = trainable self.epoch_spec = None self.multi_inputs = False self.device = ''
def _configure_single_pipeline(self, pipeline): pipeline.global_batch_multiplier = get_num_devices() pipeline.eval_shuffle = self.validation_steps is not None pipeline.prepare()
# Copyright 2019 The FastEstimator Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== import tensorflow as tf from fastestimator.estimator import Estimator from fastestimator.network import Network, build from fastestimator.pipeline import Pipeline from fastestimator.util.util import get_num_devices __version__ = '1.0-beta0' if get_num_devices() > 1: distribute_strategy = tf.distribute.MirroredStrategy() else: distribute_strategy = None
def _prepare_pipeline(self): self.pipeline.global_batch_multiplier = get_num_devices() self.pipeline.eval_shuffle = self.validation_steps is not None self.pipeline.prepare() self.do_eval = "eval" in self.pipeline.mode_list