def __getitem__(self, index: int) -> Mapping[str, Any]: """Fetch a data instance at a specified index, and apply transformations to it. Args: index: Which datapoint to retrieve. Returns: The data dictionary from the specified index, with transformations applied. """ items = deepcopy( self.dataset[index] ) # Deepcopy to prevent ops from overwriting values in datasets if isinstance(self.dataset, BatchDataset): unique_list = [] for item in items: if id(item) not in unique_list: forward_numpyop(self.ops, item, self.mode) unique_list.append(id(item)) if self.dataset.pad_value is not None: pad_batch(items, self.dataset.pad_value) items = { key: np.array([item[key] for item in items]) for key in items[0] } else: forward_numpyop(self.ops, items, self.mode) return items
def __getitem__(self, index: int) -> Mapping[str, Any]: """Fetch a data instance at a specified index, and apply transformations to it. Args: index: Which datapoint to retrieve. Returns: The data dictionary from the specified index, with transformations applied. """ items = deepcopy( self.dataset[index] ) # Deepcopy to prevent ops from overwriting values in datasets if isinstance(self.dataset, BatchDataset): # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing unique_samples = set() for item in items: if id(item) not in unique_samples: forward_numpyop(self.ops, item, {'mode': self.mode}) unique_samples.add(id(item)) if self.dataset.pad_value is not None: pad_batch(items, self.dataset.pad_value) items = { key: np.array([item[key] for item in items]) for key in items[0] } else: forward_numpyop(self.ops, items, {'mode': self.mode}) return items
def transform( self, data: Dict[str, Any], mode: str, epoch: int = 1, ds_id: str = '', target_type: str = 'np') -> Union[Dict[str, Any], FilteredData]: """Apply all pipeline operations on a given data instance for the specified `mode` and `epoch`. Args: data: Input data in dictionary format. mode: The execution mode in which to run. This can be "train", "eval", "test" or "infer". epoch: The epoch index to run. Note that epoch indices are 1-indexed. ds_id: The current dataset id. target_type: What kind of tensor(s) to create. One of "tf", "torch", or "np". Returns: The transformed data. """ data = deepcopy(data) instance_ops, batch_spec, batch_ops = self._get_op_split(mode=mode, epoch=epoch, ds_id=ds_id) state = {'mode': mode} op_data = forward_numpyop(instance_ops, data, state) if isinstance(op_data, FilteredData): return op_data data = batch_spec.collate_fn([data]) op_data = forward_numpyop(batch_ops, data, state, batched='torch') if isinstance(op_data, FilteredData): return op_data return to_tensor(data, target_type=target_type)
def transform(self, data: Dict[str, Any], mode: str, epoch: int = 1) -> Dict[str, Any]: """Apply all pipeline operations on a given data instance for the specified `mode` and `epoch`. Args: data: Input data in dictionary format. mode: The execution mode in which to run. This can be "train", "eval", "test" or "infer". epoch: The epoch index to run. Note that epoch indices are 1-indexed. Returns: The transformed data. """ data = deepcopy(data) ops = get_current_items(self.ops, mode, epoch) forward_numpyop(ops, data, {'mode': mode}) for key, value in data.items(): data[key] = np.expand_dims(value, 0) return data
def __getitem__( self, index: int ) -> Union[Mapping[str, Any], List[Mapping[str, Any]], FilteredData]: """Fetch a data instance at a specified index, and apply transformations to it. Args: index: Which datapoint to retrieve. Returns: The data dictionary from the specified index, with transformations applied OR an indication that this index should be thrown out. """ item = self.dataset[index] if isinstance(item, list): # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing unique_samples = {} # id: idx results = [] for idx, data in enumerate(item): data_id = id(data) if data_id not in unique_samples: data = _DelayedDeepDict(data) filter_data = forward_numpyop(self.ops, data, {'mode': self.mode}) if filter_data: results.append(filter_data) else: data.finalize(retain=self.output_keys, deep_remainder=self.deep_remainder) results.append(data) unique_samples[data_id] = idx else: results.append(results[unique_samples[data_id]]) else: results = _DelayedDeepDict(item) filter_data = forward_numpyop(self.ops, results, {'mode': self.mode}) if filter_data: return filter_data results.finalize(retain=self.output_keys, deep_remainder=self.deep_remainder) return results
def __getitem__(self, index: int) -> Mapping[str, Any]: """Fetch a data instance at a specified index, and apply transformations to it. Args: index: Which datapoint to retrieve. Returns: The data dictionary from the specified index, with transformations applied. """ item = self.dataset[index] if isinstance(item, list): # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing unique_samples = {} # id: idx results = [] for idx, data in enumerate(item): data_id = id(data) if data_id not in unique_samples: data = _DelayedDeepDict(data) forward_numpyop(self.ops, data, {'mode': self.mode}) data.finalize(retain=self.output_keys, deep_remainder=self.deep_remainder) results.append(data) unique_samples[data_id] = idx else: results.append(results[unique_samples[data_id]]) if hasattr(self.dataset, "pad_value") and self.dataset.pad_value is not None: pad_batch(results, self.dataset.pad_value) results = { key: np.array([result[key] for result in results]) for key in results[0] } else: results = _DelayedDeepDict(item) forward_numpyop(self.ops, results, {'mode': self.mode}) results.finalize(retain=self.output_keys, deep_remainder=self.deep_remainder) return results
def forward_batch( self, data: Union[np.ndarray, List[np.ndarray]], state: Dict[str, Any] ) -> Union[FilteredData, np.ndarray, List[np.ndarray]]: data = {key: elem for key, elem in zip(self.inputs, data)} if isinstance(self.repeat, int): for i in range(self.repeat): filtered = forward_numpyop(self.ops, data, state, batched='np') if filtered: return filtered else: filtered = forward_numpyop(self.ops, data, state, batched='np') if filtered: return filtered i = 0 while self.repeat( *[data[var_name] for var_name in self.repeat_inputs]): if self.max_iter and i >= self.max_iter: break filtered = forward_numpyop(self.ops, data, state, batched='np') if filtered: return filtered i += 1 return [data[key] for key in self.outputs]
def forward(self, data: List[np.ndarray], state: Dict[str, Any]) -> List[np.ndarray]: data = {key: elem for key, elem in zip(self.inputs, data)} if isinstance(self.repeat, int): for i in range(self.repeat): forward_numpyop(self.ops, data, state) else: forward_numpyop(self.ops, data, state) while self.repeat( *[data[var_name] for var_name in self.repeat_inputs]): forward_numpyop(self.ops, data, state) return [data[key] for key in self.outputs]
def _batch_postprocess(data: Dict[str, Any], ops: List[NumpyOp], output_keys: Set[str], mode: str) -> \ Union[Dict[str, Any], FilteredData]: op_data = forward_numpyop(ops=ops, data=data, state={'mode': mode}, batched='torch') if isinstance(op_data, FilteredData): return op_data if output_keys: for key in data.keys() - output_keys: if key not in _DelayedDeepDict.warned: _DelayedDeepDict.warned.add(key) print( "FastEstimator-Warn: the key '{}' is being pruned since it is unused outside of the Pipeline." " To prevent this, you can declare the key as an input of a Trace or TensorOp." .format(key)) data.pop(key) return data
def forward(self, data: List[np.ndarray], state: Dict[str, Any]) -> List[np.ndarray]: data = {key: elem for key, elem in zip(self.inputs, data)} forward_numpyop(self.ops, data, state) return [data[key] for key in self.outputs]
def benchmark(self, mode: str = "train", epoch: int = 1, ds_id: Optional[str] = None, num_steps: int = 1000, log_interval: int = 100, detailed: bool = True) -> None: """Benchmark the pipeline processing speed. Args: mode: The execution mode to benchmark. This can be 'train', 'eval' or 'test'. epoch: The epoch index to benchmark. Note that epoch indices are 1-indexed. ds_id: The ds_id to benchmark. If None, all ds_ids will be benchmarked. num_steps: The number of steps over which to perform the benchmark. log_interval: The logging interval. detailed: Whether to display the detailed time used by each operator. """ if ds_id is None: ds_ids = self.get_ds_ids(epoch=epoch, mode=mode) else: ds_ids = [ds_id] for ds_id in ds_ids: with self(mode=mode, epoch=epoch, ds_id=ds_id, steps_per_epoch=num_steps) as loader: if isinstance(loader, tf.data.Dataset): loader = loader.take(num_steps) start = time.perf_counter() for idx, _ in enumerate(loader, start=1): if idx % log_interval == 0: duration = time.perf_counter() - start iters_per_sec = log_interval / duration ds_str = f"Dataset: {ds_id}, " if ds_id else "" print( "FastEstimator-Benchmark ({}): {}Step: {}, Epoch: {}, Steps/sec: {}" .format(mode.capitalize(), ds_str, idx, epoch, iters_per_sec)) start = time.perf_counter() # Pipeline Operations Benchmarking when using FEDataset if isinstance(loader, FEDataLoader) and isinstance( loader.dataset, OpDataset) and detailed: # (n_visited, duration) duration_list = np.zeros(shape=(len(self.ctx_ops) + 1 + len(self.ctx_batch_ops), 2)) data_len = len(loader.dataset) ds_str = f", Dataset: {ds_id}" if ds_id else "" print( "\nBreakdown of time taken by Pipeline Operations (Mode: {}, Epoch: {}{})\n" .format(mode.capitalize(), epoch, ds_str)) extra_memory_management_time = 0 for _ in range(log_interval): filtered = False batch = [] index = np.random.randint(data_len) items = deepcopy(loader.dataset.dataset[index]) if isinstance(items, list): while not batch: filtered = False # BatchDataset may randomly sample the same elements multiple times, avoid reprocessing unique_samples = set() for item in items: if id(item) not in unique_samples: for i, op in enumerate(self.ctx_ops): start = time.perf_counter() op_data = forward_numpyop( [op], item, {'mode': loader.dataset.mode}) duration = time.perf_counter( ) - start duration_list[i][0] += 1 duration_list[i][1] += duration if isinstance( op_data, FilteredData): filtered = True break unique_samples.add(id(item)) if not filtered: batch = items else: while len(batch) < (self.ctx_batch_size or 1): filtered = False for i, op in enumerate(self.ctx_ops): start = time.perf_counter() op_data = forward_numpyop([op], items, {'mode': mode}) duration = time.perf_counter() - start duration_list[i][0] += 1 duration_list[i][1] += duration if isinstance(op_data, FilteredData): filtered = True break if not filtered: batch.append(items) index = np.random.randint(data_len) items = deepcopy(loader.dataset.dataset[index]) if not filtered: # Perform the batching start = time.perf_counter() batch = self.ctx_batch_info.collate_fn(batch) duration = time.perf_counter() - start duration_list[len(self.ctx_ops)][0] += 1 duration_list[len(self.ctx_ops)][1] += duration # Perform batch ops start = time.perf_counter() # Transform to numpy to not bias against the first op in the batch_op chain batch = to_tensor(batch, target_type='np') extra_memory_management_time += time.perf_counter( ) - start for i, op in enumerate(self.ctx_batch_ops, start=len(self.ctx_ops) + 1): start = time.perf_counter() op_data = forward_numpyop([op], data=batch, state={'mode': mode}, batched='np') duration = time.perf_counter() - start duration_list[i][0] += 1 duration_list[i][1] += duration if isinstance(op_data, FilteredData): break # Count extra time needed to cast data back to torch start = time.perf_counter() to_tensor(batch, target_type='torch', shared_memory=True) extra_memory_management_time += time.perf_counter( ) - start if self.ctx_batch_ops: # Extra memory management penalty is only incurred when using batch ops duration_list[len( self.ctx_ops)][1] += extra_memory_management_time total_time = np.sum(duration_list[:, 1]) normalized_times_ms = 1000 * duration_list[:, 1] / np.maximum( duration_list[:, 0], 1) op_names = ["Op"] for op in self.ctx_ops + [self.ctx_batch_info ] + self.ctx_batch_ops: if isinstance(op, Sometimes) and op.op: op_names.append(op.__class__.__name__ + " (" + op.op.__class__.__name__ + ")") elif isinstance(op, Repeat) and op.op: op_names.append(op.__class__.__name__ + " (" + op.op.__class__.__name__ + ")") elif isinstance(op, OneOf) and op.ops: op_names.append(op.__class__.__name__ + " (" + ", ".join([ sub_op.__class__.__name__ for sub_op in op.ops ]) + ")") elif isinstance(op, Fuse) and op.ops: op_names.append(op.__class__.__name__ + " (" + ", ".join([ sub_op.__class__.__name__ for sub_op in op.ops ]) + ")") elif isinstance(op, Batch): op_names.append("<Collating Batch>") else: op_names.append(op.__class__.__name__) max_op_len = max(len(op_name) for op_name in op_names) max_in_len = max([ len(", ".join(op.inputs)) for op in self.ctx_ops + [self.ctx_batch_info] + self.ctx_batch_ops ] + [len("Inputs")]) max_out_len = max([ len(", ".join(op.outputs)) for op in self.ctx_ops + [self.ctx_batch_info] + self.ctx_batch_ops ] + [len("Outputs")]) ms_visit_len = max( len("{:.3f}".format(max(normalized_times_ms))), len("ms / Visit")) visit_len = max(len(f"{int(np.max(duration_list[:, 0]))}"), len("Visits")) print("{}: {}: {}: {}: {}: {}".format( "Op".ljust(max_op_len + 1), "Inputs".ljust(max_in_len + 1), "Outputs".ljust(max_out_len + 1), "ms / Visit".ljust(ms_visit_len + 1), "Visits".ljust(visit_len + 1), "Time (Total)".rjust(12))) print("-" * (max_op_len + max_in_len + max_out_len + visit_len + 37)) for i, op in enumerate(self.ctx_ops + [self.ctx_batch_info] + self.ctx_batch_ops): print("{}: {}: {}: {}: {}: {:11.2f}%".format( op_names[i + 1].ljust(max_op_len + 1), ", ".join(op.inputs).ljust(max_in_len + 1), ", ".join(op.outputs).ljust(max_out_len + 1), "{:.3f}".format( normalized_times_ms[i]).ljust(ms_visit_len + 1), str(int(duration_list[i][0])).ljust(visit_len + 1), 100 * duration_list[i][1] / total_time)) if self.ctx_batch_ops: penalty = round( 100 * (duration_list[len(self.ctx_ops)][1] - extra_memory_management_time) / duration_list[len(self.ctx_ops)][1], 1) print( f"\nNote that collation time would be cut by ~{penalty}% if there were no batched ops." ) print("\n") # to make printing more obvious
def forward_batch( self, data: Union[np.ndarray, List[np.ndarray]], state: Dict[str, Any]) -> Union[np.ndarray, List[np.ndarray]]: data = {key: elem for key, elem in zip(self.inputs, data)} filtered = forward_numpyop(self.ops, data, state, batched="np") return filtered if filtered else [data[key] for key in self.outputs]
def benchmark(self, mode: str = "train", epoch: int = 1, num_steps: int = 1000, log_interval: int = 100, detailed: bool = True) -> None: """Benchmark the pipeline processing speed. Args: mode: The execution mode to benchmark. This can be 'train', 'eval' or 'test'. epoch: The epoch index to benchmark. Note that epoch indices are 1-indexed. num_steps: The maximum number of steps over which to perform the benchmark. log_interval: The logging interval. detailed: Whether to display the detailed time used by each operator. """ loader = self.get_loader(mode=mode, epoch=epoch) if isinstance(loader, tf.data.Dataset): loader = loader.take(num_steps) start = time.perf_counter() for idx, _ in enumerate(loader, start=1): if idx % log_interval == 0: duration = time.perf_counter() - start iters_per_sec = log_interval / duration print( "FastEstimator: Step: {}, Epoch: {}, Steps/sec: {}".format( idx, epoch, iters_per_sec)) start = time.perf_counter() if idx == num_steps: break # Pipeline Operations Benchmarking when using FEDataset if isinstance(loader, DataLoader) and isinstance( loader.dataset, OpDataset) and detailed: op_list = loader.dataset.ops duration_list = np.zeros(shape=(len(op_list))) data_len = len(loader.dataset.dataset) if self.batch_size: batch_size = self.batch_size.get_current_value( epoch) if isinstance(self.batch_size, Scheduler) else self.batch_size batch_size = batch_size[mode] if isinstance( batch_size, dict) else batch_size log_interval = log_interval * batch_size print( "\nBreakdown of time taken by Pipeline Operations ({} epoch {})" .format(mode, epoch)) for _ in range(log_interval): index = np.random.randint(data_len) items = deepcopy(loader.dataset.dataset[index]) if isinstance(loader.dataset.dataset, BatchDataset): # BatchDataset may randomly sample the same elements multiple times, so need to avoid reprocessing unique_samples = set() for item in items: if id(item) not in unique_samples: for i, op in enumerate(op_list): start = time.perf_counter() forward_numpyop([op], item, {'mode': loader.dataset.mode}) duration = time.perf_counter() - start duration_list[i] += duration unique_samples.add(id(item)) else: for i, op in enumerate(op_list): start = time.perf_counter() forward_numpyop([op], items, {'mode': loader.dataset.mode}) duration = time.perf_counter() - start duration_list[i] += duration total_time = np.sum(duration_list) op_names = ["Op"] for op in op_list: if isinstance(op, Sometimes) and op.op: op_names.append(op.__class__.__name__ + " (" + op.op.__class__.__name__ + ")") elif isinstance(op, OneOf) and op.ops: op_names.append(op.__class__.__name__ + " (" + ", ".join( [sub_op.__class__.__name__ for sub_op in op.ops]) + ")") else: op_names.append(op.__class__.__name__) max_op_len = max(len(op_name) for op_name in op_names) max_in_len = max([len(", ".join(op.inputs)) for op in op_list] + [len("Inputs")]) max_out_len = max([len(", ".join(op.outputs)) for op in op_list] + [len("Outputs")]) print("{}: {}: {}: {}".format("Op".ljust(max_op_len + 1), "Inputs".ljust(max_in_len + 1), "Outputs".ljust(max_out_len + 1), "Time".rjust(5))) print("-" * (max_op_len + max_in_len + max_out_len + 15)) for i, op in enumerate(op_list): print("{}: {}: {}: {:5.2f}%".format( op_names[i + 1].ljust(max_op_len + 1), ", ".join(op.inputs).ljust(max_in_len + 1), ", ".join(op.outputs).ljust(max_out_len + 1), 100 * duration_list[i] / total_time))
def benchmark(self, mode: str = "train", epoch: int = 1, num_steps: int = 1000, log_interval: int = 100) -> None: """Benchmark the pipeline processing speed. Args: mode: The execution mode to benchmark. This can be 'train', 'eval' or 'test'. epoch: The epoch index to benchmark. Note that epoch indices are 1-indexed. num_steps: The maximum number of steps over which to perform the benchmark. log_interval: The logging interval. """ loader = self.get_loader(mode=mode, epoch=epoch) if isinstance(loader, tf.data.Dataset): loader = loader.take(num_steps) start = time.perf_counter() for idx, _ in enumerate(loader, start=1): if idx % log_interval == 0: duration = time.perf_counter() - start iters_per_sec = log_interval / duration print( "FastEstimator: Step: {}, Epoch: {}, Steps/sec: {}".format( idx, epoch, iters_per_sec)) start = time.perf_counter() if idx == num_steps: break # Pipeline Operations Benchmarking op_list = loader.dataset.ops duration_list = np.zeros(shape=(len(op_list))) data_len = len(loader.dataset.dataset) if self.batch_size: log_interval = log_interval * self.batch_size print("\nBreakdown of time taken by Pipeline Operations:") for _ in range(log_interval): index = np.random.randint(data_len) items = deepcopy(loader.dataset.dataset[index]) if isinstance(loader.dataset.dataset, BatchDataset): unique_list = [] for item in items: if id(item) not in unique_list: for i, op in enumerate(op_list): start = time.perf_counter() forward_numpyop([op], item, loader.dataset.mode) duration = time.perf_counter() - start duration_list[i] += duration unique_list.append(id(item)) else: for i, op in enumerate(op_list): start = time.perf_counter() forward_numpyop([op], items, loader.dataset.mode) duration = time.perf_counter() - start duration_list[i] += duration total_time = np.sum(duration_list) for i, op in enumerate(op_list): print(" - {}: Time Consumption: {:.2f}%".format( op.__class__.__name__, 100 * duration_list[i] / total_time))