def _parse_metrics(self): """Return the metric objects used during training This relies on the `trainer::metrics` section of `self.config`. This section contains a list of metrics, where each metric is specified as a dictionary or a string. If a dictionary, they key is expected to be the importpath to the metric, and the value a dictionary of __init__ params; if a str only, it is expected to be the importpath to the metric. :return: metric objects used during training :rtype: list[object] """ metric_specs = self.config['trainer'].get('metrics', []) metrics = [] for metric_spec in metric_specs: if isinstance(metric_spec, dict): assert len(metric_spec) == 1 metric_importpath = list(metric_spec.keys())[0] metric_params = list(metric_spec.values())[0] else: metric_importpath = metric_spec metric_params = {} metric_fn = import_object(metric_importpath) if inspect.isclass(metric_fn): metric_fn = metric_fn(**metric_params) metrics.append(metric_fn) return metrics
def test_import_object__numpy(self): """Test `import_object` with a numpy.array""" NumpyArray = import_object(object_importpath='numpy.array') array1 = NumpyArray([0, 1, 2, 3]) array2 = np.array([0, 1, 2, 3]) assert isinstance(array1, np.ndarray) assert np.array_equal(array1, array2)
def _instantiate_dataset(self, set_name): """Return a dataset object to be used as an iterator during training The dataset that is returned should be able to be directly passed into the `train` method of whatever trainer class is specified in `self.config`, as either the `train_dataset` or `validation_dataset` argument. :param set_name: set to return the dataset for, one of {'train', 'validation'} :type set_name: str :return: two element tuple holding an iterable over the dataset for `set_name`, as well as the number of batches in a single pass over the dataset :rtype: tuple """ assert set_name in {'train', 'validation'} dataset_spec = self.config['dataset'] fpath_df_obs_key = 'fpath_df_{}'.format(set_name) if fpath_df_obs_key in dataset_spec: fpath_df_obs = dataset_spec[fpath_df_obs_key] df_obs = pd.read_csv(fpath_df_obs) dataset_spec['init_params']['df_obs'] = df_obs dataset_importpath = dataset_spec['importpath'] Dataset = import_object(dataset_importpath) if Dataset == KerasImageDataset: dataset_spec['init_params']['split'] = set_name dataset = Dataset(**dataset_spec['init_params']) albumentations_key = '{}_albumentations'.format(set_name) albumentations = dataset_spec.get(albumentations_key, {}) if albumentations: albumentations = self._parse_albumentations(albumentations) dataset = AugmentedDataset(dataset, albumentations) transformations_key = '{}_transformations'.format(set_name) transformations = dataset_spec[transformations_key] transformations = self._parse_transformations(transformations) dataset = AugmentedDataset(dataset, transformations) loading_params = dataset_spec['{}_loading_params'.format(set_name)] duplicate_target_keys = loading_params.pop('duplicate_target_keys', {}) collate_fn = ( lambda batch: format_batch(batch, dataset.input_keys, dataset. target_keys, duplicate_target_keys)) dataset_gen = DataLoader(dataset, collate_fn=collate_fn, **loading_params) return dataset_gen
def _parse_albumentations(self, albumentations): """Parse the provided albumentations into the expected format When passed into the AugmentedDataset (whose import path is listed below), `albumentations` is expected to be a list of two element tuples, where each tuple contains a transformation function to apply as the first element and function kwargs as the second element. When they are parsed from the config (and passed into this function), they are a list of dictionaries. This function mostly reformats them to the format expected by the AugmentedDataset class: - training.datasets.augmented_dataset.AugmentedDataset :param albumentations: holds the albumentations to apply to each batch of data, where each transformation is specified as a dictionary with the key equal to the class importpath of the albumentation and the value holds two dictionaries, 'init_params' holding initialization parameters and 'sample_keys' holding a 'value' key specifying what sample key to apply the albumentation to :type albumentations: dict[dict] :return: a one element list holding a 3 element tuple of the composed albumentation that will apply the albumentations, an empty dictionary, and a dictionary mapping sample keys the albumentations will be applied to to the keyword arguments to pass the sample elements by :rtype: list[tuple] """ compose_init_params = albumentations.pop('compose_init_params', {}) sample_keys = albumentations['sample_keys'] albumentations_fns = [] it = albumentations['albumentations'] for albumentation in albumentations['albumentations']: assert len(albumentation) == 1 albumentation_importpath = list(albumentation.keys())[0] albumentation_init_params = list(albumentation.values())[0] Albumentation = import_object(albumentation_importpath) albumentation_fn = Albumentation(**albumentation_init_params) albumentations_fns.append(albumentation_fn) albumentation_composition = Compose(albumentations_fns, **compose_init_params) processed_albumentations = [(albumentation_composition, {}, sample_keys)] return processed_albumentations
def _parse_callbacks(self): """Return the callback objects used during training This relies on the `trainer::callbacks` section of `self.config`. This section contains a list of callbacks, where each callback is specified as a dictionary or a string. If a dictionary, they key is expected to be the importpath to the callback, and the value a dictionary of __init__ params; if a str only, it is expected to be the importpath to the callback. :return: callback objects used during training :rtype: list[object] """ callback_specs = self.config['trainer'].get('callbacks', []) callbacks = [] for callback_spec in callback_specs: if isinstance(callback_spec, dict): assert len(callback_spec) == 1 callback_importpath = list(callback_spec.keys())[0] callback_params = list(callback_spec.values())[0] else: callback_importpath = callback_spec callback_params = {} if 'CSVLogger' in callback_importpath: callback_params['filename'] = os.path.join( self.dirpath_job, 'history.csv') elif 'TensorBoard' in callback_importpath: callback_params['log_dir'] = os.path.join( self.dirpath_job, 'tensorboard') elif 'ModelCheckpoint' in callback_importpath: callback_params['filepath'] = os.path.join( self.dirpath_job, 'weights') callback_params['save_weights_only'] = True elif 'LRFinder' in callback_importpath: callback_params['dirpath_results'] = os.path.join( self.dirpath_job, 'lr_finder') Callback = import_object(callback_importpath) callback = Callback(**callback_params) callbacks.append(callback) return callbacks
def _instantiate_network(self): """Return the network object to train This relies on the `network` section of `self.config`. This section must contain the following keys: - str importpath: import path to the network class to use for training - dict init_params: parameters to pass directly into the `__init__` of the specified network as keyword arguments :return: network for training :rtype: object """ network_spec = self.config['network'] network_importpath = network_spec['importpath'] Network = import_object(network_importpath) return Network(**network_spec['init_params'])
def _init_optimizer(self): """Initialize the optimizer used to train the network :return: initialized optimizer :rtype: object """ optimizer_spec = self.config['optimizer'] if isinstance(optimizer_spec, str): optimizer_importpath = optimizer_spec init_params = {} else: optimizer_importpath = optimizer_spec['importpath'] init_params = optimizer_spec['init_params'] Optimizer = import_object(optimizer_importpath) optimizer = Optimizer(**init_params) return optimizer
def _instantiate_trainer(self): """Return the trainer object that runs training This relies on the `trainer` section of `self.config`. This section must contain the following keys: - str importpath: import path to the trainer class to use - dict init_params: parameters to pass directly into the `__init__` of the specified trainer as keyword arguments :return: trainer to run training :rtype: object """ trainer_spec = self.config['trainer'] trainer_importpath = trainer_spec['importpath'] Trainer = import_object(trainer_importpath) trainer = Trainer(**trainer_spec['init_params'], dirpath_save=self.dirpath_job, gpu_id=self.gpu_id) return trainer
def _parse_transformations(self, transformations): """Parse the provided transformations into the expected format When passed into the AugmentedDataset (whose import path is listed below), `transformations` is expected to be a list of two element tuples, where each tuple contains a transformation function to apply as the first element and function kwargs as the second element. When they are parsed from the config (and passed into this function), they are a list of dictionaries. This function mostly reformats them to the format expected by the AugmentedDataset class: - training.datasets.augmented_dataset.AugmentedDataset :param transformations: holds the transformations to apply to each batch of data, where each transformation is specified as a dictionary with the key equal to the importpath of the callable transformation and the value equal to a dictionary holding keyword arguments for the callable :type transformations: list[dict] :return: a list holding a 3 element tuples of the transformation functions, a dictionary specifying keyword arguments for the functions, and a dictionary mapping sample keys the transformations will be applied to to the keyword arguments to pass the sample elements by :rtype: list[tuple] """ processed_transformations = [] for transformation in transformations: assert len(transformation) == 1 transformation_fn_importpath = list(transformation.keys())[0] transformation_config = list(transformation.values())[0] sample_keys = transformation_config.pop('sample_keys') transformation_fn = import_object(transformation_fn_importpath) processed_transformations.append( (transformation_fn, transformation_config, sample_keys)) return processed_transformations