def dump(self, path): """Dumps class instance as serialized pickle file Args: path (str): dumping path """ buffer = copy.deepcopy(self) io.save_dill(path, buffer)
def __init__(self, root, transform=None, target_transform=None): self.idx2key = io.load_json(os.path.join(root, "index.json")) self.types = dict() self.root_dir = root for scrapset in ["artworks", "pokebip"]: self.types.update( **io.load_json(os.path.join(root, scrapset, "types.json"))) self.transform = transform self.target_transform = target_transform
def __init__(self): """ INIT """ self.class_matrix = np.zeros(9, dtype=np.int32) self.io_handler = IOHandler() self.total = 0
def human_baseline(gold_dataset_dir: str, annotation_path: str): """ Shows the images from the dataset and ask the human to label them. :param str gold_dataset_dir: The directory of the gold dataset :param str annotation_path: The path to the annotation file, if it exists we will resume the labeling session """ files = glob.glob(os.path.join(gold_dataset_dir, "*.jpeg")) io_handler = IOHandler() input_dictionary = restore_dictionary(annotation_path=annotation_path) try: pbar_desc = ( "-1" if input_dictionary["total"] == 0 else f"Current human accuracy: {round((input_dictionary['correct']/input_dictionary['total'])*100,2)}%" ) with tqdm(total=len(files) - input_dictionary["total"], desc=pbar_desc) as pbar: for image_name in files: metadata = os.path.basename(image_name)[:-5] header, values = metadata.split("%") image_no = int(header[1:]) if image_no not in input_dictionary["human_predictions"]: gold_key = io_handler.imagename_input_conversion( image_name=image_name, output_type="keyboard" ) # image = io.imread(image_name) os.system(f"xv {image_name} &") # cv2.imshow("window1", img_as_ubyte(image)) # cv2.waitKey(1) user_key = keys_to_id(input("Push the keys: ")) input_dictionary["human_predictions"][image_no] = user_key input_dictionary["total"] += 1 if user_key == gold_key: input_dictionary["correct"] += 1 pbar.update(1) pbar.set_description( f"Current human accuracy: {round((input_dictionary['correct']/input_dictionary['total'])*100,2)}%" ) if input_dictionary["total"] % 20 == 0: with open( annotation_path, "w+", encoding="utf8" ) as annotation_file: json.dump(input_dictionary, annotation_file) except KeyboardInterrupt: with open(annotation_path, "w+", encoding="utf8") as annotation_file: json.dump(input_dictionary, annotation_file) with open(annotation_path, "w+", encoding="utf8") as annotation_file: json.dump(input_dictionary, annotation_file)
def serialize(self, path=None): """Dumps object dictionnary as serialized pickle file Args: path (str): dumping path """ if not path: path = os.path.join(self.session_dir, ConfigFile.pickle_filename) attributes = self.__dict__.copy() del attributes["optimizer"] attributes.update({ "optimizer_class": self.optimizer.__class__, "optimizer_config": self.optimizer.get_config() }) io.save_pickle(path, attributes)
def test_sort_shakespeare(self): shakespeare = IOHandler( '../../Assets/Files/shakespeare-complete-works.txt') actual, expected = shakespeare.words, shakespeare.sorted_words self.assertTrue(len(actual) > 0) with self.assertRaises(RecursionError): actual = self.alg(actual)
def setup_session(self, overwrite=False, timestamp=False): """Sets up training session directory Args: overwrite (bool): if True, overwrites existing directory (default: False) timestamp (bool): if True, adds timestamp to directory name (default: False) """ session_name = self.session_name if timestamp: session_name = session_name + "_" + time.strftime("%Y%m%d-%H%M%S") io.mkdir(session_name, ConfigFile.bin_dir, overwrite) session_dir = os.path.join(ConfigFile.bin_dir, session_name) io.mkdir(ConfigFile.checkpoints_dirname, session_dir) io.mkdir(ConfigFile.tensorboard_dirname, session_dir) io.mkdir(ConfigFile.observations_dirname, session_dir) io.mkdir(ConfigFile.scores_dirname, session_dir) ConfigFile._write_gitignore(session_dir)
def __init__( self, dataset_dir: str, hide_map_prob: float, dropout_images_prob: List[float], train: bool = False, ): """ INIT :param str dataset_dir: The directory of the dataset. :param bool hide_map_prob: Probability of hiding the minimap (0<=hide_map_prob<=1) :param List[float] dropout_images_prob: Probability of dropping an image (0<=dropout_images_prob<=1) :param bool train: If True, the dataset is used for training. """ self.dataset_dir = dataset_dir self.hide_map_prob = hide_map_prob self.dropout_images_prob = dropout_images_prob assert 0 <= hide_map_prob <= 1.0, ( f"hide_map_prob not in 0 <= hide_map_prob <= 1.0 range. " f"hide_map_prob: {hide_map_prob}") assert len(dropout_images_prob) == 5, ( f"dropout_images_prob must have 5 probabilities, one for each image in the sequence. " f"dropout_images_prob len: {len(dropout_images_prob)}") for dropout_image_prob in dropout_images_prob: assert 0 <= dropout_image_prob <= 1.0, ( f"All probabilities in dropout_image_prob must be in the range 0 <= dropout_image_prob <= 1.0. " f"dropout_images_prob: {dropout_images_prob}") if train: self.transform = transforms.Compose([ RemoveMinimap(hide_map_prob=hide_map_prob), RemoveImage(dropout_images_prob=dropout_images_prob), SplitImages(), ToTensor(), SequenceColorJitter(), Normalize(), MergeImages(), ReOrderImages(), ]) else: self.transform = transforms.Compose([ RemoveMinimap(hide_map_prob=hide_map_prob), # RemoveImage(dropout_images_prob=dropout_images_prob), SplitImages(), ToTensor(), # SequenceColorJitter(), Normalize(), MergeImages(), ReOrderImages(), ]) self.dataset_files = glob.glob(os.path.join(dataset_dir, "*.jpeg")) self.IOHandler = IOHandler()
class BalancedDataset: """ Generate a dataset of images with balanced classes. """ class_matrix: np.ndarray io_handler: IOHandler total: int def __init__(self): """ INIT """ self.class_matrix = np.zeros(9, dtype=np.int32) self.io_handler = IOHandler() self.total = 0 def balance_dataset(self, input_value: Union[np.ndarray, int]) -> bool: """ Decide if a given input value is to be added to the dataset or not. The probability of returning True is proportional to the number of examples per class. The higher the number of examples of a given class, the lower the probability of returning True for examples of that class. Xbox controller inputs are mapped to keys. :param int input_value: The controller input value to decide if the example is to be added to the dataset or not. :return: True if the example is to be added to the dataset, False otherwise. """ example_class = self.io_handler.input_conversion( input_value=input_value, output_type="keyboard") if self.total != 0: prop: float = ((self.total - self.class_matrix[example_class]) / self.total)**2 if prop <= 0.7: prop = 0.1 if np.random.rand() <= prop: self.class_matrix[example_class] += 1 self.total += 1 return True else: return False else: self.class_matrix[example_class] += 1 self.total += 1 return True @property def get_matrix(self) -> np.ndarray: """ Get the class matrix. :return: The class matrix. """ return self.class_matrix
def load(self, path): """Loads builder attributes Args: path (str): file path """ kwargs = io.load_pickle(path) del kwargs['ndims_'] self.__init__(**kwargs)
def test_sort_shakespeare(self): shakespeare = IOHandler( '../../Assets/Files/shakespeare-complete-works.txt') actual, expected = shakespeare.words, shakespeare.sorted_words self.assertTrue(len(actual) > 0) actual = self.alg(actual) self.assertEqual(expected, actual) self.assertCountEqual(expected, actual) self.assertSequenceEqual(expected, actual)
def load(cls, path): """Loads serialized file to initialize class instance Args: path (str): Path to file """ buffer = io.load_dill(path) if not isinstance(buffer, cls): raise TypeError("Loaded serialized file is not of proper class") return copy.deepcopy(buffer)
def load(self, path): """Loads serialized file to initalize ConfigFile instance Args: path (str): path to file """ kwargs = io.load_pickle(path) kwargs["optimizer"] = kwargs["optimizer_class"]( **kwargs["optimizer_config"]) del kwargs["session_dir"], kwargs["optimizer_class"], kwargs[ "optimizer_config"] self.__init__(**kwargs)
def load(self, path): """Loads builder attributes Args: path (str): file path """ kwargs = io.load_pickle(path) del kwargs['ndims_'] if "use_segmentation" not in kwargs.keys(): # TODO : write more generic weight to set unexisting attributes to default kwargs["use_segmentation"] = False self.__init__(**kwargs)
def fit(self, train_ids, val_ids, generator="luna", loop=True, shuffle=True, use_affine=False): """Trains model Args: train_ids (list): list of training scans ids val_ids (list): list of validation scans ids loop (boolean): If true, endlessly loop on data (default: false). generator (str): generator type to use {"luna", "segmentation", "atlas"} shuffle (boolean): If true, scans are shuffled (default: false) """ self.logger.verbose(f"Number of training scans : {len(train_ids)}\n") self.logger.verbose(f"Number of validation scans : {len(val_ids)}\n") pd.DataFrame(train_ids).to_csv(os.path.join( self.config.session_dir, LunaTrainer.train_ids_filename), index=False, header=False) pd.DataFrame(val_ids).to_csv(os.path.join( self.config.session_dir, LunaTrainer.val_ids_filename), index=False, header=False) (width, height, depth) = self.config.input_shape if generator == "luna": train_gen = gen.scan_generator(train_ids, width, height, depth, loop, shuffle, use_affine) val_gen = gen.scan_generator(val_ids, width, height, depth, loop, shuffle, use_affine) elif generator == "segmentation": train_gen = gen.scan_and_seg_generator(train_ids, width, height, depth, loop, shuffle, use_affine) val_gen = gen.scan_and_seg_generator(val_ids, width, height, depth, loop, shuffle, use_affine) elif generator == "atlas": if not self.config.atlas_id: raise RuntimeError( "Must specify an atlas id if using atlas registration") train_gen = gen.atlas_generator(self.config.atlas_id, train_ids, width, height, depth, loop, shuffle, use_affine) val_gen = gen.atlas_generator(self.config.atlas_id, val_ids, width, height, depth, loop, shuffle, use_affine) elif generator == "atlas_seg": if not self.config.atlas_id: raise RuntimeError( "Must specify an atlas id if using atlas registration") train_gen = gen.atlas_seg_generator(self.config.atlas_id, train_ids, width, height, depth, loop, shuffle, use_affine) val_gen = gen.atlas_seg_generator(self.config.atlas_id, val_ids, width, height, depth, loop, shuffle, use_affine) else: raise UnboundLocalError("Unkown specified generator") self.logger.verbose("Compiling model :\n") self.logger.verbose(f"\t - Generator : {generator}\n") self.logger.verbose( f"\t - Optimizer : {self.config.optimizer.__dict__}\n") self.logger.verbose(f"\t - Losses : {self.config.losses}\n") self.logger.verbose( f"\t - Losses weights : {self.config.loss_weights}\n") self.logger.verbose(f"\t - Callbacks : {self.config.callbacks}\n") self.model_.compile(optimizer=self.config.optimizer, loss=self.config.losses, loss_weights=self.config.loss_weights, metrics=self.config.metrics) self.logger.verbose("******** Initiating training *********") validation_steps = max(int(0.2 * self.config.steps_per_epoch), 1) with tf.device(self.device_): training_loss = self.model_.fit_generator( generator=train_gen, initial_epoch=self.config.initial_epoch, epochs=self.config.epochs, callbacks=self.config.callbacks, steps_per_epoch=self.config.steps_per_epoch, verbose=self.verbose, validation_data=val_gen, validation_steps=validation_steps) io.save_json(os.path.join(self.main_dir_, "training_history.json"), training_loss.history)
def on_epoch_end(self, epoch, logs={}): observations_subdir_format = ConfigFile.observations_subdir_format.format( epoch=epoch + 1, **logs) src_filepath = self.src_filepath.format(epoch=epoch + 1, **logs) tgt_filepath = self.tgt_filepath.format(epoch=epoch + 1, **logs) pred_filepath = self.pred_filepath.format(epoch=epoch + 1, **logs) grad_x_filepath = self.grad_x_filepath.format(epoch=epoch + 1, **logs) grad_y_filepath = self.grad_y_filepath.format(epoch=epoch + 1, **logs) pred_seg_filepath = self.pred_seg_filepath.format(epoch=epoch + 1, **logs) observations_dir = os.path.join(self.session_dir, ConfigFile.observations_dirname) io.mkdir(observations_subdir_format, observations_dir) src_gen = loader.preprocess_scans([self.src_id], *self.input_shape) tgt_gen = loader.preprocess_scans([self.tgt_id], *self.input_shape) src = next(src_gen)[0][np.newaxis, :, :, :, np.newaxis] tgt = next(tgt_gen)[0][np.newaxis, :, :, :, np.newaxis] if self.use_segmentation: tgt_seg_gen = loader.preprocess_segmentations([self.tgt_id], *self.input_shape) tgt_seg = next(tgt_seg_gen)[0][np.newaxis, :, :, :, np.newaxis] output = self.model.predict([src, tgt, tgt_seg]) else: output = self.model.predict([src, tgt]) fig, _ = handler.display_n_slices(src.squeeze(), n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, src_filepath)) plt.close() fig, _ = handler.display_n_slices(tgt.squeeze(), n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, tgt_filepath)) plt.close() fig, _ = handler.display_n_slices(output[0].squeeze(), n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, pred_filepath)) plt.close() fig, _ = handler.display_n_slices(output[1].squeeze()[:, :, :, 0], n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, grad_x_filepath)) plt.close() fig, _ = handler.display_n_slices(output[1].squeeze()[:, :, :, 1], n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, grad_y_filepath)) plt.close() if self.use_segmentation: fig, _ = handler.display_n_slices(output[2].squeeze(), n=4, return_fig=True) fig.savefig( os.path.join(observations_dir, observations_subdir_format, pred_seg_filepath)) plt.close() del fig, _
def test_sort_shakespeare(self): words = IOHandler( '../../Assets/Files/shakespeare-complete-works.txt').words self.assertTrue(len(words) > 0) with self.assertRaises(TimeoutError): self.alg(words)
class Tedd1104Dataset(Dataset): """TEDD1104 dataset.""" def __init__( self, dataset_dir: str, hide_map_prob: float, dropout_images_prob: List[float], control_mode: str = "keyboard", train: bool = False, ): """ INIT :param str dataset_dir: The directory of the dataset. :param bool hide_map_prob: Probability of hiding the minimap (0<=hide_map_prob<=1) :param List[float] dropout_images_prob: Probability of dropping an image (0<=dropout_images_prob<=1) :param str control_mode: Type of the user input: "keyboard" or "controller" :param bool train: If True, the dataset is used for training. """ self.dataset_dir = dataset_dir self.hide_map_prob = hide_map_prob self.dropout_images_prob = dropout_images_prob self.control_mode = control_mode.lower() assert self.control_mode in [ "keyboard", "controller", ], f"{self.control_mode} control mode not supported. Supported dataset types: [keyboard, controller]. " assert 0 <= hide_map_prob <= 1.0, ( f"hide_map_prob not in 0 <= hide_map_prob <= 1.0 range. " f"hide_map_prob: {hide_map_prob}") assert len(dropout_images_prob) == 5, ( f"dropout_images_prob must have 5 probabilities, one for each image in the sequence. " f"dropout_images_prob len: {len(dropout_images_prob)}") for dropout_image_prob in dropout_images_prob: assert 0 <= dropout_image_prob <= 1.0, ( f"All probabilities in dropout_image_prob must be in the range 0 <= dropout_image_prob <= 1.0. " f"dropout_images_prob: {dropout_images_prob}") if train: self.transform = transforms.Compose([ RemoveMinimap(hide_map_prob=hide_map_prob), RemoveImage(dropout_images_prob=dropout_images_prob), SplitImages(), ToTensor(), SequenceColorJitter(), Normalize(), MergeImages(), ]) else: self.transform = transforms.Compose([ # RemoveMinimap(hide_map_prob=hide_map_prob), # RemoveImage(dropout_images_prob=dropout_images_prob), SplitImages(), ToTensor(), # SequenceColorJitter(), Normalize(), MergeImages(), ]) self.dataset_files = glob.glob(os.path.join(dataset_dir, "*.jpeg")) self.IOHandler = IOHandler() def __len__(self): """ Returns the length of the dataset. :return: int - Length of the dataset. """ return len(self.dataset_files) def __getitem__(self, idx): """ Returns a sample from the dataset. :param int idx: Index of the sample. :return: Dict[str, torch.tensor]- Transformed sequence of images """ if torch.is_tensor(idx): idx = int(idx) img_name = self.dataset_files[idx] image = None while image is None: try: image = io.imread(img_name) except (ValueError, FileNotFoundError) as err: error_message = str(err).split("\n")[-1] print( f"Error reading image: {img_name} probably a corrupted file.\n" f"Exception: {error_message}\n" f"We will load a random image instead.") img_name = self.dataset_files[int( len(self.dataset_files) * torch.rand(1))] y = self.IOHandler.imagename_input_conversion( image_name=img_name, output_type=self.control_mode, ) sample = {"image": image, "y": y} return self.transform(sample)