def __next__(self): length = len(self) if length == 0: raise StopIteration() if self._curent_position == length: if self._loop: self.reset() else: raise StopIteration() entry = self._datasets[self._curent_position] env = getattr(self, 'rasterio_env', {}) self._curent_position += 1 entry_name, entry_components = entry new_components = {} cache_data = self._cache_data use_tensorflow_io = False for component_name, component_path in entry_components.items(): if isinstance(component_path, DatasetReader): component_path = component_path.name local_component_path = component_path url_components = urlparse(component_path) if not url_components.scheme: cache_data = False if url_components.path.startswith('/vsigs/'): cache_data = True # We should check if we run inside GCP ML Engine use_tensorflow_io = True component_path = url_components.path[6:] component_path = "gs:/" + component_path else: if url_components.scheme == 'file': local_component_path = url_components.path use_tensorflow_io = False cache_data = False with rasterio.Env(**env): if use_tensorflow_io: real_path = component_path data = IOUtils.open_file(real_path, "rb").read() if cache_data: hash = sha224(component_path.encode("utf8")).hexdigest() hash_part = "/".join(list(hash)[:3]) dataset_path = os.path.join(self._temp_dir, hash_part) if not IOUtils.file_exists(dataset_path): IOUtils.recursive_create_dir(dataset_path) dataset_path = os.path.join(dataset_path, os.path.basename(component_path)) if not IOUtils.file_exists(dataset_path): f = IOUtils.open_file(dataset_path, "wb") f.write(data) f.close() component_src = rasterio.open(dataset_path) else: with NamedTemporaryFile() as tmpfile: tmpfile.write(data) tmpfile.flush() component_src = rasterio.open(tmpfile.name) else: component_src = rasterio.open(local_component_path) new_components[component_name] = component_src return (entry_name, new_components)
def cleanup_dir(temp_dir): IOUtils.delete_recursively(temp_dir)
def train_handler(config, args): if args.switch_to_prefix: current_dir = os.path.abspath(os.path.dirname(__file__)) current_dir = os.path.abspath(os.path.join(current_dir, "..", "..", "..", "..", "..")) log.info("Switching to %s", current_dir) os.chdir(current_dir) log.info("Current dir: %s", os.path.abspath(os.getcwd())) with IOUtils.open_file(args.config, "r") as cfg_file: config = yaml.load(cfg_file, Loader=Loader) model_name = config["model_name"] model_type = config["model_type"] random_seed = config.get("random_seed", None) model_config = config["model"] tilling_config = config.get("tilling", {}) if 'window_size' in tilling_config: window_size = tilling_config["window_size"] else: log.warning("Using deprectated `window_size` location") window_size = config["window_size"] if 'stride_size' in tilling_config: stride_size = tilling_config["stride_size"] else: log.warning("Using deprectated `stride_size` location") stride_size = config["stride_size"] if random_seed is not None: log.info("Setting Python and NumPy seed to: %d", random_seed) random.seed(random_seed) np.random.seed(random_seed) else: log.warning("No random seed specified!") limit_validation_datasets = config.get("limit_validation_datasets", None) limit_train_datasets = config.get("limit_train_datasets", None) data_source = config.get("data_source") mapping = config["mapping"] augment = config.get("augment", False) input_channels = len(mapping["inputs"]) log.info("Input has %d channels", input_channels) log.info("Model type is: %s", model_type) if args.split is None: dataset_cache = config.get("dataset_cache", None) log.debug("dataset_cache is set from config to %s", dataset_cache) dataset_cache = dataset_cache.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) else: if not IOUtils.file_exists(args.split): raise FileNotFoundError("Invalid split file") dataset_cache = args.split log.info("dataset_cache will be directed to: %s", dataset_cache) if data_source.input_source is None: data_source.set_input_source(args.input) log.info("Using datasource: %s", data_source) if not IOUtils.file_exists(dataset_cache): log.info("Loading datasets") train_datasets, validation_datasets = data_source.get_dataset_loader() dump = (train_datasets._datasets, validation_datasets._datasets) log.info("Saving dataset cache to %s", dataset_cache) with IOUtils.open_file(dataset_cache, "w") as f: f.write(yaml.dump(dump, Dumper=Dumper)) else: log.info("Loading training datasets from %s", dataset_cache) train_datasets, validation_datasets = yaml.load(IOUtils.open_file(dataset_cache), Loader=Loader) if isinstance(train_datasets, DatasetLoader): log.warning("Converting from legacy format: `train_datasets`") train_datasets = train_datasets._datasets if isinstance(validation_datasets, DatasetLoader): log.warning("Converting from legacy format: `validation_datasets`") validation_datasets = validation_datasets._datasets train_datasets, validation_datasets = data_source.build_dataset_loaders(train_datasets, validation_datasets) train_datasets.loop = True validation_datasets.loop = True if limit_validation_datasets: validation_datasets = validation_datasets[:limit_validation_datasets] if limit_train_datasets: train_datasets = train_datasets[:limit_train_datasets] pre_callbacks = [] if augment: log.info("Enabling global level augmentation. Verify if this is desired!") def augment_callback(X, y): from ..preprocessing.augmentation import Augmentation aug = Augmentation(config) return aug.augment(X, y) pre_callbacks.append(augment_callback) log.info("Using %d training datasets", len(train_datasets)) log.info("Using %d validation datasets", len(validation_datasets)) if model_type == "keras": train_keras(model_name, window_size, stride_size, model_config, mapping, train_datasets, validation_datasets, pre_callbacks=pre_callbacks, enable_multi_gpu=args.keras_multi_gpu, gpus=args.keras_gpus, cpu_merge=args.keras_disable_cpu_merge, cpu_relocation=args.keras_enable_cpu_relocation, batch_size=args.keras_batch_size, random_seed=random_seed, ) log.info("Keras Training completed") elif model_type == "sklearn": train_sklearn(model_name, window_size, stride_size, model_config, mapping, train_datasets, validation_datasets) log.info("Scikit Training completed") else: log.critical("Unknown model type: %s", model_type)
def __next__(self): length = len(self) if length == 0: raise StopIteration() if self._curent_position == length: if self._loop: if self.randomise_on_loop: random.shuffle(self._datasets) self.reset() else: raise StopIteration() entry = self._datasets[self._curent_position] env = getattr(self, 'rasterio_env', {}) self._curent_position += 1 entry_name, entry_components = entry new_components = {} cache_data = self._cache_data use_tensorflow_io = False for component_name, component_path_entry in entry_components.items(): if isinstance(component_path_entry, (RasterGenerator, GeoDataFrame, MemoryFile)): new_components[component_name] = component_path_entry continue elif isinstance(component_path_entry, GeoDataFrame): new_components[component_name] = component_path_entry continue elif isinstance(component_path_entry, DatasetReader): component_path = component_path_entry.name elif isinstance(component_path_entry, str): component_path = component_path_entry else: raise NotImplementedError("Unsupported type for component value") local_component_path = component_path url_components = urlparse(component_path) if not url_components.scheme: cache_data = False if url_components.path.startswith('/vsigs/'): cache_data = True # We should check if we run inside GCP ML Engine use_tensorflow_io = True component_path = url_components.path[6:] component_path = "gs:/" + component_path else: if url_components.scheme == 'file': local_component_path = url_components.path use_tensorflow_io = False cache_data = False with rasterio.Env(**env): if use_tensorflow_io: real_path = component_path data = IOUtils.open_file(real_path, "rb").read() if cache_data: hash = sha224(component_path.encode("utf8")).hexdigest() hash_part = "/".join(list(hash)[:3]) dataset_path = os.path.join(self._temp_dir, hash_part) if not IOUtils.file_exists(dataset_path): IOUtils.recursive_create_dir(dataset_path) dataset_path = os.path.join(dataset_path, os.path.basename(component_path)) if not IOUtils.file_exists(dataset_path): f = IOUtils.open_file(dataset_path, "wb") f.write(data) f.close() component_src = self.get_component_file_descriptor(dataset_path) else: with NamedTemporaryFile() as tmpfile: tmpfile.write(data) tmpfile.flush() component_src = self.get_component_file_descriptor(tmpfile.name) else: component_src = self.get_component_file_descriptor(local_component_path) new_components[component_name] = component_src # Trigger the generation of the dynamic components for component_name, component_path in new_components.items(): if isinstance(component_path, RasterGenerator): new_components[component_name] = component_path(new_components) return entry_name, new_components
def train_keras(model_name, window_size, stride_size, model_config, mapping, train_datasets, validation_datasets, pre_callbacks=(), enable_multi_gpu=False, gpus=None, cpu_merge=True, cpu_relocation=False, batch_size=None, random_seed=None, ): log.info("Starting keras training") import tensorflow as tf # Seed initialization should happed as early as possible if random_seed is not None: log.info("Setting Tensorflow random seed to: %d", random_seed) tf.set_random_seed(random_seed) from keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau from ..tools.callbacks import ModelCheckpoint, CSVLogger from keras.optimizers import Adam from ..tools.utils import import_model_builder from keras.models import load_model from keras.utils import multi_gpu_model if batch_size is None: batch_size = model_config.get("batch_size", None) model_path = model_config["model_path"] model_loss = model_config.get("loss", "categorical_crossentropy") log.info("Using loss: %s", model_loss) model_metrics = model_config.get("metrics", "accuracy") # Make code compatible with previous version format_converter = model_config.get("format_converter", CategoricalConverter(2)) swap_axes = model_config["swap_axes"] train_epochs = model_config["train_epochs"] prefetch_queue_size = model_config.get("prefetch_queue_size", 10) input_channels = len(mapping["inputs"]) include_last_classfication =model_config.get("include_classfication_layer",True) z_scaler = model_config.get('z_scaler',None) train_data = DataGenerator(train_datasets, batch_size, mapping["inputs"], mapping["target"], format_converter=format_converter, swap_axes=swap_axes, postprocessing_callbacks=pre_callbacks, default_window_size=window_size, default_stride_size=stride_size,z_scaler=z_scaler) train_data = ThreadedDataGenerator(train_data, queue_size=prefetch_queue_size) validation_data = DataGenerator(validation_datasets, batch_size, mapping["inputs"], mapping["target"], format_converter=format_converter, swap_axes=swap_axes, default_window_size=window_size, default_stride_size=stride_size,z_scaler=z_scaler) validation_data = ThreadedDataGenerator(validation_data, queue_size=prefetch_queue_size) model_builder, model_builder_custom_options = import_model_builder(model_config["model_builder"]) model_builder_option = model_config.get("options", {}) steps_per_epoch = getattr(model_config, "steps_per_epoch", len(train_data) // batch_size) validation_steps_per_epoch = getattr(model_config, "validation_steps_per_epoch", len(validation_data) // batch_size) log.info("Traing data has %d tiles", len(train_data)) log.info("Validation data has %d tiles", len(validation_data)) log.info("validation_steps_per_epoch: %d", validation_steps_per_epoch) log.info("steps_per_epoch: %d", steps_per_epoch) load_only_weights = model_config.get("load_only_weights", False) checkpoint = model_config.get("checkpoint", None) callbacks = [] early_stopping = model_config.get("early_stopping", None) adaptive_lr = model_config.get("adaptive_lr", None) tensor_board = model_config.get("tensor_board", False) tb_log_dir = model_config.get("tb_log_dir", os.path.join("/tmp/", model_name)) # TensorBoard log directory tb_log_dir = tb_log_dir.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) keras_logging = model_config.get("log", None) if not keras_logging: log.info("Keras logging is disabled") else: csv_log_file = keras_logging.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) dir_head, dir_tail = os.path.split(csv_log_file) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) log.info("Logging training data to csv file: %s", csv_log_file) csv_logger = CSVLogger(csv_log_file, separator=',', append=False) callbacks.append(csv_logger) if tensor_board: log.info("Registering TensorBoard callback") log.info("Event log dir set to: {}".format(tb_log_dir)) tb_callback = TensorBoard(log_dir=tb_log_dir, histogram_freq=0, write_graph=True, write_images=True) callbacks.append(tb_callback) log.info("To access TensorBoard run: tensorboard --logdir {} --port <port_number> --host <host_ip> ".format( tb_log_dir)) if checkpoint: checkpoint_file = checkpoint["path"] log.info("Registering checkpoint callback") destination_file = checkpoint_file % { 'model_name': model_name, 'time': str(time.time()), 'hostname': socket.gethostname(), 'user': getpass.getuser()} dir_head, dir_tail = os.path.split(destination_file) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) log.info("Checkpoint data directed to: %s", destination_file) checkpoint_options = checkpoint.get("options", {}) checkpoint_callback = ModelCheckpoint(destination_file, **checkpoint_options) callbacks.append(checkpoint_callback) log.info("Starting training") options = { 'epochs': train_epochs, 'callbacks': callbacks } if len(validation_data) > 0 and validation_steps_per_epoch: log.info("We have validation data") options['validation_data'] = validation_data options["validation_steps"] = validation_steps_per_epoch if early_stopping: log.info("Enabling early stopping %s", str(early_stopping)) callback_early_stopping = EarlyStopping(**early_stopping) options["callbacks"].append(callback_early_stopping) if adaptive_lr: log.info("Enabling reduce lr on plateu: %s", str(adaptive_lr)) callback_lr_loss = ReduceLROnPlateau(**adaptive_lr) options["callbacks"].append(callback_lr_loss) else: log.warn("No validation data available. Ignoring") final_model_location = model_path.format(model_name=model_name, time=str(time.time()), hostname=socket.gethostname(), user=getpass.getuser()) log.info("Model path is %s", final_model_location) existing_model_location = None if IOUtils.file_exists(final_model_location): existing_model_location = final_model_location if existing_model_location is not None and not load_only_weights: log.info("Loading existing model from: %s", existing_model_location) custom_objects = {} if model_builder_custom_options is not None: custom_objects.update(model_builder_custom_options) if enable_multi_gpu: with tf.device('/cpu:0'): model = load_model(existing_model_location, custom_objects=custom_objects) else: model = load_model(existing_model_location, custom_objects=custom_objects) nr_classes = model_builder_option.get('nr_classes', None) if (not include_last_classfication) and nr_classes: model.layers.pop() l = Conv2D(25, (1, 1), activation='softmax',name="conv_final")(model.layers[-1].output) layers = [ll for ll in model.layers] layers.append(l) m = Model (input=layers[0].input, output=layers[-1]) model = m log.info("Model loaded!") else: log.info("Building model") model_options = model_builder_option model_options['n_channels'] = input_channels input_height, input_width = window_size model_options['input_width'] = model_builder_option.get('input_width', input_width) model_options['input_height'] = model_builder_option.get('input_height', input_height) activation = model_config.get('activation', None) if activation: model_options["activation"] = activation if enable_multi_gpu: with tf.device('/cpu:0'): model = model_builder(**model_options) else: model = model_builder(**model_options) log.info("Model built") if load_only_weights and existing_model_location is not None: log.info("Loading weights from %s", existing_model_location) model.load_weights(existing_model_location) log.info("Finished loading weights") optimiser = model_config.get("optimiser", None) if optimiser is None: log.info("No optimiser specified. Using default Adam") optimiser = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) if enable_multi_gpu: log.info("Using Keras Multi-GPU Training") fit_model = multi_gpu_model(model, gpus=gpus, cpu_merge=cpu_merge, cpu_relocation=cpu_relocation) else: log.info("Using Keras default GPU Training") fit_model = model log.info("Compiling model") fit_model.compile(loss=model_loss, optimizer=optimiser, metrics=model_metrics) log.info("Model compiled") model.summary() fit_model.fit_generator(train_data, steps_per_epoch, **options) log.info("Saving model to %s", os.path.abspath(final_model_location)) dir_head, dir_tail = os.path.split(final_model_location) if dir_tail and not IOUtils.file_exists(dir_head): log.info("Creating directory: %s", dir_head) IOUtils.recursive_create_dir(dir_head) model.save(final_model_location) log.info("Done saving") log.info("Training completed")