def collect_experiment(key, root_uri, output_dir, get_pred_package=False): print('\nCollecting experiment {}...\n'.format(key)) if root_uri.startswith('s3://'): predict_package_uris = list_paths(join(root_uri, key, 'bundle'), ext='predict_package.zip') eval_json_uris = list_paths(join(root_uri, key, 'eval'), ext='eval.json') else: predict_package_uris = glob.glob(join(root_uri, key, 'bundle', '*', 'predict_package.zip')) eval_json_uris = glob.glob(join(root_uri, key, 'eval', '*', 'eval.json')) if len(predict_package_uris) > 1 or len(eval_json_uris) > 1: print('Cannot collect from key with multiple experiments!!!') return if len(predict_package_uris) == 0 or len(eval_json_uris) == 0: print('Missing output!!!') return predict_package_uri = predict_package_uris[0] eval_json_uri = eval_json_uris[0] make_dir(join(output_dir, key)) if get_pred_package: download_or_copy(predict_package_uri, join(output_dir, key)) download_or_copy(eval_json_uri, join(output_dir, key)) eval_json = file_to_json(join(output_dir, key, 'eval.json')) pprint.pprint(eval_json['overall'], indent=4)
def download_config(self, class_map): from rastervision.protos.tf_object_detection.pipeline_pb2 \ import TrainEvalPipelineConfig """Download a model and backend config and update its fields. This is used before training a model. This downloads and unzips a bunch of files that are needed to train a model, and then downloads and updates the backend config file with local paths to these files. These files include the pretrained model, the class map, and the training and validation datasets. Args: pretrained_model_zip_uri: (string) URI of .tar.gz file containing pretrained model. (See download_pretrained_model method for more details.) backend_config_uri: (string) URI of backend config file which is a config file for the TF Object Detection API. Examples can be found here https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs # noqa """ # Parse configuration # We must remove 'nulls' that appear due to translating empty # messages. These appear when translating between text and JSON based # protobuf messages, and using the google.protobuf.Struct type to store # the JSON. This appears when TFOD uses empty message types as an enum. config = json_format.ParseDict( replace_nones_in_dict(self.config.tfod_config, {}), TrainEvalPipelineConfig()) # Update config using local paths. if config.train_config.fine_tune_checkpoint: pretrained_model_path = self.download_pretrained_model( config.train_config.fine_tune_checkpoint) config.train_config.fine_tune_checkpoint = pretrained_model_path # Save TF label map based on class_map. class_map_path = os.path.join(self.temp_dir, 'label-map.pbtxt') tf_class_map = make_tf_class_map(class_map) save_tf_class_map(tf_class_map, class_map_path) train_record_uris = list_paths(self.training_download_uri, 'record') config.train_input_reader.tf_record_input_reader.input_path[:] = train_record_uris config.train_input_reader.label_map_path = class_map_path eval_record_uris = list_paths(self.validation_download_uri, 'record') config.eval_input_reader[ 0].tf_record_input_reader.input_path[:] = eval_record_uris config.eval_input_reader[0].label_map_path = class_map_path # Save an updated copy of the config file. config_path = join(self.temp_dir, 'ml.config') config_str = text_format.MessageToString(config) with open(config_path, 'w') as config_file: config_file.write(config_str) return config_path
def test_list_paths_s3(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name) s3_directory = 's3://{}/xxx/'.format(self.bucket_name) directory = os.path.dirname(path) make_dir(directory, check_empty=False) str_to_file(self.lorem, path) upload_or_copy(path, s3_path) list_paths(s3_directory) self.assertEqual(len(list_paths(s3_directory)), 1)
def collect_eval_dir(root_uri): eval_json_uris = list_paths(join(root_uri, 'eval'), ext='eval.json') for eval_json_uri in eval_json_uris: eval_json = file_to_json(eval_json_uri) print(basename(dirname(eval_json_uri))) print(eval_json['overall'][-1]['f1']) print()
def get_scene_ids(self): label_dir = os.path.join(self.base_uri, self.label_dir) label_paths = list_paths(label_dir, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(self.label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths] return scene_ids
def _download(split, output_dir): for uri in list_paths(self.base_uri, 'record'): base_name = os.path.basename(uri) if base_name.startswith(split): record_path = self.download_if_needed(uri) target_record_path = os.path.join( output_dir, os.path.basename(record_path)) shutil.move(record_path, target_record_path)
def test_sync_from_dir_noop_local(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') src = os.path.join(self.temp_dir.name, 'lorem') make_dir(src, check_empty=False) fs = FileSystem.get_file_system(src, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_from_dir(src, src, delete=True) self.assertEqual(len(list_paths(src)), 1)
def test_sync_to_dir_local(self): path = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') src = os.path.dirname(path) dst = os.path.join(self.temp_dir.name, 'xxx') make_dir(src, check_empty=False) make_dir(dst, check_empty=False) fs = FileSystem.get_file_system(path, 'r') fs.write_bytes(path, bytes([0x00, 0x01])) sync_to_dir(src, dst, delete=True) self.assertEqual(len(list_paths(dst)), 1)
def test_copy_to_local(self): path1 = os.path.join(self.temp_dir.name, 'lorem', 'ipsum.txt') path2 = os.path.join(self.temp_dir.name, 'yyy', 'ipsum.txt') dir1 = os.path.dirname(path1) dir2 = os.path.dirname(path2) make_dir(dir1, check_empty=False) make_dir(dir2, check_empty=False) str_to_file(self.lorem, path1) upload_or_copy(path1, path2) self.assertEqual(len(list_paths(dir2)), 1)
def fitness(data_img_dir, truth_img_dir, compiler, individual): """ Return a score representing the fitness of a particular program. Params individual: The individual to be evaluated. train_files: A list of tuples of the form (raster, geojson), where raster is a filename of a GeoTIFF containing multiband raster data, and geojson is the name of a GeoJSON file representing ground-truth. """ # We will assume for the time being that list_paths() returns in a consistent order, because # it seems to. eval_data = zip(list_paths(data_img_dir), list_paths(truth_img_dir)) total_error = 0 func = compiler(expr=individual) for input_file, truth_file in eval_data: # Load truth data input_pixels, truth_pixels = read_input_truth(input_file, truth_file) output = apply_to_raster(func, input_pixels, truth_pixels.shape) errors = output - truth_pixels #total_error += np.sum(np.square(errors)) # Return mean squared error total_error += np.mean(np.square(errors)) return (total_error,)
def _download(split, output_dir): scene_class_dirs = [] for uri in list_paths(self.base_uri, 'zip'): base_name = os.path.basename(uri) if base_name.startswith(split): data_zip_path = self.download_if_needed(uri) data_dir = os.path.splitext(data_zip_path)[0] shutil.unpack_archive(data_zip_path, data_dir) # Append each of the directories containing this partitions' # labeled images based on the class directory. data_dir_subdirectories = next(os.walk(data_dir))[1] scene_class_dirs.append( dict([(class_name, os.path.join(data_dir, class_name)) for class_name in data_dir_subdirectories])) merge_class_dirs(scene_class_dirs, output_dir)
def build_data(self): cfg = self.cfg batch_sz = cfg.solver.batch_sz num_workers = cfg.data.num_workers label_names = cfg.data.labels # download and unzip data if cfg.data.data_format == 'image_folder': if cfg.data.uri.startswith('s3://') or cfg.data.uri.startswith( '/'): data_uri = cfg.data.uri else: data_uri = join(cfg.base_uri, cfg.data.uri) data_dirs = [] zip_uris = [data_uri] if data_uri.endswith('.zip') else list_paths( data_uri, 'zip') for zip_ind, zip_uri in enumerate(zip_uris): zip_path = get_local_path(zip_uri, self.data_cache_dir) if not isfile(zip_path): zip_path = download_if_needed(zip_uri, self.data_cache_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: data_dir = join(self.tmp_dir, 'data', str(zip_ind)) data_dirs.append(data_dir) zipf.extractall(data_dir) train_ds, valid_ds, test_ds = [], [], [] for data_dir in data_dirs: train_dir = join(data_dir, 'train') valid_dir = join(data_dir, 'valid') # build datasets transform = Compose( [Resize((cfg.data.img_sz, cfg.data.img_sz)), ToTensor()]) aug_transform = Compose([ RandomHorizontalFlip(), RandomVerticalFlip(), ColorJitter(0.1, 0.1, 0.1, 0.1), Resize((cfg.data.img_sz, cfg.data.img_sz)), ToTensor() ]) if isdir(train_dir): if cfg.overfit_mode: train_ds.append( ImageFolder( train_dir, transform=transform, classes=label_names)) else: train_ds.append( ImageFolder( train_dir, transform=aug_transform, classes=label_names)) if isdir(valid_dir): valid_ds.append( ImageFolder( valid_dir, transform=transform, classes=label_names)) test_ds.append( ImageFolder( valid_dir, transform=transform, classes=label_names)) train_ds, valid_ds, test_ds = \ ConcatDataset(train_ds), ConcatDataset(valid_ds), ConcatDataset(test_ds) if cfg.overfit_mode: train_ds = Subset(train_ds, range(batch_sz)) valid_ds = train_ds test_ds = train_ds elif cfg.test_mode: train_ds = Subset(train_ds, range(batch_sz)) valid_ds = Subset(valid_ds, range(batch_sz)) test_ds = Subset(test_ds, range(batch_sz)) train_dl = DataLoader( train_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) valid_dl = DataLoader( valid_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) test_dl = DataLoader( test_ds, shuffle=True, batch_size=batch_sz, num_workers=num_workers, pin_memory=True) self.train_ds, self.valid_ds, self.test_ds = (train_ds, valid_ds, test_ds) self.train_dl, self.valid_dl, self.test_dl = (train_dl, valid_dl, test_dl)
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() if 0 not in class_map.get_keys(): classes = ['nodata'] + classes num_workers = 0 if self.train_opts.debug else 4 train_img_dir = self.subset_training_data(chip_dir) def get_data(train_sampler=None): data = (SegmentationItemList.from_folder(chip_dir).split_by_folder( train=train_img_dir, valid='val-img').label_from_func( get_label_path, classes=classes).transform( get_transforms(flip_vert=self.train_opts.flip_vert), size=size, tfm_y=True).databunch(bs=self.train_opts.batch_sz, num_workers=num_workers, train_sampler=train_sampler)) return data data = get_data() oversample = self.train_opts.oversample if oversample: sampler = get_weighted_sampler(data.train_ds, oversample['rare_class_ids'], oversample['rare_target_prop']) data = get_data(train_sampler=sampler) if self.train_opts.debug: make_debug_chips(data, class_map, tmp_dir, train_uri) # Setup learner. ignore_idx = 0 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx) ] model_arch = getattr(models, self.train_opts.model_arch) learn = unet_learner(data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, bottle=True, path=train_dir) learn.unfreeze() if self.train_opts.fp16 and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model.load_state_dict(torch.load( pretrained_path, map_location=learn.data.device), strict=False) # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr print('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def train(self, tmp_dir): """Train a model.""" # Setup hyperparams. bs = int(self.config.get('bs', 8)) wd = self.config.get('wd', 1e-2) lr = self.config.get('lr', 2e-3) num_epochs = int(self.config.get('num_epochs', 10)) model_arch = self.config.get('model_arch', 'resnet50') model_arch = getattr(models, model_arch) fp16 = self.config.get('fp16', False) sync_interval = self.config.get('sync_interval', 1) debug = self.config.get('debug', False) chip_uri = self.config['chip_uri'] train_uri = self.config['train_uri'] # Sync output of previous training run from cloud. train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size classes = ['nodata'] + self.task_config.class_map.get_class_names() data = (SegmentationItemList.from_folder(chip_dir).split_by_folder( train='train-img', valid='val-img').label_from_func( get_label_path, classes=classes).transform(get_transforms(), size=size, tfm_y=True).databunch(bs=bs)) print(data) if debug: # We make debug chips during the run-time of the train command # rather than the chip command # because this is a better test (see "visualize just before the net" # in https://karpathy.github.io/2019/04/25/recipe/), and because # it's more convenient since we have the databunch here. # TODO make color map based on colors in class_map # TODO get rid of white frame # TODO zip them def _make_debug_chips(split): debug_chips_dir = join(train_uri, '{}-debug-chips'.format(split)) make_dir(debug_chips_dir) ds = data.train_ds if split == 'train' else data.valid_ds for i, (x, y) in enumerate(ds): x.show(y=y) plt.savefig(join(debug_chips_dir, '{}.png'.format(i))) plt.close() _make_debug_chips('train') _make_debug_chips('val') # Setup learner. metrics = [semseg_acc] learn = unet_learner(data, model_arch, metrics=metrics, wd=wd, bottle=True) learn.unfreeze() if fp16 and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup ability to resume training if model exists. # This hack won't properly set the learning as a function of epochs # when resuming. learner_path = join(train_dir, 'learner.pth') log_path = join(train_dir, 'log') start_epoch = 0 if isfile(learner_path): print('Loading saved model...') start_epoch = get_last_epoch(str(log_path) + '.csv') + 1 if start_epoch >= num_epochs: print('Training is already done. If you would like to re-train' ', delete the previous results of training in ' '{}.'.format(train_uri)) return learn.load(learner_path[:-4]) print('Resuming from epoch {}'.format(start_epoch)) print( 'Note: fastai does not support a start_epoch, so epoch 0 below ' 'corresponds to {}'.format(start_epoch)) epochs_left = num_epochs - start_epoch # Setup callbacks and train model. callbacks = [ SaveModelCallback(learn, name=learner_path[:-4]), MyCSVLogger(learn, filename=log_path, start_epoch=start_epoch), SyncCallback(train_dir, train_uri, sync_interval) ] learn.fit(epochs_left, lr, callbacks=callbacks) # Export model for inference model_uri = self.config['model_uri'] model_path = get_local_path(model_uri, tmp_dir) learn.export(model_path) # Sync output to cloud. sync_to_dir(train_dir, train_uri)
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() if 0 not in class_map.get_keys(): classes = ['nodata'] + classes num_workers = 0 if self.train_opts.debug else 4 data = (SegmentationItemList.from_folder(chip_dir) .split_by_folder(train='train-img', valid='val-img')) train_count = None if self.train_opts.train_count is not None: train_count = min(len(data.train), self.train_opts.train_count) elif self.train_opts.train_prop != 1.0: train_count = int(round(self.train_opts.train_prop * len(data.train))) train_items = data.train.items if train_count is not None: train_inds = np.random.permutation(np.arange(len(data.train)))[0:train_count] train_items = train_items[train_inds] items = np.concatenate([train_items, data.valid.items]) data = (SegmentationItemList(items, chip_dir) .split_by_folder(train='train-img', valid='val-img') .label_from_func(get_label_path, classes=classes) .transform(get_transforms(flip_vert=self.train_opts.flip_vert), size=size, tfm_y=True) .databunch(bs=self.train_opts.batch_sz, num_workers=num_workers)) print(data) # Setup learner. ignore_idx = 0 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx)] model_arch = getattr(models, self.train_opts.model_arch) learn = unet_learner( data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, bottle=True, path=train_dir) learn.unfreeze() if self.train_opts.mixed_prec and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model.load_state_dict( torch.load(pretrained_path, map_location=learn.data.device), strict=False) # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] oversample = self.train_opts.oversample if oversample: weights = get_oversampling_weights( data.train_ds, oversample['rare_class_ids'], oversample['rare_target_prop']) oversample_callback = OverSamplingCallback(learn, weights=weights) callbacks.append(oversample_callback) if self.train_opts.debug: if oversample: oversample_callback.on_train_begin() make_debug_chips(data, class_map, tmp_dir, train_uri) if self.train_opts.log_tensorboard: callbacks.append(TensorboardLogger(learn, 'run')) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') log_dir = join(train_dir, 'logs', 'run') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr print('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) ''' Get zip file for each group, and unzip them into chip_dir in a way that works well with FastAI. The resulting directory structure would be: <chip_dir>/ train/ training-<uuid1>/ <class1>/ ... <class2>/ ... ... training-<uuid2>/ <class1>/ ... <class2>/ ... ... ... val/ validation-<uuid1>/ <class1>/ ... <class2>/ ... ... validation-<uuid2>/ <class1>/ ... <class2>/ ... ... ... ''' chip_dir = join(tmp_dir, 'chips/') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_name = Path(zip_uri).name if zip_name.startswith('train'): extract_dir = chip_dir + 'train/' elif zip_name.startswith('val'): extract_dir = chip_dir + 'val/' else: continue zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(extract_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name size = self.task_config.chip_size class_map = self.task_config.class_map classes = class_map.get_class_names() num_workers = 0 if self.train_opts.debug else 4 tfms = get_transforms(flip_vert=self.train_opts.flip_vert) def get_data(train_sampler=None): data = (ImageList.from_folder(chip_dir).split_by_folder( train='train', valid='val').label_from_folder().transform( tfms, size=size).databunch( bs=self.train_opts.batch_sz, num_workers=num_workers, )) return data data = get_data() if self.train_opts.debug: make_debug_chips(data, class_map, tmp_dir, train_uri) # Setup learner. ignore_idx = -1 metrics = [ Precision(average='weighted', clas_idx=1, ignore_idx=ignore_idx), Recall(average='weighted', clas_idx=1, ignore_idx=ignore_idx), FBeta(average='weighted', clas_idx=1, beta=1, ignore_idx=ignore_idx) ] model_arch = getattr(models, self.train_opts.model_arch) learn = cnn_learner(data, model_arch, metrics=metrics, wd=self.train_opts.weight_decay, path=train_dir) learn.unfreeze() if self.train_opts.fp16 and torch.cuda.is_available(): # This loss_scale works for Resnet 34 and 50. You might need to adjust this # for other models. learn = learn.to_fp16(loss_scale=256) # Setup callbacks and train model. model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.model.load_state_dict(torch.load( pretrained_path, map_location=learn.data.device), strict=False) # Save every epoch so that resume functionality provided by # TrackEpochCallback will work. callbacks = [ TrackEpochCallback(learn), MySaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path, monitor='f_beta'), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] lr = self.train_opts.lr num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle: if lr is None: learn.lr_find() learn.recorder.plot(suggestion=True, return_fig=True) lr = learn.recorder.min_grad_lr print('lr_find() found lr: {}'.format(lr)) learn.fit_one_cycle(num_epochs, lr, callbacks=callbacks) else: learn.fit(num_epochs, lr, callbacks=callbacks) # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def train(self, tmp_dir: str) -> None: """Train a DeepLab model the task and backend config. Args: tmp_dir: (str) temporary directory to use Returns: None """ train_py = self.backend_config.script_locations.train_py eval_py = self.backend_config.script_locations.eval_py export_py = self.backend_config.script_locations.export_py # Setup local input and output directories log.info('Setting up local input and output directories') train_logdir = self.backend_config.training_output_uri train_logdir_local = get_local_path(train_logdir, tmp_dir) dataset_dir = get_record_dir(self.backend_config.training_data_uri, TRAIN) dataset_dir_local = get_local_path(dataset_dir, tmp_dir) make_dir(tmp_dir) make_dir(train_logdir_local) make_dir(dataset_dir_local) # Download training data log.info('Downloading training data') for i, record_file in enumerate(list_paths(dataset_dir)): download_if_needed(record_file, tmp_dir) # Download and untar initial checkpoint. log.info('Downloading and untarring initial checkpoint') tf_initial_checkpoints_uri = self.backend_config.pretrained_model_uri download_if_needed(tf_initial_checkpoints_uri, tmp_dir) tfic_tarball = get_local_path(tf_initial_checkpoints_uri, tmp_dir) tfic_dir = os.path.dirname(tfic_tarball) with tarfile.open(tfic_tarball, 'r:gz') as tar: tar.extractall(tfic_dir) tfic_ckpt = glob.glob('{}/*/*.index'.format(tfic_dir))[0] tfic_ckpt = tfic_ckpt[0:-len('.index')] # Restart support train_restart_dir = self.backend_config.train_options.train_restart_dir if type(train_restart_dir) is not str or len(train_restart_dir) == 0: train_restart_dir = train_logdir # Get output from potential previous run so we can resume training. if type(train_restart_dir) is str and len( train_restart_dir ) > 0 and not self.backend_config.train_options.replace_model: sync_from_dir(train_restart_dir, train_logdir_local) else: if self.backend_config.train_options.replace_model: if os.path.exists(train_logdir_local): shutil.rmtree(train_logdir_local) make_dir(train_logdir_local) # Periodically synchronize with remote sync = start_sync( train_logdir_local, train_logdir, sync_interval=self.backend_config.train_options.sync_interval) with sync: # Setup TFDL config tfdl_config = json_format.ParseDict( self.backend_config.tfdl_config, TrainingParametersMsg()) log.info('tfdl_config={}'.format(tfdl_config)) log.info('Training steps={}'.format( tfdl_config.training_number_of_steps)) # Additional training options max_class = max( list(map(lambda c: c.id, self.class_map.get_items()))) num_classes = len(self.class_map.get_items()) num_classes = max(max_class, num_classes) + 1 (train_args, train_env) = get_training_args( train_py, train_logdir_local, tfic_ckpt, dataset_dir_local, num_classes, tfdl_config) # Start training log.info('Starting training process') log.info(' '.join(train_args)) train_process = Popen(train_args, env=train_env) terminate_at_exit(train_process) if self.backend_config.train_options.do_monitoring: # Start tensorboard log.info('Starting tensorboard process') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(train_logdir_local)]) terminate_at_exit(tensorboard_process) if self.backend_config.train_options.do_eval: # Start eval script log.info('Starting eval script') eval_logdir = train_logdir_local eval_args = get_evaluation_args(eval_py, train_logdir_local, dataset_dir_local, eval_logdir, tfdl_config) eval_process = Popen(eval_args, env=train_env) terminate_at_exit(eval_process) # Wait for training and tensorboard log.info('Waiting for training and tensorboard processes') train_process.wait() if self.backend_config.train_options.do_monitoring: tensorboard_process.terminate() # Export frozen graph log.info( 'Exporting frozen graph ({}/model)'.format(train_logdir_local)) export_args = get_export_args(export_py, train_logdir_local, num_classes, tfdl_config) export_process = Popen(export_args) terminate_at_exit(export_process) export_process.wait() # Package up the model files for usage as fine tuning checkpoints fine_tune_checkpoint_name = self.backend_config.fine_tune_checkpoint_name latest_checkpoints = get_latest_checkpoint(train_logdir_local) model_checkpoint_files = glob.glob( '{}*'.format(latest_checkpoints)) inference_graph_path = os.path.join(train_logdir_local, 'model') with RVConfig.get_tmp_dir() as tmp_dir: model_dir = os.path.join(tmp_dir, fine_tune_checkpoint_name) make_dir(model_dir) model_tar = os.path.join( train_logdir_local, '{}.tar.gz'.format(fine_tune_checkpoint_name)) shutil.copy(inference_graph_path, '{}/frozen_inference_graph.pb'.format(model_dir)) for path in model_checkpoint_files: shutil.copy(path, model_dir) with tarfile.open(model_tar, 'w:gz') as tar: tar.add(model_dir, arcname=os.path.basename(model_dir)) # Perform final sync sync_to_dir(train_logdir_local, train_logdir, delete=False)
def exp_main(self, raw_uri, root_uri, test=False): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ # Specify the location of the raw data base_uri = join(raw_uri, 'spacenet/SN2_buildings/train/AOI_2_Vegas') # The images and labels are in two separate directories within the base_uri raster_uri = join(base_uri, 'PS-RGB') label_uri = join(base_uri, 'geojson_buildings') # The tiff (raster) and geojson (label) files have have a naming convention of # '[prefix]_[image id].geojson.' The prefix indicates the type of data and the # image id indicates which scene each is associated with. raster_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_PS-RGB_img' label_fn_prefix = 'SN2_buildings_train_AOI_2_Vegas_geojson_buildings_img' # Find all of the image ids that have associated images and labels. Collect # these values to use as our scene ids. label_paths = list_paths(label_uri, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths ] # Set some trainin parameters: # The exp_id will be the label associated with this experiment, it will be used # to name the experiment config json. exp_id = 'spacenet-simple-seg' # Number of times to go through the entire dataset during training. num_epochs = 2 # Number of images in each batch batch_size = 8 # Specify whether or not to make debug chips (a zipped sample of png chips # that you can examine to help debug the chipping process) debug = False # This experiment includes an option to run a small test experiment before # running the whole thing. You can set this using the 'test' parameter. If # this parameter is set to True it will run a tiny test example with a new # experiment id. This will be small enough to run locally. It is recommended # to run a test example locally before submitting the whole experiment to AWs # Batch. test = str_to_bool(test) if test: exp_id += '-test' num_epochs = 1 batch_size = 2 debug = True scene_ids = scene_ids[0:10] # Split the data into training and validation sets: # Randomize the order of all scene ids random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Workaround to handle scene 1000 missing on S3. if '1000' in scene_ids: scene_ids.remove('1000') # Figure out how many scenes make up 80% of the whole set num_train_ids = round(len(scene_ids) * 0.8) # Split the scene ids into training and validation lists train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] # The TaskConfigBuilder constructs a child class of TaskConfig that # corresponds to the type of computer vision task you are taking on. # This experiment includes a semantic segmentation task but Raster # Vision also has backends for object detection and chip classification. # Before building the task config you can also set parameters using # 'with_' methods. In the example below we set the chip size, the # pixel class names and colors, and addiitonal chip options. task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes({ 'Building': (1, 'orange'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=1.0, target_classes=[1], target_count_threshold=1000) \ .build() # Next we will create a backend that is built on top of a third-party # deep learning library. In this case we will construct the # BackendConfig for the pytorch semantic segmentation backend. backend = rv.BackendConfig.builder(rv.PYTORCH_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options( lr=1e-4, batch_size=batch_size, num_epochs=num_epochs, model_arch='resnet50', debug=debug) \ .build() # We will use this function to create a list of scenes that we will pass # to the DataSetConfig builder. def make_scene(id): """Make a SceneConfig object for each image/label pair Args: id (str): The id that corresponds to both the .tiff image source and .geojson label source for a given scene Returns: rv.data.SceneConfig: a SceneConfig object which is composed of images, labels and optionally AOIs """ # Find the uri for the image associated with this is train_image_uri = os.path.join( raster_uri, '{}{}.tif'.format(raster_fn_prefix, id)) # Construct a raster source from an image uri that can be handled by Rasterio. # We also specify the order of image channels by their indices and add a # stats transformer which normalizes pixel values into uint8. raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() # Next create a label source config to pair with the raster source: # define the geojson label source uri vector_source = os.path.join( label_uri, '{}{}.geojson'.format(label_fn_prefix, id)) # Since this is a semantic segmentation experiment and the labels # are distributed in a vector-based GeoJSON format, we need to rasterize # the labels. We create aRasterSourceConfigBuilder using # `rv.RASTERIZED_SOURCE` # indicating that it will come from a vector source. We then specify the uri # of the vector source and (in the 'with_rasterizer_options' method) the id # of the pixel class we would like to use as background. label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() # Create a semantic segmentation label source from rasterized source config # that we built in the previous line. label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() # Finally we can build a scene config object using the scene id and the # configs we just defined scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene # Create lists of train and test scene configs train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] # Construct a DataSet config using the lists of train and # validation scenes dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() # We will need to convert this imagery from uint16 to uint8 # in order to use it. We specified that this conversion should take place # when we built the train raster source but that process will require # dataset-level statistics. To get these stats we need to create an # analyzer. analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() # We use the previously-constructed configs to create the constituent # parts of the experiment. We also give the builder strings that define # the experiment id and and root uri. The root uri indicates where all # of the output will be written. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() # Return one or more experiment configs to run the experiment(s) return experiment
def exp_main(self, test=False): # docker filepath mounted to my data directory base_uri = '/opt/data/labels2' raster_uri = base_uri # rasters and labels in same directory for now label_uri = base_uri # Find all of the image ids that have associated images and labels. Collect # these values to use as our scene ids. # TODO use PV Array dataframe to select these label_paths = list_paths(label_uri, ext='.geojson') scene_ids = [x.split('.')[-2].split('/')[-1] for x in label_paths] scene2_ids = [ 'so9051_rgb_250_04', 'so9265_rgb_250_05', 'sp3590_rgb_250_04', 'sj7304_rgb_250_04', 'su1385_rgb_250_06', 'st0709_rgb_250_05', 'sj9004_rgb_250_05', 'st8022_rgb_250_05', 'st8303_rgb_250_05', 'sj9402_rgb_250_05', 'so9078_rgb_250_06', 'sj9003_rgb_250_05', 'sk0003_rgb_250_05', 'st8468_rgb_250_04', 'st6980_rgb_250_04', 'su0883_rgb_250_05', 'su0983_rgb_250_05', 'so9249_rgb_250_05', 'su1478_rgb_250_04', 'su1377_rgb_250_04', 'sj9002_rgb_250_06', 'sj8903_rgb_250_04', 'sj9902_rgb_250_05', 'sj9602_rgb_250_05', 'tg2827_rgb_250_04', 'sj9702_rgb_250_05', 'sj9803_rgb_250_04', 'sj9802_rgb_250_05', 'sk0504_rgb_250_04', 'sk0302_rgb_250_05', 'sk0306_rgb_250_04', 'sk0206_rgb_250_04', 'sk0207_rgb_250_04', 'sk0503_rgb_250_04', 'sj9903_rgb_250_04', 'sk0202_rgb_250_06', 'sk0309_rgb_250_03', 'sk0605_rgb_250_04', 'sk0405_rgb_250_04', 'sk0404_rgb_250_04', 'sk0502_rgb_250_05', 'st5071_rgb_250_05', 'sp3293_rgb_250_03', 'sy7691_rgb_250_05', 'sp3294_rgb_250_03', 'sp3892_rgb_250_05', 'sp3690_rgb_250_04', 'st9979_rgb_250_05', 'se6154_rgb_250_03', 'so8476_rgb_250_06', 'so8072_rgb_250_04', 'so7972_rgb_250_04', 'sp3491_rgb_250_03', 'sp3490_rgb_250_03', 'sp3291_rgb_250_03', 'sp3292_rgb_250_03', 'sp3492_rgb_250_03', 'sk0212_rgb_250_03', 'so7878_rgb_250_06', 'tl1239_rgb_250_03', 'su0972_rgb_250_03', 'st1532_rgb_250_04', 'so7556_rgb_250_05', 'st7091_rgb_250_07', 'sn2040_rgb_250_04', 'so7371_rgb_250_04', 'tl6064_rgb_250_05', 'so9255_rgb_250_05', 'st1826_rgb_250_04', 'st1528_rgb_250_04', 'st1629_rgb_250_04', 'st0727_rgb_250_04', 'st0827_rgb_250_04', 'st0928_rgb_250_04', 'st0930_rgb_250_04', 'st0929_rgb_250_04', 'st0832_rgb_250_05', 'tl1750_rgb_250_03', 'st2322_rgb_250_05', 'st1623_rgb_250_04', 'st1523_rgb_250_04', 'st1624_rgb_250_04', 'st1424_rgb_250_04', 'st1421_rgb_250_05', 'sp3793_rgb_250_04', 'sp3792_rgb_250_04', 'sj9912_rgb_250_03', 'sk2347_rgb_250_05', 'sp3391_rgb_250_03', 'tl1846_rgb_250_03', 'sp5177_rgb_250_03', 'sn3251_rgb_250_04', 'sp3693_rgb_250_04', 'st2014_rgb_250_06', 'st2015_rgb_250_06', 'st2115_rgb_250_05', 'st2114_rgb_250_05', 'sn4257_rgb_250_04', 'su4223_rgb_250_04', 'su4323_rgb_250_04', 'tl3068_rgb_250_04', 'sp5178_rgb_250_03', 'sp3791_rgb_250_04', 'st3689_rgb_250_03', 'st3789_rgb_250_03', 'st0411_rgb_250_04', 'st0212_rgb_250_04', 'st0112_rgb_250_04', 'st0211_rgb_250_04', 'st0111_rgb_250_04', 'st0209_rgb_250_05', 'st0210_rgb_250_05', 'sj6714_rgb_250_04', 'sp3893_rgb_250_05', 'su6712_rgb_250_04', 'su6713_rgb_250_04', 'st9363_rgb_250_04', 'st9463_rgb_250_04', 'nr3059_rgb_250_03', 'st8576_rgb_250_03', 'sp7948_rgb_250_04', 'sp6138_rgb_250_07', 'tl2276_rgb_250_04', 'sm9817_rgb_250_04', 'sm9816_rgb_250_04', 'sm9716_rgb_250_04', 'sm9616_rgb_250_04', 'sm9818_rgb_250_04', 'sm9009_rgb_250_04', 'sm9721_rgb_250_05', 'sm9720_rgb_250_05', 'sm9101_rgb_250_04', 'sm9201_rgb_250_04', 'sm9010_rgb_250_04', 'sm9109_rgb_250_04', 'sn6502_rgb_250_04', 'sn6601_rgb_250_04', 'sn6201_rgb_250_04', 'sn6202_rgb_250_04', 'st6788_rgb_250_05', 'st6688_rgb_250_05', 'st6689_rgb_250_06', 'su0807_rgb_250_05', 'su0806_rgb_250_05', 'sz0998_rgb_250_05', 'sz1099_rgb_250_05', 'su3743_rgb_250_04', 'su3744_rgb_250_04', 'su6509_rgb_250_04', 'su6409_rgb_250_04', 'su6410_rgb_250_04', 'su5413_rgb_250_04', 'su2088_rgb_250_04', 'su5703_rgb_250_04', 'su5603_rgb_250_04', 'su5604_rgb_250_04', 'st7642_rgb_250_06', 'st7744_rgb_250_05', 'st6728_rgb_250_05', 'st8558_rgb_250_04', 'st2735_rgb_250_04', 'tl4990_rgb_250_05', 'sm7209_rgb_250_04', 'st8864_rgb_250_04', 'tg5013_rgb_250_04', 'st1198_rgb_250_04', 'st1298_rgb_250_04', 'st1722_rgb_250_04', 'tq1078_rgb_250_05', 'su6401_rgb_250_04', 'st8753_rgb_250_04', 'st8455_rgb_250_05', 'st8660_rgb_250_04', 'st8760_rgb_250_04', 'st8765_rgb_250_04', 'sp7638_rgb_250_05', 'tl6332_rgb_250_04', 'st8705_rgb_250_05', 'sy3297_rgb_250_06', 'sy3498_rgb_250_06', 'se3636_rgb_250_01', 'st6578_rgb_250_05', 'st6478_rgb_250_05', 'st5479_rgb_250_06', 'se2931_rgb_250_02', 'sd6835_rgb_250_01', 'st2228_rgb_250_05', 'st2227_rgb_250_05' ] # Experiment label, used to label config files exp_id = 'pv-detection-2' # Number of times passing a batch of images through the model num_steps = 1e4 # 1e5 takes too long # Number of images in each batch batch_size = 8 # Specify whether or not to make debug chips (a zipped sample of png chips # that you can examine to help debug the chipping process) debug = True # This experiment includes an option to run a small test experiment before # running the whole thing. You can set this using the 'test' parameter. If # this parameter is set to True it will run a tiny test example with a new # experiment id. This will be small enough to run locally. It is recommended # to run a test example locally before submitting the whole experiment to AWs # Batch. test = str_to_bool(test) if test: print("***************** TEST MODE *****************") exp_id += '-test' num_steps = 100 batch_size = 4 debug = True scene_ids = scene_ids[0:5] # Split the data into training and validation sets: # Randomize the order of all scene ids random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # # Figure out how many scenes make up 80% of the whole set num_train_ids = round(len(scene_ids) * 0.8) # # Split the scene ids into training and validation lists train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] # train_ids = scene_ids # val_ids = scene_ids # ------------- TASK ------------- task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(300) \ .with_classes({ 'pv': (1, 'yellow'), 'background': (2, 'black') })\ .with_chip_options( chips_per_scene=50, window_method='random_sample', debug_chip_probability=1, negative_survival_probability=0.01, target_classes=[1], target_count_threshold=1000) \ .build() # # ------------- BACKEND ------------- # Configuration options for different models and tasks: # https://github.com/azavea/raster-vision/blob/60f741e30a016f25d2643a9b32916adb22e42d50/rastervision/backend/model_defaults.json backend = rv.BackendConfig.builder(rv.TF_DEEPLAB) \ .with_task(task) \ .with_debug(debug) \ .with_batch_size(batch_size) \ .with_num_steps(num_steps) \ .with_model_defaults(rv.MOBILENET_V2) \ .with_train_options(replace_model=False, sync_interval=5) \ .build() # ------------- Make Scenes ------------- # We will use this function to create a list of scenes that we will pass # to the DataSetConfig builder. def make_scene(id): """Make a SceneConfig object for each image/label pair Args: id (str): The id that corresponds to both the .jpg image source and .geojson label source for a given scene Returns: rv.data.SceneConfig: a SceneConfig object which is composed of images, labels and optionally AOIs """ # Find the uri for the image associated with this is train_image_uri = os.path.join(raster_uri, '{}.jpg'.format(id)) # Construct a raster source from an image uri that can be handled by Rasterio. # We also specify the order of image channels by their indices and add a # stats transformer which normalizes pixel values into uint8. raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() # Next create a label source config to pair with the raster source: # define the geojson label source uri vector_source = os.path.join(label_uri, '{}.geojson'.format(id)) # Since this is a semantic segmentation experiment and the labels # are distributed in a vector-based GeoJSON format, we need to rasterize # the labels. We create aRasterSourceConfigBuilder using # `rv.RASTERIZED_SOURCE` # indicating that it will come from a vector source. We then specify the uri # of the vector source and (in the 'with_rasterizer_options' method) the id # of the pixel class we would like to use as background. label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() # Create a semantic segmentation label source from rasterized source config # that we built in the previous line. label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() # Finally we can build a scene config object using the scene id and the # configs we just defined scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene # Create lists of train and test scene configs train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] # ------------- DATASET ------------- # Construct a DataSet config using the lists of train and # validation scenes dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() # ------------- ANALYZE ------------- # We will need to convert this imagery from uint16 to uint8 # in order to use it. We specified that this conversion should take place # when we built the train raster source but that process will require # dataset-level statistics. To get these stats we need to create an # analyzer. # Use small sample prob so this step doesn't take ages. analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .with_sample_prob(0.05) \ .build() # ------------- EXPERIMENT ------------- experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri('/opt/data/rv/test3') \ .build() return experiment
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. train_images = [] train_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'train/*.json')): images, lbl_bbox = get_annotations(annotation_path) train_images += images train_lbl_bbox += lbl_bbox val_images = [] val_lbl_bbox = [] for annotation_path in glob.glob(join(chip_dir, 'valid/*.json')): images, lbl_bbox = get_annotations(annotation_path) val_images += images val_lbl_bbox += lbl_bbox images = train_images + val_images lbl_bbox = train_lbl_bbox + val_lbl_bbox img2bbox = dict(zip(images, lbl_bbox)) get_y_func = lambda o: img2bbox[o.name] num_workers = 0 if self.train_opts.debug else 4 data = ObjectItemList.from_folder(chip_dir) data = data.split_by_folder() data = data.label_from_func(get_y_func) data = data.transform( get_transforms(), size=self.task_config.chip_size, tfm_y=True) data = data.databunch( bs=self.train_opts.batch_sz, collate_fn=bb_pad_collate, num_workers=num_workers) print(data) if self.train_opts.debug: make_debug_chips( data, self.task_config.class_map, tmp_dir, train_uri) # Setup callbacks and train model. ratios = [1/2, 1, 2] scales = [1, 2**(-1/3), 2**(-2/3)] model_arch = getattr(models, self.train_opts.model_arch) encoder = create_body(model_arch, cut=-2) model = RetinaNet(encoder, data.c, final_bias=-4) crit = RetinaNetFocalLoss(scales=scales, ratios=ratios) learn = Learner(data, model, loss_func=crit, path=train_dir) learn = learn.split(retina_net_split) model_path = get_local_path(self.backend_opts.model_uri, tmp_dir) pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: print('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) learn.load(pretrained_path[:-4]) callbacks = [ TrackEpochCallback(learn), SaveModelCallback(learn, every='epoch'), MyCSVLogger(learn, filename='log'), ExportCallback(learn, model_path), SyncCallback(train_dir, self.backend_opts.train_uri, self.train_opts.sync_interval) ] learn.unfreeze() learn.fit(self.train_opts.num_epochs, self.train_opts.lr, callbacks=callbacks) # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def exp_main(self, raw_uri, root_uri): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output """ raster_uri = join(raw_uri, 'MUL') label_uri = join(raw_uri, 'geojson/buildings') raster_fn_prefix = 'MUL_AOI_2_Vegas_img' label_fn_prefix = 'buildings_AOI_2_Vegas_img' label_paths = list_paths(label_uri, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths ] random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Workaround to handle scene 1000 missing on S3. if '1000' in scene_ids: scene_ids.remove('1000') num_train_ids = int(len(scene_ids) * 0.8) train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] exp_id = 'spacenet-simple-seg' chip_size = 162 task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(chip_size) \ .with_classes({ 'Building': (1, 'orange'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=1, debug_chip_probability=0.25, negative_survival_probability=1.0, target_classes=[1], target_count_threshold=1000) \ .build() config = { 'band_count': 8, 'num_generations': 50, 'pop_size': 250, 'num_individuals': 125, 'num_offspring': 125, 'mutation_rate': 0.3, 'crossover_rate': 0.5, 'debug': True } backend = rv.BackendConfig.builder(GP_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_train_options(**config) \ .build() def make_scene(id): train_image_uri = os.path.join( raster_uri, '{}{}.tif'.format(raster_fn_prefix, id)) raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_stats_transformer() \ .build() vector_source = os.path.join( label_uri, '{}{}.geojson'.format(label_fn_prefix, id)) label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() # Need to use stats_analyzer because imagery is uint16. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def train(self, tmp_dir): """Train a model. This downloads any previous output saved to the train_uri, starts training (or resumes from a checkpoint), periodically syncs contents of train_dir to train_uri and after training finishes. Args: tmp_dir: (str) path to temp directory """ self.log_options() # Sync output of previous training run from cloud. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. chip_dir = join(tmp_dir, 'chips') make_dir(chip_dir) for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(chip_dir) # Setup data loader. batch_size = self.train_opts.batch_size chip_size = self.task_config.chip_size class_names = self.class_map.get_class_names() databunch = build_databunch(chip_dir, chip_size, batch_size, class_names) log.info(databunch) num_labels = len(databunch.label_names) if self.train_opts.debug: make_debug_chips(databunch, self.class_map, tmp_dir, train_uri) # Setup model num_labels = len(databunch.label_names) model = get_model(self.train_opts.model_arch, num_labels, pretrained=True) model = model.to(self.device) model_path = join(train_dir, 'model') # Load weights from a pretrained model. pretrained_uri = self.backend_opts.pretrained_uri if pretrained_uri: log.info('Loading weights from pretrained_uri: {}'.format( pretrained_uri)) pretrained_path = download_if_needed(pretrained_uri, tmp_dir) model.load_state_dict( torch.load(pretrained_path, map_location=self.device)) # Possibly resume training from checkpoint. start_epoch = 0 train_state_path = join(train_dir, 'train_state.json') if isfile(train_state_path): log.info('Resuming from checkpoint: {}\n'.format(model_path)) train_state = file_to_json(train_state_path) start_epoch = train_state['epoch'] + 1 model.load_state_dict( torch.load(model_path, map_location=self.device)) # Write header of log CSV file. metric_names = ['precision', 'recall', 'f1'] log_path = join(train_dir, 'log.csv') if not isfile(log_path): with open(log_path, 'w') as log_file: log_writer = csv.writer(log_file) row = ['epoch', 'time', 'train_loss'] + metric_names log_writer.writerow(row) # Setup Tensorboard logging. if self.train_opts.log_tensorboard: log_dir = join(train_dir, 'tb-logs') make_dir(log_dir) tb_writer = SummaryWriter(log_dir=log_dir) if self.train_opts.run_tensorboard: log.info('Starting tensorboard process') tensorboard_process = Popen( ['tensorboard', '--logdir={}'.format(log_dir)]) terminate_at_exit(tensorboard_process) # Setup optimizer, loss, and LR scheduler. loss_fn = torch.nn.CrossEntropyLoss() lr = self.train_opts.lr opt = optim.Adam(model.parameters(), lr=lr) step_scheduler, epoch_scheduler = None, None num_epochs = self.train_opts.num_epochs if self.train_opts.one_cycle and num_epochs > 1: steps_per_epoch = len(databunch.train_ds) // batch_size total_steps = num_epochs * steps_per_epoch step_size_up = (num_epochs // 2) * steps_per_epoch step_size_down = total_steps - step_size_up step_scheduler = CyclicLR(opt, base_lr=lr / 10, max_lr=lr, step_size_up=step_size_up, step_size_down=step_size_down, cycle_momentum=False) for _ in range(start_epoch * steps_per_epoch): step_scheduler.step() # Training loop. for epoch in range(start_epoch, num_epochs): # Train one epoch. log.info('-----------------------------------------------------') log.info('epoch: {}'.format(epoch)) start = time.time() train_loss = train_epoch(model, self.device, databunch.train_dl, opt, loss_fn, step_scheduler) if epoch_scheduler: epoch_scheduler.step() log.info('train loss: {}'.format(train_loss)) # Validate one epoch. metrics = validate_epoch(model, self.device, databunch.valid_dl, num_labels) log.info('validation metrics: {}'.format(metrics)) # Print elapsed time for epoch. end = time.time() epoch_time = datetime.timedelta(seconds=end - start) log.info('epoch elapsed time: {}'.format(epoch_time)) # Save model and state. torch.save(model.state_dict(), model_path) train_state = {'epoch': epoch} json_to_file(train_state, train_state_path) # Append to log CSV file. with open(log_path, 'a') as log_file: log_writer = csv.writer(log_file) row = [epoch, epoch_time, train_loss] row += [metrics[k] for k in metric_names] log_writer.writerow(row) # Write to Tensorboard log. if self.train_opts.log_tensorboard: for key, val in metrics.items(): tb_writer.add_scalar(key, val, epoch) tb_writer.add_scalar('train_loss', train_loss, epoch) for name, param in model.named_parameters(): tb_writer.add_histogram(name, param, epoch) if (train_uri.startswith('s3://') and (((epoch + 1) % self.train_opts.sync_interval) == 0)): sync_to_dir(train_dir, train_uri) # Close Tensorboard. if self.train_opts.log_tensorboard: tb_writer.close() if self.train_opts.run_tensorboard: tensorboard_process.terminate() # Since model is exported every epoch, we need some other way to # show that training is finished. str_to_file('done!', self.backend_opts.train_done_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def train(self, tmp_dir): """Train a model.""" self.print_options() # Sync output of previous training run from cloud. # This will either be local or S3. This allows restarting the job if it has been shut down. train_uri = self.backend_opts.train_uri train_dir = get_local_path(train_uri, tmp_dir) make_dir(train_dir) sync_from_dir(train_uri, train_dir) # Get zip file for each group, and unzip them into chip_dir. self.chip_dir = join(tmp_dir, 'chips') make_dir(self.chip_dir) train_chip_dir = self.chip_dir + '/train-img' train_truth_dir = self.chip_dir + '/train-labels' fitness_func = partial(fitness, train_chip_dir, train_truth_dir, self._toolbox.compile) self._toolbox.register("evaluate", fitness_func) # This is the key part -- this is how it knows where to get the chips from. # backend_opts comes from RV, and train_opts is where you can define backend-specific stuff. for zip_uri in list_paths(self.backend_opts.chip_uri, 'zip'): zip_path = download_if_needed(zip_uri, tmp_dir) with zipfile.ZipFile(zip_path, 'r') as zipf: zipf.extractall(self.chip_dir) # Setup data loader. def get_label_path(im_path): return Path(str(im_path.parent)[:-4] + '-labels') / im_path.name class_map = self.task_config.class_map classes = class_map.get_class_names() if 0 not in class_map.get_keys(): classes = ['nodata'] + classes # Evolve # Set up hall of fame to track the best individual hof = tools.HallOfFame(1) # Set up debugging mstats = None if self.train_opts.debug: stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("averageaverage", np.mean) mstats.register("stdeviation", np.std) mstats.register("minimumstat", np.min) mstats.register("maximumstat", np.max) pop = self._toolbox.population(n=self.train_opts.pop_size) pop, log = algorithms.eaMuPlusLambda( pop, self._toolbox, self.train_opts.num_individuals, self.train_opts.num_offspring, self.train_opts.crossover_rate, self.train_opts.mutation_rate, self.train_opts.num_generations, stats=mstats, halloffame=hof, verbose=self.train_opts.debug ) # ? What should my model output be given that the output is just a string? Should I output a # text file? # RV uses file-presence based caching to figure out whether a stage has completed (kinda # like Makefiles). So since this outputs a file every epoch, it needs to use something else # to trigger done-ness. # Since model is exported every epoch, we need some other way to # show that training is finished. if self.train_opts.debug: print(str(hof[0])) str_to_file(str(hof[0]), self.backend_opts.train_done_uri) str_to_file(str(hof[0]), self.backend_opts.model_uri) # Sync output to cloud. sync_to_dir(train_dir, self.backend_opts.train_uri)
def exp_main(self, raw_uri, root_uri, test=False): """Run an experiment on the Spacenet Vegas building dataset. This is a simple example of how to do semantic segmentation on data that doesn't require any pre-processing or special permission to access. Args: raw_uri: (str) directory of raw data (the root of the Spacenet dataset) root_uri: (str) root directory for experiment output test: (bool) if True, run a very small experiment as a test and generate debug output """ base_uri = join( raw_uri, 'SpaceNet_Buildings_Dataset_Round2/spacenetV2_Train/AOI_2_Vegas') raster_uri = join(base_uri, 'RGB-PanSharpen') label_uri = join(base_uri, 'geojson/buildings') raster_fn_prefix = 'RGB-PanSharpen_AOI_2_Vegas_img' label_fn_prefix = 'buildings_AOI_2_Vegas_img' label_paths = list_paths(label_uri, ext='.geojson') label_re = re.compile(r'.*{}(\d+)\.geojson'.format(label_fn_prefix)) scene_ids = [ label_re.match(label_path).group(1) for label_path in label_paths] random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Workaround to handle scene 1000 missing on S3. if '1000' in scene_ids: scene_ids.remove('1000') num_train_ids = int(len(scene_ids) * 0.8) train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] test = str_to_bool(test) exp_id = 'spacenet-simple-seg' num_epochs = 5 batch_sz = 8 debug = False chip_size = 300 if test: exp_id += '-test' num_epochs = 2 batch_sz = 1 debug = True train_ids = ['12'] val_ids = ['13'] task = rv.TaskConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_chip_size(chip_size) \ .with_classes({ 'Building': (1, 'orange'), 'Background': (2, 'black') }) \ .with_chip_options( chips_per_scene=9, debug_chip_probability=0.25, negative_survival_probability=1.0, target_classes=[1], target_count_threshold=1000) \ .build() config = { 'bs': batch_sz, 'num_epochs': num_epochs, 'debug': debug, 'lr': 1e-4 } backend = rv.BackendConfig.builder(FASTAI_SEMANTIC_SEGMENTATION) \ .with_task(task) \ .with_config(config) \ .build() def make_scene(id): train_image_uri = os.path.join(raster_uri, '{}{}.tif'.format(raster_fn_prefix, id)) raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(train_image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() vector_source = os.path.join( label_uri, '{}{}.geojson'.format(label_fn_prefix, id)) label_raster_source = rv.RasterSourceConfig.builder(rv.RASTERIZED_SOURCE) \ .with_vector_source(vector_source) \ .with_rasterizer_options(2) \ .build() label_source = rv.LabelSourceConfig.builder(rv.SEMANTIC_SEGMENTATION) \ .with_raster_source(label_raster_source) \ .build() scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .build() analyzer = rv.AnalyzerConfig.builder(rv.STATS_ANALYZER) \ .build() # Need to use stats_analyzer because imagery is uint16. experiment = rv.ExperimentConfig.builder() \ .with_id(exp_id) \ .with_task(task) \ .with_backend(backend) \ .with_analyzer(analyzer) \ .with_dataset(dataset) \ .with_root_uri(root_uri) \ .build() return experiment
def exp_main(self, test=False): # docker filepath mounted to my data directory base_uri = '/opt/data/labels2' raster_uri = base_uri # rasters and labels in same directory for now label_uri = base_uri # Find all of the image ids that have associated images and labels. Collect # these values to use as our scene ids. # TODO use PV Array dataframe to select these label_paths = list_paths(label_uri, ext='.geojson') scene_ids = [x.split('.')[-2].split('/')[-1] for x in label_paths] scene2_ids = [ 'so9051_rgb_250_04', 'so9265_rgb_250_05', 'sp3590_rgb_250_04', 'sj7304_rgb_250_04', 'su1385_rgb_250_06', 'st0709_rgb_250_05', 'sj9004_rgb_250_05', 'st8022_rgb_250_05', 'st8303_rgb_250_05', 'sj9402_rgb_250_05', 'so9078_rgb_250_06', 'sj9003_rgb_250_05', 'sk0003_rgb_250_05', 'st8468_rgb_250_04', 'st6980_rgb_250_04', 'su0883_rgb_250_05', 'su0983_rgb_250_05', 'so9249_rgb_250_05', 'su1478_rgb_250_04', 'su1377_rgb_250_04', 'sj9002_rgb_250_06', 'sj8903_rgb_250_04', 'sj9902_rgb_250_05', 'sj9602_rgb_250_05', 'tg2827_rgb_250_04', 'sj9702_rgb_250_05', 'sj9803_rgb_250_04', 'sj9802_rgb_250_05', 'sk0504_rgb_250_04', 'sk0302_rgb_250_05', 'sk0306_rgb_250_04', 'sk0206_rgb_250_04', 'sk0207_rgb_250_04', 'sk0503_rgb_250_04', 'sj9903_rgb_250_04', 'sk0202_rgb_250_06', 'sk0309_rgb_250_03', 'sk0605_rgb_250_04', 'sk0405_rgb_250_04', 'sk0404_rgb_250_04', 'sk0502_rgb_250_05', 'st5071_rgb_250_05', 'sp3293_rgb_250_03', 'sy7691_rgb_250_05', 'sp3294_rgb_250_03', 'sp3892_rgb_250_05', 'sp3690_rgb_250_04', 'st9979_rgb_250_05', 'se6154_rgb_250_03', 'so8476_rgb_250_06', 'so8072_rgb_250_04', 'so7972_rgb_250_04', 'sp3491_rgb_250_03', 'sp3490_rgb_250_03', 'sp3291_rgb_250_03', 'sp3292_rgb_250_03', 'sp3492_rgb_250_03', 'sk0212_rgb_250_03', 'so7878_rgb_250_06', 'tl1239_rgb_250_03', 'su0972_rgb_250_03', 'st1532_rgb_250_04', 'so7556_rgb_250_05', 'st7091_rgb_250_07', 'sn2040_rgb_250_04', 'so7371_rgb_250_04', 'tl6064_rgb_250_05', 'so9255_rgb_250_05', 'st1826_rgb_250_04', 'st1528_rgb_250_04', 'st1629_rgb_250_04', 'st0727_rgb_250_04', 'st0827_rgb_250_04', 'st0928_rgb_250_04', 'st0930_rgb_250_04', 'st0929_rgb_250_04', 'st0832_rgb_250_05', 'tl1750_rgb_250_03', 'st2322_rgb_250_05', 'st1623_rgb_250_04', 'st1523_rgb_250_04', 'st1624_rgb_250_04', 'st1424_rgb_250_04', 'st1421_rgb_250_05', 'sp3793_rgb_250_04', 'sp3792_rgb_250_04', 'sj9912_rgb_250_03', 'sk2347_rgb_250_05', 'sp3391_rgb_250_03', 'tl1846_rgb_250_03', 'sp5177_rgb_250_03', 'sn3251_rgb_250_04', 'sp3693_rgb_250_04', 'st2014_rgb_250_06', 'st2015_rgb_250_06', 'st2115_rgb_250_05', 'st2114_rgb_250_05', 'sn4257_rgb_250_04', 'su4223_rgb_250_04', 'su4323_rgb_250_04', 'tl3068_rgb_250_04', 'sp5178_rgb_250_03', 'sp3791_rgb_250_04', 'st3689_rgb_250_03', 'st3789_rgb_250_03', 'st0411_rgb_250_04', 'st0212_rgb_250_04', 'st0112_rgb_250_04', 'st0211_rgb_250_04', 'st0111_rgb_250_04', 'st0209_rgb_250_05', 'st0210_rgb_250_05', 'sj6714_rgb_250_04', 'sp3893_rgb_250_05', 'su6712_rgb_250_04', 'su6713_rgb_250_04', 'st9363_rgb_250_04', 'st9463_rgb_250_04', 'nr3059_rgb_250_03', 'st8576_rgb_250_03', 'sp7948_rgb_250_04', 'sp6138_rgb_250_07', 'tl2276_rgb_250_04', 'sm9817_rgb_250_04', 'sm9816_rgb_250_04', 'sm9716_rgb_250_04', 'sm9616_rgb_250_04', 'sm9818_rgb_250_04', 'sm9009_rgb_250_04', 'sm9721_rgb_250_05', 'sm9720_rgb_250_05', 'sm9101_rgb_250_04', 'sm9201_rgb_250_04', 'sm9010_rgb_250_04', 'sm9109_rgb_250_04', 'sn6502_rgb_250_04', 'sn6601_rgb_250_04', 'sn6201_rgb_250_04', 'sn6202_rgb_250_04', 'st6788_rgb_250_05', 'st6688_rgb_250_05', 'st6689_rgb_250_06', 'su0807_rgb_250_05', 'su0806_rgb_250_05', 'sz0998_rgb_250_05', 'sz1099_rgb_250_05', 'su3743_rgb_250_04', 'su3744_rgb_250_04', 'su6509_rgb_250_04', 'su6409_rgb_250_04', 'su6410_rgb_250_04', 'su5413_rgb_250_04', 'su2088_rgb_250_04', 'su5703_rgb_250_04', 'su5603_rgb_250_04', 'su5604_rgb_250_04', 'st7642_rgb_250_06', 'st7744_rgb_250_05', 'st6728_rgb_250_05', 'st8558_rgb_250_04', 'st2735_rgb_250_04', 'tl4990_rgb_250_05', 'sm7209_rgb_250_04', 'st8864_rgb_250_04', 'tg5013_rgb_250_04', 'st1198_rgb_250_04', 'st1298_rgb_250_04', 'st1722_rgb_250_04', 'tq1078_rgb_250_05', 'su6401_rgb_250_04', 'st8753_rgb_250_04', 'st8455_rgb_250_05', 'st8660_rgb_250_04', 'st8760_rgb_250_04', 'st8765_rgb_250_04', 'sp7638_rgb_250_05', 'tl6332_rgb_250_04', 'st8705_rgb_250_05', 'sy3297_rgb_250_06', 'sy3498_rgb_250_06', 'se3636_rgb_250_01', 'st6578_rgb_250_05', 'st6478_rgb_250_05', 'st5479_rgb_250_06', 'se2931_rgb_250_02', 'sd6835_rgb_250_01', 'st2228_rgb_250_05', 'st2227_rgb_250_05'] # Experiment label and root directory for output exp_id = 'pv-classification' root_uri = '/opt/data/rv/test3' # num_steps = 1e4 # 1e5 takes too long num_epochs = 20 batch_size = 16 debug = True test = str_to_bool(test) if test: print("***************** TEST MODE *****************") exp_id += '-test' # num_steps = 100 num_epochs = 1 batch_size = 1 debug = True train_ids = scene_ids val_ids = scene_ids scene_ids = scene_ids[0:5] # Split the data into training and validation sets: # Randomize the order of all scene ids random.seed(5678) scene_ids = sorted(scene_ids) random.shuffle(scene_ids) # Set scenes num_train_ids = round(len(scene_ids) * 0.8) train_ids = scene_ids[0:num_train_ids] val_ids = scene_ids[num_train_ids:] # train_ids = scene_ids # val_ids = scene_ids # ------------- TASK ------------- task = rv.TaskConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_chip_size(200) \ .with_classes({ 'pv': (1, 'yellow'), 'background': (2, 'black') })\ .build() # # ------------- BACKEND ------------- # Configuration options for different models and tasks: # https://github.com/azavea/raster-vision/blob/60f741e30a016f25d2643a9b32916adb22e42d50/rastervision/backend/model_defaults.json backend = rv.BackendConfig.builder(rv.KERAS_CLASSIFICATION) \ .with_task(task) \ .with_debug(debug) \ .with_batch_size(batch_size) \ .with_num_epochs(num_epochs) \ .with_model_defaults(rv.RESNET50_IMAGENET) \ .with_config({ 'trainer': { 'options': { 'saveBest': True, 'lrSchedule': [ { 'epoch': 0, 'lr': 0.0005 }, { 'epoch': 10, 'lr': 0.0001 }, { 'epoch': 15, 'lr': 0.00001 } ] } } }, set_missing_keys=True) \ .build() # ------------- Make Scenes ------------- # We will use this function to create a list of scenes that we will pass # to the DataSetConfig builder. def make_scene(id): """Make a SceneConfig object for each image/label pair Args: id (str): The id that corresponds to both the .jpg image source and .geojson label source for a given scene Returns: rv.data.SceneConfig: a SceneConfig object which is composed of images, labels and optionally AOIs """ # Find the uri for the image associated with this is image_uri = os.path.join(raster_uri, '{}.jpg'.format(id)) # Construct a raster source from an image uri that can be handled by Rasterio. # We also specify the order of image channels by their indices and add a # stats transformer which normalizes pixel values into uint8. raster_source = rv.RasterSourceConfig.builder(rv.RASTERIO_SOURCE) \ .with_uri(image_uri) \ .with_channel_order([0, 1, 2]) \ .with_stats_transformer() \ .build() label = os.path.join(label_uri, '{}.geojson'.format(id)) # Build our classification labels. # IOA Threshold means minimum percentage of IOA of polygon and cell # infer_cells means label source infers polygons and labels # from the vector source. # with_pick_min_class_id True means will always pick PV if it is present # as it is the smaller id number label_source = rv.LabelSourceConfig.builder(rv.CHIP_CLASSIFICATION) \ .with_uri(label) \ .with_ioa_thresh(0.01) \ .with_use_intersection_over_cell(False) \ .with_pick_min_class_id(True) \ .with_background_class_id(2) \ .with_infer_cells(True) \ .build() # Finally we can build a scene config object using the scene id and the # configs we just defined scene = rv.SceneConfig.builder() \ .with_task(task) \ .with_id(id) \ .with_raster_source(raster_source) \ .with_label_source(label_source) \ .build() return scene # Create lists of train and test scene configs train_scenes = [make_scene(id) for id in train_ids] val_scenes = [make_scene(id) for id in val_ids] # ------------- DATASET ------------- # Construct a DataSet config using the lists of train and # validation scenes augmentor = dataset = rv.DatasetConfig.builder() \ .with_train_scenes(train_scenes) \ .with_validation_scenes(val_scenes) \ .with_augmentor(augmentor) .build()