def run(runner: str, cfg_module: str, commands: List[str], arg: List[Tuple[str, str]], splits: int): """Subcommand to run commands within pipelines using runner named RUNNER. Args: runner: name of runner to use cfg_module: name of module with `get_configs` function that returns PipelineConfigs commands: names of commands to run within pipeline. The order in which to run them is based on the Pipeline.commands attribute. If this is omitted, all commands will be run. """ tmp_dir_obj = rv_config.get_tmp_dir() tmp_dir = tmp_dir_obj.name cfg_module = importlib.import_module(cfg_module) args = dict(arg) args = convert_bool_args(args) cfgs = get_configs(cfg_module, runner, args) runner = registry.get_runner(runner)() for cfg in cfgs: cfg.update() cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema) cfg.recursive_validate_config() cfg_dict = cfg.dict() cfg_json_uri = cfg.get_config_uri() json_to_file(cfg_dict, cfg_json_uri) pipeline = cfg.build(tmp_dir) if not commands: commands = pipeline.commands runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
def _test_class_inf(self, props, exp_class_ids, default_class_id=None): geojson = { 'type': 'FeatureCollection', 'features': [{ 'properties': props, 'geometry': { 'type': 'Point', 'coordinates': [1, 1] } }] } json_to_file(geojson, self.uri) class_config = ClassConfig(names=['building', 'car', 'tree']) class_id_to_filter = { 0: ['==', 'type', 'building'], 1: ['any', ['==', 'type', 'car'], ['==', 'type', 'auto']] } vs_cfg = GeoJSONVectorSourceConfig( uri=self.uri, class_id_to_filter=class_id_to_filter, default_class_id=default_class_id) vs = vs_cfg.build(class_config, IdentityCRSTransformer()) trans_geojson = vs.get_geojson() class_ids = [ f['properties']['class_id'] for f in trans_geojson['features'] ] self.assertEqual(class_ids, exp_class_ids)
def run(runner: str, cfg_module: str, commands: List[str], arg: List[Tuple[str, str]], splits: int): """Run COMMANDS within pipelines in CFG_MODULE using RUNNER. RUNNER: name of the Runner to use CFG_MODULE: the module with `get_configs` function that returns PipelineConfigs. This can either be a Python module path or a local path to a .py file. COMMANDS: space separated sequence of commands to run within pipeline. The order in which to run them is based on the Pipeline.commands attribute. If this is omitted, all commands will be run. """ tmp_dir_obj = rv_config.get_tmp_dir() tmp_dir = tmp_dir_obj.name args = dict(arg) args = convert_bool_args(args) cfgs = get_configs(cfg_module, runner, args) runner = registry.get_runner(runner)() for cfg in cfgs: cfg.update() cfg.rv_config = rv_config.get_config_dict(registry.rv_config_schema) cfg.recursive_validate_config() cfg_dict = cfg.dict() cfg_json_uri = cfg.get_config_uri() json_to_file(cfg_dict, cfg_json_uri) pipeline = cfg.build(tmp_dir) if not commands: commands = pipeline.commands runner.run(cfg_json_uri, pipeline, commands, num_splits=splits)
def eval_model(self, split): log.info('Evaluating on {} set...'.format(split)) dl = self.get_dataloader(split) metrics = self.validate_epoch(dl) log.info('metrics: {}'.format(metrics)) json_to_file(metrics, join(self.output_dir, '{}_metrics.json'.format(split))) self.plot_predictions(split)
def build_source(self, geojson, all_touched=False): json_to_file(geojson, self.uri) config = RasterizedSourceConfig( vector_source=GeoJSONVectorSourceConfig(uri=self.uri, default_class_id=None), rasterizer_config=RasterizerConfig( background_class_id=self.background_class_id, all_touched=all_touched)) source = config.build(self.class_config, self.crs_transformer, self.extent) return source
def save(self, labels): """Save labels to URI.""" boxes = labels.get_boxes() class_ids = labels.get_class_ids().tolist() scores = labels.get_scores().tolist() geojson = boxes_to_geojson(boxes, class_ids, self.crs_transformer, self.class_config, scores=scores) json_to_file(geojson, self.uri)
def setUp(self): self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'MultiPolygon', 'coordinates': [[[[0., 0.], [0., 2.], [2., 2.], [2., 0.], [0., 0.]]]] }, 'properties': { 'class_name': 'car', 'class_id': 0, 'score': 0.0 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[2., 2.], [2., 4.], [4., 4.], [4., 2.], [2., 2.]]] }, 'properties': { 'score': 0.0, 'class_name': 'house', 'class_id': 1 } }] } self.class_config = ClassConfig(names=['car', 'house']) self.box1 = Box.make_square(0, 0, 4) self.box2 = Box.make_square(4, 4, 4) self.class_id1 = 0 self.class_id2 = 1 self.background_class_id = 2 geoms = [] for f in self.geojson['features']: g = shape(f['geometry']) g.class_id = f['properties']['class_id'] geoms.append(g) self.str_tree = STRtree(geoms) self.file_name = 'labels.json' self.tmp_dir = rv_config.get_tmp_dir() self.uri = os.path.join(self.tmp_dir.name, self.file_name) json_to_file(self.geojson, self.uri)
def save(self, labels): """Save labels to URI if writable. Note that if the grid is inferred from polygons, only the grid will be written, not the original polygons. """ boxes = labels.get_cells() class_ids = labels.get_class_ids() scores = list(labels.get_scores()) geojson = boxes_to_geojson(boxes, class_ids, self.crs_transformer, self.class_config, scores=scores) json_to_file(geojson, self.uri)
def eval_model(self, split: str): """Evaluate model using a particular dataset split. Gets validation metrics and saves them along with prediction plots. Args: split: the dataset split to use: train, valid, or test. """ log.info('Evaluating on {} set...'.format(split)) dl = self.get_dataloader(split) metrics = self.validate_epoch(dl) log.info('metrics: {}'.format(metrics)) json_to_file(metrics, join(self.output_dir, '{}_metrics.json'.format(split))) self.plot_predictions(split)
def transform_geojson(self, geojson, line_bufs=None, point_bufs=None, crs_transformer=None, to_map_coords=False): if crs_transformer is None: crs_transformer = IdentityCRSTransformer() class_config = ClassConfig(names=['building']) json_to_file(geojson, self.uri) cfg = GeoJSONVectorSourceConfig(uri=self.uri, line_bufs=line_bufs, point_bufs=point_bufs, default_class_id=0) source = cfg.build(class_config, crs_transformer) return source.get_geojson(to_map_coords=to_map_coords)
def __exit__(self, type, value, traceback): for split in ['train', 'valid']: if len(self.splits[split]['images']) > 0: split_dir = join(self.sample_dir, split) labels_path = join(split_dir, 'labels.json') images = self.splits[split]['images'] annotations = self.splits[split]['annotations'] coco_dict = { 'images': images, 'annotations': annotations, 'categories': self.categories } json_to_file(coco_dict, labels_path) super().__exit__(type, value, traceback)
def setUp(self): self.file_name = 'labels.json' self.tmp_dir = rv_config.get_tmp_dir() self.file_path = os.path.join(self.tmp_dir.name, self.file_name) self.crs_transformer = DoubleCRSTransformer() self.geojson = { 'type': 'FeatureCollection', 'features': [{ 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[0., 0.], [0., 1.], [1., 1.], [1., 0.], [0., 0.]]] }, 'properties': { 'class_id': 0, 'score': 0.9 } }, { 'type': 'Feature', 'geometry': { 'type': 'Polygon', 'coordinates': [[[1., 1.], [1., 2.], [2., 2.], [2., 1.], [1., 1.]]] }, 'properties': { 'score': 0.9, 'class_id': 1 } }] } self.extent = Box.make_square(0, 0, 10) self.class_config = ClassConfig(names=['car', 'house']) json_to_file(self.geojson, self.file_path)
def compute_coco_eval(outputs, targets, num_class_ids): """Return mAP averaged over 0.5-0.95 using pycocotools eval. Note: boxes are in (ymin, xmin, ymax, xmax) format with values ranging from 0 to h or w. Args: outputs: (list) of length m containing dicts of form {'boxes': <tensor with shape (n, 4)>, 'class_ids': <tensor with shape (n,)>, 'scores': <tensor with shape (n,)>} targets: (list) of length m containing dicts of form {'boxes': <tensor with shape (n, 4)>, 'class_ids': <tensor with shape (n,)>} """ with tempfile.TemporaryDirectory() as tmp_dir: preds = get_coco_preds(outputs) # ap is undefined when there are no predicted boxes if len(preds) == 0: return None gt = get_coco_gt(targets, num_class_ids) gt_path = join(tmp_dir, 'gt.json') json_to_file(gt, gt_path) coco_gt = COCO(gt_path) pycocotools.coco.unicode = None coco_preds = coco_gt.loadRes(preds) ann_type = 'bbox' coco_eval = COCOeval(coco_gt, coco_preds, ann_type) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return coco_eval
def __init__(self, cfg: LearnerConfig, tmp_dir: str, model_path: Optional[str] = None): """Constructor. Args: cfg: configuration tmp_dir: root of temp dirs model_path: a local path to model weights. If provided, the model is loaded and it is assumed that this Learner will be used for prediction only. """ self.cfg = cfg self.tmp_dir = tmp_dir # TODO make cache dirs configurable torch_cache_dir = '/opt/data/torch-cache' os.environ['TORCH_HOME'] = torch_cache_dir self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.data_cache_dir = '/opt/data/data-cache' make_dir(self.data_cache_dir) self.model = self.build_model() self.model.to(self.device) if model_path is not None: if isfile(model_path): self.model.load_state_dict( torch.load(model_path, map_location=self.device)) else: raise Exception( 'Model could not be found at {}'.format(model_path)) self.model.eval() else: log.info(self.cfg) # ds = dataset, dl = dataloader self.train_ds = None self.train_dl = None self.valid_ds = None self.valid_dl = None self.test_ds = None self.test_dl = None if cfg.output_uri.startswith('s3://'): self.output_dir = get_local_path(cfg.output_uri, tmp_dir) make_dir(self.output_dir, force_empty=True) if not cfg.overfit_mode: self.sync_from_cloud() else: self.output_dir = cfg.output_uri make_dir(self.output_dir) self.last_model_path = join(self.output_dir, 'last-model.pth') self.config_path = join(self.output_dir, 'config.json') self.train_state_path = join(self.output_dir, 'train-state.json') self.log_path = join(self.output_dir, 'log.csv') model_bundle_fn = basename(cfg.get_model_bundle_uri()) self.model_bundle_path = join(self.output_dir, model_bundle_fn) self.metric_names = self.build_metric_names() json_to_file(self.cfg.dict(), self.config_path) self.load_init_weights() self.load_checkpoint() self.opt = self.build_optimizer() self.setup_data() self.start_epoch = self.get_start_epoch() self.steps_per_epoch = len( self.train_ds) // self.cfg.solver.batch_sz self.step_scheduler = self.build_step_scheduler() self.epoch_scheduler = self.build_epoch_scheduler() self.setup_tensorboard()
def save_image_crop(image_uri, image_crop_uri, label_uri=None, label_crop_uri=None, size=600, min_features=10, vector_labels=True, class_config=None): """Save a crop of an image to use for testing. If label_uri is set, the crop needs to cover >= min_features. Args: image_uri: URI of original image image_crop_uri: URI of cropped image to save label_uri: optional URI of label file label_crop_uri: optional URI of cropped labels to save size: height and width of crop Raises: ValueError if cannot find a crop satisfying min_features constraint. """ if not file_exists(image_crop_uri): print('Saving test crop to {}...'.format(image_crop_uri)) old_environ = os.environ.copy() try: request_payer = S3FileSystem.get_request_payer() if request_payer == 'requester': os.environ['AWS_REQUEST_PAYER'] = request_payer im_dataset = rasterio.open(image_uri) h, w = im_dataset.height, im_dataset.width extent = Box(0, 0, h, w) windows = extent.get_windows(size, size) if label_uri and vector_labels: crs_transformer = RasterioCRSTransformer.from_dataset( im_dataset) geojson_vs_config = GeoJSONVectorSourceConfig(uri=label_uri) vs = geojson_vs_config.build(class_config, crs_transformer) geojson = vs.get_geojson() geoms = [] for f in geojson['features']: g = shape(f['geometry']) geoms.append(g) tree = STRtree(geoms) def p2m(x, y, z=None): return crs_transformer.pixel_to_map((x, y)) for w in windows: use_window = True if label_uri and vector_labels: w_polys = tree.query(w.to_shapely()) use_window = len(w_polys) >= min_features if use_window and label_crop_uri is not None: print('Saving test crop labels to {}...'.format( label_crop_uri)) label_crop_features = [ mapping(transform(p2m, wp)) for wp in w_polys ] label_crop_json = { 'type': 'FeatureCollection', 'features': [{ 'geometry': f } for f in label_crop_features] } json_to_file(label_crop_json, label_crop_uri) if use_window: crop_image(image_uri, w, image_crop_uri) if not vector_labels and label_uri and label_crop_uri: crop_image(label_uri, w, label_crop_uri) break if not use_window: raise ValueError('Could not find a good crop.') finally: os.environ.clear() os.environ.update(old_environ)