class SemanticSegmentationLabelStoreConfig(LabelStoreConfig): uri: Optional[str] = Field( None, description=( 'URI of file with predictions. If None, and this Config is part of ' 'a SceneConfig inside an RVPipelineConfig, this fiend will be ' 'auto-generated.')) vector_output: List[VectorOutputConfig] = [] rgb: bool = Field( False, description= ('If True, save prediction class_ids in RGB format using the colors in ' 'class_config.')) def build(self, class_config, crs_transformer, extent, tmp_dir): return SemanticSegmentationLabelStore(self.uri, extent, crs_transformer, tmp_dir, vector_output=self.vector_output, class_config=class_config) def update(self, pipeline=None, scene=None): if pipeline is not None and scene is not None: if self.uri is None: self.uri = join(pipeline.predict_uri, '{}.tif'.format(scene.id)) for vo in self.vector_output: vo.update(pipeline, scene)
class BuildingVectorOutputConfig(VectorOutputConfig): """Options useful for vectorization of building predictions. Intended to break up clusters of buildings. """ min_aspect_ratio: float = Field( 1.618, description= ('Ratio between length and height (or height and length) of anything that can ' 'be considered to be a cluster of buildings. The goal is to distinguish between ' 'rows of buildings and (say) a single building.')) min_area: float = Field( 0.0, description= ('Minimum area of anything that can be considered to be a cluster of buildings. ' 'The goal is to distinguish between buildings and artifacts.')) element_width_factor: float = Field( 0.5, description= ('Width of the structural element used to break building clusters as a fraction ' 'of the width of the cluster.')) element_thickness: float = Field( 0.001, description= ('Thickness of the structural element that is used to break building clusters.' )) def get_mode(self): return 'buildings'
class VectorOutputConfig(Config): """Config for vectorized semantic segmentation predictions.""" uri: Optional[str] = Field( None, description= ('URI of vector output. If None, and this Config is part of a SceneConfig and ' 'RVPipeline, this field will be auto-generated.')) class_id: int = Field( ..., description='The prediction class that is to turned into vectors.') denoise: int = Field( 0, description= ('Radius of the structural element used to remove high-frequency signals from ' 'the image.')) def update(self, pipeline=None, scene=None): if pipeline and scene: self.uri = join( pipeline.root_uri, 'predict', '{}-{}-{}.json'.format(scene.id, self.class_id, self.get_mode())) def get_mode(self): raise NotImplementedError()
class SemanticSegmentationLabelStoreConfig(LabelStoreConfig): uri: Optional[str] = Field(None, description='URI of predictions.') vector_output: List[VectorOutputConfig] = [] rgb: bool = Field( False, description= ('If True, save prediction class_ids in RGB format using the colors in ' 'class_config.')) def build(self, class_config, crs_transformer, extent, tmp_dir): return SemanticSegmentationLabelStore( self.uri, extent, crs_transformer, tmp_dir, vector_output=self.vector_output, class_config=class_config) def update(self, pipeline=None, scene=None): if pipeline is not None and scene is not None: if self.uri is None: self.uri = join(pipeline.predict_uri, '{}.tif'.format(scene.id)) for vo in self.vector_output: vo.uri = join( pipeline.root_uri, 'predict', '{}-{}-{}.json'.format( scene.id, vo.class_id, vo.get_mode()))
class RasterioSourceConfig(RasterSourceConfig): uris: List[str] = Field( ..., description= ('List of image URIs that comprise imagery for a scene. The format of each file ' 'can be any that can be read by Rasterio/GDAL. If > 1 URI is provided, a VRT ' 'will be created to mosaic together the individual images.')) x_shift: float = Field( 0.0, descriptions= ('A number of meters to shift along the x-axis. A positive shift moves the ' '"camera" to the right.')) y_shift: float = Field( 0.0, descriptions= ('A number of meters to shift along the y-axis. A positive shift moves the ' '"camera" down.')) def build(self, tmp_dir, use_transformers=True): raster_transformers = ([rt.build() for rt in self.transformers] if use_transformers else []) return RasterioSource(self.uris, raster_transformers, tmp_dir, channel_order=self.channel_order, x_shift=self.x_shift, y_shift=self.y_shift)
class StatsAnalyzerConfig(AnalyzerConfig): output_uri: Optional[str] = Field( None, description=( 'URI for output. If None and this is part of an RVPipeline, this is ' 'auto-generated.')) sample_prob: Optional[float] = Field( 0.1, description=( 'The probability of using a random window for computing statistics. ' 'If None, will use a sliding window.')) def update(self, pipeline=None): if pipeline is not None and self.output_uri is None: self.output_uri = join(pipeline.analyze_uri, 'stats.json') def validate_config(self): if self.sample_prob > 1 or self.sample_prob <= 0: raise ConfigError('sample_prob must be <= 1 and > 0') def build(self): from rastervision2.core.analyzer import StatsAnalyzer return StatsAnalyzer(self.output_uri, self.sample_prob) def get_bundle_filenames(self): return ['stats.json']
class PipelineConfig(Config): """Base class for configuring Pipelines. This should be subclassed to configure new Pipelines. """ root_uri: str = Field( None, description='the root URI for output generated by the pipeline') rv_config: dict = Field( None, description='used to store serialized RVConfig so pipeline can ' 'run in remote environment with the local RVConfig. This should ' 'not be set explicitly by users -- it is only used by the runner ' 'when running a remote pipeline.') def get_config_uri(self) -> str: """Get URI of serialized version of this PipelineConfig.""" return join(self.root_uri, 'pipeline-config.json') def build(self, tmp_dir: str) -> 'Pipeline': """Return a pipeline based on this configuration. Subclasses should override this to return an instance of the corresponding subclass of Pipeline. Args: tmp_dir: root of any temporary directory to pass to pipeline """ from rastervision2.pipeline.pipeline import Pipeline # noqa return Pipeline(self, tmp_dir)
class PyTorchLearnerBackendConfig(BackendConfig): model: ModelConfig solver: SolverConfig log_tensorboard: bool = Field( True, description='If True, log events to Tensorboard log files.') run_tensorboard: bool = Field( False, description='If True, run Tensorboard server pointing at log files.') augmentors: List[str] = Field( default_augmentors, description=( 'Names of albumentations augmentors to use for training batches. ' 'Choices include: ' + str(augmentor_list))) test_mode: bool = Field( False, description= ('This field is passed along to the LearnerConfig which is returned by ' 'get_learner_config(). For more info, see the docs for' 'pytorch_learner.learner_config.LearnerConfig.test_mode.')) def get_bundle_filenames(self): return ['model-bundle.zip'] def get_learner_config(self, pipeline): raise NotImplementedError() def build(self, pipeline, tmp_dir): raise NotImplementedError()
class SolverConfig(Config): """Config related to solver aka optimizer.""" lr: float = Field(1e-4, description='Learning rate.') num_epochs: int = Field( 10, description= 'Number of epochs (ie. sweeps through the whole training set).') test_num_epochs: int = Field( 2, description='Number of epochs to use in test mode.') test_batch_sz: int = Field(4, description='Batch size to use in test mode.') overfit_num_steps: int = Field( 1, description='Number of optimizer steps to use in overfit mode.') sync_interval: int = Field( 1, description='The interval in epochs for each sync to the cloud.') batch_sz: int = Field(32, description='Batch size.') one_cycle: bool = Field( True, description= ('If True, use triangular LR scheduler with a single cycle across all ' 'epochs with start and end LR being lr/10 and the peak being lr.')) multi_stage: List = Field( [], description=('List of epoch indices at which to divide LR by 10.')) def update(self, learner: Optional['LearnerConfig'] = None): pass def validate_config(self): self.validate_nonneg('lr') self.validate_nonneg('num_epochs') self.validate_nonneg('test_num_epochs') self.validate_nonneg('overfit_num_steps') self.validate_nonneg('sync_interval') self.validate_nonneg('batch_sz')
class DataConfig(Config): """Config related to dataset for training and testing.""" uri: Union[None, str, List[str]] = Field( None, description= ('URI of the dataset. This can be a zip file, a list of zip files, or a ' 'directory which contains a set of zip files.')) train_sz: Optional[int] = Field( None, description= ('If set, the number of training images to use. If fewer images exist, ' 'then an exception will be raised.')) group_uris: Union[None, List[Union[str, List[str]]]] = Field( None, description= ('This can be set instead of uri in order to specify groups of chips. Each ' 'element in the list is expected to be an object of the same form accepted by ' 'the uri field. The purpose of separating chips into groups is to be able to ' 'use the group_train_sz field.')) group_train_sz: Optional[int] = Field( None, description= ('If group_uris is set, this can be used to specify the number of chips to use ' 'per group.')) data_format: Optional[str] = Field(None, description='Name of dataset format.') class_names: List[str] = Field([], description='Names of classes.') class_colors: Union[None, List[str], List[List]] = Field( None, description=('Colors used to display classes. ' 'Can be color 3-tuples in list form.')) img_sz: PositiveInt = Field( 256, description= ('Length of a side of each image in pixels. This is the size to transform ' 'it to during training, not the size in the raw dataset.')) num_workers: int = Field( 4, description='Number of workers to use when DataLoader makes batches.') # TODO support setting parameters of augmentors? augmentors: List[str] = Field( default_augmentors, description=( 'Names of albumentations augmentors to use for training batches. ' 'Choices include: ' + str(augmentors))) def update(self, learner: Optional['LearnerConfig'] = None): if not self.class_colors: self.class_colors = [color_to_triple() for _ in self.class_names] def validate_augmentors(self): self.validate_list('augmentors', augmentors) def validate_config(self): self.validate_augmentors()
class RasterizerConfig(Config): background_class_id: int = Field( ..., description= ('The class_id to use for any background pixels, ie. pixels not covered by a ' 'polygon.')) all_touched: bool = Field( False, description=( 'If True, all pixels touched by geometries will be burned in. ' 'If false, only pixels whose center is within the polygon or ' 'that are selected by Bresenham’s line algorithm will be ' 'burned in. (See rasterio.features.rasterize).'))
class VectorOutputConfig(Config): uri: Optional[str] = Field(None, description='URI of vector output') class_id: int = Field( ..., description='The prediction class that is to turned into vectors.') denoise: int = Field( 0, description= ('Radius of the structural element used to remove high-frequency signals from ' 'the image.')) def get_mode(self): raise NotImplementedError()
class ObjectDetectionPredictOptions(Config): merge_thresh: float = Field( 0.5, description= ('If predicted boxes have an IOA (intersection over area) greater than ' 'merge_thresh, then they are merged into a single box during postprocessing. ' 'This is needed since the sliding window approach results in some false ' 'duplicates.')) score_thresh: float = Field( 0.5, description= ('Predicted boxes are only output if their score is above score_thresh.' ))
class ClassConfig(Config): """Configures the class names that are being predicted.""" names: List[str] = Field(..., description='Names of classes.') colors: Optional[List[Union[Tuple, str]]] = Field( None, description= ('Colors used to visualize classes. Can be color strings accepted by ' 'matplotlib or RGB tuples. If None, a random color will be auto-generated ' 'for each class.')) null_class: Optional[str] = Field( None, description= ('Optional name of class in `names` to use as the null class. This is used in ' 'semantic segmentation to represent the label for imagery pixels that are ' 'NODATA or that are missing a label. If None, and this Config is part of a ' 'SemanticSegmentationConfig, a null class will be added automatically.' )) def get_class_id(self, name): return self.names.index(name) def get_name(self, id): return self.names[id] def get_null_class_id(self): if self.null_class is None: raise ValueError('null_class is not set') return self.get_class_id(self.null_class) def get_color_to_class_id(self): return dict([(self.colors[i], i) for i in range(len(self.colors))]) def ensure_null_class(self): """Add a null class if one isn't set.""" if self.null_class is None: self.null_class = 'null' self.names.append('null') self.colors.append('black') def update(self, pipeline=None): if not self.colors: self.colors = [color_to_triple() for _ in self.names] def validate_config(self): if self.null_class is not None and self.null_class not in self.names: raise ConfigError( 'The null_class: {} must be in list of class names.'.format( self.null_class)) def __len__(self): return len(self.names)
class VectorSourceConfig(Config): default_class_id: Optional[int] = Field( ..., description= ('The default class_id to use if class cannot be inferred using other ' 'mechanisms. If a feature has an inferred class_id of None, then it ' 'will be deleted.')) class_id_to_filter: Optional[Dict] = Field( None, description= ('Map from class_id to JSON filter used to infer missing class_ids. ' 'Each key should be a class id, and its value should be a boolean ' 'expression which is run against the property field for each feature.' 'This allows matching different features to different class ids based on ' 'its properties. The expression schema is that described by ' 'https://docs.mapbox.com/mapbox-gl-js/style-spec/other/#other-filter' )) line_bufs: Optional[Dict[int, Union[int, float, None]]] = Field( None, description= ('This is useful, for example, for buffering lines representing roads so that ' 'their width roughly matches the width of roads in the imagery. If None, uses ' 'default buffer value of 1. Otherwise, a map from class_id to ' 'number of pixels to buffer by. If the buffer value is None, then no buffering ' 'will be performed and the LineString or Point won\'t get converted to a ' 'Polygon. Not converting to Polygon is incompatible with the currently ' 'available LabelSources, but may be useful in the future.')) point_bufs: Optional[Dict[int, Union[int, float, None]]] = Field( None, description= 'Same as above, but used for buffering Points into Polygons.') def has_null_class_bufs(self): if self.point_bufs is not None: for c, v in self.point_bufs.items(): if v is None: return True if self.line_bufs is not None: for c, v in self.line_bufs.items(): if v is None: return True return False def build(self, class_config, crs_transformer): raise NotImplementedError() def update(self, pipeline=None, scene=None): pass
class SemanticSegmentationLabelSourceConfig(LabelSourceConfig): raster_source: Union[RasterSourceConfig, RasterizedSourceConfig] = Field( ..., description='The labels in the form of rasters.') rgb_class_config: Optional[ClassConfig] = Field( None, description= ('If set, will infer the class_ids for the labels using the colors field. This ' 'assumes the labels are stored as RGB rasters.')) def build(self, class_config, crs_transformer, extent, tmp_dir): if isinstance(self.raster_source, RasterizedSourceConfig): rs = self.raster_source.build(class_config, crs_transformer, extent) else: rs = self.raster_source.build(tmp_dir) return SemanticSegmentationLabelSource(rs, self.rgb_class_config)
class EvaluatorConfig(Config): output_uri: Optional[str] = Field( None, description='URI of JSON output by evaluator.') def update(self, pipeline=None): if pipeline is not None and self.output_uri is None: self.output_uri = join(pipeline.eval_uri, 'eval.json')
class VectorSourceConfig(Config): default_class_id: Optional[int] = Field( ..., description= ('The default class_id to use if class cannot be inferred using other ' 'mechanisms. If a feature defaults to a class_id of None, then that feature ' 'will be deleted.')) class_id_to_filter: Optional[Dict[int, Optional[ClassFilter]]] = Field( None, description=( 'Map from class_id to JSON filter used to infer missing class_ids. The ' 'filter schema is according to ' 'https://github.com/mapbox/mapbox-gl-js/blob/c9900db279db776f493ce8b6749966cedc2d6b8a/src/style-spec/feature_filter/index.js.' # noqa )) line_bufs: Optional[Dict[int, Union[int, float, None]]] = Field( None, description= ('This is useful, for example, for buffering lines representing roads so that ' 'their width roughly matches the width of roads in the imagery. If None, uses ' 'default buffer value of 1. Otherwise, a map from class_id to ' 'number of pixels to buffer by. If the buffer value is None, then no buffering ' 'will be performed and the LineString or Point won\'t get converted to a ' 'Polygon. Not converting to Polygon is incompatible with the currently ' 'available LabelSources, but may be useful in the future.')) point_bufs: Optional[Dict[int, Union[int, float, None]]] = Field( None, description= 'Same as above, but used for buffering Points into Polygons.') def has_null_class_bufs(self): if self.point_bufs is not None: for c, v in self.point_bufs.items(): if v is None: return True if self.line_bufs is not None: for c, v in self.line_bufs.items(): if v is None: return True return False def build(self, class_config, crs_transformer): raise NotImplementedError() def update(self, pipeline=None, scene=None): pass
class ModelConfig(Config): """Config related to models.""" backbone: Backbone = Field( Backbone.resnet18, description='The torchvision.models backbone to use.') init_weights: Optional[str] = Field( None, description= ('URI of PyTorch model weights used to initialize model. If None, ' 'will use Imagenet pretrained model weights provided by torchvision.' )) def update(self, learner: Optional['LearnerConfig'] = None): pass def get_backbone_str(self): return self.backbone.name
class ObjectDetectionChipOptions(Config): neg_ratio: float = Field( 1.0, description= ('The ratio of negative chips (those containing no bounding ' 'boxes) to positive chips. This can be useful if the statistics ' 'of the background is different in positive chips. For example, ' 'in car detection, the positive chips will always contain roads, ' 'but no examples of rooftops since cars tend to not be near rooftops.' )) ioa_thresh: float = Field( 0.8, description= ('When a box is partially outside of a training chip, it is not clear if (a ' 'clipped version) of the box should be included in the chip. If the IOA ' '(intersection over area) of the box with the chip is greater than ioa_thresh, ' 'it is included in the chip.')) window_method: ObjectDetectionWindowMethod = ObjectDetectionWindowMethod.chip
class ClassConfig(Config): """Configures the class names that are being predicted.""" names: List[str] = Field(..., description='Names of classes.') colors: List[str] = Field(..., description='Colors used to visualize classes.') null_class: Optional[str] = Field( None, description= ('Optional name of class in `names` to use as the null class. This is used in ' 'semantic segmentation to represent the label for imagery pixels that are ' 'NODATA or that are missing a label.')) def get_class_id(self, name): return self.names.index(name) def get_name(self, id): return self.names[id] def get_null_class_id(self): if self.null_class is None: raise ValueError('null_class is not set') return self.get_class_id(self.null_class) def get_color_to_class_id(self): return dict([(self.colors[i], i) for i in range(len(self.colors))]) def ensure_null_class(self): if self.null_class is None: self.null_class = 'null' self.names.append('null') self.colors.append('black') def update(self, pipeline=None): pass def validate_config(self): if self.null_class is not None and self.null_class not in self.names: raise ConfigError( 'The null_class: {} must be in list of class names.'.format( self.null_class)) def __len__(self): return len(self.names)
class EvaluatorConfig(Config): output_uri: Optional[str] = Field( None, description= ('URI of JSON output by evaluator. If None, and this Config is part of an ' 'RVPipeline, then this field will be auto-generated.')) def update(self, pipeline=None): if pipeline is not None and self.output_uri is None: self.output_uri = join(pipeline.eval_uri, 'eval.json')
class ModelConfig(Config): """Config related to models.""" backbone: str = Field( 'resnet18', description='name of torchvision.models backbone to use') init_weights: Optional[str] = Field( None, description= ('URI of PyTorch model weights used to initialize model. If None, ' 'will use Imagenet pretrained model weights provided by torchvision.' )) def update(self, learner: Optional['LearnerConfig'] = None): pass def validate_backbone(self): self.validate_list('backbone', backbones) def validate_config(self): self.validate_backbone()
class ModelConfig(Config): """Config related to models.""" backbone: Backbone = Field( Backbone.resnet18, description='The torchvision.models backbone to use.') pretrained: bool = Field( True, description=( 'If True, use ImageNet weights. If False, use random initialization.' )) init_weights: Optional[str] = Field( None, description=('URI of PyTorch model weights used to initialize model. ' 'If set, this supercedes the pretrained option.')) def update(self, learner: Optional['LearnerConfig'] = None): pass def get_backbone_str(self): return self.backbone.name
class ChipClassificationGeoJSONStoreConfig(LabelStoreConfig): uri: Optional[str] = Field( None, description='URI of GeoJSON file with predictions.') def build(self, class_config, crs_transformer, extent=None, tmp_dir=None): return ChipClassificationGeoJSONStore(self.uri, class_config, crs_transformer) def update(self, pipeline=None, scene=None): if self.uri is None and pipeline is not None and scene is not None: self.uri = join(pipeline.predict_uri, '{}.json'.format(scene.id))
class SceneConfig(Config): """Config for a Scene which comprises the raster data and labels for an AOI.""" id: str raster_source: RasterSourceConfig label_source: LabelSourceConfig label_store: Optional[LabelStoreConfig] = None aoi_uris: Optional[List[str]] = Field( None, description= ('List of URIs of GeoJSON files that define the AOIs for the scene. Each polygon' 'defines an AOI which is a piece of the scene that is assumed to be fully ' 'labeled and usable for training or validation.')) def build(self, class_config, tmp_dir, use_transformers=True): raster_source = self.raster_source.build( tmp_dir, use_transformers=use_transformers) crs_transformer = raster_source.get_crs_transformer() extent = raster_source.get_extent() label_source = (self.label_source.build(class_config, crs_transformer, extent, tmp_dir) if self.label_source is not None else None) label_store = (self.label_store.build(class_config, crs_transformer, extent, tmp_dir) if self.label_store is not None else None) aoi_polygons = None if self.aoi_uris is not None: aoi_polygons = [] for uri in self.aoi_uris: # Set default class id to 0 to avoid deleting features. If it was # set to None, they would all be deleted. aoi_geojson = GeoJSONVectorSourceConfig( uri=uri, default_class_id=0, ignore_crs_field=True).build( class_config, crs_transformer).get_geojson() for f in aoi_geojson['features']: aoi_polygons.append(shape(f['geometry'])) return Scene(self.id, raster_source, ground_truth_label_source=label_source, prediction_label_store=label_store, aoi_polygons=aoi_polygons) def update(self, pipeline=None): super().update() self.raster_source.update(pipeline=pipeline, scene=self) self.label_source.update(pipeline=pipeline, scene=self) if self.label_store is None and pipeline is not None: self.label_store = pipeline.get_default_label_store(scene=self) if self.label_store is not None: self.label_store.update(pipeline=pipeline, scene=self)
class SemanticSegmentationChipOptions(Config): """Chipping options for semantic segmentation.""" window_method: SemanticSegmentationWindowMethod = Field( SemanticSegmentationWindowMethod.sliding, description=('Window method to use for chipping.')) target_class_ids: Optional[List[int]] = Field( None, description= ('List of class ids considered as targets (ie. those to prioritize when ' 'creating chips) which is only used in conjunction with the ' 'target_count_threshold and negative_survival_probability options. Applies ' 'to the random_sample window method.')) negative_survival_prob: float = Field( 1.0, description= ('List of class ids considered as targets (ie. those to prioritize when creating ' 'chips) which is only used in conjunction with the target_count_threshold and ' 'negative_survival_probability options. Applies to the random_sample window ' 'method.')) chips_per_scene: int = Field( 1000, description= ('Number of chips to generate per scene. Applies to the random_sample window ' 'method.')) target_count_threshold: int = Field( 1000, description= ('Minimum number of pixels covering target_classes that a chip must have. ' 'Applies to the random_sample window method.')) stride: Optional[int] = Field( None, description= ('Stride of windows across image. Defaults to half the chip size. Applies to ' 'the sliding_window method.'))
class StatsTransformerConfig(RasterTransformerConfig): stats_uri: Optional[str] = Field( None, description='The URI of the output of the StatsAnalyzer.') def update(self, pipeline=None, scene=None): if pipeline is not None: self.stats_uri = join(pipeline.analyze_uri, 'stats.json') def build(self): return StatsTransformer(RasterStats.load(self.stats_uri)) def update_root(self, root_dir): self.stats_uri = join(root_dir, basename(self.stats_uri))
class PyTorchLearnerBackendConfig(BackendConfig): model: ModelConfig solver: SolverConfig log_tensorboard: bool = Field( True, description='If True, log events to Tensorboard log files.') run_tensorboard: bool = Field( False, description='If True, run Tensorboard server pointing at log files.') augmentors: List[str] = Field( default_augmentors, description=( 'Names of albumentations augmentors to use for training batches. ' 'Choices include: ' + str(augmentor_list))) def get_bundle_filenames(self): return ['model-bundle.zip'] def get_learner_config(self, pipeline): raise NotImplementedError() def build(self, pipeline, tmp_dir): raise NotImplementedError()
class RasterSourceConfig(Config): channel_order: Optional[List[int]] = Field( None, description= 'The sequence of channel indices to use when reading imagery.') transformers: List[RasterTransformerConfig] = [] def build(self, tmp_dir, use_transformers=True): raise NotImplementedError() def update(self, pipeline=None, scene=None): for t in self.transformers: t.update(pipeline, scene)