def test_metadata_from_json_invalid_geojson(self): json = {'type': 'Invalid'} with self.assertRaises(InvalidSeedMetadataDefinition) as ex: SeedMetadata.metadata_from_json(json) self.assertContains('JSON_VALIDATION_ERROR', ex)
def test_metadata_from_json_feature_collection(self): json = {'type': 'FeatureCollection', 'features': []} with self.assertRaises(InvalidSeedMetadataDefinition) as ex: SeedMetadata.metadata_from_json(json) self.assertContains('UNSUPPORTED_GEOJSON', ex)
def test_metadata_from_json_geometry_collection(self): json = {'type': 'GeometryCollection', 'geometry': None} with self.assertRaises(InvalidSeedMetadataDefinition) as ex: SeedMetadata.metadata_from_json(json) self.assertContains('UNSUPPORTED_GEOJSON', ex)
def test_successful_v6(self, mock_save): """Tests calling SourceDataFileParseSaver.save_parse_results_v6() successfully""" started = '2018-06-01T00:00:00Z' ended = '2018-06-01T01:00:00Z' types = ['one', 'two', 'three'] new_workspace_path = 'awful/path' data = { 'type': 'Feature', 'geometry': { 'type': 'Point', 'coordinates': [0, 1] }, 'properties': { 'dataStarted': started, 'dataEnded': ended, 'dataTypes': types, 'newWorkspacePath': new_workspace_path } } metadata = { self.source_file_1.id: SeedMetadata.metadata_from_json(data, do_validate=False) } calls = [ call(self.source_file_1.id, data, parse_datetime(started), parse_datetime(ended), types, new_workspace_path) ] SourceDataFileParseSaver().save_parse_results_v6(metadata) self.assertEqual(mock_save.call_count, 1) mock_save.assert_has_calls(calls, any_order=True)
def test_metadata_from_json_geometry(self): json = {'type': 'Point', 'coordinates': [0, 0]} object = SeedMetadata.metadata_from_json(json) self.assertIn('geometry', object._data) self.assertIn('properties', object._data) self.assertEquals(json, object._data['geometry'])
def _capture_source_metadata_files(self, seed_input_files, name_to_ids): """Identify any input files that have additional metadata provided for them. The convention defined for metadata capture on source files is: /scale/output_data/INPUT_FILE_NAME.metadata.json Content follows the Seed metadata side-car standard of a GeoJSON object. The above file name would correlate to a _single_ input file with an interface name of INPUT_FILE_NAME :param seed_input_files: list of input files elements :type seed_input_files: [`job.seed.types.SeedInputFiles`] :param name_to_ids: Mapping of input file field names to associated file_ids :type name_to_ids: { dict: [int] } :raises InvalidSeedMetadataDefinition """ # Dict of detected files and associated metadata captured_files = {} # Iterate over each files object for names for input_file in seed_input_files: if input_file.multiple: logger.debug( 'We do not currently support capturing source file metadata for multiple inputs. ' 'Ignoring input {}.'.format(input_file.name)) continue logger.debug( 'Checking for source file metadata for input {}...'.format( input_file.name)) # Check to see if there is a side-car metadata file metadata_file = os.path.join(SCALE_JOB_EXE_OUTPUT_PATH, input_file.name + METADATA_SUFFIX) # If metadata is found, attempt to grab any Scale relevant data and create a map of file_ids to metadata if os.path.isfile(metadata_file): logger.info( 'Capturing source file metadata from detected metadata file: %s' % metadata_file) with open(metadata_file) as metadata_file_handle: try: metadata = SeedMetadata.metadata_from_json( json.load(metadata_file_handle)) # Get ID for name. Since we currently only support single file inputs, we can grab by index file_id = name_to_ids[input_file.name][0] captured_files[file_id] = metadata except InvalidSeedMetadataDefinition: logger.exception( 'Unable to process data in source file metadata side-car.' ) return captured_files
def _capture_output_files(self, seed_output_files): """Evaluate files patterns and capture any available side-car metadata associated with matched files :param seed_output_files: interface definition of Seed output files that should be captured :type seed_output_files: [`job.seed.types.SeedOutputFiles`] :return: collection of files name keys mapped to a ProductFileMetadata list. { name : [`ProductFileMetadata`] :rtype: dict """ # Dict of detected files and associated metadata captured_files = {} # Iterate over each files object for output_file in seed_output_files: # For files obj that are detected, handle results (may be multiple) product_files = [] for matched_file in output_file.get_files(): logger.info('File detected for output capture: %s' % matched_file) product_file_meta = ProductFileMetadata(output_file.name, matched_file, output_file.media_type) # check to see if there is a side-car metadata file metadata_file = matched_file + METADATA_SUFFIX # If metadata is found, attempt to grab any Scale relevant data and place in ProductFileMetadata tuple if os.path.isfile(metadata_file): logger.info('Capturing metadata from detected side-car file: %s' % metadata_file) with open(metadata_file) as metadata_file_handle: try: metadata = SeedMetadata.metadata_from_json(json.load(metadata_file_handle)) # Property keys per #1160 product_file_meta.geojson = metadata.data product_file_meta.data_start = metadata.get_property('dataStarted') product_file_meta.data_end = metadata.get_property('dataEnded') product_file_meta.source_started = metadata.get_property('sourceStarted') product_file_meta.source_ended = metadata.get_property('sourceEnded') product_file_meta.source_sensor_class = metadata.get_property('sourceSensorClass') product_file_meta.source_sensor = metadata.get_property('sourceSensor') product_file_meta.source_collection = metadata.get_property('sourceCollection') product_file_meta.source_task = metadata.get_property('sourceTask') except InvalidSeedMetadataDefinition: logger.exception() product_files.append(product_file_meta) captured_files[output_file.name] = product_files return captured_files
def test_metadata_from_json_feature(self): json = {'type': 'Feature', 'geometry': None, 'properties': None} object = SeedMetadata.metadata_from_json(json) self.assertEquals(object.data, json)