def __call__(self, context, file_name, exposure_latitude=None, exposure_longitude=None, use_parallel=True): """ Read a csv exposure file into the context object. :param context: The context instance, used to move data around. :param file_name: The csv file to load. :param exposure_latitude: the title string of the latitude column. :param exposure_longitude: the title string of the longitude column. Content return: exposure_att: Add the file values into this dictionary. key: column titles value: column values, except the title """ file_name = misc.download_file_from_s3_if_needed(file_name) dt = misc.get_file_mtime(file_name) expent = context.prov.entity( ":Exposure data", { 'dcterms:title': 'Exposure data', 'prov:type': 'void:Dataset', 'prov:generatedAtTime': dt, 'prov:atLocation': os.path.basename(file_name) }) context.prov.used(context.provlabel, expent) data_frame = parallel.csv2dict(file_name, use_parallel=use_parallel) # FIXME Need to do better error handling # FIXME this function can only be called once. # Multiple calls will corrupt the context data. if exposure_latitude is None: lat_key = EX_LAT else: lat_key = exposure_latitude try: context.exposure_lat = data_frame[lat_key].values del data_frame[lat_key] except KeyError: msg = "No Exposure latitude column labelled '%s'." % lat_key raise RuntimeError(msg) if exposure_longitude is None: long_key = EX_LONG else: long_key = exposure_longitude try: context.exposure_long = data_frame[long_key].values del data_frame[long_key] except KeyError: msg = "No Exposure longitude column labelled '%s'." % long_key raise RuntimeError(msg) context.exposure_att = data_frame
def save_aggregation(self, filename, boundaries, impactcode, boundarycode, categories, fields, use_parallel=True): """ Save data aggregated to geospatial regions :param str filename: Destination filename :param bool use_parallel: True for parallel behaviout, which is only node 0 writing to file """ LOGGER.info("Saving aggregated data") boundaries = misc.download_file_from_s3_if_needed(boundaries) [filename, bucket_name, bucket_key] = \ misc.create_temp_file_path_for_s3(filename) write_dict = self.exposure_att.copy() dt = datetime.now().strftime(DATEFMT) atts = { "prov:type": "void:Dataset", "prov:atLocation": os.path.basename(boundaries), "prov:generatedAtTime": misc.get_file_mtime(boundaries), "void:boundary_code": boundarycode } bdyent = self.prov.entity(":Aggregation boundaries", atts) aggact = self.prov.activity(":AggregationByRegions", dt, None, { 'prov:type': "Spatial aggregation", 'void:functions': repr(fields) }) aggatts = { "prov:type": "void:Dataset", "prov:atLocation": os.path.basename(filename), "prov:generatedAtTime": dt } aggfileent = self.prov.entity(":AggregationFile", aggatts) self.prov.used(aggact, bdyent) self.prov.wasInformedBy(aggact, self.provlabel) self.prov.wasGeneratedBy(aggfileent, aggact) if parallel.STATE.rank == 0 or not use_parallel: aggregate.choropleth(write_dict, boundaries, impactcode, boundarycode, filename, fields, categories) misc.upload_to_s3_if_applicable(filename, bucket_name, bucket_key) if (bucket_name is not None and bucket_key is not None and bucket_key.endswith('.shp')): [rootname, ext] = os.path.splitext(filename) base_bucket_key = bucket_key[:-len(ext)] misc.upload_to_s3_if_applicable(rootname + '.dbf', bucket_name, base_bucket_key + '.dbf') misc.upload_to_s3_if_applicable(rootname + '.shx', bucket_name, base_bucket_key + '.shx') misc.upload_to_s3_if_applicable(rootname + '.prj', bucket_name, base_bucket_key + '.prj') misc.upload_to_s3_if_applicable(rootname + '.cpg', bucket_name, base_bucket_key + '.cpg', True)
def __call__(self, context, attribute_label, file_list, clip_exposure2all_hazards=False, file_format=None, variable=None, no_data_value=None): """ Load one or more files and get the value for all the exposure points. All files have to be of the same attribute. Alternatively a numeric array of the raster data can be passed in. :param context: The context instance, used to move data around. :param attribute_label: The string to be associated with this data. :param clip_exposure2all_hazards: True if the exposure data is clippped to the hazard data, so no hazard values are ignored. :param file_list: A list of files or a single file to be loaded. :param no_data_value: Values in the raster that represent no data. Context return: exposure_att: Add the file values into this dictionary. key: column titles value: column values, except the title """ if isinstance(file_list, str): file_list = [file_list] for f in file_list: f = misc.download_file_from_s3_if_needed(f) dt = misc.get_file_mtime(f) atts = { "dcterms:title": "Source hazard data", "prov:type": "prov:Dataset", "prov:atLocation": os.path.basename(f), "prov:format": os.path.splitext(f)[1].replace('.', ''), "prov:generatedAtTime": dt, } if file_format == 'nc' and variable: atts['prov:variable'] = variable hazent = context.prov.entity(":Hazard data", atts) context.prov.used(context.provlabel, hazent) if file_format == 'nc' and variable: file_list = misc.mod_file_list(file_list, variable) file_data, extent = raster_module.files_raster_data_at_points( context.exposure_long, context.exposure_lat, file_list) file_data[file_data == no_data_value] = np.NAN context.exposure_att[attribute_label] = file_data if clip_exposure2all_hazards: # Clipping the exposure points after the data has been added. # Not optimised for speed, but easy to implement. context.clip_exposure(*extent)
def __call__(self, context, file_name): """ Read a csv exposure file into the context object. :param context: The context instance, used to move data around. :param file_name: The xml file to load. """ if file_name is not None: vuln_sets = vuln_sets_from_xml_file(file_name) context.vulnerability_sets.update(vuln_sets) dt = misc.get_file_mtime(file_name) vulent = context.prov.entity( ":vulnerability file", { 'prov:type': 'prov:Collection', 'prov:generatedAtTime': dt, 'prov:atLocation': os.path.basename(file_name) }) context.prov.used(context.provlabel, vulent)
def __call__(self, context, attribute_label, clip_exposure2all_hazards=False, file_list=None, file_format=None, variable=None, raster=None, upper_left_x=None, upper_left_y=None, cell_size=None, no_data_value=None): """ Load one or more files and get the value for all the exposure points. All files have to be of the same attribute. Alternatively a numeric array of the raster data can be passed in. :param context: The context instance, used to move data around. :param attribute_label: The string to be associated with this data. :param clip_exposure2all_hazards: True if the exposure data is clippped to the hazard data, so no hazard values are ignored. :param file_list: A list of files or a single file to be loaded. OR :param raster: A 2D numeric array of the raster values, North is up. :param upper_left_x: The longitude at the upper left corner. :param upper_left_y: The latitude at the upper left corner. :param cell_size: The cell size. :param no_data_value: Values in the raster that represent no data. Context return: exposure_att: Add the file values into this dictionary. key: column titles value: column values, except the title """ # We need a file or a full set of raster info. if file_list is None: # The raster info is being passed as an array assert raster is not None assert upper_left_x is not None assert upper_left_y is not None assert cell_size is not None assert no_data_value is not None a_raster = raster_module.Raster.from_array(raster, upper_left_x, upper_left_y, cell_size, no_data_value) if clip_exposure2all_hazards: # Reduce the context to the hazard area # before the raster info has been added to the context extent = a_raster.extent() context.clip_exposure(*extent) file_data = a_raster.raster_data_at_points(context.exposure_long, context.exposure_lat) file_data = np.where(file_data == no_data_value, np.NAN, file_data) context.exposure_att[attribute_label] = file_data else: if isinstance(file_list, str): file_list = [file_list] for f in file_list: f = misc.download_file_from_s3_if_needed(f) dt = misc.get_file_mtime(f) atts = { "dcterms:title": "Source hazard data", "prov:type": "prov:Dataset", "prov:atLocation": os.path.basename(f), "prov:format": os.path.splitext(f)[1].replace('.', ''), "prov:generatedAtTime": dt, } if file_format == 'nc' and variable: atts['prov:variable'] = variable hazent = context.prov.entity(":Hazard data", atts) context.prov.used(context.provlabel, hazent) if file_format == 'nc' and variable: file_list = misc.mod_file_list(file_list, variable) file_data, extent = raster_module.files_raster_data_at_points( context.exposure_long, context.exposure_lat, file_list) file_data[file_data == no_data_value] = np.NAN context.exposure_att[attribute_label] = file_data if clip_exposure2all_hazards: # Clipping the exposure points after the data has been added. # Not optimised for speed, but easy to implement. context.clip_exposure(*extent)