Beispiel #1
0
    def __call__(self,
                 context,
                 file_name,
                 exposure_latitude=None,
                 exposure_longitude=None,
                 use_parallel=True):
        """
        Read a csv exposure file into the context object.

        :param context: The context instance, used to move data around.
        :param file_name: The csv file to load.
        :param exposure_latitude: the title string of the latitude column.
        :param exposure_longitude: the title string of the longitude column.

        Content return:
            exposure_att: Add the file values into this dictionary.
            key: column titles
            value: column values, except the title
        """
        file_name = misc.download_file_from_s3_if_needed(file_name)
        dt = misc.get_file_mtime(file_name)
        expent = context.prov.entity(
            ":Exposure data", {
                'dcterms:title': 'Exposure data',
                'prov:type': 'void:Dataset',
                'prov:generatedAtTime': dt,
                'prov:atLocation': os.path.basename(file_name)
            })
        context.prov.used(context.provlabel, expent)
        data_frame = parallel.csv2dict(file_name, use_parallel=use_parallel)
        # FIXME Need to do better error handling
        # FIXME this function can only be called once.
        # Multiple calls will corrupt the context data.

        if exposure_latitude is None:
            lat_key = EX_LAT
        else:
            lat_key = exposure_latitude

        try:
            context.exposure_lat = data_frame[lat_key].values
            del data_frame[lat_key]
        except KeyError:
            msg = "No Exposure latitude column labelled '%s'." % lat_key
            raise RuntimeError(msg)

        if exposure_longitude is None:
            long_key = EX_LONG
        else:
            long_key = exposure_longitude

        try:
            context.exposure_long = data_frame[long_key].values
            del data_frame[long_key]
        except KeyError:
            msg = "No Exposure longitude column labelled '%s'." % long_key
            raise RuntimeError(msg)

        context.exposure_att = data_frame
Beispiel #2
0
    def save_aggregation(self,
                         filename,
                         boundaries,
                         impactcode,
                         boundarycode,
                         categories,
                         fields,
                         use_parallel=True):
        """
        Save data aggregated to geospatial regions

        :param str filename: Destination filename
        :param bool use_parallel: True for parallel behaviout, which
                                  is only node 0 writing to file

        """
        LOGGER.info("Saving aggregated data")
        boundaries = misc.download_file_from_s3_if_needed(boundaries)
        [filename, bucket_name, bucket_key] = \
            misc.create_temp_file_path_for_s3(filename)
        write_dict = self.exposure_att.copy()
        dt = datetime.now().strftime(DATEFMT)
        atts = {
            "prov:type": "void:Dataset",
            "prov:atLocation": os.path.basename(boundaries),
            "prov:generatedAtTime": misc.get_file_mtime(boundaries),
            "void:boundary_code": boundarycode
        }

        bdyent = self.prov.entity(":Aggregation boundaries", atts)
        aggact = self.prov.activity(":AggregationByRegions", dt, None, {
            'prov:type': "Spatial aggregation",
            'void:functions': repr(fields)
        })
        aggatts = {
            "prov:type": "void:Dataset",
            "prov:atLocation": os.path.basename(filename),
            "prov:generatedAtTime": dt
        }
        aggfileent = self.prov.entity(":AggregationFile", aggatts)
        self.prov.used(aggact, bdyent)
        self.prov.wasInformedBy(aggact, self.provlabel)
        self.prov.wasGeneratedBy(aggfileent, aggact)
        if parallel.STATE.rank == 0 or not use_parallel:
            aggregate.choropleth(write_dict, boundaries, impactcode,
                                 boundarycode, filename, fields, categories)
            misc.upload_to_s3_if_applicable(filename, bucket_name, bucket_key)
            if (bucket_name is not None and bucket_key is not None
                    and bucket_key.endswith('.shp')):
                [rootname, ext] = os.path.splitext(filename)
                base_bucket_key = bucket_key[:-len(ext)]
                misc.upload_to_s3_if_applicable(rootname + '.dbf', bucket_name,
                                                base_bucket_key + '.dbf')
                misc.upload_to_s3_if_applicable(rootname + '.shx', bucket_name,
                                                base_bucket_key + '.shx')
                misc.upload_to_s3_if_applicable(rootname + '.prj', bucket_name,
                                                base_bucket_key + '.prj')
                misc.upload_to_s3_if_applicable(rootname + '.cpg', bucket_name,
                                                base_bucket_key + '.cpg', True)
Beispiel #3
0
    def __call__(self,
                 context,
                 attribute_label,
                 file_list,
                 clip_exposure2all_hazards=False,
                 file_format=None,
                 variable=None,
                 no_data_value=None):
        """
        Load one or more files and get the value for all the
        exposure points. All files have to be of the same attribute.
        Alternatively a numeric array of the raster data can be passed in.

        :param context: The context instance, used to move data around.
        :param attribute_label: The string to be associated with this data.
        :param clip_exposure2all_hazards: True if the exposure data is
            clippped to the hazard data, so no hazard values are ignored.
        :param file_list: A list of files or a single file to be loaded.
        :param no_data_value: Values in the raster that represent no data.

        Context return:
           exposure_att: Add the file values into this dictionary.
               key: column titles
               value: column values, except the title
        """

        if isinstance(file_list, str):
            file_list = [file_list]

        for f in file_list:
            f = misc.download_file_from_s3_if_needed(f)
            dt = misc.get_file_mtime(f)
            atts = {
                "dcterms:title": "Source hazard data",
                "prov:type": "prov:Dataset",
                "prov:atLocation": os.path.basename(f),
                "prov:format": os.path.splitext(f)[1].replace('.', ''),
                "prov:generatedAtTime": dt,
            }
            if file_format == 'nc' and variable:
                atts['prov:variable'] = variable
            hazent = context.prov.entity(":Hazard data", atts)
            context.prov.used(context.provlabel, hazent)

        if file_format == 'nc' and variable:
            file_list = misc.mod_file_list(file_list, variable)

        file_data, extent = raster_module.files_raster_data_at_points(
            context.exposure_long, context.exposure_lat, file_list)
        file_data[file_data == no_data_value] = np.NAN

        context.exposure_att[attribute_label] = file_data

        if clip_exposure2all_hazards:
            # Clipping the exposure points after the data has been added.
            # Not optimised for speed, but easy to implement.
            context.clip_exposure(*extent)
Beispiel #4
0
    def __call__(self, context, file_name):
        """
        Read a csv exposure file into the context object.

        :param context: The context instance, used to move data around.
        :param file_name: The xml file to load.
        """
        if file_name is not None:
            vuln_sets = vuln_sets_from_xml_file(file_name)
            context.vulnerability_sets.update(vuln_sets)
            dt = misc.get_file_mtime(file_name)
            vulent = context.prov.entity(
                ":vulnerability file", {
                    'prov:type': 'prov:Collection',
                    'prov:generatedAtTime': dt,
                    'prov:atLocation': os.path.basename(file_name)
                })
            context.prov.used(context.provlabel, vulent)
Beispiel #5
0
    def __call__(self,
                 context,
                 attribute_label,
                 clip_exposure2all_hazards=False,
                 file_list=None,
                 file_format=None,
                 variable=None,
                 raster=None,
                 upper_left_x=None,
                 upper_left_y=None,
                 cell_size=None,
                 no_data_value=None):
        """
        Load one or more files and get the value for all the
        exposure points. All files have to be of the same attribute.
        Alternatively a numeric array of the raster data can be passed in.

        :param context: The context instance, used to move data around.
        :param attribute_label: The string to be associated with this data.
        :param clip_exposure2all_hazards: True if the exposure data is
            clippped to the hazard data, so no hazard values are ignored.

        :param file_list: A list of files or a single file to be loaded.
        OR
        :param raster: A 2D numeric array of the raster values, North is up.
        :param upper_left_x: The longitude at the upper left corner.
        :param upper_left_y: The latitude at the upper left corner.
        :param cell_size: The cell size.
        :param no_data_value: Values in the raster that represent no data.


        Context return:
           exposure_att: Add the file values into this dictionary.
               key: column titles
               value: column values, except the title
        """

        # We need a file or a full set of raster info.
        if file_list is None:
            # The raster info is being passed as an array
            assert raster is not None
            assert upper_left_x is not None
            assert upper_left_y is not None
            assert cell_size is not None
            assert no_data_value is not None
            a_raster = raster_module.Raster.from_array(raster, upper_left_x,
                                                       upper_left_y, cell_size,
                                                       no_data_value)

            if clip_exposure2all_hazards:
                # Reduce the context to the hazard area
                # before the raster info has been added to the context
                extent = a_raster.extent()
                context.clip_exposure(*extent)

            file_data = a_raster.raster_data_at_points(context.exposure_long,
                                                       context.exposure_lat)
            file_data = np.where(file_data == no_data_value, np.NAN, file_data)
            context.exposure_att[attribute_label] = file_data
        else:
            if isinstance(file_list, str):
                file_list = [file_list]

            for f in file_list:
                f = misc.download_file_from_s3_if_needed(f)
                dt = misc.get_file_mtime(f)
                atts = {
                    "dcterms:title": "Source hazard data",
                    "prov:type": "prov:Dataset",
                    "prov:atLocation": os.path.basename(f),
                    "prov:format": os.path.splitext(f)[1].replace('.', ''),
                    "prov:generatedAtTime": dt,
                }
                if file_format == 'nc' and variable:
                    atts['prov:variable'] = variable
                hazent = context.prov.entity(":Hazard data", atts)
                context.prov.used(context.provlabel, hazent)

            if file_format == 'nc' and variable:
                file_list = misc.mod_file_list(file_list, variable)

            file_data, extent = raster_module.files_raster_data_at_points(
                context.exposure_long, context.exposure_lat, file_list)
            file_data[file_data == no_data_value] = np.NAN

            context.exposure_att[attribute_label] = file_data

            if clip_exposure2all_hazards:
                # Clipping the exposure points after the data has been added.
                # Not optimised for speed, but easy to implement.
                context.clip_exposure(*extent)