Ejemplo n.º 1
0
    def test_download_file_from_s3_if_needed(self):
        s3 = get_s3_client()
        s3.create_bucket(Bucket='bucket')
        file_path = download_file_from_s3_if_needed('/local/path/to/file')
        self.assertEqual(file_path, '/local/path/to/file')

        s3.put_object(Bucket='bucket', Key='subdir/file.ext', Body='')
        file_path = \
            download_file_from_s3_if_needed('/vsis3/bucket/subdir/file.ext')
        self.assertTrue('file.ext' in file_path)

        s3.put_object(Bucket='bucket', Key='subdir/file.shp', Body='')
        s3.put_object(Bucket='bucket', Key='subdir/file.shx', Body='')
        s3.put_object(Bucket='bucket', Key='subdir/file.dbf', Body='')
        s3.put_object(Bucket='bucket', Key='subdir/file.prj', Body='')
        file_path = \
            download_file_from_s3_if_needed('/vsis3/bucket/subdir/file.shp')
        self.assertTrue('file.shp' in file_path)

        # Check zip file
        directory_path = get_temporary_directory()
        zip_file_path = os.path.join(directory_path, 'file.zip')
        with ZipFile(zip_file_path, 'w') as zipObj:
            for file_name in ['test.shp', 'test.shx', 'test.dbf',
                              'test.prj', 'test.cpg']:
                file_path = os.path.join(directory_path, file_name)
                print('Test file', file=open(file_path, 'w'))
                zipObj.write(file_path)

        s3.upload_file(zip_file_path, 'bucket', 'subdir/file.zip')
        file_path = \
            download_file_from_s3_if_needed('/vsis3/bucket/subdir/file.zip')
        self.assertTrue('test.shp' in file_path)
Ejemplo n.º 2
0
def start(config_list=None, config_file=None, cont_in=None):
    """
    Run the HazImp tool, based on the config info.

    :param config_list: The configuration info, as a list.
    :param config_file: The configuration info, as a file location.
    :param cont_in: Only used in testing. A context instance.
    :returns: The config dictionary.
    """
    if config_file:
        config_file = misc.download_file_from_s3_if_needed(config_file)
        config_list = config.read_config_file(config_file)

    if isinstance(config_list, dict):
        msg = "Bad configuration file. \n"
        msg += "Add a dash ( - ) before each variable. e.g. - template: flood"
        raise RuntimeError(msg)

    if config_list is None:
        raise RuntimeError('No configuration information.')

    if cont_in is None:
        cont_in = context.Context()
    # TODO: Make the entity name a tailored variable
    cont_in.set_prov_label("HazImp_analysis")
    calc_jobs = config.instance_builder(config_list)
    the_pipeline = pipeline.PipeLine(calc_jobs)
    the_pipeline.run(cont_in)

    return cont_in
Ejemplo n.º 3
0
    def __call__(self,
                 context,
                 file_name,
                 exposure_latitude=None,
                 exposure_longitude=None,
                 use_parallel=True):
        """
        Read a csv exposure file into the context object.

        :param context: The context instance, used to move data around.
        :param file_name: The csv file to load.
        :param exposure_latitude: the title string of the latitude column.
        :param exposure_longitude: the title string of the longitude column.

        Content return:
            exposure_att: Add the file values into this dictionary.
            key: column titles
            value: column values, except the title
        """
        file_name = misc.download_file_from_s3_if_needed(file_name)
        dt = misc.get_file_mtime(file_name)
        expent = context.prov.entity(
            ":Exposure data", {
                'dcterms:title': 'Exposure data',
                'prov:type': 'void:Dataset',
                'prov:generatedAtTime': dt,
                'prov:atLocation': os.path.basename(file_name)
            })
        context.prov.used(context.provlabel, expent)
        data_frame = parallel.csv2dict(file_name, use_parallel=use_parallel)
        # FIXME Need to do better error handling
        # FIXME this function can only be called once.
        # Multiple calls will corrupt the context data.

        if exposure_latitude is None:
            lat_key = EX_LAT
        else:
            lat_key = exposure_latitude

        try:
            context.exposure_lat = data_frame[lat_key].values
            del data_frame[lat_key]
        except KeyError:
            msg = "No Exposure latitude column labelled '%s'." % lat_key
            raise RuntimeError(msg)

        if exposure_longitude is None:
            long_key = EX_LONG
        else:
            long_key = exposure_longitude

        try:
            context.exposure_long = data_frame[long_key].values
            del data_frame[long_key]
        except KeyError:
            msg = "No Exposure longitude column labelled '%s'." % long_key
            raise RuntimeError(msg)

        context.exposure_att = data_frame
Ejemplo n.º 4
0
    def save_aggregation(self,
                         filename,
                         boundaries,
                         impactcode,
                         boundarycode,
                         categories,
                         fields,
                         use_parallel=True):
        """
        Save data aggregated to geospatial regions

        :param str filename: Destination filename
        :param bool use_parallel: True for parallel behaviout, which
                                  is only node 0 writing to file

        """
        LOGGER.info("Saving aggregated data")
        boundaries = misc.download_file_from_s3_if_needed(boundaries)
        [filename, bucket_name, bucket_key] = \
            misc.create_temp_file_path_for_s3(filename)
        write_dict = self.exposure_att.copy()
        dt = datetime.now().strftime(DATEFMT)
        atts = {
            "prov:type": "void:Dataset",
            "prov:atLocation": os.path.basename(boundaries),
            "prov:generatedAtTime": misc.get_file_mtime(boundaries),
            "void:boundary_code": boundarycode
        }

        bdyent = self.prov.entity(":Aggregation boundaries", atts)
        aggact = self.prov.activity(":AggregationByRegions", dt, None, {
            'prov:type': "Spatial aggregation",
            'void:functions': repr(fields)
        })
        aggatts = {
            "prov:type": "void:Dataset",
            "prov:atLocation": os.path.basename(filename),
            "prov:generatedAtTime": dt
        }
        aggfileent = self.prov.entity(":AggregationFile", aggatts)
        self.prov.used(aggact, bdyent)
        self.prov.wasInformedBy(aggact, self.provlabel)
        self.prov.wasGeneratedBy(aggfileent, aggact)
        if parallel.STATE.rank == 0 or not use_parallel:
            aggregate.choropleth(write_dict, boundaries, impactcode,
                                 boundarycode, filename, fields, categories)
            misc.upload_to_s3_if_applicable(filename, bucket_name, bucket_key)
            if (bucket_name is not None and bucket_key is not None
                    and bucket_key.endswith('.shp')):
                [rootname, ext] = os.path.splitext(filename)
                base_bucket_key = bucket_key[:-len(ext)]
                misc.upload_to_s3_if_applicable(rootname + '.dbf', bucket_name,
                                                base_bucket_key + '.dbf')
                misc.upload_to_s3_if_applicable(rootname + '.shx', bucket_name,
                                                base_bucket_key + '.shx')
                misc.upload_to_s3_if_applicable(rootname + '.prj', bucket_name,
                                                base_bucket_key + '.prj')
                misc.upload_to_s3_if_applicable(rootname + '.cpg', bucket_name,
                                                base_bucket_key + '.cpg', True)
Ejemplo n.º 5
0
    def __call__(self,
                 context,
                 attribute_label,
                 file_list,
                 clip_exposure2all_hazards=False,
                 file_format=None,
                 variable=None,
                 no_data_value=None):
        """
        Load one or more files and get the value for all the
        exposure points. All files have to be of the same attribute.
        Alternatively a numeric array of the raster data can be passed in.

        :param context: The context instance, used to move data around.
        :param attribute_label: The string to be associated with this data.
        :param clip_exposure2all_hazards: True if the exposure data is
            clippped to the hazard data, so no hazard values are ignored.
        :param file_list: A list of files or a single file to be loaded.
        :param no_data_value: Values in the raster that represent no data.

        Context return:
           exposure_att: Add the file values into this dictionary.
               key: column titles
               value: column values, except the title
        """

        if isinstance(file_list, str):
            file_list = [file_list]

        for f in file_list:
            f = misc.download_file_from_s3_if_needed(f)
            dt = misc.get_file_mtime(f)
            atts = {
                "dcterms:title": "Source hazard data",
                "prov:type": "prov:Dataset",
                "prov:atLocation": os.path.basename(f),
                "prov:format": os.path.splitext(f)[1].replace('.', ''),
                "prov:generatedAtTime": dt,
            }
            if file_format == 'nc' and variable:
                atts['prov:variable'] = variable
            hazent = context.prov.entity(":Hazard data", atts)
            context.prov.used(context.provlabel, hazent)

        if file_format == 'nc' and variable:
            file_list = misc.mod_file_list(file_list, variable)

        file_data, extent = raster_module.files_raster_data_at_points(
            context.exposure_long, context.exposure_lat, file_list)
        file_data[file_data == no_data_value] = np.NAN

        context.exposure_att[attribute_label] = file_data

        if clip_exposure2all_hazards:
            # Clipping the exposure points after the data has been added.
            # Not optimised for speed, but easy to implement.
            context.clip_exposure(*extent)
Ejemplo n.º 6
0
    def __call__(self,
                 context,
                 attribute_label,
                 clip_exposure2all_hazards=False,
                 file_list=None,
                 file_format=None,
                 variable=None,
                 raster=None,
                 upper_left_x=None,
                 upper_left_y=None,
                 cell_size=None,
                 no_data_value=None):
        """
        Load one or more files and get the value for all the
        exposure points. All files have to be of the same attribute.
        Alternatively a numeric array of the raster data can be passed in.

        :param context: The context instance, used to move data around.
        :param attribute_label: The string to be associated with this data.
        :param clip_exposure2all_hazards: True if the exposure data is
            clippped to the hazard data, so no hazard values are ignored.

        :param file_list: A list of files or a single file to be loaded.
        OR
        :param raster: A 2D numeric array of the raster values, North is up.
        :param upper_left_x: The longitude at the upper left corner.
        :param upper_left_y: The latitude at the upper left corner.
        :param cell_size: The cell size.
        :param no_data_value: Values in the raster that represent no data.


        Context return:
           exposure_att: Add the file values into this dictionary.
               key: column titles
               value: column values, except the title
        """

        # We need a file or a full set of raster info.
        if file_list is None:
            # The raster info is being passed as an array
            assert raster is not None
            assert upper_left_x is not None
            assert upper_left_y is not None
            assert cell_size is not None
            assert no_data_value is not None
            a_raster = raster_module.Raster.from_array(raster, upper_left_x,
                                                       upper_left_y, cell_size,
                                                       no_data_value)

            if clip_exposure2all_hazards:
                # Reduce the context to the hazard area
                # before the raster info has been added to the context
                extent = a_raster.extent()
                context.clip_exposure(*extent)

            file_data = a_raster.raster_data_at_points(context.exposure_long,
                                                       context.exposure_lat)
            file_data = np.where(file_data == no_data_value, np.NAN, file_data)
            context.exposure_att[attribute_label] = file_data
        else:
            if isinstance(file_list, str):
                file_list = [file_list]

            for f in file_list:
                f = misc.download_file_from_s3_if_needed(f)
                dt = misc.get_file_mtime(f)
                atts = {
                    "dcterms:title": "Source hazard data",
                    "prov:type": "prov:Dataset",
                    "prov:atLocation": os.path.basename(f),
                    "prov:format": os.path.splitext(f)[1].replace('.', ''),
                    "prov:generatedAtTime": dt,
                }
                if file_format == 'nc' and variable:
                    atts['prov:variable'] = variable
                hazent = context.prov.entity(":Hazard data", atts)
                context.prov.used(context.provlabel, hazent)

            if file_format == 'nc' and variable:
                file_list = misc.mod_file_list(file_list, variable)

            file_data, extent = raster_module.files_raster_data_at_points(
                context.exposure_long, context.exposure_lat, file_list)
            file_data[file_data == no_data_value] = np.NAN

            context.exposure_att[attribute_label] = file_data

            if clip_exposure2all_hazards:
                # Clipping the exposure points after the data has been added.
                # Not optimised for speed, but easy to implement.
                context.clip_exposure(*extent)