Ejemplo n.º 1
0
    def ingest_habitats(self):
        base_msg = "Ingesting 'habitats'..."
        self.logger.info(base_msg)
        habs_logger = self.get_section_logger('habs', base_msg)

        self.habs = {}
        self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size)
        habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp")
        habs_config = self.config.get('sections', {}).get('habitats', {})

        def add_to_habs_spatial_hash(data=None, **kwargs):
            self.habs_spatial_hash.add_rect(data.mbr, data)
            return data

        def process_neg_depth(neg_depth):
            if neg_depth is not None:
                depth = -1.0 * float(neg_depth)
                return depth

        Ingestor(
            reader=ShapefileReader(
                shp_file=habs_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(clazz=self.dao.schema['sources']['Habitat'],
                            mappings=[
                                {
                                    'source': 'SUBSTRATE',
                                    'target': 'substrate_id'
                                },
                                {
                                    'source': 'ENERGY',
                                    'target': 'energy_id'
                                },
                                {
                                    'source': 'Z',
                                    'target': 'depth',
                                    'processor': process_neg_depth
                                },
                                {
                                    'source': '__shape',
                                    'target': 'shape'
                                },
                            ]),
                self.add_area_mbr,
                add_to_habs_spatial_hash,
                DictWriter(dict_=self.habs),
            ],
            logger=habs_logger,
            limit=habs_config.get('limit'),
        ).ingest()
Ejemplo n.º 2
0
    def ingest_stat_areas(self, parent_logger=None, limit=None):
        self.stat_areas = {}
        self.sa_spatial_hash = SpatialHash(cell_size=.1)
        self.sa_values = {}
        logger = self.get_logger_logger(
            name='stat_area_ingest', 
            base_msg='Ingesting stat_areas...',
            parent_logger=parent_logger
        )

        Ingestor(
            reader=ShapefileReader(shp_file=self.stat_areas_path,
                                   reproject_to='EPSG:4326'),
            processors=[
                ClassMapper(
                    clazz=models.StatArea,
                    mappings=[{'source': 'SAREA', 'target': 'id'},
                              {'source': '__shape', 'target': 'shape'},],
                ),
                DictWriter(dict_=self.stat_areas, key_func=lambda sa: sa.id),
            ],
            logger=logger,
            limit=limit
        ).ingest()

        # Add to spatial hash.
        for stat_area in self.stat_areas.values():
            stat_area.mbr = gis_util.get_shape_mbr(stat_area.shape)
            self.sa_spatial_hash.add_rect(stat_area.mbr, stat_area)
Ejemplo n.º 3
0
    def ingest_cells(self, parent_logger=None, limit=None):
        self.cells = {}
        self.cell_spatial_hash = SpatialHash(cell_size=.1)
        self.c_values = {}
        logger = self.get_logger_logger(
            name='cell_ingest', 
            base_msg='Ingesting cells...',
            parent_logger=parent_logger
        )

        Ingestor(
            reader=ShapefileReader(shp_file=self.grid_path,
                                   reproject_to='EPSG:4326'),
            processors=[
                ClassMapper(
                    clazz=models.Cell,
                    mappings=[{'source': 'ID', 'target': 'id'},
                              {'source': '__shape', 'target': 'shape'},],
                ),
                DictWriter(dict_=self.cells, key_func=lambda c: c.id),
            ],
            logger=logger,
            limit=limit
        ).ingest()

        # Calculate cell areas and add cells to spatial hash,
        # and initialize c_values.
        for cell in self.cells.values():
            cell.area = gis_util.get_shape_area(cell.shape)
            cell.mbr = gis_util.get_shape_mbr(cell.shape)
            self.cell_spatial_hash.add_rect(cell.mbr, cell)
            self.c_values[cell.id] = {}
Ejemplo n.º 4
0
    def ingest_habitats(self):
        base_msg = "Ingesting 'habitats'..."
        self.logger.info(base_msg)
        habs_logger=self.get_section_logger('habs', base_msg)

        self.habs = {}
        self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size)
        habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp")
        habs_config = self.config.get('sections', {}).get('habitats', {})

        def add_to_habs_spatial_hash(data=None, **kwargs):
            self.habs_spatial_hash.add_rect(data.mbr, data)
            return data

        def process_neg_depth(neg_depth):
            if neg_depth is not None:
                depth = -1.0 * float(neg_depth)
                return depth

        Ingestor(
            reader=ShapefileReader(
                shp_file=habs_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(
                    clazz=self.dao.schema['sources']['Habitat'],
                    mappings=[
                        {'source': 'SUBSTRATE', 'target': 'substrate_id'},
                        {'source': 'ENERGY', 'target': 'energy_id'},
                        {'source': 'Z', 'target': 'depth', 
                         'processor': process_neg_depth},
                        {'source': '__shape', 'target': 'shape'}, 
                    ]
                ),
                self.add_area_mbr,
                add_to_habs_spatial_hash,
                DictWriter(dict_=self.habs),
            ],
            logger=habs_logger,
            limit=habs_config.get('limit'),
        ).ingest()
Ejemplo n.º 5
0
class SASI_Ingestor(object):
    def __init__(self, data_dir=None, dao=None, logger=logging.getLogger(),
                 config={}, hash_cell_size=.1, **kwargs):
        self.data_dir = data_dir
        self.dao = dao
        self.logger = logger
        self.hash_cell_size = hash_cell_size
        self.config = config
        self.commit_interval = config.get('commit_interval', 1e4)

    def ingest(self):

        # Define generic CSV ingests.
        csv_sections = [
            {
                'id': 'substrates',
                'class': self.dao.schema['sources']['Substrate'],
                'mappings': [
                    {'source': 'id', 'target': 'id'},
                    {'source': 'label', 'target': 'label'},
                    {'source': 'description', 'target': 'description'},
                ]
            },
            {
                'id': 'energies',
                'class': self.dao.schema['sources']['Energy'],
                'mappings': [
                    {'source': 'id', 'target': 'id'},
                    {'source': 'label', 'target': 'label'},
                    {'source': 'description', 'target': 'description'},
                ]
            },
            {
                'id': 'feature_categories',
                'class': self.dao.schema['sources']['FeatureCategory'],
                'mappings': [
                    {'source': 'id', 'target': 'id'},
                    {'source': 'label', 'target': 'label'},
                    {'source': 'description', 'target': 'description'},
                ]
            },
            {
                'id': 'features',
                'class': self.dao.schema['sources']['Feature'],
                'mappings': [
                    {'source': 'id', 'target': 'id'},
                    {'source': 'category', 'target': 'category'},
                    {'source': 'label', 'target': 'label'},
                    {'source': 'description', 'target': 'description'},
                ]
            },
            {
                'id': 'gears',
                'class': self.dao.schema['sources']['Gear'],
                'mappings': [
                    {'source': 'id', 'target': 'id'},
                    {'source': 'generic_id', 'target': 'generic_id'},
                    {'source': 'is_generic', 'target': 'is_generic', 
                     'processor': parse_bool},
                    {'source': 'label', 'target': 'label'},
                    {'source': 'description', 'target': 'description'},
                    {'source': 'min_depth', 'processor': robust_float},
                    {'source': 'max_depth', 'processor': robust_float},
                ]
            },
            {
                'id': 'va',
                'class': self.dao.schema['sources']['VA'],
                'mappings': [
                    {'source': 'gear_id', 'target': 'gear_id'},
                    {'source': 'feature_id', 'target': 'feature_id'},
                    {'source': 'substrate_id', 'target': 'substrate_id'},
                    {'source': 'energy_id', 'target': 'energy_id'},
                    {'source': 's', 'target': 's', 'processor': robust_int},
                    {'source': 'r', 'target': 'r', 'processor': robust_int},
                ]
            },
            {
                'id': 'fishing_efforts',
                'optional': True,
                'class': self.dao.schema['sources']['Effort'],
                'mappings': [
                    {'source': 'cell_id', 'target':'cell_id', 
                     'processor': robust_int},
                    {'source': 'time', 'target': 'time', 
                     'processor': robust_int},
                    'gear_id',
                    # note: we assume a is already in km^2.
                    {'source': 'a', 'processor': robust_float},
                    {'source': 'hours_fished', 'processor': robust_float},
                    {'source': 'value', 'processor': robust_float},
                ]
            },
            {
                'id': 'model_parameters',
                'class': self.dao.schema['sources']['ModelParameters'],
                'mappings': [
                    'time_start',
                    'time_end',
                    'time_step',
                    {'source': 't_0', 'target': 't_0', 
                     'processor': robust_float},
                    {'source': 't_1', 'target': 't_1', 
                     'processor': robust_float},
                    {'source': 't_2', 'target': 't_2', 
                     'processor': robust_float},
                    {'source': 't_3', 'target': 't_3', 
                     'processor': robust_float},
                    {'source': 'w_0', 'target': 'w_0', 
                     'processor': robust_float},
                    {'source': 'w_1', 'target': 'w_1', 
                     'processor': robust_float},
                    {'source': 'w_2', 'target': 'w_2', 
                     'processor': robust_float},
                    {'source': 'w_3', 'target': 'w_3', 
                     'processor': robust_float},
                    {'source': 'effort_model', 'default': 'nominal'},
                    {'source': 'projection', 'target': 'projection',
                     # Use the mollweide projection as the default.
                     'default': gis_util.get_default_geographic_crs(),
                    }
                ],
            },
            ]

        for section in csv_sections:
            self.ingest_csv_section(section)

        # Convenience shortcuts.
        self.model_parameters = self.dao.query('__ModelParameters').fetchone()
        self.geographic_crs = self.model_parameters.projection

        self.ingest_grid()
        self.ingest_habitats()

        self.post_ingest()

    def ingest_csv_section(self, section):
        csv_file = os.path.join(self.data_dir, "%s.csv" % section['id'])
        if not os.path.isfile(csv_file):
            if not section.get('optional'):
                raise Exception(
                    ("Error ingesting '%s': "
                     "File '%s' is required and was not found.") % 
                    (section['id'], csv_file)
                )
            else:
                return

        base_msg = "Ingesting '%s'..." % section['id']
        self.logger.info(base_msg)
        section_config = self.config.get('sections', {}).get(
            section['id'], {})

        Ingestor(
            reader=CSVReader(csv_file=csv_file),
            processors=[
                ClassMapper(clazz=section['class'],
                            mappings=section['mappings']),
                DAOWriter(dao=self.dao, commit_interval=self.commit_interval),
            ],
            logger=self.get_section_logger(section['id'], base_msg),
            limit=section_config.get('limit'),
        ).ingest()
        self.dao.commit()

    def ingest_grid(self):
        base_msg = "Ingesting 'grid'..."
        self.logger.info(base_msg)
        grid_logger = self.get_section_logger('grid', base_msg)

        self.cells = {}
        grid_file = os.path.join(self.data_dir, 'grid', "grid.shp")
        grid_config = self.config.get('sections', {}).get('grid', {})

        Ingestor(
            reader=ShapefileReader(
                shp_file=grid_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(
                    clazz=self.dao.schema['sources']['Cell'],
                    mappings=[
                        {'source': 'ID', 'target': 'id', 'processor': int}, 
                        {'source': '__shape', 'target': 'shape'},
                        {'source': '__shape', 'target': 'geom_wkt',
                         'processor': gis_util.shape_to_wkt}
                    ]
                ),
                self.add_area_mbr,
                DictWriter(dict_=self.cells),
            ],
            logger=grid_logger,
            limit=grid_config.get('limit'),
        ).ingest()

    def ingest_habitats(self):
        base_msg = "Ingesting 'habitats'..."
        self.logger.info(base_msg)
        habs_logger=self.get_section_logger('habs', base_msg)

        self.habs = {}
        self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size)
        habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp")
        habs_config = self.config.get('sections', {}).get('habitats', {})

        def add_to_habs_spatial_hash(data=None, **kwargs):
            self.habs_spatial_hash.add_rect(data.mbr, data)
            return data

        def process_neg_depth(neg_depth):
            if neg_depth is not None:
                depth = -1.0 * float(neg_depth)
                return depth

        Ingestor(
            reader=ShapefileReader(
                shp_file=habs_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(
                    clazz=self.dao.schema['sources']['Habitat'],
                    mappings=[
                        {'source': 'SUBSTRATE', 'target': 'substrate_id'},
                        {'source': 'ENERGY', 'target': 'energy_id'},
                        {'source': 'Z', 'target': 'depth', 
                         'processor': process_neg_depth},
                        {'source': '__shape', 'target': 'shape'}, 
                    ]
                ),
                self.add_area_mbr,
                add_to_habs_spatial_hash,
                DictWriter(dict_=self.habs),
            ],
            logger=habs_logger,
            limit=habs_config.get('limit'),
        ).ingest()

    def get_section_logger(self, section_id, base_msg):
        logger = logging.getLogger("%s_%s" % (id(self), section_id))
        formatter = logging.Formatter(base_msg + ' %(message)s.')
        log_handler = LoggerLogHandler(self.logger)
        log_handler.setFormatter(formatter)
        logger.addHandler(log_handler)
        logger.setLevel(self.logger.level)
        return logger

    def post_ingest(self):
        self.post_process_cells()

        # Allow for cells and habs to be garbage collected.
        self.cells = None
        self.habs = None
        self.habs_spatial_hash = None

    def post_process_cells(self, log_interval=1000):
        base_msg = 'Calculating cell compositions...'
        self.logger.info(base_msg)
        logger = self.get_section_logger('habitat_areas', base_msg)

        num_cells = len(self.cells)
        counter = 0
        for cell in self.cells.values():
            counter += 1
            if (counter % log_interval) == 0:
                logger.info(" %d of %d (%.1f%%)" % (
                    counter, num_cells, 1.0 * counter/num_cells* 100))

            composition = {}
            cell.depth = 0

            # Get candidate intersecting habitats.
            candidate_habs = self.habs_spatial_hash.items_for_rect(cell.mbr)
            for hab in candidate_habs:
                intersection = gis_util.get_intersection(cell.shape, hab.shape)
                if not intersection:
                    continue
                intersection_area = gis_util.get_shape_area(
                    intersection,
                    target_crs=self.geographic_crs,
                )
                hab_key = (hab.substrate_id, hab.energy_id,)
                pct_area = intersection_area/cell.area
                composition[hab_key] = composition.get(hab_key, 0) + pct_area
                cell.depth += pct_area * hab.depth

            cell.habitat_composition = composition

            # Convert cell area to km^2.
            cell.area = cell.area/(1000.0**2)

            self.dao.save(cell, commit=False)
        self.dao.commit()

    # Define processor for adding area, mbr to geom entities.
    def add_area_mbr(self, data=None, **kwargs):
        data.area = gis_util.get_shape_area(
            data.shape, target_crs=self.geographic_crs)
        data.mbr = gis_util.get_shape_mbr(data.shape)
        return data 
Ejemplo n.º 6
0
class SASI_Ingestor(object):
    def __init__(self,
                 data_dir=None,
                 dao=None,
                 logger=logging.getLogger(),
                 config={},
                 hash_cell_size=.1,
                 **kwargs):
        self.data_dir = data_dir
        self.dao = dao
        self.logger = logger
        self.hash_cell_size = hash_cell_size
        self.config = config
        self.commit_interval = config.get('commit_interval', 1e4)

    def ingest(self):

        # Define generic CSV ingests.
        csv_sections = [
            {
                'id':
                'substrates',
                'class':
                self.dao.schema['sources']['Substrate'],
                'mappings': [
                    {
                        'source': 'id',
                        'target': 'id'
                    },
                    {
                        'source': 'label',
                        'target': 'label'
                    },
                    {
                        'source': 'description',
                        'target': 'description'
                    },
                ]
            },
            {
                'id':
                'energies',
                'class':
                self.dao.schema['sources']['Energy'],
                'mappings': [
                    {
                        'source': 'id',
                        'target': 'id'
                    },
                    {
                        'source': 'label',
                        'target': 'label'
                    },
                    {
                        'source': 'description',
                        'target': 'description'
                    },
                ]
            },
            {
                'id':
                'feature_categories',
                'class':
                self.dao.schema['sources']['FeatureCategory'],
                'mappings': [
                    {
                        'source': 'id',
                        'target': 'id'
                    },
                    {
                        'source': 'label',
                        'target': 'label'
                    },
                    {
                        'source': 'description',
                        'target': 'description'
                    },
                ]
            },
            {
                'id':
                'features',
                'class':
                self.dao.schema['sources']['Feature'],
                'mappings': [
                    {
                        'source': 'id',
                        'target': 'id'
                    },
                    {
                        'source': 'category',
                        'target': 'category'
                    },
                    {
                        'source': 'label',
                        'target': 'label'
                    },
                    {
                        'source': 'description',
                        'target': 'description'
                    },
                ]
            },
            {
                'id':
                'gears',
                'class':
                self.dao.schema['sources']['Gear'],
                'mappings': [
                    {
                        'source': 'id',
                        'target': 'id'
                    },
                    {
                        'source': 'generic_id',
                        'target': 'generic_id'
                    },
                    {
                        'source': 'is_generic',
                        'target': 'is_generic',
                        'processor': parse_bool
                    },
                    {
                        'source': 'label',
                        'target': 'label'
                    },
                    {
                        'source': 'description',
                        'target': 'description'
                    },
                    {
                        'source': 'min_depth',
                        'processor': robust_float
                    },
                    {
                        'source': 'max_depth',
                        'processor': robust_float
                    },
                ]
            },
            {
                'id':
                'va',
                'class':
                self.dao.schema['sources']['VA'],
                'mappings': [
                    {
                        'source': 'gear_id',
                        'target': 'gear_id'
                    },
                    {
                        'source': 'feature_id',
                        'target': 'feature_id'
                    },
                    {
                        'source': 'substrate_id',
                        'target': 'substrate_id'
                    },
                    {
                        'source': 'energy_id',
                        'target': 'energy_id'
                    },
                    {
                        'source': 's',
                        'target': 's',
                        'processor': robust_int
                    },
                    {
                        'source': 'r',
                        'target': 'r',
                        'processor': robust_int
                    },
                ]
            },
            {
                'id':
                'fishing_efforts',
                'optional':
                True,
                'class':
                self.dao.schema['sources']['Effort'],
                'mappings': [
                    {
                        'source': 'cell_id',
                        'target': 'cell_id',
                        'processor': robust_int
                    },
                    {
                        'source': 'time',
                        'target': 'time',
                        'processor': robust_int
                    },
                    'gear_id',
                    # note: we assume a is already in km^2.
                    {
                        'source': 'a',
                        'processor': robust_float
                    },
                    {
                        'source': 'hours_fished',
                        'processor': robust_float
                    },
                    {
                        'source': 'value',
                        'processor': robust_float
                    },
                ]
            },
            {
                'id':
                'model_parameters',
                'class':
                self.dao.schema['sources']['ModelParameters'],
                'mappings': [
                    'time_start',
                    'time_end',
                    'time_step',
                    {
                        'source': 't_0',
                        'target': 't_0',
                        'processor': robust_float
                    },
                    {
                        'source': 't_1',
                        'target': 't_1',
                        'processor': robust_float
                    },
                    {
                        'source': 't_2',
                        'target': 't_2',
                        'processor': robust_float
                    },
                    {
                        'source': 't_3',
                        'target': 't_3',
                        'processor': robust_float
                    },
                    {
                        'source': 'w_0',
                        'target': 'w_0',
                        'processor': robust_float
                    },
                    {
                        'source': 'w_1',
                        'target': 'w_1',
                        'processor': robust_float
                    },
                    {
                        'source': 'w_2',
                        'target': 'w_2',
                        'processor': robust_float
                    },
                    {
                        'source': 'w_3',
                        'target': 'w_3',
                        'processor': robust_float
                    },
                    {
                        'source': 'effort_model',
                        'default': 'nominal'
                    },
                    {
                        'source': 'projection',
                        'target': 'projection',
                        # Use the mollweide projection as the default.
                        'default': gis_util.get_default_geographic_crs(),
                    }
                ],
            },
        ]

        for section in csv_sections:
            self.ingest_csv_section(section)

        # Convenience shortcuts.
        self.model_parameters = self.dao.query('__ModelParameters').fetchone()
        self.geographic_crs = self.model_parameters.projection

        self.ingest_grid()
        self.ingest_habitats()

        self.post_ingest()

    def ingest_csv_section(self, section):
        csv_file = os.path.join(self.data_dir, "%s.csv" % section['id'])
        if not os.path.isfile(csv_file):
            if not section.get('optional'):
                raise Exception(("Error ingesting '%s': "
                                 "File '%s' is required and was not found.") %
                                (section['id'], csv_file))
            else:
                return

        base_msg = "Ingesting '%s'..." % section['id']
        self.logger.info(base_msg)
        section_config = self.config.get('sections', {}).get(section['id'], {})

        Ingestor(
            reader=CSVReader(csv_file=csv_file),
            processors=[
                ClassMapper(clazz=section['class'],
                            mappings=section['mappings']),
                DAOWriter(dao=self.dao, commit_interval=self.commit_interval),
            ],
            logger=self.get_section_logger(section['id'], base_msg),
            limit=section_config.get('limit'),
        ).ingest()
        self.dao.commit()

    def ingest_grid(self):
        base_msg = "Ingesting 'grid'..."
        self.logger.info(base_msg)
        grid_logger = self.get_section_logger('grid', base_msg)

        self.cells = {}
        grid_file = os.path.join(self.data_dir, 'grid', "grid.shp")
        grid_config = self.config.get('sections', {}).get('grid', {})

        Ingestor(
            reader=ShapefileReader(
                shp_file=grid_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(clazz=self.dao.schema['sources']['Cell'],
                            mappings=[{
                                'source': 'ID',
                                'target': 'id',
                                'processor': int
                            }, {
                                'source': '__shape',
                                'target': 'shape'
                            }, {
                                'source': '__shape',
                                'target': 'geom_wkt',
                                'processor': gis_util.shape_to_wkt
                            }]),
                self.add_area_mbr,
                DictWriter(dict_=self.cells),
            ],
            logger=grid_logger,
            limit=grid_config.get('limit'),
        ).ingest()

    def ingest_habitats(self):
        base_msg = "Ingesting 'habitats'..."
        self.logger.info(base_msg)
        habs_logger = self.get_section_logger('habs', base_msg)

        self.habs = {}
        self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size)
        habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp")
        habs_config = self.config.get('sections', {}).get('habitats', {})

        def add_to_habs_spatial_hash(data=None, **kwargs):
            self.habs_spatial_hash.add_rect(data.mbr, data)
            return data

        def process_neg_depth(neg_depth):
            if neg_depth is not None:
                depth = -1.0 * float(neg_depth)
                return depth

        Ingestor(
            reader=ShapefileReader(
                shp_file=habs_file,
                reproject_to='EPSG:4326',
            ),
            processors=[
                ClassMapper(clazz=self.dao.schema['sources']['Habitat'],
                            mappings=[
                                {
                                    'source': 'SUBSTRATE',
                                    'target': 'substrate_id'
                                },
                                {
                                    'source': 'ENERGY',
                                    'target': 'energy_id'
                                },
                                {
                                    'source': 'Z',
                                    'target': 'depth',
                                    'processor': process_neg_depth
                                },
                                {
                                    'source': '__shape',
                                    'target': 'shape'
                                },
                            ]),
                self.add_area_mbr,
                add_to_habs_spatial_hash,
                DictWriter(dict_=self.habs),
            ],
            logger=habs_logger,
            limit=habs_config.get('limit'),
        ).ingest()

    def get_section_logger(self, section_id, base_msg):
        logger = logging.getLogger("%s_%s" % (id(self), section_id))
        formatter = logging.Formatter(base_msg + ' %(message)s.')
        log_handler = LoggerLogHandler(self.logger)
        log_handler.setFormatter(formatter)
        logger.addHandler(log_handler)
        logger.setLevel(self.logger.level)
        return logger

    def post_ingest(self):
        self.post_process_cells()

        # Allow for cells and habs to be garbage collected.
        self.cells = None
        self.habs = None
        self.habs_spatial_hash = None

    def post_process_cells(self, log_interval=1000):
        base_msg = 'Calculating cell compositions...'
        self.logger.info(base_msg)
        logger = self.get_section_logger('habitat_areas', base_msg)

        num_cells = len(self.cells)
        counter = 0
        for cell in self.cells.values():
            counter += 1
            if (counter % log_interval) == 0:
                logger.info(
                    " %d of %d (%.1f%%)" %
                    (counter, num_cells, 1.0 * counter / num_cells * 100))

            composition = {}
            cell.depth = 0

            # Get candidate intersecting habitats.
            candidate_habs = self.habs_spatial_hash.items_for_rect(cell.mbr)
            for hab in candidate_habs:
                intersection = gis_util.get_intersection(cell.shape, hab.shape)
                if not intersection:
                    continue
                intersection_area = gis_util.get_shape_area(
                    intersection,
                    target_crs=self.geographic_crs,
                )
                hab_key = (
                    hab.substrate_id,
                    hab.energy_id,
                )
                pct_area = intersection_area / cell.area
                composition[hab_key] = composition.get(hab_key, 0) + pct_area
                cell.depth += pct_area * hab.depth

            cell.habitat_composition = composition

            # Convert cell area to km^2.
            cell.area = cell.area / (1000.0**2)

            self.dao.save(cell, commit=False)
        self.dao.commit()

    # Define processor for adding area, mbr to geom entities.
    def add_area_mbr(self, data=None, **kwargs):
        data.area = gis_util.get_shape_area(data.shape,
                                            target_crs=self.geographic_crs)
        data.mbr = gis_util.get_shape_mbr(data.shape)
        return data
Ejemplo n.º 7
0
class SASIGridderTask(task_manager.Task):

    def __init__(self, config={}, data={}, **kwargs):
        super(SASIGridderTask, self).__init__(**kwargs)
        self.logger.debug("RunSasiTask.__init__")

        self.data = data
        self.config = config
        self.value_attrs = models.Effort.value_attrs
        self.key_attrs = ['gear_id', 'time']

        # Define trip type to gear code mappings.
        self.trip_type_gear_mappings = kwargs.get('gear_mappings', {
            'hy_drg': 'GC30',
            'otter': 'GC10',
            'sca-gc': 'GC21',
            'sca-la': 'GC20',
            'shrimp': 'GC11',
            'squid': 'GC12',
            'raised': 'GC13',
            'trap': 'GC60',
            'gillne': 'GC50',
            'longli': 'GC40',
        })

        for kwarg in ['raw_efforts_path', 'grid_path', 'stat_areas_path',
                      'output_path', 'effort_limit']:
            setattr(self, kwarg, kwargs.get(kwarg))

        if not self.output_path:
            os_hndl, self.output_path = tempfile.mkstemp(
                prefix="gridded_efforts.", suffix='.csv')

        self.message_logger = logging.getLogger("Task%s_msglogger" % id(self))
        main_log_handler = LoggerLogHandler(self.logger)
        main_log_handler.setFormatter(
            logging.Formatter('%(message)s'))
        self.message_logger.addHandler(main_log_handler)
        self.message_logger.setLevel(self.logger.level)

    def call(self):
        self.progress = 1
        self.message_logger.info("Starting...")

        # Create build dir.
        build_dir = tempfile.mkdtemp(prefix="gridderWork.")

        # Read in data.
        base_msg = "Ingesting..."
        ingest_logger = self.get_logger_logger('ingest', base_msg,
                                               self.logger)
        self.message_logger.info(base_msg)

        # Read in cells.
        self.ingest_cells(parent_logger=ingest_logger, limit=None)

        # Read in stat_areas.
        self.ingest_stat_areas(parent_logger=ingest_logger)

        #
        #  Main part of the gridding task.
        #   

        base_msg = "Gridding."
        gridding_logger = self.get_logger_logger('gridding', base_msg,
                                                  self.logger)
        self.message_logger.info(base_msg)

        #
        # 0. Terms used here:
        # 'clean' efforts can be assigned to a cell.
        # 'kinda_dirty' efforts can be assigned to a stat_area.
        # 'super_dirty' efforts can not be assigned to a cell or a stat_area.
        #
        # Running example:
        # We start with two cells, 'C1' and 'C2', and one stat_area , 'StatArea1'.
        # 'StatArea1' contains 50% of 'C1', and 100% of 'C2'.
        #

        #
        # 1. Assign 'clean' efforts to cells, assign kinda-dirty efforts to
        # stat areas, and save super-dirty efforts to the super-dirty efforts list.
        #
        # Running example:
        # We have 100 points of clean effort which can be assigned to 'C1',
        # 100 points of clean effort which can be assigned to 'C2',
        # 100 points of kinda-dirty effort which can be assigned to 'StatArea1',
        # and 100 points of super-dirty effort which can't be assigned to anything.
        # After this first step, both 'C1' and 'C2' will have 100 points of effort assigned
        # from clean efforts.
        #


        # Do the first pass on efforts
        # as we read them in.
        

        base_msg = "Assigning raw efforts to cells/stat_areas ... "
        fp_logger = self.get_logger_logger('first_pass', base_msg,
                                              gridding_logger)
        fp_logger.info(base_msg)

        unassigned = {}

        logging_interval = 1e4

        # Define functions to handle raw effort columns
        def trip_type_to_gear_id(trip_type):
            return self.trip_type_gear_mappings.get(trip_type)

        def float_w_empty_dot(value):
            if value == '.' or value == '':
                return None
            elif value is not None:
                return float(value)

        # Define function to execute after each raw effort is mapped to an
        # effort column. This is the first pass described above.
        def first_pass(data=None, **kwargs):
            #effort = data

            #if (effort_counter % 1e3) == 0:
            #    print ["%s: %.3e" % (k, v) for k,v in c_.items()]

            # If effort has lat and lon...
            if data.lat is not None and data.lon is not None:
                # Can effort can be assigned to cell?
                cell = self.get_cell_for_pos(data.lat, data.lon)
                if cell:
                    self.add_effort_to_cell(cell, data)
                    return

                # Otherwise can effort can be assigned to statarea?
                stat_area = self.get_stat_area_for_pos(
                    data.lat, data.lon)
                if stat_area:
                    self.add_effort_to_stat_area(stat_area, data)
                    return

                # Otherwise add to unassigned.
                else:
                    self.add_effort_to_unassigned(unassigned, data)
                    return

            # Otherwise if effort has a stat area...
            elif data.stat_area_id is not None:
                stat_area = self.stat_areas.get(data.stat_area_id)
                if not stat_area:
                    self.add_effort_to_unassigned(unassigned, data)
                    return
                else:
                    self.add_effort_to_stat_area(stat_area, data)
                    return

            # Otherwise add to unassigned list.
            else:
                self.add_effort_to_unassigned(unassigned, data)
                return

        # Create and run effort ingestor.
        ingestor = Ingestor(
            reader=CSVReader(csv_file=self.raw_efforts_path),
            processors=[
                ClassMapper(
                    clazz=models.Effort,
                    mappings=[
                        {'source': 'trip_type', 'target': 'gear_id', 
                         'processor': trip_type_to_gear_id},
                        {'source': 'year', 'target': 'time',
                         'processor': float_w_empty_dot},
                        {'source': 'nemarea', 'target': 'stat_area_id',
                         'processor': float_w_empty_dot},
                        {'source': 'A', 'target': 'a',
                         'processor': float_w_empty_dot},
                        {'source': 'value', 'target': 'value',
                         'processor': float_w_empty_dot},
                        {'source': 'hours_fished', 'target': 'hours_fished',
                         'processor': float_w_empty_dot},
                        {'source': 'lat', 'target': 'lat', 
                         'processor': float_w_empty_dot},
                        {'source': 'lon', 'target': 'lon',
                         'processor': float_w_empty_dot}
                    ],
                ),
                first_pass,
            ],
            logger=fp_logger,
            get_count=True,
            limit=self.effort_limit,
        ).ingest() 

        # 
        # 2. For each effort assigned to a stat area,
        # distribute values across cracked cells in that stat area.
        # We distribute values in proportion to the amount of value
        # contained in the cracked cell relative to the total amount
        # of 'clean' value the stat area already contains.
        #
        # Running Example:
        # We now distribute the 100 points of kinda effort which can be assigned to 'StatArea1'.
        # We distribute the effort proportionally to the cracked cells,
        # so that 'C1' gets 33 additional effort points, and 'C2' gets 66 additional effort points.
        #

        base_msg = "Distributing stat_area values to cells ... "
        sa_logger = self.get_logger_logger('stat_areas', base_msg,
                                              gridding_logger)
        sa_logger.info(base_msg)

        num_stat_areas = len(self.stat_areas)
        logging_interval = 1
        sa_counter = 0
        for stat_area in self.stat_areas.values():
            sa_counter += 1
            if (sa_counter % logging_interval) == 0:
                sa_logger.info("stat_area %s of %s (%.1f%%)" % (
                    sa_counter, num_stat_areas, 
                    100.0 * sa_counter/num_stat_areas))

            # Get stat area values.
            sa_keyed_values = self.sa_values.setdefault(stat_area.id, {})

            # Get list of cracked cells.
            cracked_cells = self.get_cracked_cells_for_stat_area(stat_area)

            # Calculate totals for values across cracked cells.
            ccell_totals = {}
            for ccell in cracked_cells:
                for effort_key, ccell_values in ccell.keyed_values.items():
                    ccell_totals_values = ccell_totals.setdefault(
                        effort_key,
                        self.new_values_dict()
                    )
                    for attr, ccell_value in ccell_values.items():
                        ccell_totals_values[attr] += ccell_value

            # Distribute the stat area's values across the cracked
            # cells, in proportion to the cracked cell's values as a
            # percentage of the stat area's cracked cell totals.
            for ccell in cracked_cells:
                pcell_keyed_values = self.c_values[ccell.parent_cell.id]
                for effort_key, sa_values in sa_keyed_values.items():
                    ccell_totals_values = ccell_totals.get(effort_key)
                    ccell_values = ccell.keyed_values.get(effort_key)
                    pcell_values = pcell_keyed_values.setdefault(
                        effort_key, self.new_values_dict())
                    if not ccell_totals_values or not ccell_values:
                        continue
                    for attr, sa_value in sa_values.items():
                        # Don't add anything for empty values.
                        # This also avoids division by zero errors.
                        if not sa_value:
                            continue
                        ccell_value = ccell_values.get(attr, 0.0)
                        ccell_totals_value = ccell_totals_values.get(attr, 0.0)
                        if not ccell_value or not ccell_totals_value:
                            continue
                        pct_value = ccell_value/ccell_totals_value
                        # Add proportional value to cracked cell's parent 
                        # cell.
                        pcell_values[attr] += sa_value * pct_value

        #
        # 3. For efforts which could not be assigned to a cell or a stat area
        # ('super-dirty' efforts), distribute the efforts across all cells,
        # such that the amount of effort each cell is receives is proportional to the cell's
        # total contribution to the overall total.
        #
        # Running Example:
        # We start cells 'C1' and 'C2'.
        # 'C1' starts with 133 effort points from clean efforts + kinda-dirty efforts.
        # Likewise 'C1' starts with 166 effort points from clean efforts + kinda-dirty efforts.
        # Our overall total is 133 + 166 = 300.
        # 'C1' is responsible for 133/300 = 45% of the total effort.
        # 'C2' is responsible for 166/300 = 55% of the total effort.
        # We then have 100 additional points of super-dirty effort which could not be assigned to any cell
        # or stat area.
        # We distributed the effort proportionally to the cells so that
        # 'C1' gets 45 additional effort points, and 'C2' gets 55 additional effort points.
        # Our final result is that 'C1' has 133 + 45 = 178 effort points, and
        # 'C2' has 166 + 55 = 221 effort points.
        base_msg = "Distributing unassigned values to cells ... "
        unassigned_logger = self.get_logger_logger('unassigned', base_msg,
                                              gridding_logger)
        unassigned_logger.info(base_msg)

        # Calculate totals across all cells.
        totals = {}
        num_cells = len(self.cells)
        for cell in self.cells.values():
            cell_keyed_values = self.c_values[cell.id]
            for effort_key, cell_values in cell_keyed_values.items():
                totals_values = totals.setdefault(
                    effort_key, 
                    self.new_values_dict()
                )
                for attr, cell_value in cell_values.items():
                    totals_values[attr] += cell_value

        # Distribute unassigned efforts across all cells,
        # in proportion to the cell's values as a percentage of the total.
        logging_interval = 1e3
        cell_counter = 0
        for cell in self.cells.values():
            cell_counter += 1
            if (cell_counter % logging_interval) == 0:
                unassigned_logger.info("cell %s of %s (%.1f%%)" % (
                    cell_counter, num_cells, 100.0 * cell_counter/num_cells))

            cell_keyed_values = self.c_values[cell.id]
            for effort_key, unassigned_values in unassigned.items():
                cell_values = cell_keyed_values.get(effort_key)
                if not cell_values:
                    continue
                for attr, unassigned_value in unassigned_values.items():
                    if not unassigned_value:
                        continue
                    cell_value = cell_values.get(attr, 0.0)
                    pct_value = cell_value/unassigned_value
                    cell_values[attr] += unassigned_value * pct_value

        # Done with gridding. At this point the effort has been distributed. 

        # Note that there may be some efforts which are not included.
        # For example, if an unassigned effort has an effort_key which is 
        # not used by any effort assigned to a cell or a stat_area, then 
        # no cell will have a non-zero pct_value for that effort_key.

        #
        # Output gridded efforts.
        #
        with open(self.output_path, "w") as f:
            w = csv.writer(f)
            fields = ['cell_id'] + self.key_attrs + self.value_attrs
            w.writerow(fields)

            for cell in self.cells.values():
                cell_keyed_values = self.c_values[cell.id]
                for keys, values in cell_keyed_values.items():
                    row_dict = {
                        'cell_id': cell.id
                    }
                    for i in range(len(self.key_attrs)):
                        row_dict[self.key_attrs[i]] = keys[i]
                    row_dict.update(values)
                    w.writerow([row_dict[f] for f in fields])

        shutil.rmtree(build_dir)

        self.progress = 100
        self.message_logger.info("Gridding completed, output file is:'%s'" % (
            self.output_path))
        self.data['output_file'] = self.output_path
        self.status = 'resolved'

    def get_logger_logger(self, name=None, base_msg=None, parent_logger=None):
        logger = logging.getLogger("%s_%s" % (id(self), name))
        formatter = logging.Formatter(base_msg + ' %(message)s.')
        log_handler = LoggerLogHandler(parent_logger)
        log_handler.setFormatter(formatter)
        logger.addHandler(log_handler)
        logger.setLevel(self.message_logger.level)
        return logger

    def get_cell_for_pos(self, lat, lon):
        """
        Get cell which contains given point, via
        spatial hash.
        """
        pos_wkt = 'POINT(%s %s)' % (lon, lat)
        pnt_shp = gis_util.wkt_to_shape(pos_wkt)
        candidates = self.cell_spatial_hash.items_for_point((lon,lat))
        for c in candidates:
            if gis_util.get_intersection(c.shape, pnt_shp):
                return c
        return None

    def get_stat_area_for_pos(self, lat, lon):
        pos_wkt = 'POINT(%s %s)' % (lon, lat)
        pnt_shp = gis_util.wkt_to_shape(pos_wkt)
        candidates = self.sa_spatial_hash.items_for_point((lon,lat))
        for c in candidates:
            if gis_util.get_intersection(c.shape, pnt_shp):
                return c
        return None

    def new_values_dict(self):
        return dict(zip(self.value_attrs, [0.0] * len(self.value_attrs)))

    def update_values_dict(self, values_dict, effort):
        for k in values_dict.keys():
            effort_value = getattr(effort, k, 0.0)
            if effort_value is None:
                effort_value = 0.0
            values_dict[k] += effort_value

    def add_effort_to_keyed_values_dict(self, kvd, effort):
        values = kvd.setdefault(
            self.get_effort_key(effort), 
            self.new_values_dict()
        )
        self.update_values_dict(values, effort)

    def add_effort_to_cell(self, cell, effort):
        cell_keyed_values = self.c_values[cell.id]
        self.add_effort_to_keyed_values_dict(cell_keyed_values, effort)

    def add_effort_to_stat_area(self, stat_area, effort):
        sa_keyed_values = self.sa_values.setdefault(stat_area.id, {})
        self.add_effort_to_keyed_values_dict(sa_keyed_values, effort)

    def add_effort_to_unassigned(self, unassigned, effort):
        self.add_effort_to_keyed_values_dict(unassigned, effort)

    def get_effort_key(self, effort):
        """  Key for grouping values by effort types. """
        return tuple([getattr(effort, attr, None) for attr in self.key_attrs])

    def ingest_cells(self, parent_logger=None, limit=None):
        self.cells = {}
        self.cell_spatial_hash = SpatialHash(cell_size=.1)
        self.c_values = {}
        logger = self.get_logger_logger(
            name='cell_ingest', 
            base_msg='Ingesting cells...',
            parent_logger=parent_logger
        )

        Ingestor(
            reader=ShapefileReader(shp_file=self.grid_path,
                                   reproject_to='EPSG:4326'),
            processors=[
                ClassMapper(
                    clazz=models.Cell,
                    mappings=[{'source': 'ID', 'target': 'id'},
                              {'source': '__shape', 'target': 'shape'},],
                ),
                DictWriter(dict_=self.cells, key_func=lambda c: c.id),
            ],
            logger=logger,
            limit=limit
        ).ingest()

        # Calculate cell areas and add cells to spatial hash,
        # and initialize c_values.
        for cell in self.cells.values():
            cell.area = gis_util.get_shape_area(cell.shape)
            cell.mbr = gis_util.get_shape_mbr(cell.shape)
            self.cell_spatial_hash.add_rect(cell.mbr, cell)
            self.c_values[cell.id] = {}

    def ingest_stat_areas(self, parent_logger=None, limit=None):
        self.stat_areas = {}
        self.sa_spatial_hash = SpatialHash(cell_size=.1)
        self.sa_values = {}
        logger = self.get_logger_logger(
            name='stat_area_ingest', 
            base_msg='Ingesting stat_areas...',
            parent_logger=parent_logger
        )

        Ingestor(
            reader=ShapefileReader(shp_file=self.stat_areas_path,
                                   reproject_to='EPSG:4326'),
            processors=[
                ClassMapper(
                    clazz=models.StatArea,
                    mappings=[{'source': 'SAREA', 'target': 'id'},
                              {'source': '__shape', 'target': 'shape'},],
                ),
                DictWriter(dict_=self.stat_areas, key_func=lambda sa: sa.id),
            ],
            logger=logger,
            limit=limit
        ).ingest()

        # Add to spatial hash.
        for stat_area in self.stat_areas.values():
            stat_area.mbr = gis_util.get_shape_mbr(stat_area.shape)
            self.sa_spatial_hash.add_rect(stat_area.mbr, stat_area)

    def get_cracked_cells_for_stat_area(self, stat_area):
        cracked_cells = []
        candidates = self.cell_spatial_hash.items_for_rect(stat_area.mbr)
        for icell in candidates:
            intersection = gis_util.get_intersection(stat_area.shape, icell.shape)
            if not intersection:
                continue

            intersection_area = gis_util.get_shape_area(intersection)
            pct_area = intersection_area/icell.area

            # Set cracked cell values in proportion to percentage
            # of parent cell's area.
            ccell_keyed_values = {}
            icell_keyed_values = self.c_values[icell.id]
            for effort_key, icell_values in icell_keyed_values.items():
                ccell_values = ccell_keyed_values.setdefault(effort_key, {})
                for attr, value in icell_values.items():
                    ccell_values[attr] = pct_area * value

            cracked_cells.append(models.CrackedCell(
                parent_cell=icell,
                area=intersection_area,
                keyed_values=ccell_keyed_values,
            ))
        return cracked_cells