class SASI_Ingestor(object): def __init__(self, data_dir=None, dao=None, logger=logging.getLogger(), config={}, hash_cell_size=.1, **kwargs): self.data_dir = data_dir self.dao = dao self.logger = logger self.hash_cell_size = hash_cell_size self.config = config self.commit_interval = config.get('commit_interval', 1e4) def ingest(self): # Define generic CSV ingests. csv_sections = [ { 'id': 'substrates', 'class': self.dao.schema['sources']['Substrate'], 'mappings': [ { 'source': 'id', 'target': 'id' }, { 'source': 'label', 'target': 'label' }, { 'source': 'description', 'target': 'description' }, ] }, { 'id': 'energies', 'class': self.dao.schema['sources']['Energy'], 'mappings': [ { 'source': 'id', 'target': 'id' }, { 'source': 'label', 'target': 'label' }, { 'source': 'description', 'target': 'description' }, ] }, { 'id': 'feature_categories', 'class': self.dao.schema['sources']['FeatureCategory'], 'mappings': [ { 'source': 'id', 'target': 'id' }, { 'source': 'label', 'target': 'label' }, { 'source': 'description', 'target': 'description' }, ] }, { 'id': 'features', 'class': self.dao.schema['sources']['Feature'], 'mappings': [ { 'source': 'id', 'target': 'id' }, { 'source': 'category', 'target': 'category' }, { 'source': 'label', 'target': 'label' }, { 'source': 'description', 'target': 'description' }, ] }, { 'id': 'gears', 'class': self.dao.schema['sources']['Gear'], 'mappings': [ { 'source': 'id', 'target': 'id' }, { 'source': 'generic_id', 'target': 'generic_id' }, { 'source': 'is_generic', 'target': 'is_generic', 'processor': parse_bool }, { 'source': 'label', 'target': 'label' }, { 'source': 'description', 'target': 'description' }, { 'source': 'min_depth', 'processor': robust_float }, { 'source': 'max_depth', 'processor': robust_float }, ] }, { 'id': 'va', 'class': self.dao.schema['sources']['VA'], 'mappings': [ { 'source': 'gear_id', 'target': 'gear_id' }, { 'source': 'feature_id', 'target': 'feature_id' }, { 'source': 'substrate_id', 'target': 'substrate_id' }, { 'source': 'energy_id', 'target': 'energy_id' }, { 'source': 's', 'target': 's', 'processor': robust_int }, { 'source': 'r', 'target': 'r', 'processor': robust_int }, ] }, { 'id': 'fishing_efforts', 'optional': True, 'class': self.dao.schema['sources']['Effort'], 'mappings': [ { 'source': 'cell_id', 'target': 'cell_id', 'processor': robust_int }, { 'source': 'time', 'target': 'time', 'processor': robust_int }, 'gear_id', # note: we assume a is already in km^2. { 'source': 'a', 'processor': robust_float }, { 'source': 'hours_fished', 'processor': robust_float }, { 'source': 'value', 'processor': robust_float }, ] }, { 'id': 'model_parameters', 'class': self.dao.schema['sources']['ModelParameters'], 'mappings': [ 'time_start', 'time_end', 'time_step', { 'source': 't_0', 'target': 't_0', 'processor': robust_float }, { 'source': 't_1', 'target': 't_1', 'processor': robust_float }, { 'source': 't_2', 'target': 't_2', 'processor': robust_float }, { 'source': 't_3', 'target': 't_3', 'processor': robust_float }, { 'source': 'w_0', 'target': 'w_0', 'processor': robust_float }, { 'source': 'w_1', 'target': 'w_1', 'processor': robust_float }, { 'source': 'w_2', 'target': 'w_2', 'processor': robust_float }, { 'source': 'w_3', 'target': 'w_3', 'processor': robust_float }, { 'source': 'effort_model', 'default': 'nominal' }, { 'source': 'projection', 'target': 'projection', # Use the mollweide projection as the default. 'default': gis_util.get_default_geographic_crs(), } ], }, ] for section in csv_sections: self.ingest_csv_section(section) # Convenience shortcuts. self.model_parameters = self.dao.query('__ModelParameters').fetchone() self.geographic_crs = self.model_parameters.projection self.ingest_grid() self.ingest_habitats() self.post_ingest() def ingest_csv_section(self, section): csv_file = os.path.join(self.data_dir, "%s.csv" % section['id']) if not os.path.isfile(csv_file): if not section.get('optional'): raise Exception(("Error ingesting '%s': " "File '%s' is required and was not found.") % (section['id'], csv_file)) else: return base_msg = "Ingesting '%s'..." % section['id'] self.logger.info(base_msg) section_config = self.config.get('sections', {}).get(section['id'], {}) Ingestor( reader=CSVReader(csv_file=csv_file), processors=[ ClassMapper(clazz=section['class'], mappings=section['mappings']), DAOWriter(dao=self.dao, commit_interval=self.commit_interval), ], logger=self.get_section_logger(section['id'], base_msg), limit=section_config.get('limit'), ).ingest() self.dao.commit() def ingest_grid(self): base_msg = "Ingesting 'grid'..." self.logger.info(base_msg) grid_logger = self.get_section_logger('grid', base_msg) self.cells = {} grid_file = os.path.join(self.data_dir, 'grid', "grid.shp") grid_config = self.config.get('sections', {}).get('grid', {}) Ingestor( reader=ShapefileReader( shp_file=grid_file, reproject_to='EPSG:4326', ), processors=[ ClassMapper(clazz=self.dao.schema['sources']['Cell'], mappings=[{ 'source': 'ID', 'target': 'id', 'processor': int }, { 'source': '__shape', 'target': 'shape' }, { 'source': '__shape', 'target': 'geom_wkt', 'processor': gis_util.shape_to_wkt }]), self.add_area_mbr, DictWriter(dict_=self.cells), ], logger=grid_logger, limit=grid_config.get('limit'), ).ingest() def ingest_habitats(self): base_msg = "Ingesting 'habitats'..." self.logger.info(base_msg) habs_logger = self.get_section_logger('habs', base_msg) self.habs = {} self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size) habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp") habs_config = self.config.get('sections', {}).get('habitats', {}) def add_to_habs_spatial_hash(data=None, **kwargs): self.habs_spatial_hash.add_rect(data.mbr, data) return data def process_neg_depth(neg_depth): if neg_depth is not None: depth = -1.0 * float(neg_depth) return depth Ingestor( reader=ShapefileReader( shp_file=habs_file, reproject_to='EPSG:4326', ), processors=[ ClassMapper(clazz=self.dao.schema['sources']['Habitat'], mappings=[ { 'source': 'SUBSTRATE', 'target': 'substrate_id' }, { 'source': 'ENERGY', 'target': 'energy_id' }, { 'source': 'Z', 'target': 'depth', 'processor': process_neg_depth }, { 'source': '__shape', 'target': 'shape' }, ]), self.add_area_mbr, add_to_habs_spatial_hash, DictWriter(dict_=self.habs), ], logger=habs_logger, limit=habs_config.get('limit'), ).ingest() def get_section_logger(self, section_id, base_msg): logger = logging.getLogger("%s_%s" % (id(self), section_id)) formatter = logging.Formatter(base_msg + ' %(message)s.') log_handler = LoggerLogHandler(self.logger) log_handler.setFormatter(formatter) logger.addHandler(log_handler) logger.setLevel(self.logger.level) return logger def post_ingest(self): self.post_process_cells() # Allow for cells and habs to be garbage collected. self.cells = None self.habs = None self.habs_spatial_hash = None def post_process_cells(self, log_interval=1000): base_msg = 'Calculating cell compositions...' self.logger.info(base_msg) logger = self.get_section_logger('habitat_areas', base_msg) num_cells = len(self.cells) counter = 0 for cell in self.cells.values(): counter += 1 if (counter % log_interval) == 0: logger.info( " %d of %d (%.1f%%)" % (counter, num_cells, 1.0 * counter / num_cells * 100)) composition = {} cell.depth = 0 # Get candidate intersecting habitats. candidate_habs = self.habs_spatial_hash.items_for_rect(cell.mbr) for hab in candidate_habs: intersection = gis_util.get_intersection(cell.shape, hab.shape) if not intersection: continue intersection_area = gis_util.get_shape_area( intersection, target_crs=self.geographic_crs, ) hab_key = ( hab.substrate_id, hab.energy_id, ) pct_area = intersection_area / cell.area composition[hab_key] = composition.get(hab_key, 0) + pct_area cell.depth += pct_area * hab.depth cell.habitat_composition = composition # Convert cell area to km^2. cell.area = cell.area / (1000.0**2) self.dao.save(cell, commit=False) self.dao.commit() # Define processor for adding area, mbr to geom entities. def add_area_mbr(self, data=None, **kwargs): data.area = gis_util.get_shape_area(data.shape, target_crs=self.geographic_crs) data.mbr = gis_util.get_shape_mbr(data.shape) return data
class SASI_Ingestor(object): def __init__(self, data_dir=None, dao=None, logger=logging.getLogger(), config={}, hash_cell_size=.1, **kwargs): self.data_dir = data_dir self.dao = dao self.logger = logger self.hash_cell_size = hash_cell_size self.config = config self.commit_interval = config.get('commit_interval', 1e4) def ingest(self): # Define generic CSV ingests. csv_sections = [ { 'id': 'substrates', 'class': self.dao.schema['sources']['Substrate'], 'mappings': [ {'source': 'id', 'target': 'id'}, {'source': 'label', 'target': 'label'}, {'source': 'description', 'target': 'description'}, ] }, { 'id': 'energies', 'class': self.dao.schema['sources']['Energy'], 'mappings': [ {'source': 'id', 'target': 'id'}, {'source': 'label', 'target': 'label'}, {'source': 'description', 'target': 'description'}, ] }, { 'id': 'feature_categories', 'class': self.dao.schema['sources']['FeatureCategory'], 'mappings': [ {'source': 'id', 'target': 'id'}, {'source': 'label', 'target': 'label'}, {'source': 'description', 'target': 'description'}, ] }, { 'id': 'features', 'class': self.dao.schema['sources']['Feature'], 'mappings': [ {'source': 'id', 'target': 'id'}, {'source': 'category', 'target': 'category'}, {'source': 'label', 'target': 'label'}, {'source': 'description', 'target': 'description'}, ] }, { 'id': 'gears', 'class': self.dao.schema['sources']['Gear'], 'mappings': [ {'source': 'id', 'target': 'id'}, {'source': 'generic_id', 'target': 'generic_id'}, {'source': 'is_generic', 'target': 'is_generic', 'processor': parse_bool}, {'source': 'label', 'target': 'label'}, {'source': 'description', 'target': 'description'}, {'source': 'min_depth', 'processor': robust_float}, {'source': 'max_depth', 'processor': robust_float}, ] }, { 'id': 'va', 'class': self.dao.schema['sources']['VA'], 'mappings': [ {'source': 'gear_id', 'target': 'gear_id'}, {'source': 'feature_id', 'target': 'feature_id'}, {'source': 'substrate_id', 'target': 'substrate_id'}, {'source': 'energy_id', 'target': 'energy_id'}, {'source': 's', 'target': 's', 'processor': robust_int}, {'source': 'r', 'target': 'r', 'processor': robust_int}, ] }, { 'id': 'fishing_efforts', 'optional': True, 'class': self.dao.schema['sources']['Effort'], 'mappings': [ {'source': 'cell_id', 'target':'cell_id', 'processor': robust_int}, {'source': 'time', 'target': 'time', 'processor': robust_int}, 'gear_id', # note: we assume a is already in km^2. {'source': 'a', 'processor': robust_float}, {'source': 'hours_fished', 'processor': robust_float}, {'source': 'value', 'processor': robust_float}, ] }, { 'id': 'model_parameters', 'class': self.dao.schema['sources']['ModelParameters'], 'mappings': [ 'time_start', 'time_end', 'time_step', {'source': 't_0', 'target': 't_0', 'processor': robust_float}, {'source': 't_1', 'target': 't_1', 'processor': robust_float}, {'source': 't_2', 'target': 't_2', 'processor': robust_float}, {'source': 't_3', 'target': 't_3', 'processor': robust_float}, {'source': 'w_0', 'target': 'w_0', 'processor': robust_float}, {'source': 'w_1', 'target': 'w_1', 'processor': robust_float}, {'source': 'w_2', 'target': 'w_2', 'processor': robust_float}, {'source': 'w_3', 'target': 'w_3', 'processor': robust_float}, {'source': 'effort_model', 'default': 'nominal'}, {'source': 'projection', 'target': 'projection', # Use the mollweide projection as the default. 'default': gis_util.get_default_geographic_crs(), } ], }, ] for section in csv_sections: self.ingest_csv_section(section) # Convenience shortcuts. self.model_parameters = self.dao.query('__ModelParameters').fetchone() self.geographic_crs = self.model_parameters.projection self.ingest_grid() self.ingest_habitats() self.post_ingest() def ingest_csv_section(self, section): csv_file = os.path.join(self.data_dir, "%s.csv" % section['id']) if not os.path.isfile(csv_file): if not section.get('optional'): raise Exception( ("Error ingesting '%s': " "File '%s' is required and was not found.") % (section['id'], csv_file) ) else: return base_msg = "Ingesting '%s'..." % section['id'] self.logger.info(base_msg) section_config = self.config.get('sections', {}).get( section['id'], {}) Ingestor( reader=CSVReader(csv_file=csv_file), processors=[ ClassMapper(clazz=section['class'], mappings=section['mappings']), DAOWriter(dao=self.dao, commit_interval=self.commit_interval), ], logger=self.get_section_logger(section['id'], base_msg), limit=section_config.get('limit'), ).ingest() self.dao.commit() def ingest_grid(self): base_msg = "Ingesting 'grid'..." self.logger.info(base_msg) grid_logger = self.get_section_logger('grid', base_msg) self.cells = {} grid_file = os.path.join(self.data_dir, 'grid', "grid.shp") grid_config = self.config.get('sections', {}).get('grid', {}) Ingestor( reader=ShapefileReader( shp_file=grid_file, reproject_to='EPSG:4326', ), processors=[ ClassMapper( clazz=self.dao.schema['sources']['Cell'], mappings=[ {'source': 'ID', 'target': 'id', 'processor': int}, {'source': '__shape', 'target': 'shape'}, {'source': '__shape', 'target': 'geom_wkt', 'processor': gis_util.shape_to_wkt} ] ), self.add_area_mbr, DictWriter(dict_=self.cells), ], logger=grid_logger, limit=grid_config.get('limit'), ).ingest() def ingest_habitats(self): base_msg = "Ingesting 'habitats'..." self.logger.info(base_msg) habs_logger=self.get_section_logger('habs', base_msg) self.habs = {} self.habs_spatial_hash = SpatialHash(cell_size=self.hash_cell_size) habs_file = os.path.join(self.data_dir, 'habitats', "habitats.shp") habs_config = self.config.get('sections', {}).get('habitats', {}) def add_to_habs_spatial_hash(data=None, **kwargs): self.habs_spatial_hash.add_rect(data.mbr, data) return data def process_neg_depth(neg_depth): if neg_depth is not None: depth = -1.0 * float(neg_depth) return depth Ingestor( reader=ShapefileReader( shp_file=habs_file, reproject_to='EPSG:4326', ), processors=[ ClassMapper( clazz=self.dao.schema['sources']['Habitat'], mappings=[ {'source': 'SUBSTRATE', 'target': 'substrate_id'}, {'source': 'ENERGY', 'target': 'energy_id'}, {'source': 'Z', 'target': 'depth', 'processor': process_neg_depth}, {'source': '__shape', 'target': 'shape'}, ] ), self.add_area_mbr, add_to_habs_spatial_hash, DictWriter(dict_=self.habs), ], logger=habs_logger, limit=habs_config.get('limit'), ).ingest() def get_section_logger(self, section_id, base_msg): logger = logging.getLogger("%s_%s" % (id(self), section_id)) formatter = logging.Formatter(base_msg + ' %(message)s.') log_handler = LoggerLogHandler(self.logger) log_handler.setFormatter(formatter) logger.addHandler(log_handler) logger.setLevel(self.logger.level) return logger def post_ingest(self): self.post_process_cells() # Allow for cells and habs to be garbage collected. self.cells = None self.habs = None self.habs_spatial_hash = None def post_process_cells(self, log_interval=1000): base_msg = 'Calculating cell compositions...' self.logger.info(base_msg) logger = self.get_section_logger('habitat_areas', base_msg) num_cells = len(self.cells) counter = 0 for cell in self.cells.values(): counter += 1 if (counter % log_interval) == 0: logger.info(" %d of %d (%.1f%%)" % ( counter, num_cells, 1.0 * counter/num_cells* 100)) composition = {} cell.depth = 0 # Get candidate intersecting habitats. candidate_habs = self.habs_spatial_hash.items_for_rect(cell.mbr) for hab in candidate_habs: intersection = gis_util.get_intersection(cell.shape, hab.shape) if not intersection: continue intersection_area = gis_util.get_shape_area( intersection, target_crs=self.geographic_crs, ) hab_key = (hab.substrate_id, hab.energy_id,) pct_area = intersection_area/cell.area composition[hab_key] = composition.get(hab_key, 0) + pct_area cell.depth += pct_area * hab.depth cell.habitat_composition = composition # Convert cell area to km^2. cell.area = cell.area/(1000.0**2) self.dao.save(cell, commit=False) self.dao.commit() # Define processor for adding area, mbr to geom entities. def add_area_mbr(self, data=None, **kwargs): data.area = gis_util.get_shape_area( data.shape, target_crs=self.geographic_crs) data.mbr = gis_util.get_shape_mbr(data.shape) return data
class SASIGridderTask(task_manager.Task): def __init__(self, config={}, data={}, **kwargs): super(SASIGridderTask, self).__init__(**kwargs) self.logger.debug("RunSasiTask.__init__") self.data = data self.config = config self.value_attrs = models.Effort.value_attrs self.key_attrs = ['gear_id', 'time'] # Define trip type to gear code mappings. self.trip_type_gear_mappings = kwargs.get('gear_mappings', { 'hy_drg': 'GC30', 'otter': 'GC10', 'sca-gc': 'GC21', 'sca-la': 'GC20', 'shrimp': 'GC11', 'squid': 'GC12', 'raised': 'GC13', 'trap': 'GC60', 'gillne': 'GC50', 'longli': 'GC40', }) for kwarg in ['raw_efforts_path', 'grid_path', 'stat_areas_path', 'output_path', 'effort_limit']: setattr(self, kwarg, kwargs.get(kwarg)) if not self.output_path: os_hndl, self.output_path = tempfile.mkstemp( prefix="gridded_efforts.", suffix='.csv') self.message_logger = logging.getLogger("Task%s_msglogger" % id(self)) main_log_handler = LoggerLogHandler(self.logger) main_log_handler.setFormatter( logging.Formatter('%(message)s')) self.message_logger.addHandler(main_log_handler) self.message_logger.setLevel(self.logger.level) def call(self): self.progress = 1 self.message_logger.info("Starting...") # Create build dir. build_dir = tempfile.mkdtemp(prefix="gridderWork.") # Read in data. base_msg = "Ingesting..." ingest_logger = self.get_logger_logger('ingest', base_msg, self.logger) self.message_logger.info(base_msg) # Read in cells. self.ingest_cells(parent_logger=ingest_logger, limit=None) # Read in stat_areas. self.ingest_stat_areas(parent_logger=ingest_logger) # # Main part of the gridding task. # base_msg = "Gridding." gridding_logger = self.get_logger_logger('gridding', base_msg, self.logger) self.message_logger.info(base_msg) # # 0. Terms used here: # 'clean' efforts can be assigned to a cell. # 'kinda_dirty' efforts can be assigned to a stat_area. # 'super_dirty' efforts can not be assigned to a cell or a stat_area. # # Running example: # We start with two cells, 'C1' and 'C2', and one stat_area , 'StatArea1'. # 'StatArea1' contains 50% of 'C1', and 100% of 'C2'. # # # 1. Assign 'clean' efforts to cells, assign kinda-dirty efforts to # stat areas, and save super-dirty efforts to the super-dirty efforts list. # # Running example: # We have 100 points of clean effort which can be assigned to 'C1', # 100 points of clean effort which can be assigned to 'C2', # 100 points of kinda-dirty effort which can be assigned to 'StatArea1', # and 100 points of super-dirty effort which can't be assigned to anything. # After this first step, both 'C1' and 'C2' will have 100 points of effort assigned # from clean efforts. # # Do the first pass on efforts # as we read them in. base_msg = "Assigning raw efforts to cells/stat_areas ... " fp_logger = self.get_logger_logger('first_pass', base_msg, gridding_logger) fp_logger.info(base_msg) unassigned = {} logging_interval = 1e4 # Define functions to handle raw effort columns def trip_type_to_gear_id(trip_type): return self.trip_type_gear_mappings.get(trip_type) def float_w_empty_dot(value): if value == '.' or value == '': return None elif value is not None: return float(value) # Define function to execute after each raw effort is mapped to an # effort column. This is the first pass described above. def first_pass(data=None, **kwargs): #effort = data #if (effort_counter % 1e3) == 0: # print ["%s: %.3e" % (k, v) for k,v in c_.items()] # If effort has lat and lon... if data.lat is not None and data.lon is not None: # Can effort can be assigned to cell? cell = self.get_cell_for_pos(data.lat, data.lon) if cell: self.add_effort_to_cell(cell, data) return # Otherwise can effort can be assigned to statarea? stat_area = self.get_stat_area_for_pos( data.lat, data.lon) if stat_area: self.add_effort_to_stat_area(stat_area, data) return # Otherwise add to unassigned. else: self.add_effort_to_unassigned(unassigned, data) return # Otherwise if effort has a stat area... elif data.stat_area_id is not None: stat_area = self.stat_areas.get(data.stat_area_id) if not stat_area: self.add_effort_to_unassigned(unassigned, data) return else: self.add_effort_to_stat_area(stat_area, data) return # Otherwise add to unassigned list. else: self.add_effort_to_unassigned(unassigned, data) return # Create and run effort ingestor. ingestor = Ingestor( reader=CSVReader(csv_file=self.raw_efforts_path), processors=[ ClassMapper( clazz=models.Effort, mappings=[ {'source': 'trip_type', 'target': 'gear_id', 'processor': trip_type_to_gear_id}, {'source': 'year', 'target': 'time', 'processor': float_w_empty_dot}, {'source': 'nemarea', 'target': 'stat_area_id', 'processor': float_w_empty_dot}, {'source': 'A', 'target': 'a', 'processor': float_w_empty_dot}, {'source': 'value', 'target': 'value', 'processor': float_w_empty_dot}, {'source': 'hours_fished', 'target': 'hours_fished', 'processor': float_w_empty_dot}, {'source': 'lat', 'target': 'lat', 'processor': float_w_empty_dot}, {'source': 'lon', 'target': 'lon', 'processor': float_w_empty_dot} ], ), first_pass, ], logger=fp_logger, get_count=True, limit=self.effort_limit, ).ingest() # # 2. For each effort assigned to a stat area, # distribute values across cracked cells in that stat area. # We distribute values in proportion to the amount of value # contained in the cracked cell relative to the total amount # of 'clean' value the stat area already contains. # # Running Example: # We now distribute the 100 points of kinda effort which can be assigned to 'StatArea1'. # We distribute the effort proportionally to the cracked cells, # so that 'C1' gets 33 additional effort points, and 'C2' gets 66 additional effort points. # base_msg = "Distributing stat_area values to cells ... " sa_logger = self.get_logger_logger('stat_areas', base_msg, gridding_logger) sa_logger.info(base_msg) num_stat_areas = len(self.stat_areas) logging_interval = 1 sa_counter = 0 for stat_area in self.stat_areas.values(): sa_counter += 1 if (sa_counter % logging_interval) == 0: sa_logger.info("stat_area %s of %s (%.1f%%)" % ( sa_counter, num_stat_areas, 100.0 * sa_counter/num_stat_areas)) # Get stat area values. sa_keyed_values = self.sa_values.setdefault(stat_area.id, {}) # Get list of cracked cells. cracked_cells = self.get_cracked_cells_for_stat_area(stat_area) # Calculate totals for values across cracked cells. ccell_totals = {} for ccell in cracked_cells: for effort_key, ccell_values in ccell.keyed_values.items(): ccell_totals_values = ccell_totals.setdefault( effort_key, self.new_values_dict() ) for attr, ccell_value in ccell_values.items(): ccell_totals_values[attr] += ccell_value # Distribute the stat area's values across the cracked # cells, in proportion to the cracked cell's values as a # percentage of the stat area's cracked cell totals. for ccell in cracked_cells: pcell_keyed_values = self.c_values[ccell.parent_cell.id] for effort_key, sa_values in sa_keyed_values.items(): ccell_totals_values = ccell_totals.get(effort_key) ccell_values = ccell.keyed_values.get(effort_key) pcell_values = pcell_keyed_values.setdefault( effort_key, self.new_values_dict()) if not ccell_totals_values or not ccell_values: continue for attr, sa_value in sa_values.items(): # Don't add anything for empty values. # This also avoids division by zero errors. if not sa_value: continue ccell_value = ccell_values.get(attr, 0.0) ccell_totals_value = ccell_totals_values.get(attr, 0.0) if not ccell_value or not ccell_totals_value: continue pct_value = ccell_value/ccell_totals_value # Add proportional value to cracked cell's parent # cell. pcell_values[attr] += sa_value * pct_value # # 3. For efforts which could not be assigned to a cell or a stat area # ('super-dirty' efforts), distribute the efforts across all cells, # such that the amount of effort each cell is receives is proportional to the cell's # total contribution to the overall total. # # Running Example: # We start cells 'C1' and 'C2'. # 'C1' starts with 133 effort points from clean efforts + kinda-dirty efforts. # Likewise 'C1' starts with 166 effort points from clean efforts + kinda-dirty efforts. # Our overall total is 133 + 166 = 300. # 'C1' is responsible for 133/300 = 45% of the total effort. # 'C2' is responsible for 166/300 = 55% of the total effort. # We then have 100 additional points of super-dirty effort which could not be assigned to any cell # or stat area. # We distributed the effort proportionally to the cells so that # 'C1' gets 45 additional effort points, and 'C2' gets 55 additional effort points. # Our final result is that 'C1' has 133 + 45 = 178 effort points, and # 'C2' has 166 + 55 = 221 effort points. base_msg = "Distributing unassigned values to cells ... " unassigned_logger = self.get_logger_logger('unassigned', base_msg, gridding_logger) unassigned_logger.info(base_msg) # Calculate totals across all cells. totals = {} num_cells = len(self.cells) for cell in self.cells.values(): cell_keyed_values = self.c_values[cell.id] for effort_key, cell_values in cell_keyed_values.items(): totals_values = totals.setdefault( effort_key, self.new_values_dict() ) for attr, cell_value in cell_values.items(): totals_values[attr] += cell_value # Distribute unassigned efforts across all cells, # in proportion to the cell's values as a percentage of the total. logging_interval = 1e3 cell_counter = 0 for cell in self.cells.values(): cell_counter += 1 if (cell_counter % logging_interval) == 0: unassigned_logger.info("cell %s of %s (%.1f%%)" % ( cell_counter, num_cells, 100.0 * cell_counter/num_cells)) cell_keyed_values = self.c_values[cell.id] for effort_key, unassigned_values in unassigned.items(): cell_values = cell_keyed_values.get(effort_key) if not cell_values: continue for attr, unassigned_value in unassigned_values.items(): if not unassigned_value: continue cell_value = cell_values.get(attr, 0.0) pct_value = cell_value/unassigned_value cell_values[attr] += unassigned_value * pct_value # Done with gridding. At this point the effort has been distributed. # Note that there may be some efforts which are not included. # For example, if an unassigned effort has an effort_key which is # not used by any effort assigned to a cell or a stat_area, then # no cell will have a non-zero pct_value for that effort_key. # # Output gridded efforts. # with open(self.output_path, "w") as f: w = csv.writer(f) fields = ['cell_id'] + self.key_attrs + self.value_attrs w.writerow(fields) for cell in self.cells.values(): cell_keyed_values = self.c_values[cell.id] for keys, values in cell_keyed_values.items(): row_dict = { 'cell_id': cell.id } for i in range(len(self.key_attrs)): row_dict[self.key_attrs[i]] = keys[i] row_dict.update(values) w.writerow([row_dict[f] for f in fields]) shutil.rmtree(build_dir) self.progress = 100 self.message_logger.info("Gridding completed, output file is:'%s'" % ( self.output_path)) self.data['output_file'] = self.output_path self.status = 'resolved' def get_logger_logger(self, name=None, base_msg=None, parent_logger=None): logger = logging.getLogger("%s_%s" % (id(self), name)) formatter = logging.Formatter(base_msg + ' %(message)s.') log_handler = LoggerLogHandler(parent_logger) log_handler.setFormatter(formatter) logger.addHandler(log_handler) logger.setLevel(self.message_logger.level) return logger def get_cell_for_pos(self, lat, lon): """ Get cell which contains given point, via spatial hash. """ pos_wkt = 'POINT(%s %s)' % (lon, lat) pnt_shp = gis_util.wkt_to_shape(pos_wkt) candidates = self.cell_spatial_hash.items_for_point((lon,lat)) for c in candidates: if gis_util.get_intersection(c.shape, pnt_shp): return c return None def get_stat_area_for_pos(self, lat, lon): pos_wkt = 'POINT(%s %s)' % (lon, lat) pnt_shp = gis_util.wkt_to_shape(pos_wkt) candidates = self.sa_spatial_hash.items_for_point((lon,lat)) for c in candidates: if gis_util.get_intersection(c.shape, pnt_shp): return c return None def new_values_dict(self): return dict(zip(self.value_attrs, [0.0] * len(self.value_attrs))) def update_values_dict(self, values_dict, effort): for k in values_dict.keys(): effort_value = getattr(effort, k, 0.0) if effort_value is None: effort_value = 0.0 values_dict[k] += effort_value def add_effort_to_keyed_values_dict(self, kvd, effort): values = kvd.setdefault( self.get_effort_key(effort), self.new_values_dict() ) self.update_values_dict(values, effort) def add_effort_to_cell(self, cell, effort): cell_keyed_values = self.c_values[cell.id] self.add_effort_to_keyed_values_dict(cell_keyed_values, effort) def add_effort_to_stat_area(self, stat_area, effort): sa_keyed_values = self.sa_values.setdefault(stat_area.id, {}) self.add_effort_to_keyed_values_dict(sa_keyed_values, effort) def add_effort_to_unassigned(self, unassigned, effort): self.add_effort_to_keyed_values_dict(unassigned, effort) def get_effort_key(self, effort): """ Key for grouping values by effort types. """ return tuple([getattr(effort, attr, None) for attr in self.key_attrs]) def ingest_cells(self, parent_logger=None, limit=None): self.cells = {} self.cell_spatial_hash = SpatialHash(cell_size=.1) self.c_values = {} logger = self.get_logger_logger( name='cell_ingest', base_msg='Ingesting cells...', parent_logger=parent_logger ) Ingestor( reader=ShapefileReader(shp_file=self.grid_path, reproject_to='EPSG:4326'), processors=[ ClassMapper( clazz=models.Cell, mappings=[{'source': 'ID', 'target': 'id'}, {'source': '__shape', 'target': 'shape'},], ), DictWriter(dict_=self.cells, key_func=lambda c: c.id), ], logger=logger, limit=limit ).ingest() # Calculate cell areas and add cells to spatial hash, # and initialize c_values. for cell in self.cells.values(): cell.area = gis_util.get_shape_area(cell.shape) cell.mbr = gis_util.get_shape_mbr(cell.shape) self.cell_spatial_hash.add_rect(cell.mbr, cell) self.c_values[cell.id] = {} def ingest_stat_areas(self, parent_logger=None, limit=None): self.stat_areas = {} self.sa_spatial_hash = SpatialHash(cell_size=.1) self.sa_values = {} logger = self.get_logger_logger( name='stat_area_ingest', base_msg='Ingesting stat_areas...', parent_logger=parent_logger ) Ingestor( reader=ShapefileReader(shp_file=self.stat_areas_path, reproject_to='EPSG:4326'), processors=[ ClassMapper( clazz=models.StatArea, mappings=[{'source': 'SAREA', 'target': 'id'}, {'source': '__shape', 'target': 'shape'},], ), DictWriter(dict_=self.stat_areas, key_func=lambda sa: sa.id), ], logger=logger, limit=limit ).ingest() # Add to spatial hash. for stat_area in self.stat_areas.values(): stat_area.mbr = gis_util.get_shape_mbr(stat_area.shape) self.sa_spatial_hash.add_rect(stat_area.mbr, stat_area) def get_cracked_cells_for_stat_area(self, stat_area): cracked_cells = [] candidates = self.cell_spatial_hash.items_for_rect(stat_area.mbr) for icell in candidates: intersection = gis_util.get_intersection(stat_area.shape, icell.shape) if not intersection: continue intersection_area = gis_util.get_shape_area(intersection) pct_area = intersection_area/icell.area # Set cracked cell values in proportion to percentage # of parent cell's area. ccell_keyed_values = {} icell_keyed_values = self.c_values[icell.id] for effort_key, icell_values in icell_keyed_values.items(): ccell_values = ccell_keyed_values.setdefault(effort_key, {}) for attr, value in icell_values.items(): ccell_values[attr] = pct_area * value cracked_cells.append(models.CrackedCell( parent_cell=icell, area=intersection_area, keyed_values=ccell_keyed_values, )) return cracked_cells