def relation(self, relation): OsmParser.relation(self, relation) if self.is_item_index_for_current_pool_index() or self.is_last_element(): osm_timestamp = elements_transformer.osm_timestamp_from_osm_entity(relation) if self.is_new_element(osm_timestamp): relation_dict, relation_nodes, relation_ways, relation_relations = \ self.get_relation_and_its_dependencies_as_dict(relation) self.batch_manager.replace_ids_in_relation_and_its_dependencies(relation_dict, relation_nodes, relation_ways, relation_relations) self.batch_manager.add_osm_dicts_to_batches(relation_nodes, relation_ways, relation_relations, relation_dict) if self.is_last_element() or \ (self.is_item_index_for_current_pool_index() and self.batch_manager.is_full(self.current_entity_type)): temp_osm_file_name = self.generate_batch_osm_file_name(self.pool_size) self.sort_and_write_to_osm_file(temp_osm_file_name) target_ids = self.batch_manager.get_main_relations_simplified_ids() id_geometry_map = self.gdal_handler.osm_to_geojson(temp_osm_file_name, self.current_entity_type, target_ids) def prepare_and_write_out(restored_relation_dict): restored_relation_dict = elements_transformer.edit_relation_dict_according_to_bq_schema( restored_relation_dict) self.write_out_to_jsonl(self.current_entity_type, restored_relation_dict) self.batch_manager.restore_relations_ids_and_add_geometry(id_geometry_map, prepare_and_write_out) self.batch_manager.reset() self.log_indexer_efficiency_data()
def node(self, node): OsmParser.node(self, node) if self.is_item_index_for_current_pool_index(): osm_timestamp = elements_transformer.osm_timestamp_from_osm_entity(node) if self.is_new_element(osm_timestamp): node_geometry = str({"type": "Point", "coordinates": [node.location.lon, node.location.lat]}) if node.location.valid() else None node_dict = elements_transformer.osm_entity_node_dict(node, node_geometry) self.write_out_to_jsonl(self.current_entity_type, node_dict)
def __init__(self, osm_indexer_map, processing_counter, last_max_element_timestamp, num_db_shards, pool_size=1, pool_index=0, batch_size_to_commit=1000000, logging_range_count=1000000, with_relations=False): OsmParser.__init__(self, processing_counter, logging_range_count, pool_size, pool_index) self.osm_indexer_map = osm_indexer_map self.batch_size_to_commit = batch_size_to_commit self.with_relations = with_relations self.last_max_element_timestamp = last_max_element_timestamp self.num_db_shards = num_db_shards self.max_timestamp = 0 self.added_records = 0
def __init__(self, osm_indexer_map, num_shards, entities_out_files_dict, last_max_element_timestamps, work_dir, processing_counter, entities_number, pool_index, pool_size, logging_range_count=100000, gdal_batch_size=5000, ignore_subrelations=True): OsmParser.__init__(self, processing_counter, logging_range_count, pool_size, pool_index) self.osm_indexer_map = osm_indexer_map self.num_shards = num_shards self.geo_json_factory = osmium.geom.GeoJSONFactory() self.entities_out_files_dict = entities_out_files_dict self.work_dir = work_dir self.gdal_handler = gdal_handler.GDALHandler("gdal/run_ogr.sh", "gdal/osmconf.ini", work_dir) self.batch_manager = elements_processing.BatchManager(gdal_batch_size, entities_number) self.entities_number = entities_number self.ignore_subrelations = ignore_subrelations self.last_max_element_timestamps = last_max_element_timestamps self.with_single_indexer = len(self.osm_indexer_map) == 1 self.single_indexer = self.osm_indexer_map[0] if self.with_single_indexer else None
def relation(self, relation): OsmParser.relation(self, relation) self.process_osm_object(relation, self.current_entity_type)
def way(self, way): OsmParser.way(self, way) self.process_osm_object(way, self.current_entity_type)
def node(self, node): OsmParser.node(self, node) self.process_osm_object(node, self.current_entity_type)