def create_temporary_table(self): log.info('Creating province table') self.cur.execute( 'CREATE TABLE {} (' ' cpro CHAR(2) NOT NULL,' ' province TEXT NOT NULL);' .format(self.temporary_table_name))
def extract_shapefiles(self, zip_filepath): log.info('Extracting zip with shapefiles') with ZipFile(zip_filepath, 'r') as zip: shapefiles = [] for filename in zip.namelist(): if MUNICIPALITIES_PATTERN in filename and YEAR_PATTERN in filename: zip.extract(filename, path=self.tmpdir.name) if filename.endswith('.shp'): shapefiles.append( os.path.join(self.tmpdir.name, filename)) return shapefiles
def populate_table(self): log.info('Importing shapefile') subprocess.run([ 'ogr2ogr', '-overwrite', '-f', 'PostgreSQL', f'PG:{self.connection_string}', '-nlt', 'MultiPolygon', self.dataset['shapefile'], ])
def merge_shapefiles(self, shapefiles): log.info('Merging shapefiles') merged_shapefile = os.path.join(self.tmpdir.name, f'{TABLE_NAME}.shp') for shapefile in shapefiles: subprocess.run([ 'ogr2ogr', '-f', 'ESRI Shapefile', '-append', '-update', merged_shapefile, shapefile, ]) return merged_shapefile
def download_zip(self): log.info('Downloading zipped shapefiles') zip_filepath = os.path.join(self.tmpdir.name, 'shapefiles.zip') download_file(ZIPPED_SHAPEFILES_URL, zip_filepath) return zip_filepath
def create_indexes(self): log.info('Creating province index') self.cur.execute( 'ALTER TABLE {} ' 'ADD PRIMARY KEY(cpro);' .format(self.temporary_table_name))
def populate_table(self): log.info('Importing provinces') provinces = self._get_provinces() query = f'INSERT INTO {self.temporary_table_name} (cpro, province) VALUES %s;' psycopg2.extras.execute_values(self.cur, query, provinces)
def get_dataset(self): provinces_file = os.path.join(self.tmpdir.name, 'codprov.xls') log.info('Downloading provinces file') download_file(PROVINCES_FILE_URL, provinces_file) return {'provinces_file': provinces_file}