def import_dataset_find_shapefiles_DUMMY(dataset_id, dataset_conf): with admin_db, admin_db.cursor() as cur: cur.execute(""" CREATE TABLE geodata_{0} ( key CHARACTER VARYING (256) PRIMARY KEY, value TEXT); """.format(dataset_id))
def import_dataset_find_shapefiles(dataset_id, dataset_conf): """ Find all the Shapefiles from archives listed as dataset resources. :param dataset_id: The dataset id :param dataset_conf: The dataset configuration """ destination_table = 'geodata_{0}'.format(dataset_id) create_table_sqls = [] import_data_sqls = [] with TemporaryDir() as tempdir: # First, copy zip files to temporary directory for resource in dataset_conf['resources']: # We assume the file is a zip, but we should double-check that! dest_file = os.path.join(tempdir, _random_file_name('zip')) if isinstance(resource, basestring): resource = {'url': resource} # Copy the resource to disk _copy_resource_to_file(resource, dest_file) # Let's look for shapefiles inside that thing.. found = find_shapefiles(dest_file) for basename, files in found.iteritems(): if 'shp' not in files: continue # Bad match.. # Export shapefiles to temporary files base_name = _random_file_name() for ext, item in files.iteritems(): dest = os.path.join(tempdir, base_name + '.' + ext) with open(dest, 'wb') as fp: # todo: copy file in chunks, not as a whole fp.write(item.open().read()) shp_full_path = os.path.join(tempdir, base_name + '.shp') create_table_sql = shp2pgsql( shp_full_path, table=destination_table, create_table_only=True, mode='create', geometry_column='geom', create_gist_index=True) # Use TEXT fields instead of varchar(XX) # todo: use a less-hackish way!! create_table_sql = re.sub( r'varchar\([0-9]+\)', 'text', create_table_sql, flags=re.IGNORECASE) import_data_sql = shp2pgsql( shp_full_path, table=destination_table, mode='append', geometry_column='geom', create_gist_index=False) create_table_sqls.append(create_table_sql) import_data_sqls.append(import_data_sql) with admin_db, admin_db.cursor() as cur: cur.execute(create_table_sqls[0]) with db, db.cursor() as cur: for sql in import_data_sqls: cur.execute(sql)