def download_and_clean_FEWSNET_IPC_data(): url = "http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip" zipfile_name = Path(data_dir) / url.split("/")[-1] download_file(url, zipfile_name) with zipfile.ZipFile(zipfile_name) as zf: zf.extractall(data_dir) with cd(str(Path(data_dir) / "ALL_HFIC" / "East Africa")): files_to_rename = glob("EA2017*") for f in files_to_rename: os.rename(f, f.replace("EA", "EA_"))
def clean_FAOSTAT_data(faostat_zipfile, faostat_dir): if not (faostat_zipfile.is_file() or faostat_dir.is_dir()): download_FAOSTAT_data() if faostat_zipfile.is_file() and not faostat_dir.is_dir(): os.mkdir(faostat_dir) with zipfile.ZipFile(str(faostat_zipfile)) as zf: zf.extractall(faostat_dir) with cd(str(faostat_dir.resolve())): zipfiles = glob("*.zip") with mp.Pool(mp.cpu_count()) as p: for _ in tqdm(p.imap_unordered(sp_unzip, zipfiles), total=len(zipfiles)): pass dfs = [] for filename in tqdm(glob(str(faostat_dir) + "/*.csv")): df = read_csv(filename) df = df.rename(columns={"Country": "Area", "Months": "Month"}) if "Currency" in df.columns: df = df.rename(columns={"Currency": "Item", "Item": "Element"}) if "Reporter Countries" in df.columns: df = df.rename(columns={"Reporter Countries": "Area"}) if "Survey" in df.columns: df = df.rename( columns={ "Breadown by Sex of the Household Head": "Sex of the Household Head", "Indicator": "Item", "Measure": "Element", }) df["Area"] = df["Survey"].str.split().str.get(0) df["Year"] = df["Survey"].str.split().str.get(-1) del df["Survey"] df = df[df["Sex of the Household Head"] == "Total"] df = df[df["Breakdown Variable"] == "Country-level"] del df["Sex of the Household Head"] del df["Breakdown Variable"] if "Donor Country" in df.columns: df = df.rename(columns={"Recipient Country": "Area"}) del df["Donor Country"] df["Element"] = "Food aid shipments" if set(df.columns.values) == set( ["Area", "Item", "Element", "Year", "Unit", "Value"]): df = df[df["Area"] == "South Sudan"] del df["Area"] dfs.append(df) df["filename"] = filename.split("/")[-1] df = pd.concat(dfs) df.to_csv(str(Path(data_dir) / "south_sudan_data_fao.csv"), index=False, sep="|")
def create_food_security_data_table(region: str, country: str): admin_boundaries_shapefile = "data/raw/FEWS/FEWSNET_World_Admin/FEWSNET_Admin2" sf_admin = shapefile.Reader(admin_boundaries_shapefile) south_sudan_srs = [ x for x in sf_admin.shapeRecords() if x.record[3] == country ] path = f"data/raw/FEWS/ALL_HFIC/{region}" ipc_records = [] with cd(path): shapefiles = glob("*.shp") for filename in tqdm(shapefiles, unit="shapefile"): year = int(filename[3:7]) month = int(filename[7:9]) reader = shapefile.Reader(filename) for i, fs_sr in tqdm( enumerate(reader.shapeRecords()), unit="Food security shapeRecord", ): parts, points = fs_sr.shape.parts, fs_sr.shape.points nparts = len(parts) CS = int(fs_sr.record[0]) fs_polygons = get_polygons(fs_sr.shape) for sr in tqdm(south_sudan_srs, desc=f"{country} Counties"): county_polygon = Polygon(sr.shape.points) for fs_polygon in tqdm( fs_polygons, unit="Food security polygon" ): if county_polygon.buffer(-0.05).intersects(fs_polygon): ipc_records.append( { "Country": sr.record[3], "State": sr.record[4], "County": sr.record[8], "Year": year, "Month": month, "Value": CS, "Variable": "IPC Phase Classification", "Unit": "IPC Phase", "Source": "FEWSNET", } ) df = pd.DataFrame(ipc_records) df.to_csv(sys.argv[1], sep="\t", index=False)
def create_food_security_data_table(region: str, country: str): admin_boundaries_shapefile = str( Path(data_dir) / "FEWSNET_World_Admin" / "FEWSNET_Admin2" ) sf_admin = shapefile.Reader(admin_boundaries_shapefile) south_sudan_srs = [ x for x in sf_admin.shapeRecords() if x.record[3] == country ] path = str(Path(data_dir) / "ALL_HFIC" / region) ipc_records = [] with cd(path): shapefiles = glob("*.shp") for filename in tqdm(shapefiles, unit="shapefile"): year = int(filename[3:7]) month = int(filename[7:9]) reader = shapefile.Reader(filename) for i, fs_sr in tqdm( enumerate(reader.shapeRecords()), unit="Food security shapeRecord", ): parts, points = fs_sr.shape.parts, fs_sr.shape.points nparts = len(parts) CS = int(fs_sr.record[0]) fs_polygons = get_polygons(fs_sr.shape) for sr in tqdm(south_sudan_srs, desc=f"{country} Counties"): county_polygon = Polygon(sr.shape.points) for fs_polygon in tqdm( fs_polygons, unit="Food security polygon" ): if county_polygon.buffer(-0.05).intersects(fs_polygon): ipc_records.append( { "Country": sr.record[3], "State": sr.record[4], "County": sr.record[8], "Year": year, "Month": month, "IPC Phase": CS, } ) df = pd.DataFrame(ipc_records) df.to_csv(Path(data_dir) / "ipc_data.tsv", sep="\t")