コード例 #1
0
def download_and_clean_FEWSNET_IPC_data():
    url = "http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip"
    zipfile_name = Path(data_dir) / url.split("/")[-1]
    download_file(url, zipfile_name)
    with zipfile.ZipFile(zipfile_name) as zf:
        zf.extractall(data_dir)
    with cd(str(Path(data_dir) / "ALL_HFIC" / "East Africa")):
        files_to_rename = glob("EA2017*")
        for f in files_to_rename:
            os.rename(f, f.replace("EA", "EA_"))
コード例 #2
0
def clean_FAOSTAT_data(faostat_zipfile, faostat_dir):

    if not (faostat_zipfile.is_file() or faostat_dir.is_dir()):
        download_FAOSTAT_data()
    if faostat_zipfile.is_file() and not faostat_dir.is_dir():
        os.mkdir(faostat_dir)
        with zipfile.ZipFile(str(faostat_zipfile)) as zf:
            zf.extractall(faostat_dir)
        with cd(str(faostat_dir.resolve())):
            zipfiles = glob("*.zip")
            with mp.Pool(mp.cpu_count()) as p:
                for _ in tqdm(p.imap_unordered(sp_unzip, zipfiles),
                              total=len(zipfiles)):
                    pass

    dfs = []
    for filename in tqdm(glob(str(faostat_dir) + "/*.csv")):
        df = read_csv(filename)
        df = df.rename(columns={"Country": "Area", "Months": "Month"})
        if "Currency" in df.columns:
            df = df.rename(columns={"Currency": "Item", "Item": "Element"})
        if "Reporter Countries" in df.columns:
            df = df.rename(columns={"Reporter Countries": "Area"})
        if "Survey" in df.columns:
            df = df.rename(
                columns={
                    "Breadown by Sex of the Household Head":
                    "Sex of the Household Head",
                    "Indicator": "Item",
                    "Measure": "Element",
                })
            df["Area"] = df["Survey"].str.split().str.get(0)
            df["Year"] = df["Survey"].str.split().str.get(-1)
            del df["Survey"]
            df = df[df["Sex of the Household Head"] == "Total"]
            df = df[df["Breakdown Variable"] == "Country-level"]
            del df["Sex of the Household Head"]
            del df["Breakdown Variable"]
        if "Donor Country" in df.columns:
            df = df.rename(columns={"Recipient Country": "Area"})
            del df["Donor Country"]
            df["Element"] = "Food aid shipments"
        if set(df.columns.values) == set(
            ["Area", "Item", "Element", "Year", "Unit", "Value"]):
            df = df[df["Area"] == "South Sudan"]
            del df["Area"]
            dfs.append(df)

        df["filename"] = filename.split("/")[-1]

    df = pd.concat(dfs)
    df.to_csv(str(Path(data_dir) / "south_sudan_data_fao.csv"),
              index=False,
              sep="|")
コード例 #3
0
def create_food_security_data_table(region: str, country: str):
    admin_boundaries_shapefile = "data/raw/FEWS/FEWSNET_World_Admin/FEWSNET_Admin2"
    sf_admin = shapefile.Reader(admin_boundaries_shapefile)
    south_sudan_srs = [
        x for x in sf_admin.shapeRecords() if x.record[3] == country
    ]

    path = f"data/raw/FEWS/ALL_HFIC/{region}"
    ipc_records = []
    with cd(path):
        shapefiles = glob("*.shp")
        for filename in tqdm(shapefiles, unit="shapefile"):
            year = int(filename[3:7])
            month = int(filename[7:9])
            reader = shapefile.Reader(filename)
            for i, fs_sr in tqdm(
                enumerate(reader.shapeRecords()),
                unit="Food security shapeRecord",
            ):
                parts, points = fs_sr.shape.parts, fs_sr.shape.points
                nparts = len(parts)
                CS = int(fs_sr.record[0])
                fs_polygons = get_polygons(fs_sr.shape)

                for sr in tqdm(south_sudan_srs, desc=f"{country} Counties"):
                    county_polygon = Polygon(sr.shape.points)
                    for fs_polygon in tqdm(
                        fs_polygons, unit="Food security polygon"
                    ):
                        if county_polygon.buffer(-0.05).intersects(fs_polygon):
                            ipc_records.append(
                                {
                                    "Country": sr.record[3],
                                    "State": sr.record[4],
                                    "County": sr.record[8],
                                    "Year": year,
                                    "Month": month,
                                    "Value": CS,
                                    "Variable": "IPC Phase Classification",
                                    "Unit": "IPC Phase",
                                    "Source": "FEWSNET",
                                }
                            )
    df = pd.DataFrame(ipc_records)
    df.to_csv(sys.argv[1], sep="\t", index=False)
コード例 #4
0
def create_food_security_data_table(region: str, country: str):
    admin_boundaries_shapefile = str(
        Path(data_dir) / "FEWSNET_World_Admin" / "FEWSNET_Admin2"
    )
    sf_admin = shapefile.Reader(admin_boundaries_shapefile)
    south_sudan_srs = [
        x for x in sf_admin.shapeRecords() if x.record[3] == country
    ]

    path = str(Path(data_dir) / "ALL_HFIC" / region)
    ipc_records = []
    with cd(path):
        shapefiles = glob("*.shp")
        for filename in tqdm(shapefiles, unit="shapefile"):
            year = int(filename[3:7])
            month = int(filename[7:9])
            reader = shapefile.Reader(filename)
            for i, fs_sr in tqdm(
                enumerate(reader.shapeRecords()),
                unit="Food security shapeRecord",
            ):
                parts, points = fs_sr.shape.parts, fs_sr.shape.points
                nparts = len(parts)
                CS = int(fs_sr.record[0])
                fs_polygons = get_polygons(fs_sr.shape)

                for sr in tqdm(south_sudan_srs, desc=f"{country} Counties"):
                    county_polygon = Polygon(sr.shape.points)
                    for fs_polygon in tqdm(
                        fs_polygons, unit="Food security polygon"
                    ):
                        if county_polygon.buffer(-0.05).intersects(fs_polygon):
                            ipc_records.append(
                                {
                                    "Country": sr.record[3],
                                    "State": sr.record[4],
                                    "County": sr.record[8],
                                    "Year": year,
                                    "Month": month,
                                    "IPC Phase": CS,
                                }
                            )
    df = pd.DataFrame(ipc_records)
    df.to_csv(Path(data_dir) / "ipc_data.tsv", sep="\t")