Python create_dag 예제들, dagmod.create_dag Python 예제들

예제 #1

0

파일 보기

AGGS: List[str] = ["states", "counties"]
INTERVENTIONS: List[str] = [
    "NO_INTERVENTION",
    "WEAK_INTERVENTION",
    "STRONG_INTERVENTION",
    "OBSERVED_INTERVENTION",
]

# build filepaths and urls for all pairs of agg. and intervention
paths: List[Path] = []
urls: List[Path] = []
for agg in AGGS:
    for intervention in INTERVENTIONS:

        url: Path = Path(URL0 + agg + "." + intervention + URL1)
        urls.append(url)

        filename: str = "can_" + agg + "_" + intervention + WFORMAT
        path: Path = PATH0.joinpath(filename)
        paths.append(path)

# define operators and dag
dag: DAG = dagmod.create_dag("CANdatapull", "Covid act now data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("CANdatapull", dagmod.rw_all,
                                               [WFORMAT, paths, urls], dag)

date_task >> pull_task

예제 #2

0

파일 보기

파일: reich_process.py 프로젝트: dcaseykc/restart

    Cube (ndarray), indices (dataframe) are saved as hdf5, csv respectively.
    """
    cube, index = build_cube()

    # write cube to disk with gzip compression as cube is sparse and large
    with h5py.File("../../extern/data/epidemiological/processed/reich.hdf5",
                   "w") as cube_file:
        cube_file.create_dataset("zipped_reichcube",
                                 data=cube,
                                 compression="gzip")

    # write indices to dataframe as csv for simplicity
    dfindex = pd.DataFrame()
    dflen = max(cube.shape)
    for key, val in index.items():
        # need consistent column lengths
        dfindex[key] = val + ([None] * (dflen - len(val)))
    dfindex.to_csv(
        "../../extern/data/epidemiological/processed/reichindex.csv")


desc = ("Processes latest Reich Lab collected forecasts and builds" +
        "datacube and indices.")
dag: DAG = dagmod.create_dag("ReichLabDataCube", desc)
date_task: BashOperator = dagmod.get_date_op(dag)
cube_task: PythonOperator = PythonOperator(task_id="ReichCube",
                                           python_callable=produce_cube,
                                           dag=dag)

date_task >> cube_task

예제 #3

0

파일 보기

파일: yyg2_pull.py 프로젝트: dcaseykc/restart

import pathlib

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator

import dagmod

# constants
YY_URL: str = (
    "https://raw.githubusercontent.com/youyanggu/" +
    "covid19_projections/master/projections/combined/latest_subregion.csv")
PATH0: pathlib.PosixPath = pathlib.PosixPath(
    "../../extern/data/epidemiological/us/forecasts/YYG/county")
WFORMAT: str = ".csv"
FILENAME: str = "YYG_county_us_casesdeathsprojR"

# build path
path: pathlib.PosixPath = PATH0.joinpath(FILENAME + WFORMAT)

# define operators and dag
dag: DAG = dagmod.create_dag("YYG2datapull", "daily YYG2 data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("YYG2countypull", dagmod.rw_all,
                                               [WFORMAT, [path], [YY_URL]],
                                               dag)

date_task >> pull_task

예제 #4

0

파일 보기

파일: uwp_pull.py 프로젝트: dcaseykc/restart

"""DAG for pulling the UW state policy data."""
import pathlib

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator

import dagmod

# constants
UWP_URL: str = "https://query.data.world/s/vfpfdftmwmk3qj7fbobnrpas5kxuj2"
PATH0: pathlib.PosixPath = pathlib.PosixPath(
    "../../extern/data/epidemiological/us/policy")
WFORMAT: str = ".csv"
FILENAME: str = "UW_state_us_policy"

# build path
path: pathlib.PosixPath = PATH0.joinpath(FILENAME + WFORMAT)

# define operators and dag
dag: DAG = dagmod.create_dag("UWPdatapull", "daily UW policy data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("UWpolicypull",
                                               dagmod.rw_all,
                                               [WFORMAT, [path], [UWP_URL]],
                                               dag=dag)

date_task >> pull_task

예제 #5

0

파일 보기

파일: yu_pull.py 프로젝트: dcaseykc/restart

               "G0Zg3wlgJpB2Zvg-vEN1i_76n2I-djL0Dk/export?format=csv&id")
PATH0: pathlib.PosixPath = pathlib.PosixPath(
    "../../extern/data/epidemiological/us/forecasts/Yu/county")
FILENAME: str = "YU_county_us_deathsproj"


def rw_all(format: str = ".csv"):
    """Grab, read, and write data."""
    # grab csv from url and convert to dataframe
    df: pd.DataFrame = pd.DataFrame(index=[], columns=[])
    try:
        df = pd.read_csv(YU_URL, header=2)
    except Exception as e:
        dagmod.bad_url(YU_URL, e)

    # set path and write dataframe in desired format
    path: pathlib.PosixPath = PATH0.joinpath(FILENAME + format)
    dagmod.rw(format, path, df)


# define operators and dag
dag: DAG = dagmod.create_dag("YUdatapull", "daily Yu Group data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("YUcountypull",
                                               rw_all, [],
                                               dag=dag)

date_task >> pull_task

예제 #6

0

파일 보기

파일: who_pull.py 프로젝트: dcaseykc/restart

"""DAG for pulling WHO Covid data."""
import pathlib

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator

import dagmod

# constants
WHO_URL: str = "https://covid19.who.int/WHO-COVID-19-global-data.csv"
PATH0: pathlib.PosixPath = pathlib.PosixPath(
    "../../extern/data/epidemiological/global"
)
WFORMAT: str = ".csv"
FILENAME: str = "WHO-COVID-19-global-data"

# build path
path: pathlib.PosixPath = PATH0.joinpath(FILENAME + WFORMAT)

# define operators and dag
dag: DAG = dagmod.create_dag("WHOdatapull", "daily WHO data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op(
    "WHOdatapull", dagmod.rw_all, [WFORMAT, [path], [WHO_URL]], dag
)

date_task >> pull_task

예제 #7

0

파일 보기

파일: mit_pull.py 프로젝트: dcaseykc/restart

        # build paths for writing
        paths: List[str] = build_paths("state", state)

        # write data
        paths = [path + format for path in paths]
        if format == ".csv":
            cases.to_csv(paths[0], mode="w")
            deaths.to_csv(paths[1], mode="w")
            hospital.to_csv(paths[2], mode="w")
        else:  # only other format is .h5
            cases.to_hdf(paths[0], key="df", mode="w")
            deaths.to_hdf(paths[1], key="df", mode="w")
            hospital.to_hdf(paths[2], key="df", mode="w")


def pull_mit():
    """Callable for python op."""
    rw_cases_deaths_hosp(URL)


# define operators and dag
dag: DAG = dagmod.create_dag("MITdatapull", "daily MIT data pull (states)")

pull_states_task: PythonOperator = dagmod.get_pull_op(
    "MITstatespull", pull_mit, [], dag
)

date_task: BashOperator = dagmod.get_date_op(dag)

date_task >> pull_states_task

예제 #8

0

파일 보기

파일: WADH_pull.py 프로젝트: dcaseykc/restart

    "deaths_by_age_county",
    "datadict",
]
PATH0: pathlib.Path = pathlib.Path("../../extern/data/epidemiological/WA")


def rw_all(format: str = ".csv"):
    """Grab, read, and write data."""
    for i in range(4):
        # grab csv from url and convert to dataframe
        df: pd.DataFrame = pd.DataFrame(index=[], columns=[])
        try:
            df = pd.read_excel(URL, sheet_name=i)
        except Exception as e:
            dagmod.bad_url(URL, e)

        # set path and write dataframe in desired format
        path: pathlib.Path = PATH0.joinpath(NAMES[i] + format)
        dagmod.rw(format, path, df)


# define operators and dag

dag: DAG = dagmod.create_dag("WADHdatapull", "weekly WADH data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("WADHdatapull", rw_all, [], dag)

date_task >> pull_task

예제 #9

0

파일 보기

파일: ihme_pull.py 프로젝트: dcaseykc/restart

    # write each csv to /IHME dir in the desired format
    for member in z.namelist():

        # skip if member file is not a csv
        if ".csv" not in member:
            continue

        with z.open(member) as file:

            # read in data
            df = pd.DataFrame()
            try:
                df = pd.read_csv(file)
            except Exception as e:
                print("Exception" + str(e) + " with file " + str(member))

            # build path and write file in desired format
            s: slice = slice(start=member.index("/") + 1, stop=-1)
            path: Path = PATH0.joinpath(member[s])
            dagmod.rw(format, path, df)


dag: DAG = dagmod.create_dag("IHMEdatapull", "daily IHME data pull")

date_task: BashOperator = dagmod.get_date_op(dag)

pull_task: PythonOperator = dagmod.get_pull_op("IHMEallpull", rw_zip, [], dag)

date_task >> pull_task

예제 #10

0

파일 보기

파일: yyg_pull.py 프로젝트: dcaseykc/restart

def pull_us():
    """Pull country data."""
    # prepare data url and filepaths
    url: str = YY_URL + "/US.csv"
    paths: List[str] = build_paths(agg="country", loc="US")

    # grab data and write to files
    rw_cases_deaths_R(url, paths, format=".csv")


def pull_states():
    """Pull all states data."""
    # states data
    for state in STATES:
        pull_state(state)


# define operators and dag
dag: DAG = dagmod.create_dag("YYGdatapull",
                             "daily YYG data pull (states + us)")

pull_states_task: PythonOperator = dagmod.get_pull_op("YYGstatespull",
                                                      pull_states, [], dag)

pull_us_task: PythonOperator = dagmod.get_pull_op("YYGuspull", pull_us, [],
                                                  dag)

date_task: BashOperator = dagmod.get_date_op(dag)

date_task >> pull_us_task >> pull_states_task