Exemple #1
0
def test_shell_initializes_and_runs_multiline_cmd():
    cmd = """
    TEST=$(cat <<-END
This is line one
This is line two
This is line three
boom!
END
)
for i in $TEST
do
    echo $i
done"""
    with Flow(name="test") as f:
        task = ShellTask()(command=cmd,
                           env={key: "test"
                                for key in "abcdefgh"})
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "boom!"
Exemple #2
0
def test_shell_task_handles_multiline_commands():
    with tempfile.TemporaryDirectory() as tempdir:
        cmd = """
        cd {}
        for file in $(ls)
        do
            cat $file
        done
        """.format(
            tempdir
        )
        with open(tempdir + "/testfile.txt", "w") as f:
            f.write("this is a test")

        with Flow(name="test") as f:
            task = ShellTask()(command=cmd)

        out = f.run()

    assert out.is_successful()
    assert out.result[task].result == "this is a test"
Exemple #3
0
    def test_basic_trigger_dag_triggers(self, airflow_settings):
        task = AirflowTriggerDAG(dag_id="tutorial",
                                 execution_date="1986-09-20",
                                 env=airflow_settings)
        check_task = ShellTask(
            command="airflow list_dag_runs tutorial",
            helper_script=task.helper_script,
            env=airflow_settings,
        )

        with Flow(name="tutorial") as flow:
            res = check_task(upstream_tasks=[task])

        flow_state = flow.run()
        assert flow_state.is_successful()

        check_state = flow_state.result[res]
        assert check_state.is_successful()

        # check CLI output
        assert "manual__1986-09-20T00:00:00+00:00" in check_state.result
        assert "running" in check_state.result
        assert "1986-09-20T00:00:00+00:00" in check_state.result
Exemple #4
0
def test_shell_runs_other_shells():
    with Flow(name="test") as f:
        task = ShellTask(shell="zsh")(command="echo -n $ZSH_NAME")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "zsh"
Exemple #5
0
def test_shell_initializes_and_multiline_output_optionally_returns_all_lines():
    with Flow(name="test") as f:
        task = ShellTask(return_all=True)(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == ["hello world", "42"]
Exemple #6
0
def test_shell_initializes_and_multiline_output_returns_last_line():
    with Flow(name="test") as f:
        task = ShellTask()(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "42"
Exemple #7
0
def test_shell_task_raises_fail_if_cmd_fails():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls surely_a_dir_that_doesnt_exist")
    out = f.run()
    assert out.is_failed()
    assert "Command failed with exit code" in str(out.result[task].message)
Exemple #8
0
)


@task
def curl_cmd(url: str, file: str) -> str:
    """
    The curl command we wish to execute.
    """
    prefect.context.get("logger")
    if Path(file).exists():
        raise SKIP("Image data file already exists.")
    return f"curl -fL -o {file} {url}"


download: Runable = ShellTask(name="curl_task",
                              max_retries=2,
                              retry_delay=datetime.timedelta(seconds=10))


@task(skip_on_upstream_skip=False)
def load_and_split(fname: str) -> list:
    """
    Loads image data file at `fname` and splits it into
    multiple frames.  Returns a list of bytes, one element
    for each frame.
    """
    prefect.context.get("logger")
    with open(fname, "rb") as f:
        imgs = f.read()

    return [img for img in imgs.split(b"\n" * 4) if img]
Exemple #9
0
def match_ssp(ssp, component_spec):
    command = "python ssp.py --reader json-l match --components {} {}".format(
        component_spec, ssp)
    output = ShellTask(command=command, return_all=True).run()
    return "\n".join(output)
Exemple #10
0
#!/usr/bin/env python
#
# Requires:
# conda create -n prefect -c conda-forge prefect
#

from prefect import Flow, Parameter, task, unmapped
from prefect.executors import LocalDaskExecutor
from prefect.tasks.shell import ShellTask
from prefect.utilities.debug import raise_on_exception
from omero.cli import cli_login
from omero.gateway import BlitzGateway

name = Parameter("name")

shell = ShellTask(return_all=True, log_stderr=True)

COMMAND = "/opt/omero/server/OMERO.server/bin/omero"


@task
def render(object, name):
    return (f"{COMMAND} render set {object} "
            f"/uod/idr/metadata/idr0072-schormann-subcellref/"
            f"{name}/idr0072-{name}-render.yml")


@task
def list_children(name, ignore):
    with cli_login() as cli:
        conn = BlitzGateway(client_obj=cli.get_client())
Exemple #11
0
        start_date=pendulum.datetime(2020, 4, 22, 17, 30, tz="America/Toronto"),
        interval=timedelta(days=1)
        )],
    # but only on weekdays
    filters=[filters.is_weekday],

    # and not in January TODO: Add TSX Holidays
    not_filters=[filters.between_dates(1, 1, 1, 31)]
)

#tsx_imb_fl.schedule = schedule

############## Storage ecr docker flow ##############
dkr_ecr_scrt = PrefectSecret("docker_ecr_login").run()

get_ecr_auth_token = ShellTask(helper_script="cd ~")
ecr_auth_token = get_ecr_auth_token.run(command=dkr_ecr_scrt)



ecr_client = boto3.client('ecr', region_name=aws_region)
ecr_token = ecr_client.get_authorization_token()

# # Decode the aws token
username, password = base64.b64decode(ecr_token['authorizationData'][0]['authorizationToken']).decode().split(':')
ecr_url = ecr_token['authorizationData'][0]['proxyEndpoint']

############################################################

# # # Registry URL for prefect or docker push
ecr_repo_name = f"{ecr_url.replace('https://', '')}"#/{aws_ecr_repo_name}" #:latest"
#
import pandas as pd
from prefect import task, Flow
from prefect.tasks.shell import ShellTask


@task()
def get_dataframe():
    return pd.read_excel('./top2000.xlsx')


my_task = ShellTask()

with Flow("shell") as f:
    output = my_task(command="in2csv top2000.xlsx | tee top2000.csv | ls")

flow_state = f.run()
shell_output = flow_state.result[output].result
print(shell_output)
Exemple #13
0
def test_shell_task_env_can_be_set_at_init():
    with Flow(name="test") as f:
        task = ShellTask(env=dict(MYTESTVAR="test"))(command="echo -n $MYTESTVAR")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "test"
Exemple #14
0
def test_shell_task_accepts_env():
    with Flow(name="test") as f:
        task = ShellTask()(command="echo -n $MYTESTVAR", env=dict(MYTESTVAR="test"))
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "test"
import prefect
from prefect import task, Flow
from prefect.tasks.shell import ShellTask
from prefect.tasks.templates.strings import StringFormatter

spark_submit_command = StringFormatter(template='''
spark-submit --packages org.apache.hadoop:hadoop-aws:3.2.0 \
  /opt/scripts/warehouse.py {yesterday}
''')

bash = ShellTask(log_stderr=True, return_all=True)

with Flow('warehouse') as flow:
    bash(command=spark_submit_command())

if __name__ == '__main__':
    flow.run()
def test_shell_log_stream_type_error_on_invalid_log_level_string(caplog):
    with pytest.raises(TypeError):
        with raise_on_exception():
            with Flow(name="test") as f:
                ShellTask(stream_output="FOO")
from prefect import task, Flow, Parameter
from prefect.tasks.shell import ShellTask
from datetime import timedelta

run_script = ShellTask(helper_script="cd ./scripts",
                       cache_for=timedelta(days=1),
                       return_all=True,
                       log_stdout=True)

with Flow("covid_update_data") as f:
    run_date = Parameter(name='run_date')

    # update_jhu = run_script(command="python 0_prepare_data_jhu.py")

    get_apple = run_script(command="python 1_apple_download_report.py")
    update_apple = run_script(command="python 2_prepare_data_apple.py")

    get_rki = run_script(
        command=f"python 3_rki_report_download.py --date={run_date}")
    parse_rki = run_script(
        command=f"python 4_rki_report_parse.py --date={run_date}")
    # update_rki = run_script(command="python 5_prepare_data_rki.py")

    update_apple.set_upstream(get_apple)
    parse_rki.set_upstream(get_rki)
    # update_rki.set_upstream(parse_rki)

f.run(parameters={"run_date": "2020-06-14"})
# f.visualize()
Exemple #18
0
from prefect import Flow, task
from prefect.triggers import any_failed
from prefect.tasks.shell import ShellTask
from prefect.engine.executors import LocalDaskExecutor
from prefect.environments import LocalEnvironment


@task(max_retries=3, retry_delay=timedelta(seconds=0))
def extract_phizz():
    return [
        # schema output
        '''psql "dbname='db' user='******' password='******' host='postgres.rds.amazonaws.com'" -c "\COPY (select row_to_json(t) from public.information_schema.columns as t ) to '~/s3_bucket/extractload/schema.json';"''',
    ]


getdata = ShellTask(name='shell task',
                    helper_script='cd /home/ubuntu/prefect_scripts')

with Flow("Schema Extract") as flow:
    phizz_data_extract = getdata.map(extract_phizz)

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=50),
)

# to run locally use flow.run()
#flow.run()

# to register to the prefect server use flow.register
# this assumes you have a project named ExtractLoad
flow.register(project_name="ExtractLoad")
Exemple #19
0
from datetime import datetime, timedelta

import prefect
from prefect import Flow, Parameter, task
from prefect.schedules import IntervalSchedule
from prefect.tasks.shell import ShellTask
from prefect.tasks.templates.jinja2 import JinjaTemplate

## default config settings such as this can generally be set in your
## user config file
retry_delay = timedelta(minutes=5)

## create all relevant tasks
t1 = ShellTask(
    name="print_date", command="date", max_retries=1, retry_delay=retry_delay
)
t2 = ShellTask(name="sleep", command="sleep 5", max_retries=3, retry_delay=retry_delay)


@task(max_retries=1, retry_delay=retry_delay)
def add_7():
    date = prefect.context.get("scheduled_start_time", datetime.utcnow())
    return date + timedelta(days=7)


## templated command; template vars will be read from both prefect.context as well as
## any passed kwargs to the task
command = """
    {% for i in range(5) %}
        echo "{{ scheduled_start_time }}"
## - place into database
@task(name="Format Command")
def cmd(last_date):
    """
    Based on the last available date in the database, creates the appropriate
    journalctl command to collect all sshd logs since the last seen date.
    """
    if not last_date:
        since = pendulum.now("utc").add(
            hours=-48).strftime("%Y-%m-%d %H:%M:%S")
    else:
        since = last_date[-1][0]
    return f'journalctl _COMM=sshd -o json --since "{since}" --no-pager'


shell_task = ShellTask(name="Extract", return_all=True)


@task(name="Transform")
def transform(raw_data):
    """
    Takes the raw data returned from the journalctl command and filters / parses it
    down into a database-ready collection of rows.
    """
    data = [json.loads(line) for line in raw_data]
    rows = []

    user_patt = re.compile("user (.*?) from")
    network_patt = re.compile("from (.*?) port (.*?)$")

    db_path = os.path.expanduser("~/GeoLite/GeoLite2-City.mmdb")
Exemple #21
0
    retry_delay=timedelta(minutes=1),
    nout=2,
    trigger=triggers.all_finished,
)
def create_parquet(_success):
    ts = prefect.context.scheduled_start_time
    dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H")
    vintage_fn = FN_STR.format(dt_str) + ".parquet"
    fn = FN_STR.format("") + ".parquet"

    df = pd.read_csv(CSV_FN, parse_dates=["dt"])
    df.to_parquet(DATA_PATH / vintage_fn, index=False)
    df.to_parquet(DATA_PATH / fn, index=False)
    return vintage_fn, fn


@task
def get_gcs_cmd(fn):
    return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}"


shell = ShellTask()
with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f:
    connstr = EnvVarSecret("COVID_DB_CONN_URI")
    success = export_to_csv(connstr)
    vintage_fn, fn = create_parquet(success)
    shell(get_gcs_cmd(vintage_fn))
    shell(get_gcs_cmd(fn))

f.register(project_name="can-scrape")
Exemple #22
0
def test_shell_initializes_with_basic_cmd():
    with Flow(name="test") as f:
        task = ShellTask(command="echo -n 'hello world'")()
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "hello world"
Exemple #23
0
                             params=params)
    return response.text


@task
def load_file(filename: str) -> str:
    with open(filename, "r", encoding="utf-8") as file:
        return file.read()


@task
def printa(stuff):
    print(stuff)


task = ShellTask(return_all=True)
with Flow("shell") as f:
    translation_server_url = "http://localhost:1969"
    bibtex = load_file("./workspace/aksw-short.bib")
    zotero = import_translation(bibtex, translation_server_url)
    rdf = export_translation(zotero, translation_server_url,
                             "rdf_bibliontology")
    turtle = task(command="rapper - -o turtle -I www.test.com > tests.ttl")
    printa(turtle)

f.run_config = DockerRun(image="prefecthq/prefect")
f.register(project_name="tutoriala")

# Configure extra environment variables for this flow,
# and set a custom image
# f.run()
Exemple #24
0
def test_shell_returns_none_if_empty_output():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls > /dev/null")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result is None
    jobs = []
    for k, v in data.items():
        jobs.append(
            bq.load_table_from_dataframe(
                dataframe=v,
                destination=".".join(["cbs", k]),
                project=GCP.project,
                job_config=job_config,
                location=GCP.location,
            ))
    return jobs


gcp = Parameter("gcp", required=True)
filepath = Parameter("filepath", required=True)
curl_download = ShellTask(name="curl_download")

with Flow("CBS regionaal") as flow:
    # # TODO: fix UnicodeDecodeError when writing to Google Drive
    curl_command = curl_cmd(URL_PC6HUISNR, filepath)
    # curl_download = curl_download(command=curl_command)
    # gwb = pc6huisnr_to_gbq(zipfile=filepath, GCP=gcp, upstream_tasks=[curl_download])
    regionaal = cbsodatav3_to_gbq.map(
        id=ODATA_REGIONAAL,
        GCP=unmapped(gcp),
        task_args={'skip_on_upstream_skip': False})
    regionaal_column_description = column_descriptions.map(
        table_id=ODATA_REGIONAAL,
        GCP=unmapped(gcp),
        upstream_tasks=[regionaal])
Exemple #26
0
def test_shell_raises_if_no_command_provided():
    with Flow(name="test") as f:
        ShellTask()()
    with pytest.raises(TypeError):
        with raise_on_exception():
            assert f.run()
Exemple #27
0
"""
A quick demo of three little shell tasks
"""
import sys
from pathlib import Path

from prefect import Flow, task
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock
from prefect.tasks.shell import ShellTask

shelltask = ShellTask()


@task(log_stdout=True)
def show_file():
    with Path("/tmp/flow.me") as fd:
        print(fd.read_text())


with Flow("three_little_tasks_flow") as flow:
    t1 = shelltask(command="echo '====== start' >> /tmp/flow.me")
    t2 = shelltask(command="date >> /tmp/flow.me; sleep 3")
    t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me")

    t1.set_downstream(t2)
    t2.set_downstream(t3)
    t3.set_downstream(show_file)

if __name__ == "__main__":
    cmd = "run"
Exemple #28
0
#
# Assumes you have SnowSQL CLI installed 
# Assumes you have setup the user config
#
import prefect
from prefect import task, Flow
from prefect.tasks.shell import ShellTask

with Flow("SnowSQL") as flow:
    data_load_date = ShellTask(
    name='what time is it')(command='snowsql -d dw -s public -q "select current_timestamp()"')

# to run locally use flow.run()
#flow.run()

# to register to the prefect server use flow.register
# this assumes you have a project named ExtractLoad
flow.register(project_name="ExtractLoad")
Exemple #29
0
from prefect import Task, Flow
from prefect.tasks.shell import ShellTask


class ShowOutput(Task):
    def run(self, std_out):
        print(std_out)


ls_task = ShellTask(command="ls", return_all=True)
show_output = ShowOutput()

ls_count = ShellTask(command="ls | wc -l", return_all=True)
show_output2 = ShowOutput()

flow = Flow("list_files")
show_output.set_upstream(ls_task, key="std_out", flow=flow)
show_output2.set_upstream(ls_count, key="std_out", flow=flow)

flow.run()