Python ShellTask.ShellTask Exemples, prefect.tasks.shell.ShellTask.ShellTask Python Exemples

Exemple #1

0

Afficher le fichier

def test_shell_initializes_and_runs_multiline_cmd():
    cmd = """
    TEST=$(cat <<-END
This is line one
This is line two
This is line three
boom!
END
)
for i in $TEST
do
    echo $i
done"""
    with Flow(name="test") as f:
        task = ShellTask()(command=cmd,
                           env={key: "test"
                                for key in "abcdefgh"})
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "boom!"

Exemple #2

0

Afficher le fichier

Fichier : test_shell.py Projet : shrmnk/prefect

def test_shell_task_handles_multiline_commands():
    with tempfile.TemporaryDirectory() as tempdir:
        cmd = """
        cd {}
        for file in $(ls)
        do
            cat $file
        done
        """.format(
            tempdir
        )
        with open(tempdir + "/testfile.txt", "w") as f:
            f.write("this is a test")

        with Flow(name="test") as f:
            task = ShellTask()(command=cmd)

        out = f.run()

    assert out.is_successful()
    assert out.result[task].result == "this is a test"

Exemple #3

0

Afficher le fichier

    def test_basic_trigger_dag_triggers(self, airflow_settings):
        task = AirflowTriggerDAG(dag_id="tutorial",
                                 execution_date="1986-09-20",
                                 env=airflow_settings)
        check_task = ShellTask(
            command="airflow list_dag_runs tutorial",
            helper_script=task.helper_script,
            env=airflow_settings,
        )

        with Flow(name="tutorial") as flow:
            res = check_task(upstream_tasks=[task])

        flow_state = flow.run()
        assert flow_state.is_successful()

        check_state = flow_state.result[res]
        assert check_state.is_successful()

        # check CLI output
        assert "manual__1986-09-20T00:00:00+00:00" in check_state.result
        assert "running" in check_state.result
        assert "1986-09-20T00:00:00+00:00" in check_state.result

Exemple #4

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_runs_other_shells():
    with Flow(name="test") as f:
        task = ShellTask(shell="zsh")(command="echo -n $ZSH_NAME")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "zsh"

Exemple #5

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_initializes_and_multiline_output_optionally_returns_all_lines():
    with Flow(name="test") as f:
        task = ShellTask(return_all=True)(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == ["hello world", "42"]

Exemple #6

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_initializes_and_multiline_output_returns_last_line():
    with Flow(name="test") as f:
        task = ShellTask()(command="echo -n 'hello world\n42'")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "42"

Exemple #7

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_task_raises_fail_if_cmd_fails():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls surely_a_dir_that_doesnt_exist")
    out = f.run()
    assert out.is_failed()
    assert "Command failed with exit code" in str(out.result[task].message)

Exemple #8

0

Afficher le fichier

)


@task
def curl_cmd(url: str, file: str) -> str:
    """
    The curl command we wish to execute.
    """
    prefect.context.get("logger")
    if Path(file).exists():
        raise SKIP("Image data file already exists.")
    return f"curl -fL -o {file} {url}"


download: Runable = ShellTask(name="curl_task",
                              max_retries=2,
                              retry_delay=datetime.timedelta(seconds=10))


@task(skip_on_upstream_skip=False)
def load_and_split(fname: str) -> list:
    """
    Loads image data file at `fname` and splits it into
    multiple frames.  Returns a list of bytes, one element
    for each frame.
    """
    prefect.context.get("logger")
    with open(fname, "rb") as f:
        imgs = f.read()

    return [img for img in imgs.split(b"\n" * 4) if img]

Exemple #9

0

Afficher le fichier

Fichier : prefect-pipeline.py Projet : GovReady/CUB

def match_ssp(ssp, component_spec):
    command = "python ssp.py --reader json-l match --components {} {}".format(
        component_spec, ssp)
    output = ShellTask(command=command, return_all=True).run()
    return "\n".join(output)

Exemple #10

0

Afficher le fichier

#!/usr/bin/env python
#
# Requires:
# conda create -n prefect -c conda-forge prefect
#

from prefect import Flow, Parameter, task, unmapped
from prefect.executors import LocalDaskExecutor
from prefect.tasks.shell import ShellTask
from prefect.utilities.debug import raise_on_exception
from omero.cli import cli_login
from omero.gateway import BlitzGateway

name = Parameter("name")

shell = ShellTask(return_all=True, log_stderr=True)

COMMAND = "/opt/omero/server/OMERO.server/bin/omero"


@task
def render(object, name):
    return (f"{COMMAND} render set {object} "
            f"/uod/idr/metadata/idr0072-schormann-subcellref/"
            f"{name}/idr0072-{name}-render.yml")


@task
def list_children(name, ignore):
    with cli_login() as cli:
        conn = BlitzGateway(client_obj=cli.get_client())

Exemple #11

0

Afficher le fichier

        start_date=pendulum.datetime(2020, 4, 22, 17, 30, tz="America/Toronto"),
        interval=timedelta(days=1)
        )],
    # but only on weekdays
    filters=[filters.is_weekday],

    # and not in January TODO: Add TSX Holidays
    not_filters=[filters.between_dates(1, 1, 1, 31)]
)

#tsx_imb_fl.schedule = schedule

############## Storage ecr docker flow ##############
dkr_ecr_scrt = PrefectSecret("docker_ecr_login").run()

get_ecr_auth_token = ShellTask(helper_script="cd ~")
ecr_auth_token = get_ecr_auth_token.run(command=dkr_ecr_scrt)



ecr_client = boto3.client('ecr', region_name=aws_region)
ecr_token = ecr_client.get_authorization_token()

# # Decode the aws token
username, password = base64.b64decode(ecr_token['authorizationData'][0]['authorizationToken']).decode().split(':')
ecr_url = ecr_token['authorizationData'][0]['proxyEndpoint']

############################################################

# # # Registry URL for prefect or docker push
ecr_repo_name = f"{ecr_url.replace('https://', '')}"#/{aws_ecr_repo_name}" #:latest"

Exemple #12

0

Afficher le fichier

Fichier : Shell_excel_to_csv.py Projet : Mykrass/Prefect_Shell

#
import pandas as pd
from prefect import task, Flow
from prefect.tasks.shell import ShellTask


@task()
def get_dataframe():
    return pd.read_excel('./top2000.xlsx')


my_task = ShellTask()

with Flow("shell") as f:
    output = my_task(command="in2csv top2000.xlsx | tee top2000.csv | ls")

flow_state = f.run()
shell_output = flow_state.result[output].result
print(shell_output)

Exemple #13

0

Afficher le fichier

Fichier : test_shell.py Projet : shrmnk/prefect

def test_shell_task_env_can_be_set_at_init():
    with Flow(name="test") as f:
        task = ShellTask(env=dict(MYTESTVAR="test"))(command="echo -n $MYTESTVAR")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "test"

Exemple #14

0

Afficher le fichier

Fichier : test_shell.py Projet : shrmnk/prefect

def test_shell_task_accepts_env():
    with Flow(name="test") as f:
        task = ShellTask()(command="echo -n $MYTESTVAR", env=dict(MYTESTVAR="test"))
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "test"

Exemple #15

0

Afficher le fichier

Fichier : warehouse.py Projet : wdpressplus-bigdata/wdpressplus-bigdata

import prefect
from prefect import task, Flow
from prefect.tasks.shell import ShellTask
from prefect.tasks.templates.strings import StringFormatter

spark_submit_command = StringFormatter(template='''
spark-submit --packages org.apache.hadoop:hadoop-aws:3.2.0 \
  /opt/scripts/warehouse.py {yesterday}
''')

bash = ShellTask(log_stderr=True, return_all=True)

with Flow('warehouse') as flow:
    bash(command=spark_submit_command())

if __name__ == '__main__':
    flow.run()

Exemple #16

0

Afficher le fichier

Fichier : test_shell.py Projet : thomasfrederikhoeck/prefect

def test_shell_log_stream_type_error_on_invalid_log_level_string(caplog):
    with pytest.raises(TypeError):
        with raise_on_exception():
            with Flow(name="test") as f:
                ShellTask(stream_output="FOO")

Exemple #17

0

Afficher le fichier

Fichier : flow_upate_data.py Projet : chekanskiy/covid-19-data-exploration

from prefect import task, Flow, Parameter
from prefect.tasks.shell import ShellTask
from datetime import timedelta

run_script = ShellTask(helper_script="cd ./scripts",
                       cache_for=timedelta(days=1),
                       return_all=True,
                       log_stdout=True)

with Flow("covid_update_data") as f:
    run_date = Parameter(name='run_date')

    # update_jhu = run_script(command="python 0_prepare_data_jhu.py")

    get_apple = run_script(command="python 1_apple_download_report.py")
    update_apple = run_script(command="python 2_prepare_data_apple.py")

    get_rki = run_script(
        command=f"python 3_rki_report_download.py --date={run_date}")
    parse_rki = run_script(
        command=f"python 4_rki_report_parse.py --date={run_date}")
    # update_rki = run_script(command="python 5_prepare_data_rki.py")

    update_apple.set_upstream(get_apple)
    parse_rki.set_upstream(get_rki)
    # update_rki.set_upstream(parse_rki)

f.run(parameters={"run_date": "2020-06-14"})
# f.visualize()

Exemple #18

0

Afficher le fichier

from prefect import Flow, task
from prefect.triggers import any_failed
from prefect.tasks.shell import ShellTask
from prefect.engine.executors import LocalDaskExecutor
from prefect.environments import LocalEnvironment


@task(max_retries=3, retry_delay=timedelta(seconds=0))
def extract_phizz():
    return [
        # schema output
        '''psql "dbname='db' user='******' password='******' host='postgres.rds.amazonaws.com'" -c "\COPY (select row_to_json(t) from public.information_schema.columns as t ) to '~/s3_bucket/extractload/schema.json';"''',
    ]


getdata = ShellTask(name='shell task',
                    helper_script='cd /home/ubuntu/prefect_scripts')

with Flow("Schema Extract") as flow:
    phizz_data_extract = getdata.map(extract_phizz)

flow.environment = LocalEnvironment(
    labels=[],
    executor=LocalDaskExecutor(scheduler="threads", num_workers=50),
)

# to run locally use flow.run()
#flow.run()

# to register to the prefect server use flow.register
# this assumes you have a project named ExtractLoad
flow.register(project_name="ExtractLoad")

Exemple #19

0

Afficher le fichier

from datetime import datetime, timedelta

import prefect
from prefect import Flow, Parameter, task
from prefect.schedules import IntervalSchedule
from prefect.tasks.shell import ShellTask
from prefect.tasks.templates.jinja2 import JinjaTemplate

## default config settings such as this can generally be set in your
## user config file
retry_delay = timedelta(minutes=5)

## create all relevant tasks
t1 = ShellTask(
    name="print_date", command="date", max_retries=1, retry_delay=retry_delay
)
t2 = ShellTask(name="sleep", command="sleep 5", max_retries=3, retry_delay=retry_delay)


@task(max_retries=1, retry_delay=retry_delay)
def add_7():
    date = prefect.context.get("scheduled_start_time", datetime.utcnow())
    return date + timedelta(days=7)


## templated command; template vars will be read from both prefect.context as well as
## any passed kwargs to the task
command = """
    {% for i in range(5) %}
        echo "{{ scheduled_start_time }}"

Exemple #20

0

Afficher le fichier

Fichier : monitoring_etl_flow.py Projet : lauralorenz/ssh-etl-monitoring

## - place into database
@task(name="Format Command")
def cmd(last_date):
    """
    Based on the last available date in the database, creates the appropriate
    journalctl command to collect all sshd logs since the last seen date.
    """
    if not last_date:
        since = pendulum.now("utc").add(
            hours=-48).strftime("%Y-%m-%d %H:%M:%S")
    else:
        since = last_date[-1][0]
    return f'journalctl _COMM=sshd -o json --since "{since}" --no-pager'


shell_task = ShellTask(name="Extract", return_all=True)


@task(name="Transform")
def transform(raw_data):
    """
    Takes the raw data returned from the journalctl command and filters / parses it
    down into a database-ready collection of rows.
    """
    data = [json.loads(line) for line in raw_data]
    rows = []

    user_patt = re.compile("user (.*?) from")
    network_patt = re.compile("from (.*?) port (.*?)$")

    db_path = os.path.expanduser("~/GeoLite/GeoLite2-City.mmdb")

Exemple #21

0

Afficher le fichier

    retry_delay=timedelta(minutes=1),
    nout=2,
    trigger=triggers.all_finished,
)
def create_parquet(_success):
    ts = prefect.context.scheduled_start_time
    dt_str = pd.to_datetime(ts).strftime("%Y-%m-%dT%H")
    vintage_fn = FN_STR.format(dt_str) + ".parquet"
    fn = FN_STR.format("") + ".parquet"

    df = pd.read_csv(CSV_FN, parse_dates=["dt"])
    df.to_parquet(DATA_PATH / vintage_fn, index=False)
    df.to_parquet(DATA_PATH / fn, index=False)
    return vintage_fn, fn


@task
def get_gcs_cmd(fn):
    return f"gsutil acl ch -u AllUsers:R gs://can-scrape-outputs/final/{fn}"


shell = ShellTask()
with Flow("UpdateParquetFiles", CronSchedule("10 */2 * * *")) as f:
    connstr = EnvVarSecret("COVID_DB_CONN_URI")
    success = export_to_csv(connstr)
    vintage_fn, fn = create_parquet(success)
    shell(get_gcs_cmd(vintage_fn))
    shell(get_gcs_cmd(fn))

f.register(project_name="can-scrape")

Exemple #22

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_initializes_with_basic_cmd():
    with Flow(name="test") as f:
        task = ShellTask(command="echo -n 'hello world'")()
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result == "hello world"

Exemple #23

0

Afficher le fichier

Fichier : wf.py Projet : BonaBeavis/bibtex-shacl-shapes

                             params=params)
    return response.text


@task
def load_file(filename: str) -> str:
    with open(filename, "r", encoding="utf-8") as file:
        return file.read()


@task
def printa(stuff):
    print(stuff)


task = ShellTask(return_all=True)
with Flow("shell") as f:
    translation_server_url = "http://localhost:1969"
    bibtex = load_file("./workspace/aksw-short.bib")
    zotero = import_translation(bibtex, translation_server_url)
    rdf = export_translation(zotero, translation_server_url,
                             "rdf_bibliontology")
    turtle = task(command="rapper - -o turtle -I www.test.com > tests.ttl")
    printa(turtle)

f.run_config = DockerRun(image="prefecthq/prefect")
f.register(project_name="tutoriala")

# Configure extra environment variables for this flow,
# and set a custom image
# f.run()

Exemple #24

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_returns_none_if_empty_output():
    with Flow(name="test") as f:
        task = ShellTask()(command="ls > /dev/null")
    out = f.run()
    assert out.is_successful()
    assert out.result[task].result is None

Exemple #25

0

Afficher le fichier

Fichier : regionaal.py Projet : Michelangelo367/nl-open-data

    jobs = []
    for k, v in data.items():
        jobs.append(
            bq.load_table_from_dataframe(
                dataframe=v,
                destination=".".join(["cbs", k]),
                project=GCP.project,
                job_config=job_config,
                location=GCP.location,
            ))
    return jobs


gcp = Parameter("gcp", required=True)
filepath = Parameter("filepath", required=True)
curl_download = ShellTask(name="curl_download")

with Flow("CBS regionaal") as flow:
    # # TODO: fix UnicodeDecodeError when writing to Google Drive
    curl_command = curl_cmd(URL_PC6HUISNR, filepath)
    # curl_download = curl_download(command=curl_command)
    # gwb = pc6huisnr_to_gbq(zipfile=filepath, GCP=gcp, upstream_tasks=[curl_download])
    regionaal = cbsodatav3_to_gbq.map(
        id=ODATA_REGIONAAL,
        GCP=unmapped(gcp),
        task_args={'skip_on_upstream_skip': False})
    regionaal_column_description = column_descriptions.map(
        table_id=ODATA_REGIONAAL,
        GCP=unmapped(gcp),
        upstream_tasks=[regionaal])

Exemple #26

0

Afficher le fichier

Fichier : test_shell.py Projet : zviri/prefect

def test_shell_raises_if_no_command_provided():
    with Flow(name="test") as f:
        ShellTask()()
    with pytest.raises(TypeError):
        with raise_on_exception():
            assert f.run()

Exemple #27

0

Afficher le fichier

"""
A quick demo of three little shell tasks
"""
import sys
from pathlib import Path

from prefect import Flow, task
from prefect.schedules import Schedule
from prefect.schedules.clocks import CronClock
from prefect.tasks.shell import ShellTask

shelltask = ShellTask()


@task(log_stdout=True)
def show_file():
    with Path("/tmp/flow.me") as fd:
        print(fd.read_text())


with Flow("three_little_tasks_flow") as flow:
    t1 = shelltask(command="echo '====== start' >> /tmp/flow.me")
    t2 = shelltask(command="date >> /tmp/flow.me; sleep 3")
    t3 = shelltask(command="echo '====== stop' >> /tmp/flow.me")

    t1.set_downstream(t2)
    t2.set_downstream(t3)
    t3.set_downstream(show_file)

if __name__ == "__main__":
    cmd = "run"

Exemple #28

0

Afficher le fichier

#
# Assumes you have SnowSQL CLI installed 
# Assumes you have setup the user config
#
import prefect
from prefect import task, Flow
from prefect.tasks.shell import ShellTask

with Flow("SnowSQL") as flow:
    data_load_date = ShellTask(
    name='what time is it')(command='snowsql -d dw -s public -q "select current_timestamp()"')

# to run locally use flow.run()
#flow.run()

# to register to the prefect server use flow.register
# this assumes you have a project named ExtractLoad
flow.register(project_name="ExtractLoad")

Exemple #29

0

Afficher le fichier

Fichier : shelltask.py Projet : joshmeek/dummy-flows

from prefect import Task, Flow
from prefect.tasks.shell import ShellTask


class ShowOutput(Task):
    def run(self, std_out):
        print(std_out)


ls_task = ShellTask(command="ls", return_all=True)
show_output = ShowOutput()

ls_count = ShellTask(command="ls | wc -l", return_all=True)
show_output2 = ShowOutput()

flow = Flow("list_files")
show_output.set_upstream(ls_task, key="std_out", flow=flow)
show_output2.set_upstream(ls_count, key="std_out", flow=flow)

flow.run()