예제 #1
0
def data_pipeline() -> co.Serial:
    """
    `conducto-data-pipeline` is a pipeline-local key-value store.
    This data is only visible to your pipeline and persists until your
    pipeline is archived. One useful application is storing binaries in a
    build node, and retrieving them in a later test node. We exercise the
    `put` and `get` commands to do this.
    """

    build_cmd = """set -ex
go build -o bin/app ./app.go
conducto-data-pipeline put --name my_app_binary --file bin/app
"""
    test_cmd = """set -ex
conducto-data-pipeline get --name my_app_binary --file /tmp/app
/tmp/app --test
"""

    # Dockerfile installs golang and conducto.
    dockerfile = "./docker/Dockerfile.data"
    image = co.Image(dockerfile=dockerfile, context=".", copy_dir="./code")
    with co.Serial(image=image, doc=co.util.magic_doc()) as build_and_test:
        co.Exec("conducto-data-pipeline --help", name="usage")
        co.Exec(build_cmd, name="build")
        co.Exec(test_cmd, name="test")
    return build_and_test
예제 #2
0
def disambiguate() -> co.Parallel:
    with co.Parallel(image=co.Image(copy_dir=".")) as node:

        # no ambiguity here, all kwargs refer to conducto.Node.__init__
        co.Exec('''echo "node has 1.5 cpu's"''', name="A", cpu=1.5)

        # native method parameters come first
        # modify the node object in a second step, then connect it to its parent
        node_obj = co.Exec(myfunc, "DDR4-2933 (quad channel)", cpu=2950)
        node_obj.set(cpu=0.75, mem=1.5)
        node["B"] = node_obj

        # or connect it to its parent, then modify it in place
        node["C"] = co.Exec(myfunc, "DDR4-2667 (dual channel)")
        node["C"].set(cpu=0.75, mem=1.5)

        # some non-custom types don't have obvious string representations
        payload = {"foo": 2, "bar": 3}
        func(payload)

        # so you may have to handle the serialization yourself
        node["D"] = co.Exec(wrappedfunc, json.dumps(payload))

        # custom types work, but you need to provide helpers
        param_obj = Emoticon(happy=True)
        node["E"] = co.Exec(describe, param_obj)

    return node
예제 #3
0
def run() -> co.Serial:
    image = co.Image("python:3.7",
                     copy_branch="master",
                     copy_url="https://github.com/liamcryan/ieuler.git")
    with co.Serial(image=image, doc=co.util.magic_doc()) as pipeline:
        co.Exec('pip install -r requirements.txt', name='build')
        co.Exec('pytest', name='tests')
    return pipeline
예제 #4
0
def get_image():
    return co.Image("python:3.8-slim",
                    copy_dir=".",
                    reqs_packages=["wget"],
                    reqs_py=[
                        "conducto", "numpy", "pandas", "scanpy", "ipdb",
                        "leidenalg", "tabulate"
                    ])
예제 #5
0
def primes_less_than(n: int) -> co.Serial:

    img = co.Image(copy_dir=".")

    with co.Serial(image=img) as root:
        root["find primes"] = co.Exec(sieve, n)

    return root
예제 #6
0
def _get_image():
    return co.Image(
        "python:3.8-slim",
        copy_dir=".",
        reqs_py=[
            "conducto", "boto3", "pandas", "sklearn", "matplotlib", "ipdb"
        ],
    )
예제 #7
0
def get_image():
    return co.Image(
        "python:3.8-slim",
        copy_dir=".",
        reqs_py=["flask", "black"],
        reqs_packages=["curl", "vim"],
        reqs_docker=True,
    )
예제 #8
0
def primes_less_than(n) -> co.Serial:
    n = int(n)
    img = co.Image(copy_dir=".")

    with co.Serial(same_container=co.SameContainer.NEW, image=img) as root:
        root["find primes"] = co.Exec(f"python sieve.py {n}")
        if n >= 3:
            root["check distribution"] = co.Exec(f"cat primes | python check.py {n}")
        root["is 2 included?"] = co.Exec("egrep '^2$' primes")

    return root
예제 #9
0
def run() -> co.Serial:
    cfg = configparser.ConfigParser()
    cfg.read('config.ini'); # work config params (reps)
    reps = cfg['params']['replicates']
    print(f'running with {reps} replicates')
    image = co.Image(image="gbly/miniconda3", copy_dir=".", reqs_py=['conducto==0.0.67'])
    with co.Serial(image=image, doc=co.util.magic_doc()) as pipeline:
        #pipeline["python_trial"] = co.Exec("python -c 'import pandas as pd'")
        pipeline["parallel_experiment"] = co.Lazy(parallelize_reps, reps=int(reps))
        pipeline["plot_data"] = co.Exec(plot_reps, reps=int(reps))
    return pipeline
예제 #10
0
def build(projects: typing.List[str]) -> co.Parallel:
    "Build projects in parallel, using simple shell command."

    # Override the parent image to use one with docker installed.
    img = co.Image(image="docker:19.03", copy_dir=".")

    output = co.Parallel(image=img, requires_docker=True)
    for project in projects:
        # Command needs docker; inherits flag from parent node
        output[project] = co.Exec(f"cd {project} && docker build .")
    return output
예제 #11
0
def pr(branch) -> co.Parallel:
    # Make a Docker image, based on python:alpine, with the whole repo and the contents
    # of the given branch.
    image = co.Image("python:alpine", copy_repo=True, copy_branch=branch)

    # Using that Docker image, run three commands in parallel to interact with the
    # repo's files.
    with co.Parallel(image=image) as root:
        co.Exec(f"echo {branch}", name="print branch")
        co.Exec("pwd", name="print working directory")
        co.Exec("ls -la", name="list files")
    return root
예제 #12
0
def build_and_test() -> co.Serial:
    image = co.Image(copy_dir="./code")
    with co.Serial(image=image, stop_on_error=False) as pipeline:
        with co.Parallel(name="Trade") as first_trading:
            first_trading['US'] = co.Exec("python3 first_stock_trading.py")
            first_trading['CHINA'] = co.Exec("python3 second_stock_trading.py")
        with co.Parallel(name="TopK") as second_trading:
            second_trading['US'] = co.Exec(
                "python3 first_topK_stock_pipeline.py")
            second_trading['CHINA'] = co.Exec(
                "python3 second_topK_stock_pipeline.py")

    return pipeline
예제 #13
0
def download_and_plot() -> co.Serial:
    download_command = """
            apt update -y && apt install -y curl unzip
            curl https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip > data.zip
            unzip data.zip
        """
    image = co.Image(dockerfile='./Dockerfile', context='.')
    with co.Serial(image=image) as pipeline:
        co.Exec(download_command, name="download")
        with co.Parallel(name='plot'):
            co.Exec('python rainfall.py', name='daily')
            co.Exec('python rainfall.py --resample M --save', name='monthly')
    return pipeline
예제 #14
0
 def get(cls, image=None, dockerfile=None, reqs_py=None, name=None):
     assert image is not None or dockerfile is not None
     return co.Image(
         image=image,
         dockerfile=dockerfile,
         context=cls.context,
         reqs_py=reqs_py,
         copy_dir=cls.copy_dir,
         copy_url=cls.copy_url,
         copy_branch=cls.copy_branch,
         path_map=cls.path_map,
         name=name,
     )
예제 #15
0
def main() -> co.Serial:
    image = co.Image(dockerfile="./Dockerfile", copy_dir=".")
    with co.Serial(doc=__doc__, image=image, env=get_env()) as root:
        with co.Parallel(name="access check") as access_check:
            access_check["Heroku"] = co.Exec(TEST_HEROKU)
            access_check["RedisLabs"] = co.Exec(TEST_REDIS)
        root["deploy"] = deploy()
        root["integration test"] = co.Exec(INTEGRATION_TEST)
        with co.Parallel(name="teardown") as teardown:
            teardown["clear data"] = co.Exec(CLEAR_DATA)
            teardown["stop"] = co.Exec(STOP_APP)
            teardown["destroy"] = co.Exec(DESTROY_APP)
    return root
예제 #16
0
def ci_cd(projects=utils.get_projects()) -> co.Serial:
    "Build all projects, run tests if builds succeed, then deploy if tests pass"

    # User the standard python 3.8 image as a base and add all files from
    # the current dir. We also need to install conducto in the image in
    # order to dynamically generate the tree with Lazy in test().
    img = co.Image(image="python:3.8", copy_dir=".", reqs_py=["conducto"])

    output = co.Serial(image=img)
    output["Build"] = build(projects)
    output["Test"] = test(projects)
    output["Deploy"] = co.Exec("echo aws cloudformation deploy")
    return output
예제 #17
0
def main() -> co.Serial:
    img = co.Image(dockerfile="./Dockerfile", reqs_docker=True)
    with co.Serial(image=img, env=get_env(), doc=__doc__) as root:
        root["Check AWS Creds"] = co.Exec(CHECK_AWS_CREDS)
        with co.Parallel(name="Init", doc=INIT_DOC) as init:
            init["Deploy Infra"] = deploy_infra()
            init["Deploy Image"] = deploy_image()
            init["Lint"] = co.Exec("black --check .")
            init["Unit Test"] = co.Exec("python service/test.py --verbose")
        root["Deploy Service"] = deploy_service()
        root["Integration Test"] = co.Exec(INTEGRATION_CMD,
                                           doc=INTEGRATION_DOC)
        root["Cleanup"] = cleanup()
    return root
예제 #18
0
def pipeline() -> co.Serial:

    # defer node definition until the first node runs
    root = co.Lazy(nodes_for_this_month)

    # conducto installs the dependencies into its image
    root.image = co.Image(
        copy_url="https://github.com/MatrixManAtYrService/sandboxen",
        copy_branch="master",
        path_map={".": "./fortune_witherror"},
        reqs_py=["conducto", "sh"],
        reqs_packages=["fortune"],
    )

    return root
예제 #19
0
def run(branch: str) -> co.Serial:
    image = co.Image(image="python:3.6", reqs_py=["conducto"])
    root = co.Serial(image=image)
    with co.Serial(same_container=co.SameContainer.NEW, cpu=12,
                   mem=32) as build:
        build["fetch"] = co.Exec("echo im fetching")
        build["checkout"] = co.Exec("echo im checking out")
        with co.Parallel(name="checks") as checks:
            checks["yapf"] = co.Exec("echo checking yapf")
            checks["python_tests"] = co.Exec(
                "echo checking python tests")
            checks["flake8"] = co.Exec(
                "echo checking flake8")
            checks["pylint"] = co.Exec(
                "echo im checking pylint")
            checks["mypy"] = co.Exec("echo im checking mypy")
            checks["cppcheck"] = co.Exec(
                "echo im checking cppcheck")
            checks["clang_format"] = co.Exec(
                "echo im checking clang_format")

        build["build"] = co.Exec('echo im building now')

    root["build"] = build

    auth_token = co.api.Auth().get_token_from_shell()
    access_token = co.api.Secrets().get_user_secrets(
        auth_token)["GITHUB_ACCESS_TOKEN"]
    stdout = subprocess.check_output(
        f"git ls-remote [email protected]:jmazar/conduco_statuses.git refs/heads/{branch} | cut -f1",
        shell=True)
    sha = stdout.decode("utf-8").strip()
    print(sha)
    print(access_token)
    creator = co.callback.github_status_creator(
        owner="jmazar",
        repo="conduco_statuses",
        sha=sha,
        access_token=access_token,
    )

    for node in root.stream():
        if isinstance(node, co.Exec):
            node.on_queued(creator(state="pending"))
            node.on_done(creator(state="success"))
            node.on_error(creator(state="failure"))
    return root
예제 #20
0
def pipeline() -> co.Parallel:
    """
    Customize an Image
    """

    # the previous example passed a string via the `image` node parameter
    #
    #     root["world]"] = co.Exec(f"{cmd}",
    #                              image="node:current-alpine")
    #                                     ^
    # like this --------------------------┘

    # pass co.Image objects for more options
    img = co.Image(
        name="my-shared-image",
        image="node:current-alpine",
        reqs_packages=["tree"],
        copy_url="https://github.com/conducto/conducto",
        copy_branch="main",
    )

    # set the `image` node parameter on a parent
    # the children will inherit the value
    root = co.Parallel(image=img)

    # this node runs ok
    root["Look around"] = co.Exec("""
        tree -L 2 ;
        find . -name 'hello_.*'
        """)

    # these nodes have problems
    root["Hi from Node"] = co.Exec("node hello_py_js/hello.js")
    root["Hi from C++"] = co.Exec("""
        g++ hello_cpp/hello.cpp -o hello ;
        ./hello
        """)

    # explore the pipeline to understand them

    # change this file, and relaunch the pipeline to see results

    root = tour.guide(root)

    return root
예제 #21
0
def data_pipeline() -> co.Serial:
    """
    ### **`co.data.pipeline`**
    `co.data.pipeline` is a pipeline-local key-value store. This data is only
    visible to your pipeline and persists until your pipeline is deleted. It
    is useful for writing data in one pipeline step, and reading it in another.

    `co.data.pipeline` has both a python and command line interface as
    `conducto-data-pipeline`. The first node of the example prints the command line
    usage to show the full interface.

    ### Example: Parameter Search
    One useful application is performing and summarizing a parameter search.
    In this example, we try different parameterizations of an algorithm in
    parallel. Each one stores its results using `co.data.pipeline.puts()`. Once
    all of the parallel tasks are done, it reads the results using
    `co.data.pipeline.gets()` and prints a summary.
    """
    # Dockerfile installs python, R, and conducto.
    image = co.Image(dockerfile="docker/Dockerfile.data",
                     context=".",
                     copy_dir="./code",
                     reqs_py=["conducto"])

    data_dir = "demo/data_science/data"

    output = co.Serial(image=image, doc=co.util.magic_doc())
    output["usage"] = co.Exec("conducto-data-pipeline --help")

    output["parameter_search"] = ps = co.Parallel()

    for window in [25, 50, 100]:
        ps[f"window={window}"] = w = co.Parallel()

        for mean in [.05, .08, .11]:
            w[f"mean={mean}"] = m = co.Parallel()

            for volatility in [.1, .125, .15, .2]:
                m[f"volatility={volatility}"] = co.Exec(
                    f"python data.py --window={window} --mean={mean} "
                    f"--volatility={volatility} --data-dir={data_dir}")

    output["summarize"] = co.Exec(f"Rscript data.R {data_dir}")

    return output
예제 #22
0
def cicd() -> co.Serial:
    image = co.Image("node:current-alpine",
                     copy_url="https://github.com/flippedcoder/its-hot",
                     copy_branch="master")

    install_node = co.Exec("npm i")
    test_node = co.Exec("CI=true; npm test")
    build_node = co.Exec("npm build")
    deploy_node = co.Exec("echo secret stuff to deploy to an S3 bucket on AWS")

    pipeline = co.Serial(image=image, same_container=co.SameContainer.NEW)

    pipeline["Install dependencies..."] = install_node
    pipeline["Running tests..."] = test_node
    pipeline["Build project..."] = build_node
    pipeline["Deploy project..."] = deploy_node

    return pipeline
예제 #23
0
def main() -> co.Serial:

    retry = co.Exec(fail_then_pass, "retry", 2)
    retry.on_error(co.callback.retry(3))

    retry_2 = co.Exec(fail_then_pass, "retry2", 3)
    retry_2.on_error(co.callback.retry(2))

    retry_then_skip = co.Exec(fail_then_pass, "retry_then_skip", 3)
    retry_then_skip.on_error(co.callback.retry_then_skip(2))

    retry_then_skip_2 = co.Exec(fail_then_pass, "retry_then_skip", 2)
    retry_then_skip_2.on_error(co.callback.retry_then_skip(3))

    retry_with_double_mem = co.Exec(fail_then_pass, "retry_with_double_mem", 2)
    retry_with_double_mem.on_error(co.callback.retry_with_double_mem(3))

    handle_memory_errors = co.Exec(fail_then_pass, "retry_with_double_mem", 0)
    handle_memory_errors.on_error(co.callback.handle_memory_errors())

    skip_some_errors = co.Serial(stop_on_error=False)
    skip_some_errors['pass'] = co.Exec('echo hi')
    skip_some_errors['fail1'] = co.Exec('echo hi | grep foo')
    skip_some_errors['fail2'] = co.Exec('echo hi | grep bar')
    skip_some_errors.on_error(co.callback.skip_some_errors(2))

    with co.Serial(image=co.Image(copy_dir=".", reqs_py=["conducto"]),
                   stop_on_error=False) as node:
        node["retry"] = retry
        node["retry_2"] = retry_2
        node["retry_then_skip"] = retry_then_skip
        node["retry_then_skip_2"] = retry_then_skip_2
        node["retry_with_double_mem"] = retry_with_double_mem
        node["skip_some_errors"] = skip_some_errors
        node["handle_memory_errors"] = handle_memory_errors

    node.on_done(co.callback.email(to="*****@*****.**"))

    return node
예제 #24
0
{df.transpose().round(2).to_markdown()}
</ConductoMarkdown>
    """)


############################################################
# Constants and globals
############################################################
DATA_PATH = "/conducto/data/pipeline/steo.txt"

DATASETS = {
    "Heating Degree Days": r"^STEO.ZWHD_[^_]*\.M$",
    "Cooling Degree Days": r"^STEO.ZWCD_[^_]*.M$",
    "Electricity Generation": r"^STEO.NGEPGEN_[^_]*\.M$",
}

IMG = co.Image("python:3.8",
               copy_dir=".",
               reqs_py=["conducto", "pandas", "matplotlib", "tabulate"])

# Data is downloaded from the United States Energy Information Administration.
# https://www.eia.gov/opendata/bulkfiles.php
DOWNLOAD_COMMAND = f"""
echo "Downloading"
curl http://api.eia.gov/bulk/STEO.zip > steo.zip
unzip -cq steo.zip > {DATA_PATH}
""".strip()

if __name__ == "__main__":
    co.main(default=run)
예제 #25
0
import conducto as co
from inspect import cleandoc
import sys

# Docker Images
###############

# for playing the game tick-at-a-time
game_of_life = co.Image(dockerfile='conway/Dockerfile', context='conway')

# Command Templates
###################

# for all commands, use strict mode so that errors draw attention
header = "set -euo pipefail"

# create the start state and stash it
initialize_grid = cleandoc('''
    {header}
    to_grid '0010000000
             0010000011
             0010100011
             0100010000
             0101110001
             0100000001
             0001000111
             0010100000
             0101010010
             0010011000' > grid.json

    # store it as the only item in a list (subsequent grids coming soon)
예제 #26
0
import conducto as co

python_img = co.Image(
    image="python:3.8-alpine",
    copy_url="https://github.com/leachim6/hello-world",
    copy_branch="master",
    path_map={"./local-copy/p": "p"},
)


def hello() -> co.Serial:
    pipeline = co.Serial()
    pipeline["Say Hi"] = co.Exec("python p/python3.py", image=python_img)
    return pipeline


if __name__ == "__main__":
    co.main(default=hello)
예제 #27
0
import conducto as co
import json
from pathlib import Path

# commands.py and experiment.py are in the same folder as this file
from experiment import genomes, genes

img = co.Image(
    image="ncbi/blast",  # use the BLAST image published by ncbi on dockerhub
    copy_dir=".",  # add this directory
    reqs_py=["conducto", "biopython", "pandas"],
    reqs_packages=["wget", "gzip"],
)

data_dir = "/conducto/data/pipeline"


def download_file(source_url, target_path) -> co.Serial:
    "Returns a serial node which downloads a gzipped FASTA file"

    target_dir = Path(target_path).parent

    node = co.Serial()
    node["Download"] = co.Exec(
        f"mkdir -p {target_dir} && wget -O {target_path}.gz {source_url}")
    node["Decompress"] = co.Exec(f"gunzip -c {target_path}.gz > {target_path}")

    return node


def analyze(hits):
예제 #28
0
    with co.Serial(stop_on_error=False, doc=__doc__) as root:

        # inner context: stop on errors
        # don't bother testing a failed deployment
        with co.Serial(name="run", stop_on_error=True) as run:
            run["deploy"] = deploy()
            run["test"] = test()

        # stop services
        root["clean up"] = teardown()

    return root


# test tools
test_img = co.Image(reqs_packages=["redis", "curl"], reqs_py=["conducto"])


def test() -> co.Serial:
    """
    Check if both redis and flask are available.  Then see if they're
    working.
    """

    with co.Serial(image=test_img) as test:

        with co.Parallel(name="services up?") as check:
            check["redis up?"] = co.Exec(TEST_REDIS_CMD)
            check["flask up?"] = co.Exec(TEST_FLASK_CMD)

    test["integration test"] = co.Exec(INTEGRATION_TEST_CMD)
import conducto as co

img = co.Image(dockerfile="Dockerfile")


def hello() -> co.Serial:
    with co.Serial(image=img) as pipeline:
        pipeline["Say Hi"] = co.Exec("pokemonsay -pn Oddish 'Hi'")
    return pipeline


if __name__ == "__main__":
    co.main(default=hello)
예제 #30
0
import conducto as co

IMG = co.Image()


def main() -> co.Serial:
    with co.Serial(image=IMG, requires_docker=True) as root:
        with co.Parallel(name="Init") as init:
            init["Build"] = co.Exec("sleep 3")
            init["Lint"] = co.Exec("sleep 1")
            init["Unit Test"] = co.Exec("sleep 1.5")
        root["Deploy"] = co.Exec("sleep 4")
        root["Integration Test"] = co.Exec("sleep 2")
    return root


if __name__ == "__main__":
    co.main(default=main)