예제 #1
0
def _redis_wrapper() -> co.Serial:
    """
    This is a simple wrapper that starts and stops a local redis instance
    around our *redis_data_store* example. This is just to mock a real
    redis service you might have running externally. The details of how
    this works are not critical right now. We use Conducto features
    `stop_on_error` and `requires_docker` that are discussed in a later
    tutorial. **Focus on the *redis_data_store* node for now.**
    """

    name = "conducto_demo_redis"
    mock_redis_start_cmd = f"""set -ex
docker run -p 6379:6379 -d --rm --name {name} redis:5.0-alpine
sleep 1 # wait for redis to start up
docker logs --details {name}
# error if redis container not running
docker inspect {name} --format="{{{{.State.Running}}}}"
"""
    mock_redis_stop_cmd = f"docker stop {name} || true"

    with co.Serial(
            image="docker:19.03",
            stop_on_error=False,
            requires_docker=True,
            doc=co.util.magic_doc(doc_only=True),
    ) as wrapper:
        co.Exec(mock_redis_start_cmd, name="mock_redis_start")
        wrapper["redis_data_store"] = redis_data_store()
        co.Exec(mock_redis_stop_cmd, name="mock_redis_stop")
    return wrapper
예제 #2
0
def disambiguate() -> co.Parallel:
    with co.Parallel(image=co.Image(copy_dir=".")) as node:

        # no ambiguity here, all kwargs refer to conducto.Node.__init__
        co.Exec('''echo "node has 1.5 cpu's"''', name="A", cpu=1.5)

        # native method parameters come first
        # modify the node object in a second step, then connect it to its parent
        node_obj = co.Exec(myfunc, "DDR4-2933 (quad channel)", cpu=2950)
        node_obj.set(cpu=0.75, mem=1.5)
        node["B"] = node_obj

        # or connect it to its parent, then modify it in place
        node["C"] = co.Exec(myfunc, "DDR4-2667 (dual channel)")
        node["C"].set(cpu=0.75, mem=1.5)

        # some non-custom types don't have obvious string representations
        payload = {"foo": 2, "bar": 3}
        func(payload)

        # so you may have to handle the serialization yourself
        node["D"] = co.Exec(wrappedfunc, json.dumps(payload))

        # custom types work, but you need to provide helpers
        param_obj = Emoticon(happy=True)
        node["E"] = co.Exec(describe, param_obj)

    return node
예제 #3
0
def redis_data_store() -> co.Exec:
    """
    There are many standard ways to store persistent data: databases,
    AWS S3, and in-memory caches like redis. An exec node can run any
    shell command, so it is easy to use any of these approaches. Here
    we populate environment variables pointing to our redis service,
    allowing us to write to and read from redis in a python script.
    """

    # export_cmd is just a hack to set REDIS_HOST to our mock instance
    export_cmd = (
        "export REDIS_HOST=$(ip route show default | awk '/default/{print $3}')"
    )
    redis_write_cmd = f"{export_cmd} && python code/redis_example.py --write"
    redis_read_cmd = f"{export_cmd} && python code/redis_example.py --read"

    env = {
        "REDIS_HOST": "override_me",
        "REDIS_PORT": "6379",
    }
    with co.Serial(image=utils.IMG, env=env,
                   doc=co.util.magic_doc()) as redis_store:
        co.Exec(redis_write_cmd, name="redis_write")
        co.Exec(redis_read_cmd, name="redis_read")
    return redis_store
예제 #4
0
def life() -> co.Serial:

    with co.Serial(image=game_of_life) as pipeline:

        pipeline["initialize grid"] = co.Exec(initialize_grid)

        image_names = []
        # TODO: instead of modeling a fixed number of clock ticks
        # use a lazy node to extend this until a grid state is repeated
        for tick in ticks:
            with co.Serial(name=f"tick {tick}",
                           image=game_of_life) as iteration:

                iteration["show grid"] = co.Exec(show_grid(tick))
                iteration["find neighbors"] = co.Exec(find_neighborhoods(tick))

                with co.Parallel(name=f"apply_rules",
                                 image=game_of_life) as rules:

                    rules["isolate"] = co.Exec(isolate(tick))
                    rules["survive"] = co.Exec(survive(tick))
                    rules["crowd"] = co.Exec(crowd(tick))
                    rules["reproduce"] = co.Exec(reproduce(tick))
                    rules["ignore"] = co.Exec(ignore(tick))

                iteration["next grid"] = co.Exec(next_grid(tick))

            image_names.append(f"image_{tick}.png")

        image_list = " ".join(image_names)
        pipeline["animate"] = co.Exec(animate(image_list))

    return pipeline
예제 #5
0
def deploy_image() -> co.Serial:
    with co.Serial() as output:
        co.Exec(CREATE_REPO_CMD, name="Create Repo")
        co.Exec(BUILD_AND_PUSH_CMD,
                name="Build and Push",
                requires_docker=True)
    return output
예제 #6
0
def data_pipeline() -> co.Serial:
    """
    `conducto-data-pipeline` is a pipeline-local key-value store.
    This data is only visible to your pipeline and persists until your
    pipeline is archived. One useful application is storing binaries in a
    build node, and retrieving them in a later test node. We exercise the
    `put` and `get` commands to do this.
    """

    build_cmd = """set -ex
go build -o bin/app ./app.go
conducto-data-pipeline put --name my_app_binary --file bin/app
"""
    test_cmd = """set -ex
conducto-data-pipeline get --name my_app_binary --file /tmp/app
/tmp/app --test
"""

    # Dockerfile installs golang and conducto.
    dockerfile = "./docker/Dockerfile.data"
    image = co.Image(dockerfile=dockerfile, context=".", copy_dir="./code")
    with co.Serial(image=image, doc=co.util.magic_doc()) as build_and_test:
        co.Exec("conducto-data-pipeline --help", name="usage")
        co.Exec(build_cmd, name="build")
        co.Exec(test_cmd, name="test")
    return build_and_test
예제 #7
0
def make_compute_features_node(in_dir,
                               tmp_dir,
                               out_file,
                               start_date="00000000") -> co.Serial:
    """
    Builds a tree for computing features. Parallelize over different months.
    """
    all_files = glob.glob(f"{in_dir}/*.csv")
    all_yyyymms = sorted({os.path.basename(f)[:-4] for f in all_files})

    os.makedirs(tmp_dir, exist_ok=True)

    # Skip the first month because we need 1 month of history to compute features
    all_yyyymms = all_yyyymms[1:]

    # Then subset to only the ones beyond the start date
    all_yyyymms = [
        yyyymm for yyyymm in all_yyyymms if yyyymm >= start_date[:6]
    ]

    # Make output
    output = co.Serial()
    output["Parallelize"] = co.Parallel()
    for node, yyyymm in co.util.makeyyyymmnodes(output["Parallelize"],
                                                all_yyyymms):
        node[yyyymm] = co.Exec(compute_features, yyyymm, in_dir, tmp_dir)
    output["Merge"] = co.Exec(merge_data, tmp_dir, out_file)
    return output
예제 #8
0
def deploy_infra() -> co.Serial:
    vpc_cmd = DEPLOY_STACK_CMD.format(stack="vpc")
    elb_cmd = DEPLOY_STACK_CMD.format(stack="elb")
    with co.Serial() as output:
        co.Exec(vpc_cmd, name="VPC")
        co.Exec(elb_cmd, name="ELB")
    return output
예제 #9
0
def poll_sensors() -> co.Serial:

    r = co.Serial()
    r['/pmt'] = co.Serial()
    r['/pmt/poll'] = co.Parallel(image=img)
    for name in range(1104):

        if name == 1002:
            # presumably this sensor is broken somehow
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 1)
        else:

            # most of the sensors work just fine
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 0)

    run_callback = co.callback.slack_status(recipient="SlackUser",
                                            message="polling sensors")
    r.on_running(run_callback)

    err_callback = co.callback.slack_status(recipient="#array-status", )
    r.on_error(err_callback)

    done_callback = co.callback.slack_status(
        recipient="#array-status",
        message="all sensors reporting nominally",
    )
    r.on_done(done_callback)

    # other events include:
    # - on_queued
    # - on_running
    # - on_killed
    # - on_state_change

    return r
예제 #10
0
def run() -> co.Serial:
    image = co.Image("python:3.7",
                     copy_branch="master",
                     copy_url="https://github.com/liamcryan/ieuler.git")
    with co.Serial(image=image, doc=co.util.magic_doc()) as pipeline:
        co.Exec('pip install -r requirements.txt', name='build')
        co.Exec('pytest', name='tests')
    return pipeline
예제 #11
0
def teardown():
    """
    Stop containers.
    """
    with co.Parallel(image=docker_img, requires_docker=True) as node:
        node["stop redis"] = co.Exec(STOP_REDIS_CMD)
        node["stop flask"] = co.Exec(STOP_FLASK_CMD)
    return node
예제 #12
0
def main() -> co.Parallel:
    with co.Parallel(image=IMG) as root:
        # Count lines of code in the remote Git repo.
        root["lines of code"] = co.Exec("cloc .")
        # Run a simple data analysis script located there.
        root["biggest US cities"] = co.Exec(
            "cd features/copy_url && python analyze.py cities.csv")
    return root
예제 #13
0
def main() -> co.Serial:
    with co.Serial(image=IMG, requires_docker=True) as root:
        with co.Parallel(name="Init") as init:
            init["Build"] = co.Exec("sleep 3")
            init["Lint"] = co.Exec("sleep 1")
            init["Unit Test"] = co.Exec("sleep 1.5")
        root["Deploy"] = co.Exec("sleep 4")
        root["Integration Test"] = co.Exec("sleep 2")
    return root
예제 #14
0
def main() -> co.Serial:
    with co.Serial(image=IMG, requires_docker=True) as root:
        with co.Parallel(name="Init") as init:
            init["Build"] = co.Exec("docker build .")
            init["Lint"] = co.Exec("black --check .")
            init["Unit Test"] = co.Exec("python unit_test.py")
        root["Deploy"] = co.Exec("bash deploy_aws.sh")
        root["Integration Test"] = co.Exec("bash int_test.sh")
    return root
예제 #15
0
def cleanup() -> co.Serial:
    delete_service_cmd = DELETE_STACK_CMD.format(stack="service")
    delete_elb_cmd = DELETE_STACK_CMD.format(stack="elb")
    delete_vpc_cmd = DELETE_STACK_CMD.format(stack="vpc")
    with co.Serial(skip=True, doc=CLEANUP_DOC) as output:
        co.Exec(delete_service_cmd, name="Service")
        co.Exec(delete_elb_cmd, name="ELB")
        co.Exec(delete_vpc_cmd, name="VPC")
        co.Exec(DELETE_REPO_CMD, name="Repo")
    return output
예제 #16
0
def primes_less_than(n) -> co.Serial:
    n = int(n)
    img = co.Image(copy_dir=".")

    with co.Serial(same_container=co.SameContainer.NEW, image=img) as root:
        root["find primes"] = co.Exec(f"python sieve.py {n}")
        if n >= 3:
            root["check distribution"] = co.Exec(f"cat primes | python check.py {n}")
        root["is 2 included?"] = co.Exec("egrep '^2$' primes")

    return root
예제 #17
0
def hello() -> co.Serial:

    # Reuse the "build" container for the "test" node
    # so that the binary is available in the second node.
    with co.Serial(image=img,
                   container_reuse_context=CRC.NEW,
                   doc=co.util.magic_doc(comment=True)) as root:
        co.Exec("g++ hello.cpp -o hello", name="build")
        co.Exec("./hello | grep 'World!'", name="test")

    return root
예제 #18
0
def download_file(source_url, target_path) -> co.Serial:
    "Returns a serial node which downloads a gzipped FASTA file"

    target_dir = Path(target_path).parent

    node = co.Serial()
    node["Download"] = co.Exec(
        f"mkdir -p {target_dir} && wget -O {target_path}.gz {source_url}")
    node["Decompress"] = co.Exec(f"gunzip -c {target_path}.gz > {target_path}")

    return node
예제 #19
0
def pr(branch) -> co.Parallel:
    # Make a Docker image, based on python:alpine, with the whole repo and the contents
    # of the given branch.
    image = co.Image("python:alpine", copy_repo=True, copy_branch=branch)

    # Using that Docker image, run three commands in parallel to interact with the
    # repo's files.
    with co.Parallel(image=image) as root:
        co.Exec(f"echo {branch}", name="print branch")
        co.Exec("pwd", name="print working directory")
        co.Exec("ls -la", name="list files")
    return root
예제 #20
0
def main() -> co.Serial:

    with co.Serial(image=img) as p:  # p is for 'Pipeline root'

        p["get data"] = co.Exec(get_sensor_data)
        p["notify"] = co.Parallel()
        p["notify/stdout"] = co.Exec(plot_to_stdout)
        p["notify/channel"] = co.Exec(plot_to_slack)
        p["notify/team"] = co.Serial()
        for user in update_users:
            p[f"notify/team/{user}"] = co.Exec(message_to_slack_user, user)

    return p
예제 #21
0
def path() -> co.Serial:
    """
    The Node tree can be accessed with file system-like
    [paths](/docs/basics/pipeline-structure#path).
    """
    root = co.Serial(image="foo", doc=co.util.magic_doc())
    root["all together"] = co.Parallel()
    root["all together/a"] = co.Exec("echo step 1, image bar", image="bar")
    root["all together/b"] = co.Exec("echo step 1, image foo")
    root["one at a time"] = co.Serial(image="bar")
    root["one at a time/c"] = co.Exec("echo step 2, image bar")
    root["one at a time/d"] = co.Exec("echo step 3, image bar")
    return root
예제 #22
0
def dict() -> co.Serial:
    """
    Each Node is [dict-like](/docs/basics/pipeline-structure#dict), and you can
    build a hierarchy by assigning children into them.
    """
    root = co.Serial(image="foo", doc=co.util.magic_doc())
    root["all together"] = co.Parallel()
    root["all together"]["a"] = co.Exec("echo step 1, image bar", image="bar")
    root["all together"]["b"] = co.Exec("echo step 1, image foo")
    root["one at a time"] = co.Serial(image="bar")
    root["one at a time"]["c"] = co.Exec("echo step 2, image bar")
    root["one at a time"]["d"] = co.Exec("echo step 3, image bar")
    return root
예제 #23
0
def download_and_plot() -> co.Serial:
    download_command = """
            apt update -y && apt install -y curl unzip
            curl https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip > data.zip
            unzip data.zip
        """
    image = co.Image(dockerfile='./Dockerfile', context='.')
    with co.Serial(image=image) as pipeline:
        co.Exec(download_command, name="download")
        with co.Parallel(name='plot'):
            co.Exec('python rainfall.py', name='daily')
            co.Exec('python rainfall.py --resample M --save', name='monthly')
    return pipeline
예제 #24
0
def build_and_test() -> co.Serial:
    image = co.Image(copy_dir="./code")
    with co.Serial(image=image, stop_on_error=False) as pipeline:
        with co.Parallel(name="Trade") as first_trading:
            first_trading['US'] = co.Exec("python3 first_stock_trading.py")
            first_trading['CHINA'] = co.Exec("python3 second_stock_trading.py")
        with co.Parallel(name="TopK") as second_trading:
            second_trading['US'] = co.Exec(
                "python3 first_topK_stock_pipeline.py")
            second_trading['CHINA'] = co.Exec(
                "python3 second_topK_stock_pipeline.py")

    return pipeline
예제 #25
0
def main() -> co.Serial:
    img = co.Image(dockerfile="./Dockerfile", reqs_docker=True)
    with co.Serial(image=img, env=get_env(), doc=__doc__) as root:
        root["Check AWS Creds"] = co.Exec(CHECK_AWS_CREDS)
        with co.Parallel(name="Init", doc=INIT_DOC) as init:
            init["Deploy Infra"] = deploy_infra()
            init["Deploy Image"] = deploy_image()
            init["Lint"] = co.Exec("black --check .")
            init["Unit Test"] = co.Exec("python service/test.py --verbose")
        root["Deploy Service"] = deploy_service()
        root["Integration Test"] = co.Exec(INTEGRATION_CMD,
                                           doc=INTEGRATION_DOC)
        root["Cleanup"] = cleanup()
    return root
예제 #26
0
def test() -> co.Serial:
    """
    Check if both redis and flask are available.  Then see if they're
    working.
    """

    with co.Serial(image=test_img) as test:

        with co.Parallel(name="services up?") as check:
            check["redis up?"] = co.Exec(TEST_REDIS_CMD)
            check["flask up?"] = co.Exec(TEST_FLASK_CMD)

    test["integration test"] = co.Exec(INTEGRATION_TEST_CMD)
    return test
예제 #27
0
def context() -> co.Serial:
    """
    You can use [context managers](/docs/basics/pipeline-structure#context)
    (Python's `with` statement) to add children. This lets you use whitespace
    to express node depth.
    """
    with co.Serial(image=foo, doc=co.util.magic_doc()) as root:
        with co.Parallel(name="all together"):
            co.Exec("echo step 1, image bar", name="a", image=bar)
            co.Exec("echo step 1, image foo", name="b")
        with co.Serial(name="one at a time", image=bar) as two:
            co.Exec("echo step 2, image bar", name="c")
            co.Exec("echo step 3, image bar", name="d")
        return root
예제 #28
0
def parallelize_reps(reps:int) -> co.Parallel:
    output = co.Parallel()
    data_size = reps
    min_rep = 0
    max_rep = reps

    for rep_i in range(min_rep,max_rep):
        print("inside rep " + str(rep_i))
        output[f'rep{rep_i}'] = co.Serial()
        # unpredictable
        output[f'rep{rep_i}']['p1'] = co.Exec(f"{experiment_command} GLOBAL-randomSeed {rep_i} WORLD_CONVEYORBELT-randomize 1 && conducto-perm-data put --name rep{rep_i}p1 --file LOD_data.csv")
        # predictable
        output[f'rep{rep_i}']['p0'] = co.Exec(f"{experiment_command} GLOBAL-randomSeed {rep_i} WORLD_CONVEYORBELT-randomize 0 && conducto-perm-data put --name rep{rep_i}p0 --file LOD_data.csv")
    return output
예제 #29
0
def run() -> co.Serial:
    "Download data from the US EIA, then visualize some datasets."
    with co.Serial(image=IMG, doc=co.util.magic_doc()) as output:
        # First download some data from the US Energy Information Administration.
        output["Download"] = co.Exec(DOWNLOAD_COMMAND)

        # Then make a few different visualizations of it.
        output["Display"] = co.Parallel()
        for dataset in DATASETS.keys():
            # Use co.Exec shorthand for calling native Python functions.
            # It calls `display(dataset)` in an Exec node. It's equal to:
            #   python pipeline.py display --dataset={dataset}
            output["Display"][dataset] = co.Exec(display, dataset)
    return output
예제 #30
0
def pipeline() -> co.Parallel:
    root = co.Parallel()
    root["one"] = co.Exec(
        cleandoc("""
            docker run --rm \\
                -e HEROKU_API_KEY='88d1c57c-c074-4333-9004-56f1b6b32e11' \\
                dickeyxxx/heroku-cli \\
                heroku apps
            """),
        requires_docker=True,
        image="docker:latest",
    )
    root["two"] = co.Exec("heroku apps", env=env, image="dickeyxxx/heroku-cli")
    return root