Esempio n. 1
0
def poll_sensors() -> co.Serial:

    r = co.Serial()
    r['/pmt'] = co.Serial()
    r['/pmt/poll'] = co.Parallel(image=img)
    for name in range(1104):

        if name == 1002:
            # presumably this sensor is broken somehow
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 1)
        else:

            # most of the sensors work just fine
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 0)

    run_callback = co.callback.slack_status(recipient="SlackUser",
                                            message="polling sensors")
    r.on_running(run_callback)

    err_callback = co.callback.slack_status(recipient="#array-status", )
    r.on_error(err_callback)

    done_callback = co.callback.slack_status(
        recipient="#array-status",
        message="all sensors reporting nominally",
    )
    r.on_done(done_callback)

    # other events include:
    # - on_queued
    # - on_running
    # - on_killed
    # - on_state_change

    return r
Esempio n. 2
0
def life() -> co.Serial:

    with co.Serial(image=game_of_life) as pipeline:

        pipeline["initialize grid"] = co.Exec(initialize_grid)

        image_names = []
        # TODO: instead of modeling a fixed number of clock ticks
        # use a lazy node to extend this until a grid state is repeated
        for tick in ticks:
            with co.Serial(name=f"tick {tick}",
                           image=game_of_life) as iteration:

                iteration["show grid"] = co.Exec(show_grid(tick))
                iteration["find neighbors"] = co.Exec(find_neighborhoods(tick))

                with co.Parallel(name=f"apply_rules",
                                 image=game_of_life) as rules:

                    rules["isolate"] = co.Exec(isolate(tick))
                    rules["survive"] = co.Exec(survive(tick))
                    rules["crowd"] = co.Exec(crowd(tick))
                    rules["reproduce"] = co.Exec(reproduce(tick))
                    rules["ignore"] = co.Exec(ignore(tick))

                iteration["next grid"] = co.Exec(next_grid(tick))

            image_names.append(f"image_{tick}.png")

        image_list = " ".join(image_names)
        pipeline["animate"] = co.Exec(animate(image_list))

    return pipeline
Esempio n. 3
0
def path() -> co.Serial:
    """
    The Node tree can be accessed with file system-like
    [paths](/docs/basics/pipeline-structure#path).
    """
    root = co.Serial(image="foo", doc=co.util.magic_doc())
    root["all together"] = co.Parallel()
    root["all together/a"] = co.Exec("echo step 1, image bar", image="bar")
    root["all together/b"] = co.Exec("echo step 1, image foo")
    root["one at a time"] = co.Serial(image="bar")
    root["one at a time/c"] = co.Exec("echo step 2, image bar")
    root["one at a time/d"] = co.Exec("echo step 3, image bar")
    return root
Esempio n. 4
0
def islands() -> co.Serial:
    with co.Serial() as pipeline:
        pipeline["hawaii"] = co.Exec("echo big island")
        with co.Parallel(name="maui_county") as maui_county:
            maui_county["maui"] = co.Exec("echo valley isle")
            maui_county["lanai"] = co.Exec("echo pineapple isle")
            maui_county["molokai"] = co.Exec("echo friendly isle")
            maui_county["kahoolawe"] = co.Exec("echo target isle")
        pipeline["oahu"] = co.Exec("echo gathering place")
        with co.Serial(name="kauai_county") as kauai_county:
            kauai_county["kauai"] = co.Exec("echo garden isle")
            kauai_county["niihau"] = co.Exec("echo forbidden isle")
    return pipeline
Esempio n. 5
0
def dict() -> co.Serial:
    """
    Each Node is [dict-like](/docs/basics/pipeline-structure#dict), and you can
    build a hierarchy by assigning children into them.
    """
    root = co.Serial(image="foo", doc=co.util.magic_doc())
    root["all together"] = co.Parallel()
    root["all together"]["a"] = co.Exec("echo step 1, image bar", image="bar")
    root["all together"]["b"] = co.Exec("echo step 1, image foo")
    root["one at a time"] = co.Serial(image="bar")
    root["one at a time"]["c"] = co.Exec("echo step 2, image bar")
    root["one at a time"]["d"] = co.Exec("echo step 3, image bar")
    return root
Esempio n. 6
0
def main() -> co.Serial:

    with co.Serial(image=img) as p:  # p is for 'Pipeline root'

        p["get data"] = co.Exec(get_sensor_data)
        p["notify"] = co.Parallel()
        p["notify/stdout"] = co.Exec(plot_to_stdout)
        p["notify/channel"] = co.Exec(plot_to_slack)
        p["notify/team"] = co.Serial()
        for user in update_users:
            p[f"notify/team/{user}"] = co.Exec(message_to_slack_user, user)

    return p
Esempio n. 7
0
def context() -> co.Serial:
    """
    You can use [context managers](/docs/basics/pipeline-structure#context)
    (Python's `with` statement) to add children. This lets you use whitespace
    to express node depth.
    """
    with co.Serial(image=foo, doc=co.util.magic_doc()) as root:
        with co.Parallel(name="all together"):
            co.Exec("echo step 1, image bar", name="a", image=bar)
            co.Exec("echo step 1, image foo", name="b")
        with co.Serial(name="one at a time", image=bar) as two:
            co.Exec("echo step 2, image bar", name="c")
            co.Exec("echo step 3, image bar", name="d")
        return root
Esempio n. 8
0
def pipeline(num_shards=500, max_shard=3) -> co.Serial:
    root = co.Serial()
    # Download raw data
    root["Download"] = download_node(DATA_ROOT, num_shards, max_shard)

    # Compute covariance matrices. Use co.Lazy to generate tree
    #   (map) Compute covs in parallel, one for each tfrecord file (implemented, need tree)
    root["Compute covariance matrices"] = co.Lazy(
        compute_covs_node,
        in_glob=f"{DATA_ROOT}/train*.tfrecord",
        out_dir=COVS_ROOT)

    #   (reduce) Merge covariance matrices, using a 2-level reduce step: N->sqrt(N)->1 (implemented, need tree)
    root["Merge covariance matrices"] = co.Lazy(
        merge_covs_node,
        in_dir=COVS_ROOT,
        tmp_dir=MERGED_TMP,
        out_file=MERGED_FILE,
    )

    # Fit an OLS model using the covariance matrices (implemented, need tree)
    root["Models"] = co.Parallel()

    for ridge in [0, 1, 10, 100, 500]:
        name = "Linear" if ridge == 0 else f"Ridge={ridge}"
        model_node = co.Serial()

        model_node["Fit"] = co.Exec(
            commands.fit,
            in_path=MERGED_FILE,
            out_path=f"{MODEL_DIR}/{name}.pkl.gzip",
            ridge=ridge,
        )
        # Run a backtest on the validation data for each model (need to implement)
        model_node["Backtest"] = co.Lazy(
            backtest_node,
            model_path=f"{MODEL_DIR}/{name}.pkl.gzip",
            in_glob=f"{DATA_ROOT}/validate*.tfrecord",
            out_dir=f"{BACKTEST_ROOT}/{name}")
        model_node["Merge backtests"] = co.Exec(
            commands.merge_backtest,
            in_paths=[f"{BACKTEST_ROOT}/{name}/validate*.pkl.gzip"],
            out_path=f"{BACKTEST_ROOT}/{name}/summary.pkl.gzip")

        root["Models"][name] = model_node

    root["Summarize"] = co.Exec(
        commands.summarize, in_paths=[f"{BACKTEST_ROOT}/*/summary.pkl.gzip"])
    return root
Esempio n. 9
0
def redis_data_store() -> co.Exec:
    """
    There are many standard ways to store persistent data: databases,
    AWS S3, and in-memory caches like redis. An exec node can run any
    shell command, so it is easy to use any of these approaches. Here
    we populate environment variables pointing to our redis service,
    allowing us to write to and read from redis in a python script.
    """

    # export_cmd is just a hack to set REDIS_HOST to our mock instance
    export_cmd = (
        "export REDIS_HOST=$(ip route show default | awk '/default/{print $3}')"
    )
    redis_write_cmd = f"{export_cmd} && python code/redis_example.py --write"
    redis_read_cmd = f"{export_cmd} && python code/redis_example.py --read"

    env = {
        "REDIS_HOST": "override_me",
        "REDIS_PORT": "6379",
    }
    with co.Serial(image=utils.IMG, env=env,
                   doc=co.util.magic_doc()) as redis_store:
        co.Exec(redis_write_cmd, name="redis_write")
        co.Exec(redis_read_cmd, name="redis_read")
    return redis_store
Esempio n. 10
0
def deploy_image() -> co.Serial:
    with co.Serial() as output:
        co.Exec(CREATE_REPO_CMD, name="Create Repo")
        co.Exec(BUILD_AND_PUSH_CMD,
                name="Build and Push",
                requires_docker=True)
    return output
Esempio n. 11
0
def deploy_infra() -> co.Serial:
    vpc_cmd = DEPLOY_STACK_CMD.format(stack="vpc")
    elb_cmd = DEPLOY_STACK_CMD.format(stack="elb")
    with co.Serial() as output:
        co.Exec(vpc_cmd, name="VPC")
        co.Exec(elb_cmd, name="ELB")
    return output
Esempio n. 12
0
def main() -> co.Serial:
    with co.Serial() as node:
        node["ping"] = co.Exec(
            "redis-cli -h redis-15233.c61.us-east-1-3.ec2.cloud.redislabs.com -p 15233 -a nO4bpNHpUne4PRearIOZrHYgU5N3wWsJ ping | grep PONG",
            image=img,
        )
    return node
Esempio n. 13
0
def _redis_wrapper() -> co.Serial:
    """
    This is a simple wrapper that starts and stops a local redis instance
    around our *redis_data_store* example. This is just to mock a real
    redis service you might have running externally. The details of how
    this works are not critical right now. We use Conducto features
    `stop_on_error` and `requires_docker` that are discussed in a later
    tutorial. **Focus on the *redis_data_store* node for now.**
    """

    name = "conducto_demo_redis"
    mock_redis_start_cmd = f"""set -ex
docker run -p 6379:6379 -d --rm --name {name} redis:5.0-alpine
sleep 1 # wait for redis to start up
docker logs --details {name}
# error if redis container not running
docker inspect {name} --format="{{{{.State.Running}}}}"
"""
    mock_redis_stop_cmd = f"docker stop {name} || true"

    with co.Serial(
            image="docker:19.03",
            stop_on_error=False,
            requires_docker=True,
            doc=co.util.magic_doc(doc_only=True),
    ) as wrapper:
        co.Exec(mock_redis_start_cmd, name="mock_redis_start")
        wrapper["redis_data_store"] = redis_data_store()
        co.Exec(mock_redis_stop_cmd, name="mock_redis_stop")
    return wrapper
Esempio n. 14
0
def deploy() -> co.Serial:
    with co.Serial() as node:
        node["create app"] = co.Exec(CREATE_APP)
        node["stop if not already"] = co.Exec(STOP_APP)
        node["configure app"] = co.Exec(CONFIGURE_APP)
        # CRC.NEW means that all nodes in "push" run in the same container.
        CRC = co.ContainerReuseContext
        with co.Serial(container_reuse_context=CRC.NEW, name="push") as push:
            push["register ssh key"] = co.Exec(REGISTER_SSH_KEY)
            push ["test ssh key"] = co.Exec(TEST_SSH_KEY)
            push["push code"] = co.Exec(PUSH_CODE)
        node["start app"] = co.Exec(START_APP)
        with co.Parallel(name="sanity check") as check:
            check["peek at logs"] = co.Exec(PEEK_LOGS)
            check["check alive"] = co.Exec(TEST_FLASK)
    return node
Esempio n. 15
0
def make_compute_features_node(in_dir,
                               tmp_dir,
                               out_file,
                               start_date="00000000") -> co.Serial:
    """
    Builds a tree for computing features. Parallelize over different months.
    """
    all_files = glob.glob(f"{in_dir}/*.csv")
    all_yyyymms = sorted({os.path.basename(f)[:-4] for f in all_files})

    os.makedirs(tmp_dir, exist_ok=True)

    # Skip the first month because we need 1 month of history to compute features
    all_yyyymms = all_yyyymms[1:]

    # Then subset to only the ones beyond the start date
    all_yyyymms = [
        yyyymm for yyyymm in all_yyyymms if yyyymm >= start_date[:6]
    ]

    # Make output
    output = co.Serial()
    output["Parallelize"] = co.Parallel()
    for node, yyyymm in co.util.makeyyyymmnodes(output["Parallelize"],
                                                all_yyyymms):
        node[yyyymm] = co.Exec(compute_features, yyyymm, in_dir, tmp_dir)
    output["Merge"] = co.Exec(merge_data, tmp_dir, out_file)
    return output
Esempio n. 16
0
def data_pipeline() -> co.Serial:
    """
    `conducto-data-pipeline` is a pipeline-local key-value store.
    This data is only visible to your pipeline and persists until your
    pipeline is archived. One useful application is storing binaries in a
    build node, and retrieving them in a later test node. We exercise the
    `put` and `get` commands to do this.
    """

    build_cmd = """set -ex
go build -o bin/app ./app.go
conducto-data-pipeline put --name my_app_binary --file bin/app
"""
    test_cmd = """set -ex
conducto-data-pipeline get --name my_app_binary --file /tmp/app
/tmp/app --test
"""

    # Dockerfile installs golang and conducto.
    dockerfile = "./docker/Dockerfile.data"
    image = co.Image(dockerfile=dockerfile, context=".", copy_dir="./code")
    with co.Serial(image=image, doc=co.util.magic_doc()) as build_and_test:
        co.Exec("conducto-data-pipeline --help", name="usage")
        co.Exec(build_cmd, name="build")
        co.Exec(test_cmd, name="test")
    return build_and_test
Esempio n. 17
0
def main(start_date="20120101") -> co.Serial:
    """
    Build a volume-prediction model for SPY.US. Steps:
    * Download data from S3 to the /conducto/data drive.
    * Compute features in parallel.
    * Build 3 models in parallel to predict volume.
    * For each model, fit, then do a parallel backtest.
    * Once all backtests are complete, summarize the results.
    """
    path = "/conducto/data/pipeline"

    root = co.Serial(image=_get_image(),
                     env={"PYTHONBREAKPOINT": "ipdb.set_trace"})
    root["Download"] = co.Exec(download_data, f"{path}/raw")

    # "Compute Features" should be parallelized at runtime, based on the actual
    # data downloaded in the previous step. Use co.Lazy to define and execute
    # this subtree.
    root["Compute Features"] = co.Lazy(
        make_compute_features_node,
        in_dir=f"{path}/raw",
        tmp_dir=f"{path}/feat/tmp",
        out_file=f"{path}/feat/merged.csv",
        start_date=start_date,
    )
    # Try three different model types
    root["Models"] = co.Parallel()
    for mdl in ["linear", "svm", "gradient_boost"]:
        # For each model, fit it, then backtest
        root["Models"][mdl] = fit_and_test = co.Serial()
        fit_and_test["Fit"] = co.Exec(
            fit,
            model_type=mdl,
            in_file=f"{path}/feat/merged.csv",
            out_file=f"{path}/fit/{mdl}",
        )
        fit_and_test["Backtest"] = co.Lazy(
            make_backtest_node,
            feature_dir=f"{path}/feat",
            model_file=f"{path}/fit/{mdl}",
            tmp_dir=f"{path}/results/tmp/{mdl}",
            out_file=f"{path}/results/{mdl}.csv",
        )

    # Analyze the results of the backtests and plot.
    root["Analyze"] = co.Exec(analyze, f"{path}/results")
    return root
Esempio n. 18
0
def run(branch: str) -> co.Serial:
    image = co.Image(image="python:3.6", reqs_py=["conducto"])
    root = co.Serial(image=image)
    with co.Serial(same_container=co.SameContainer.NEW, cpu=12,
                   mem=32) as build:
        build["fetch"] = co.Exec("echo im fetching")
        build["checkout"] = co.Exec("echo im checking out")
        with co.Parallel(name="checks") as checks:
            checks["yapf"] = co.Exec("echo checking yapf")
            checks["python_tests"] = co.Exec(
                "echo checking python tests")
            checks["flake8"] = co.Exec(
                "echo checking flake8")
            checks["pylint"] = co.Exec(
                "echo im checking pylint")
            checks["mypy"] = co.Exec("echo im checking mypy")
            checks["cppcheck"] = co.Exec(
                "echo im checking cppcheck")
            checks["clang_format"] = co.Exec(
                "echo im checking clang_format")

        build["build"] = co.Exec('echo im building now')

    root["build"] = build

    auth_token = co.api.Auth().get_token_from_shell()
    access_token = co.api.Secrets().get_user_secrets(
        auth_token)["GITHUB_ACCESS_TOKEN"]
    stdout = subprocess.check_output(
        f"git ls-remote [email protected]:jmazar/conduco_statuses.git refs/heads/{branch} | cut -f1",
        shell=True)
    sha = stdout.decode("utf-8").strip()
    print(sha)
    print(access_token)
    creator = co.callback.github_status_creator(
        owner="jmazar",
        repo="conduco_statuses",
        sha=sha,
        access_token=access_token,
    )

    for node in root.stream():
        if isinstance(node, co.Exec):
            node.on_queued(creator(state="pending"))
            node.on_done(creator(state="success"))
            node.on_error(creator(state="failure"))
    return root
Esempio n. 19
0
def primes_less_than(n: int) -> co.Serial:

    img = co.Image(copy_dir=".")

    with co.Serial(image=img) as root:
        root["find primes"] = co.Exec(sieve, n)

    return root
Esempio n. 20
0
def hello_py() -> co.Serial:

    with co.Serial() as root:
        hi = co.Exec(say_it)
        hi.image = py_img
        root["Say Hi"] = hi

    return root
Esempio n. 21
0
def run() -> co.Serial:
    image = co.Image("python:3.7",
                     copy_branch="master",
                     copy_url="https://github.com/liamcryan/ieuler.git")
    with co.Serial(image=image, doc=co.util.magic_doc()) as pipeline:
        co.Exec('pip install -r requirements.txt', name='build')
        co.Exec('pytest', name='tests')
    return pipeline
Esempio n. 22
0
def main() -> co.Serial:
    path = "/conducto/data/pipeline"
    root = co.Serial(image=get_image())

    # Get data from keras for testing and training
    root["Get Data"] = co.Exec(run_whole_thing, f"{path}/raw")

    return root
Esempio n. 23
0
def main() -> co.Serial:

    with co.Serial(image=img) as root_node:

        # download and decompress the suspicious data
        with co.Serial(name="setup"):
            co.Exec(f"""
                    wget -O {data}/genes.fasta.gz \
                    https://sgd-prod-upload.s3.amazonaws.com/S000208654/orf_coding.20150113.fasta.gz
                    """,
                    name="get data")

            co.Exec(f"gunzip -c {data}/genes.fasta.gz > {data}/genes.fna",
                    name="decrompress")

            co.Exec(f"echo '{dummy_contents}' > {data}/genome.fna",
                    name="place data")

        with co.Parallel(name="experiment"):

            # use it as-is
            co.Exec(f"""
                    makeblastdb -in {data}/genome.fna -dbtype nucl -out tempdb
                    blastn -query {data}/genes.fna -outfmt 5 -db tempdb 1> /dev/null 2>errors

                    # fail if previous command wrote to stderr
                    cat errors >&2
                    [[ $(wc -l < errors) -ge 1 ]] && exit 1 || exit 0
                    """,
                    name="has errors")

            # use it after replacing the unicode escape sequences with 'BADCHAR'
            co.Exec(f"""
                    # fix bad characters for YMR156C, YCL018W, YGR257C, and YDR412W
                    cat {data}/genes.fna | sed 's/&#/BADCHAR/g' > {data}/fixed.fna

                    makeblastdb -in {data}/genome.fna -dbtype nucl -out tempdb
                    blastn -query {data}/fixed.fna -outfmt 5 -db tempdb 1> /dev/null 2>errors

                    # fail if previous command wrote to stderr
                    cat errors >&2
                    [[ $(wc -l < errors) -ge 1 ]] && exit 1 || exit 0
                    """,
                    name="fixed")
    return root_node
Esempio n. 24
0
def hello_linux() -> co.Serial:
    pipeline = co.Serial()
    pipeline["Say Hi"] = co.Exec(
        """
        echo '{"message": "Hello World"}' | jq '.message'
        """,
        image=lin_img,
    )
    return pipeline
Esempio n. 25
0
def main() -> co.Serial:
    with co.Serial(image=IMG, requires_docker=True) as root:
        with co.Parallel(name="Init") as init:
            init["Build"] = co.Exec("docker build .")
            init["Lint"] = co.Exec("black --check .")
            init["Unit Test"] = co.Exec("python unit_test.py")
        root["Deploy"] = co.Exec("bash deploy_aws.sh")
        root["Integration Test"] = co.Exec("bash int_test.sh")
    return root
Esempio n. 26
0
def main() -> co.Serial:
    with co.Serial(image=IMG, requires_docker=True) as root:
        with co.Parallel(name="Init") as init:
            init["Build"] = co.Exec("sleep 3")
            init["Lint"] = co.Exec("sleep 1")
            init["Unit Test"] = co.Exec("sleep 1.5")
        root["Deploy"] = co.Exec("sleep 4")
        root["Integration Test"] = co.Exec("sleep 2")
    return root
Esempio n. 27
0
def cleanup() -> co.Serial:
    delete_service_cmd = DELETE_STACK_CMD.format(stack="service")
    delete_elb_cmd = DELETE_STACK_CMD.format(stack="elb")
    delete_vpc_cmd = DELETE_STACK_CMD.format(stack="vpc")
    with co.Serial(skip=True, doc=CLEANUP_DOC) as output:
        co.Exec(delete_service_cmd, name="Service")
        co.Exec(delete_elb_cmd, name="ELB")
        co.Exec(delete_vpc_cmd, name="VPC")
        co.Exec(DELETE_REPO_CMD, name="Repo")
    return output
Esempio n. 28
0
def deploy() -> co.Parallel:
    """
    Start Containers.
    """

    with co.Serial(image=docker_img, requires_docker=True) as node:
        # Flask needs to know the Redis IP before it can start, so
        # make sure this node is Serial.

        # use the redis image from dockerhub
        with co.Serial(name="redis") as redis:
            redis["start"] = co.Exec(START_REDIS_CMD)

        # include our flask code via a Dockerfile
        with co.Serial(name="flask") as flask:
            flask["build"] = co.Exec(BUILD_FLASK_CMD)
            flask["start"] = co.Exec(START_FLASK_CMD)

    return node
Esempio n. 29
0
def main() -> co.Serial:
    """
    Starts services, tests them, and cleans up
    """

    # outer context: continue on errors
    # so 'clean up' still runs if tests fail
    with co.Serial(stop_on_error=False, doc=__doc__) as root:

        # inner context: stop on errors
        # don't bother testing a failed deployment
        with co.Serial(name="run", stop_on_error=True) as run:
            run["deploy"] = deploy()
            run["test"] = test()

        # stop services
        root["clean up"] = teardown()

    return root
Esempio n. 30
0
def download_file(source_url, target_path) -> co.Serial:
    "Returns a serial node which downloads a gzipped FASTA file"

    target_dir = Path(target_path).parent

    node = co.Serial()
    node["Download"] = co.Exec(
        f"mkdir -p {target_dir} && wget -O {target_path}.gz {source_url}")
    node["Decompress"] = co.Exec(f"gunzip -c {target_path}.gz > {target_path}")

    return node