def poll_sensors() -> co.Serial: r = co.Serial() r['/pmt'] = co.Serial() r['/pmt/poll'] = co.Parallel(image=img) for name in range(1104): if name == 1002: # presumably this sensor is broken somehow r[f'/pmt/poll/{name}'] = co.Exec(certain, 1) else: # most of the sensors work just fine r[f'/pmt/poll/{name}'] = co.Exec(certain, 0) run_callback = co.callback.slack_status(recipient="SlackUser", message="polling sensors") r.on_running(run_callback) err_callback = co.callback.slack_status(recipient="#array-status", ) r.on_error(err_callback) done_callback = co.callback.slack_status( recipient="#array-status", message="all sensors reporting nominally", ) r.on_done(done_callback) # other events include: # - on_queued # - on_running # - on_killed # - on_state_change return r
def life() -> co.Serial: with co.Serial(image=game_of_life) as pipeline: pipeline["initialize grid"] = co.Exec(initialize_grid) image_names = [] # TODO: instead of modeling a fixed number of clock ticks # use a lazy node to extend this until a grid state is repeated for tick in ticks: with co.Serial(name=f"tick {tick}", image=game_of_life) as iteration: iteration["show grid"] = co.Exec(show_grid(tick)) iteration["find neighbors"] = co.Exec(find_neighborhoods(tick)) with co.Parallel(name=f"apply_rules", image=game_of_life) as rules: rules["isolate"] = co.Exec(isolate(tick)) rules["survive"] = co.Exec(survive(tick)) rules["crowd"] = co.Exec(crowd(tick)) rules["reproduce"] = co.Exec(reproduce(tick)) rules["ignore"] = co.Exec(ignore(tick)) iteration["next grid"] = co.Exec(next_grid(tick)) image_names.append(f"image_{tick}.png") image_list = " ".join(image_names) pipeline["animate"] = co.Exec(animate(image_list)) return pipeline
def path() -> co.Serial: """ The Node tree can be accessed with file system-like [paths](/docs/basics/pipeline-structure#path). """ root = co.Serial(image="foo", doc=co.util.magic_doc()) root["all together"] = co.Parallel() root["all together/a"] = co.Exec("echo step 1, image bar", image="bar") root["all together/b"] = co.Exec("echo step 1, image foo") root["one at a time"] = co.Serial(image="bar") root["one at a time/c"] = co.Exec("echo step 2, image bar") root["one at a time/d"] = co.Exec("echo step 3, image bar") return root
def islands() -> co.Serial: with co.Serial() as pipeline: pipeline["hawaii"] = co.Exec("echo big island") with co.Parallel(name="maui_county") as maui_county: maui_county["maui"] = co.Exec("echo valley isle") maui_county["lanai"] = co.Exec("echo pineapple isle") maui_county["molokai"] = co.Exec("echo friendly isle") maui_county["kahoolawe"] = co.Exec("echo target isle") pipeline["oahu"] = co.Exec("echo gathering place") with co.Serial(name="kauai_county") as kauai_county: kauai_county["kauai"] = co.Exec("echo garden isle") kauai_county["niihau"] = co.Exec("echo forbidden isle") return pipeline
def dict() -> co.Serial: """ Each Node is [dict-like](/docs/basics/pipeline-structure#dict), and you can build a hierarchy by assigning children into them. """ root = co.Serial(image="foo", doc=co.util.magic_doc()) root["all together"] = co.Parallel() root["all together"]["a"] = co.Exec("echo step 1, image bar", image="bar") root["all together"]["b"] = co.Exec("echo step 1, image foo") root["one at a time"] = co.Serial(image="bar") root["one at a time"]["c"] = co.Exec("echo step 2, image bar") root["one at a time"]["d"] = co.Exec("echo step 3, image bar") return root
def main() -> co.Serial: with co.Serial(image=img) as p: # p is for 'Pipeline root' p["get data"] = co.Exec(get_sensor_data) p["notify"] = co.Parallel() p["notify/stdout"] = co.Exec(plot_to_stdout) p["notify/channel"] = co.Exec(plot_to_slack) p["notify/team"] = co.Serial() for user in update_users: p[f"notify/team/{user}"] = co.Exec(message_to_slack_user, user) return p
def context() -> co.Serial: """ You can use [context managers](/docs/basics/pipeline-structure#context) (Python's `with` statement) to add children. This lets you use whitespace to express node depth. """ with co.Serial(image=foo, doc=co.util.magic_doc()) as root: with co.Parallel(name="all together"): co.Exec("echo step 1, image bar", name="a", image=bar) co.Exec("echo step 1, image foo", name="b") with co.Serial(name="one at a time", image=bar) as two: co.Exec("echo step 2, image bar", name="c") co.Exec("echo step 3, image bar", name="d") return root
def pipeline(num_shards=500, max_shard=3) -> co.Serial: root = co.Serial() # Download raw data root["Download"] = download_node(DATA_ROOT, num_shards, max_shard) # Compute covariance matrices. Use co.Lazy to generate tree # (map) Compute covs in parallel, one for each tfrecord file (implemented, need tree) root["Compute covariance matrices"] = co.Lazy( compute_covs_node, in_glob=f"{DATA_ROOT}/train*.tfrecord", out_dir=COVS_ROOT) # (reduce) Merge covariance matrices, using a 2-level reduce step: N->sqrt(N)->1 (implemented, need tree) root["Merge covariance matrices"] = co.Lazy( merge_covs_node, in_dir=COVS_ROOT, tmp_dir=MERGED_TMP, out_file=MERGED_FILE, ) # Fit an OLS model using the covariance matrices (implemented, need tree) root["Models"] = co.Parallel() for ridge in [0, 1, 10, 100, 500]: name = "Linear" if ridge == 0 else f"Ridge={ridge}" model_node = co.Serial() model_node["Fit"] = co.Exec( commands.fit, in_path=MERGED_FILE, out_path=f"{MODEL_DIR}/{name}.pkl.gzip", ridge=ridge, ) # Run a backtest on the validation data for each model (need to implement) model_node["Backtest"] = co.Lazy( backtest_node, model_path=f"{MODEL_DIR}/{name}.pkl.gzip", in_glob=f"{DATA_ROOT}/validate*.tfrecord", out_dir=f"{BACKTEST_ROOT}/{name}") model_node["Merge backtests"] = co.Exec( commands.merge_backtest, in_paths=[f"{BACKTEST_ROOT}/{name}/validate*.pkl.gzip"], out_path=f"{BACKTEST_ROOT}/{name}/summary.pkl.gzip") root["Models"][name] = model_node root["Summarize"] = co.Exec( commands.summarize, in_paths=[f"{BACKTEST_ROOT}/*/summary.pkl.gzip"]) return root
def redis_data_store() -> co.Exec: """ There are many standard ways to store persistent data: databases, AWS S3, and in-memory caches like redis. An exec node can run any shell command, so it is easy to use any of these approaches. Here we populate environment variables pointing to our redis service, allowing us to write to and read from redis in a python script. """ # export_cmd is just a hack to set REDIS_HOST to our mock instance export_cmd = ( "export REDIS_HOST=$(ip route show default | awk '/default/{print $3}')" ) redis_write_cmd = f"{export_cmd} && python code/redis_example.py --write" redis_read_cmd = f"{export_cmd} && python code/redis_example.py --read" env = { "REDIS_HOST": "override_me", "REDIS_PORT": "6379", } with co.Serial(image=utils.IMG, env=env, doc=co.util.magic_doc()) as redis_store: co.Exec(redis_write_cmd, name="redis_write") co.Exec(redis_read_cmd, name="redis_read") return redis_store
def deploy_image() -> co.Serial: with co.Serial() as output: co.Exec(CREATE_REPO_CMD, name="Create Repo") co.Exec(BUILD_AND_PUSH_CMD, name="Build and Push", requires_docker=True) return output
def deploy_infra() -> co.Serial: vpc_cmd = DEPLOY_STACK_CMD.format(stack="vpc") elb_cmd = DEPLOY_STACK_CMD.format(stack="elb") with co.Serial() as output: co.Exec(vpc_cmd, name="VPC") co.Exec(elb_cmd, name="ELB") return output
def main() -> co.Serial: with co.Serial() as node: node["ping"] = co.Exec( "redis-cli -h redis-15233.c61.us-east-1-3.ec2.cloud.redislabs.com -p 15233 -a nO4bpNHpUne4PRearIOZrHYgU5N3wWsJ ping | grep PONG", image=img, ) return node
def _redis_wrapper() -> co.Serial: """ This is a simple wrapper that starts and stops a local redis instance around our *redis_data_store* example. This is just to mock a real redis service you might have running externally. The details of how this works are not critical right now. We use Conducto features `stop_on_error` and `requires_docker` that are discussed in a later tutorial. **Focus on the *redis_data_store* node for now.** """ name = "conducto_demo_redis" mock_redis_start_cmd = f"""set -ex docker run -p 6379:6379 -d --rm --name {name} redis:5.0-alpine sleep 1 # wait for redis to start up docker logs --details {name} # error if redis container not running docker inspect {name} --format="{{{{.State.Running}}}}" """ mock_redis_stop_cmd = f"docker stop {name} || true" with co.Serial( image="docker:19.03", stop_on_error=False, requires_docker=True, doc=co.util.magic_doc(doc_only=True), ) as wrapper: co.Exec(mock_redis_start_cmd, name="mock_redis_start") wrapper["redis_data_store"] = redis_data_store() co.Exec(mock_redis_stop_cmd, name="mock_redis_stop") return wrapper
def deploy() -> co.Serial: with co.Serial() as node: node["create app"] = co.Exec(CREATE_APP) node["stop if not already"] = co.Exec(STOP_APP) node["configure app"] = co.Exec(CONFIGURE_APP) # CRC.NEW means that all nodes in "push" run in the same container. CRC = co.ContainerReuseContext with co.Serial(container_reuse_context=CRC.NEW, name="push") as push: push["register ssh key"] = co.Exec(REGISTER_SSH_KEY) push ["test ssh key"] = co.Exec(TEST_SSH_KEY) push["push code"] = co.Exec(PUSH_CODE) node["start app"] = co.Exec(START_APP) with co.Parallel(name="sanity check") as check: check["peek at logs"] = co.Exec(PEEK_LOGS) check["check alive"] = co.Exec(TEST_FLASK) return node
def make_compute_features_node(in_dir, tmp_dir, out_file, start_date="00000000") -> co.Serial: """ Builds a tree for computing features. Parallelize over different months. """ all_files = glob.glob(f"{in_dir}/*.csv") all_yyyymms = sorted({os.path.basename(f)[:-4] for f in all_files}) os.makedirs(tmp_dir, exist_ok=True) # Skip the first month because we need 1 month of history to compute features all_yyyymms = all_yyyymms[1:] # Then subset to only the ones beyond the start date all_yyyymms = [ yyyymm for yyyymm in all_yyyymms if yyyymm >= start_date[:6] ] # Make output output = co.Serial() output["Parallelize"] = co.Parallel() for node, yyyymm in co.util.makeyyyymmnodes(output["Parallelize"], all_yyyymms): node[yyyymm] = co.Exec(compute_features, yyyymm, in_dir, tmp_dir) output["Merge"] = co.Exec(merge_data, tmp_dir, out_file) return output
def data_pipeline() -> co.Serial: """ `conducto-data-pipeline` is a pipeline-local key-value store. This data is only visible to your pipeline and persists until your pipeline is archived. One useful application is storing binaries in a build node, and retrieving them in a later test node. We exercise the `put` and `get` commands to do this. """ build_cmd = """set -ex go build -o bin/app ./app.go conducto-data-pipeline put --name my_app_binary --file bin/app """ test_cmd = """set -ex conducto-data-pipeline get --name my_app_binary --file /tmp/app /tmp/app --test """ # Dockerfile installs golang and conducto. dockerfile = "./docker/Dockerfile.data" image = co.Image(dockerfile=dockerfile, context=".", copy_dir="./code") with co.Serial(image=image, doc=co.util.magic_doc()) as build_and_test: co.Exec("conducto-data-pipeline --help", name="usage") co.Exec(build_cmd, name="build") co.Exec(test_cmd, name="test") return build_and_test
def main(start_date="20120101") -> co.Serial: """ Build a volume-prediction model for SPY.US. Steps: * Download data from S3 to the /conducto/data drive. * Compute features in parallel. * Build 3 models in parallel to predict volume. * For each model, fit, then do a parallel backtest. * Once all backtests are complete, summarize the results. """ path = "/conducto/data/pipeline" root = co.Serial(image=_get_image(), env={"PYTHONBREAKPOINT": "ipdb.set_trace"}) root["Download"] = co.Exec(download_data, f"{path}/raw") # "Compute Features" should be parallelized at runtime, based on the actual # data downloaded in the previous step. Use co.Lazy to define and execute # this subtree. root["Compute Features"] = co.Lazy( make_compute_features_node, in_dir=f"{path}/raw", tmp_dir=f"{path}/feat/tmp", out_file=f"{path}/feat/merged.csv", start_date=start_date, ) # Try three different model types root["Models"] = co.Parallel() for mdl in ["linear", "svm", "gradient_boost"]: # For each model, fit it, then backtest root["Models"][mdl] = fit_and_test = co.Serial() fit_and_test["Fit"] = co.Exec( fit, model_type=mdl, in_file=f"{path}/feat/merged.csv", out_file=f"{path}/fit/{mdl}", ) fit_and_test["Backtest"] = co.Lazy( make_backtest_node, feature_dir=f"{path}/feat", model_file=f"{path}/fit/{mdl}", tmp_dir=f"{path}/results/tmp/{mdl}", out_file=f"{path}/results/{mdl}.csv", ) # Analyze the results of the backtests and plot. root["Analyze"] = co.Exec(analyze, f"{path}/results") return root
def run(branch: str) -> co.Serial: image = co.Image(image="python:3.6", reqs_py=["conducto"]) root = co.Serial(image=image) with co.Serial(same_container=co.SameContainer.NEW, cpu=12, mem=32) as build: build["fetch"] = co.Exec("echo im fetching") build["checkout"] = co.Exec("echo im checking out") with co.Parallel(name="checks") as checks: checks["yapf"] = co.Exec("echo checking yapf") checks["python_tests"] = co.Exec( "echo checking python tests") checks["flake8"] = co.Exec( "echo checking flake8") checks["pylint"] = co.Exec( "echo im checking pylint") checks["mypy"] = co.Exec("echo im checking mypy") checks["cppcheck"] = co.Exec( "echo im checking cppcheck") checks["clang_format"] = co.Exec( "echo im checking clang_format") build["build"] = co.Exec('echo im building now') root["build"] = build auth_token = co.api.Auth().get_token_from_shell() access_token = co.api.Secrets().get_user_secrets( auth_token)["GITHUB_ACCESS_TOKEN"] stdout = subprocess.check_output( f"git ls-remote [email protected]:jmazar/conduco_statuses.git refs/heads/{branch} | cut -f1", shell=True) sha = stdout.decode("utf-8").strip() print(sha) print(access_token) creator = co.callback.github_status_creator( owner="jmazar", repo="conduco_statuses", sha=sha, access_token=access_token, ) for node in root.stream(): if isinstance(node, co.Exec): node.on_queued(creator(state="pending")) node.on_done(creator(state="success")) node.on_error(creator(state="failure")) return root
def primes_less_than(n: int) -> co.Serial: img = co.Image(copy_dir=".") with co.Serial(image=img) as root: root["find primes"] = co.Exec(sieve, n) return root
def hello_py() -> co.Serial: with co.Serial() as root: hi = co.Exec(say_it) hi.image = py_img root["Say Hi"] = hi return root
def run() -> co.Serial: image = co.Image("python:3.7", copy_branch="master", copy_url="https://github.com/liamcryan/ieuler.git") with co.Serial(image=image, doc=co.util.magic_doc()) as pipeline: co.Exec('pip install -r requirements.txt', name='build') co.Exec('pytest', name='tests') return pipeline
def main() -> co.Serial: path = "/conducto/data/pipeline" root = co.Serial(image=get_image()) # Get data from keras for testing and training root["Get Data"] = co.Exec(run_whole_thing, f"{path}/raw") return root
def main() -> co.Serial: with co.Serial(image=img) as root_node: # download and decompress the suspicious data with co.Serial(name="setup"): co.Exec(f""" wget -O {data}/genes.fasta.gz \ https://sgd-prod-upload.s3.amazonaws.com/S000208654/orf_coding.20150113.fasta.gz """, name="get data") co.Exec(f"gunzip -c {data}/genes.fasta.gz > {data}/genes.fna", name="decrompress") co.Exec(f"echo '{dummy_contents}' > {data}/genome.fna", name="place data") with co.Parallel(name="experiment"): # use it as-is co.Exec(f""" makeblastdb -in {data}/genome.fna -dbtype nucl -out tempdb blastn -query {data}/genes.fna -outfmt 5 -db tempdb 1> /dev/null 2>errors # fail if previous command wrote to stderr cat errors >&2 [[ $(wc -l < errors) -ge 1 ]] && exit 1 || exit 0 """, name="has errors") # use it after replacing the unicode escape sequences with 'BADCHAR' co.Exec(f""" # fix bad characters for YMR156C, YCL018W, YGR257C, and YDR412W cat {data}/genes.fna | sed 's/&#/BADCHAR/g' > {data}/fixed.fna makeblastdb -in {data}/genome.fna -dbtype nucl -out tempdb blastn -query {data}/fixed.fna -outfmt 5 -db tempdb 1> /dev/null 2>errors # fail if previous command wrote to stderr cat errors >&2 [[ $(wc -l < errors) -ge 1 ]] && exit 1 || exit 0 """, name="fixed") return root_node
def hello_linux() -> co.Serial: pipeline = co.Serial() pipeline["Say Hi"] = co.Exec( """ echo '{"message": "Hello World"}' | jq '.message' """, image=lin_img, ) return pipeline
def main() -> co.Serial: with co.Serial(image=IMG, requires_docker=True) as root: with co.Parallel(name="Init") as init: init["Build"] = co.Exec("docker build .") init["Lint"] = co.Exec("black --check .") init["Unit Test"] = co.Exec("python unit_test.py") root["Deploy"] = co.Exec("bash deploy_aws.sh") root["Integration Test"] = co.Exec("bash int_test.sh") return root
def main() -> co.Serial: with co.Serial(image=IMG, requires_docker=True) as root: with co.Parallel(name="Init") as init: init["Build"] = co.Exec("sleep 3") init["Lint"] = co.Exec("sleep 1") init["Unit Test"] = co.Exec("sleep 1.5") root["Deploy"] = co.Exec("sleep 4") root["Integration Test"] = co.Exec("sleep 2") return root
def cleanup() -> co.Serial: delete_service_cmd = DELETE_STACK_CMD.format(stack="service") delete_elb_cmd = DELETE_STACK_CMD.format(stack="elb") delete_vpc_cmd = DELETE_STACK_CMD.format(stack="vpc") with co.Serial(skip=True, doc=CLEANUP_DOC) as output: co.Exec(delete_service_cmd, name="Service") co.Exec(delete_elb_cmd, name="ELB") co.Exec(delete_vpc_cmd, name="VPC") co.Exec(DELETE_REPO_CMD, name="Repo") return output
def deploy() -> co.Parallel: """ Start Containers. """ with co.Serial(image=docker_img, requires_docker=True) as node: # Flask needs to know the Redis IP before it can start, so # make sure this node is Serial. # use the redis image from dockerhub with co.Serial(name="redis") as redis: redis["start"] = co.Exec(START_REDIS_CMD) # include our flask code via a Dockerfile with co.Serial(name="flask") as flask: flask["build"] = co.Exec(BUILD_FLASK_CMD) flask["start"] = co.Exec(START_FLASK_CMD) return node
def main() -> co.Serial: """ Starts services, tests them, and cleans up """ # outer context: continue on errors # so 'clean up' still runs if tests fail with co.Serial(stop_on_error=False, doc=__doc__) as root: # inner context: stop on errors # don't bother testing a failed deployment with co.Serial(name="run", stop_on_error=True) as run: run["deploy"] = deploy() run["test"] = test() # stop services root["clean up"] = teardown() return root
def download_file(source_url, target_path) -> co.Serial: "Returns a serial node which downloads a gzipped FASTA file" target_dir = Path(target_path).parent node = co.Serial() node["Download"] = co.Exec( f"mkdir -p {target_dir} && wget -O {target_path}.gz {source_url}") node["Decompress"] = co.Exec(f"gunzip -c {target_path}.gz > {target_path}") return node