def main(): # _thisfile, file_to_execute, *arguments = sys.argv args = sys.argv[1:] if not args or args[0] in ( "-h", "--help", "--version", "show", "debug", "livedebug", "init", "migrate", ): variables = { "show": show, "debug": debug, "livedebug": livedebug, "init": init, "migrate": migrate, } co.main(variables=variables) else: file_to_execute, *arguments = args if not os.path.exists(file_to_execute): print(f"No such file or directory: '{file_to_execute}'", file=sys.stderr) sys.exit(1) module = _load_file_module(file_to_execute) variables = {k: getattr(module, k) for k in dir(module)} co.main(variables=variables, argv=arguments, filename=file_to_execute)
def _main(cls): import conducto as co variables = { "delete": cls._delete_cli, "exists": cls._exists_cli, "get": cls._get_cli, "gets": cls._gets_cli, "list": cls._list_cli, "put": cls._put_cli, "puts": cls._puts_cli, "size": cls._size_cli, "url": cls._url_cli, "cache-exists": cls._cache_exists_cli, "clear-cache": cls._clear_cache_cli, "save-cache": cls._save_cache_cli, "restore-cache": cls._restore_cache_cli, } co.main(variables=variables, printer=cls._print)
import conducto as co def go() -> co.Serial: with co.Serial() as node: co.Exec("echo hi", name='node') return node if __name__ == "__main__": co.main(default=go)
# The image parameters that appear in `reqs_py` and `reqs_packages` are # depenencies of this function. But the pipeline launcher doesn't need them. # # Import them inside the function to reduce external dependency. os.environ["PATH"] = ":".join([os.environ["PATH"], "/usr/games"]) from sh import fortune now = datetime.now() parent = co.Parallel() for i in range(monthrange(now.year, now.month)[1]): date = f"{now.year}-{now.month}-{i + 1}" fortune_str = indent(fortune().stdout.decode(), prefix=16 * " ") cmd = cleandoc(f""" echo "About {date} the spirits say:" cat << EOF {fortune_str[16:]} EOF""") parent[date] = co.Exec(cmd) return parent if __name__ == "__main__": co.main(default=pipeline)
import conducto as co def compare() -> co.Serial: with co.Parallel() as root: with co.Serial(name="run until error"): # will fail because grep returns nonzero co.Exec("echo foo | grep bar", name="fail") # will remain pending because the previous node failed co.Exec("echo baz", name="succeed") with co.Serial(stop_on_error=False, name="run all children"): # will fail because grep returns nonzero co.Exec("echo wakka | grep bang", name="fail") # will run and succeed despite the earlier failure co.Exec("echo splat", name="succeed") return root co.main(default=compare)
r[f'/pmt/poll/{name}'] = co.Exec(certain, 1) else: # most of the sensors work just fine r[f'/pmt/poll/{name}'] = co.Exec(certain, 0) run_callback = co.callback.slack_status(recipient="SlackUser", message="polling sensors") r.on_running(run_callback) err_callback = co.callback.slack_status(recipient="#array-status", ) r.on_error(err_callback) done_callback = co.callback.slack_status( recipient="#array-status", message="all sensors reporting nominally", ) r.on_done(done_callback) # other events include: # - on_queued # - on_running # - on_killed # - on_state_change return r if __name__ == "__main__": co.main(default=poll_sensors)
# TODO: instead of modeling a fixed number of clock ticks # use a lazy node to extend this until a grid state is repeated for tick in ticks: with co.Serial(name=f"tick {tick}", image=game_of_life) as iteration: iteration["show grid"] = co.Exec(show_grid(tick)) iteration["find neighbors"] = co.Exec(find_neighborhoods(tick)) with co.Parallel(name=f"apply_rules", image=game_of_life) as rules: rules["isolate"] = co.Exec(isolate(tick)) rules["survive"] = co.Exec(survive(tick)) rules["crowd"] = co.Exec(crowd(tick)) rules["reproduce"] = co.Exec(reproduce(tick)) rules["ignore"] = co.Exec(ignore(tick)) iteration["next grid"] = co.Exec(next_grid(tick)) image_names.append(f"image_{tick}.png") image_list = " ".join(image_names) pipeline["animate"] = co.Exec(animate(image_list)) return pipeline if __name__ == "__main__": co.main(default=life)
import conducto as co python_img = co.Image( image="python:3.8-alpine", copy_url="https://github.com/leachim6/hello-world", copy_branch="master", path_map={"./local-copy/p": "p"}, ) def hello() -> co.Serial: pipeline = co.Serial() pipeline["Say Hi"] = co.Exec("python p/python3.py", image=python_img) return pipeline if __name__ == "__main__": co.main(default=hello)
import conducto as co cmd = "cat /etc/*-release" def which_distro() -> co.Serial: pipeline = co.Serial() pipeline["Node Name"] = co.Exec(cmd, image="alpine:latest") return pipeline if __name__ == "__main__": co.main(default=which_distro)
# wget https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip # unzip RDS-2005-0004.zip import conducto as co def download_and_plot() -> co.Serial: download_command = """ apt update -y && apt install -y curl unzip curl https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip > data.zip unzip data.zip """ image = co.Image(dockerfile='./Dockerfile', context='.') with co.Serial(image=image) as pipeline: co.Exec(download_command, name="download") with co.Parallel(name='plot'): co.Exec('python rainfall.py', name='daily') co.Exec('python rainfall.py --resample M --save', name='monthly') return pipeline if __name__ == '__main__': co.main(default=download_and_plot)
# no ambiguity here, all kwargs refer to conducto.Node.__init__ co.Exec('''echo "node has 1.5 cpu's"''', name="A", cpu=1.5) # native method parameters come first # modify the node object in a second step, then connect it to its parent node_obj = co.Exec(myfunc, "DDR4-2933 (quad channel)", cpu=2950) node_obj.set(cpu=0.75, mem=1.5) node["B"] = node_obj # or connect it to its parent, then modify it in place node["C"] = co.Exec(myfunc, "DDR4-2667 (dual channel)") node["C"].set(cpu=0.75, mem=1.5) # some non-custom types don't have obvious string representations payload = {"foo": 2, "bar": 3} func(payload) # so you may have to handle the serialization yourself node["D"] = co.Exec(wrappedfunc, json.dumps(payload)) # custom types work, but you need to provide helpers param_obj = Emoticon(happy=True) node["E"] = co.Exec(describe, param_obj) return node if __name__ == "__main__": co.main(default=disambiguate)
fortune_str = fortune() cmd = cleandoc(f""" echo "About {date} the spirits say:" cat << EOF {indent(fortune_str, prefix=' ')} EOF""") parent[date] = co.Exec(cmd) return parent # copy_dir places this file in the image so that # the above function can be found when the Lazy node runs img = co.Image(copy_dir=".", reqs_py=["conducto", "sh"], reqs_packages=["fortune"]) def make_pipeline() -> co.Serial: root = co.Serial(image=img) root['fortune'] = co.Lazy(nodes_for_this_month) return root if __name__ == "__main__": co.Image.share_directory("fortune", ".") co.main(default=make_pipeline)
with co.Serial() as root: hi = co.Exec(say_it) hi.image = py_img root["Say Hi"] = hi return root # Use the package repository native to your image linux flavor ############################################################## # use `apt` to install `jq` into the image lin_img = co.Image(reqs_packages=["jq"]) # have it parse some json def hello_linux() -> co.Serial: pipeline = co.Serial() pipeline["Say Hi"] = co.Exec( """ echo '{"message": "Hello World"}' | jq '.message' """, image=lin_img, ) return pipeline if __name__ == "__main__": co.main(default=hello_py)
if line.strip() == "": continue obj = json.loads(line) layer = [obj["ID"], _size_mb(obj["Size"])] if layer[0] == "\u003cmissing\u003e": sizes[-1][1] += layer[1] else: sizes.append(layer) overlaps.extend(sizes) singles.update(dict(sizes)) image_virtual_total = sum([v for k, v in overlaps]) image_size_net = sum([v for k, v in singles.items()]) return image_virtual_total, image_size_net async def show_usage(label): image_ids = await _get_image_ids(label) virtual, size_net = await _image_list_sizes(image_ids) print(f"Images labeled {label}: {len(image_ids)}") print(f"Virtual Total: {virtual:.2f} MB") print(f"Net Size: {size_net:.2f} MB") if __name__ == "__main__": co.main(default=cleanup)
in_files = sorted(glob.glob(in_glob)) output = co.Parallel() for f in in_files: # Input: RAW_DATA_DIR/validate3554.tfrecord # Output: OUT_DIR/validate3554.pkl.gzip base = os.path.basename(f).replace(".tfrecord", "") out_path = os.path.join(out_dir, base + ".pkl.gzip") if len(in_files) > 50: import re parent = re.sub("(\d\d)\d\d", "\\1__", base) if parent not in output: output[parent] = co.Parallel() base = f"{parent}/{base}" output[base] = co.Exec(commands.backtest, model_path=model_path, data_path=f, out_path=out_path) return output return output IMG = co.Image(copy_dir=".", reqs_py=["conducto", "tensorflow", "matplotlib"]) if __name__ == "__main__": co.main(image=IMG)
import conducto as co def cicd() -> co.Serial: image = co.Image("node:current-alpine", copy_url="https://github.com/flippedcoder/its-hot", copy_branch="master") install_node = co.Exec("npm i") test_node = co.Exec("CI=true; npm test") build_node = co.Exec("npm build") deploy_node = co.Exec("echo secret stuff to deploy to an S3 bucket on AWS") pipeline = co.Serial(image=image, same_container=co.SameContainer.NEW) pipeline["Install dependencies..."] = install_node pipeline["Running tests..."] = test_node pipeline["Build project..."] = build_node pipeline["Deploy project..."] = deploy_node return pipeline if __name__ == "__main__": co.main(default=cicd)
#### Clear To clear the downloaded data and see these nodes in action again, select the "clear" node and click the _Unskip_ button. """ with co.Serial(image=utils.IMG, doc=co.util.magic_doc()) as out: doc = co.util.magic_doc(func=btc.download) cleardoc = co.util.magic_doc(func=btc.clear) out["usage"] = co.Exec("conducto-data-user --help") out["download_20-11"] = co.Exec( "python code/btc.py download --start=-20 --end=-11", doc=doc) out["download_15-6"] = co.Exec( "python code/btc.py download --start=-15 --end=-6", doc=doc) out["download_10-now"] = co.Exec( "python code/btc.py download --start=-10 --end=-1", doc=doc) out["clear"] = co.Exec("python code/btc.py clear", skip=True, doc=cleardoc) return out def examples() -> co.Serial: ex = co.Serial(doc=__doc__) ex["co.data.pipeline"] = data_pipeline() ex["co.data.user"] = data_user() return ex if __name__ == "__main__": print(__doc__) co.main(default=examples)
# The idea here is that the .coverage.* file is unique # from others being built in other containers co.Exec(coverage_template.format(share=share), name="{} Coverage".format(name), image=image) # Coverage Reporting co.Exec(coverage_report_template.format(share=share), name="Test Code Coverage", image=base_image) with co.Parallel(name="Packaging"): for entry in pkg_dockerfiles: name, dockerfile = entry image = co.Image(dockerfile=dockerfile, context=context, **image_kwargs) # Build our packages co.Exec(rpm_pkg_template, name=name, image=image) return pipeline if __name__ == "__main__": """ Execute our pipeline """ co.main(default=all_checks)
# genes source_url, genes_file = genes(data_dir) download["genes: S288C"] = download_file(source_url, genes_file) hits = {} with co.Parallel(name="Process"): for name, _, target_file in genomes(data_dir): with co.Serial(name=name) as process_one: process_one["Make BLAST DB"] = co.Exec(f""" cd {data_dir} makeblastdb -in {target_file} -dbtype nucl -out {name} """) hits_file = f"{name}_hits.xml" process_one["Find Genes"] = co.Exec(f""" cd {data_dir} blastn -query {genes_file} -outfmt 5 -db {name} > {hits_file} """) hits[name] = hits_file root["Analyze"] = co.Exec(analyze, json.dumps(hits)) # root["Analyze"] = co.nb(something???) return root if __name__ == "__main__": co.main(default=main)
import conducto as co def build_and_test() -> co.Serial: image = co.Image(copy_dir="./code") with co.Serial(image=image, stop_on_error=False) as pipeline: with co.Parallel(name="Trade") as first_trading: first_trading['US'] = co.Exec("python3 first_stock_trading.py") first_trading['CHINA'] = co.Exec("python3 second_stock_trading.py") with co.Parallel(name="TopK") as second_trading: second_trading['US'] = co.Exec( "python3 first_topK_stock_pipeline.py") second_trading['CHINA'] = co.Exec( "python3 second_topK_stock_pipeline.py") return pipeline if __name__ == "__main__": co.main(default=build_and_test)
# Island Information: # hawaii -> echo big island # maui county: # maui -> echo valley isle # lanai -> echo pineapple isle # molokai -> echo friendly isle # kahoolawe -> echo target isle # oahu -> echo gathering place # kauai county: # kauai -> echo garden isle # niihau -> echo forbidden isle def islands() -> co.Serial: with co.Serial() as pipeline: pipeline["hawaii"] = co.Exec("echo big island") with co.Parallel(name="maui_county") as maui_county: maui_county["maui"] = co.Exec("echo valley isle") maui_county["lanai"] = co.Exec("echo pineapple isle") maui_county["molokai"] = co.Exec("echo friendly isle") maui_county["kahoolawe"] = co.Exec("echo target isle") pipeline["oahu"] = co.Exec("echo gathering place") with co.Serial(name="kauai_county") as kauai_county: kauai_county["kauai"] = co.Exec("echo garden isle") kauai_county["niihau"] = co.Exec("echo forbidden isle") return pipeline if __name__ == "__main__": co.main()
pipeline = co.Serial(image=external_img) pipeline["Say Hi"] = co.Exec("python p/python3.py") return pipeline # Include this git repo in the image ##################################### examples_img = co.Image(image="python:3.8-alpine", copy_repo=True, reqs_py=["conducto"]) # plucks the hello-world pipeline out of it def get_pipeline() -> co.Serial: from hello_world import hello return hello.pipeline() # reference code in this repo def hello_self() -> co.Serial: pipeline = co.Serial(image=examples_img, env={"PYTHONPATH": "."}) pipeline["Say Hi"] = co.Lazy(get_pipeline) return pipeline # co.Lazy builds the rest of the pipeline tree at runtime, so if new nodes are # added to ../hello_world.pipeline.py in the future, those nodes will show up # without needing to change this file if __name__ == "__main__": co.main(default=hello_other)
{df.transpose().round(2).to_markdown()} </ConductoMarkdown> """) ############################################################ # Constants and globals ############################################################ DATA_PATH = "/conducto/data/pipeline/steo.txt" DATASETS = { "Heating Degree Days": r"^STEO.ZWHD_[^_]*\.M$", "Cooling Degree Days": r"^STEO.ZWCD_[^_]*.M$", "Electricity Generation": r"^STEO.NGEPGEN_[^_]*\.M$", } IMG = co.Image("python:3.8", copy_dir=".", reqs_py=["conducto", "pandas", "matplotlib", "tabulate"]) # Data is downloaded from the United States Energy Information Administration. # https://www.eia.gov/opendata/bulkfiles.php DOWNLOAD_COMMAND = f""" echo "Downloading" curl http://api.eia.gov/bulk/STEO.zip > steo.zip unzip -cq steo.zip > {DATA_PATH} """.strip() if __name__ == "__main__": co.main(default=run)
output["Deploy"] = co.Exec("echo aws cloudformation deploy") return output def build(projects: typing.List[str]) -> co.Parallel: "Build projects in parallel, using simple shell command." # Override the parent image to use one with docker installed. img = co.Image(image="docker:19.03", copy_dir=".") output = co.Parallel(image=img, requires_docker=True) for project in projects: # Command needs docker; inherits flag from parent node output[project] = co.Exec(f"cd {project} && docker build .") return output def test(projects: typing.List[str]) -> co.Parallel: "Group tests by project, all in parallel." output = co.Parallel() for project in projects: output[project] = co.Parallel() for name in utils.get_tests(project): # co.Exec often accepts a command string. In this case it takes (func, *args, **kwargs), output[project][name] = co.Exec(utils.run_test, project, test=name) return output if __name__ == "__main__": co.main(default=ci_cd)