Ejemplo n.º 1
0
def main():
    # _thisfile, file_to_execute, *arguments = sys.argv
    args = sys.argv[1:]
    if not args or args[0] in (
            "-h",
            "--help",
            "--version",
            "show",
            "debug",
            "livedebug",
            "init",
            "migrate",
    ):
        variables = {
            "show": show,
            "debug": debug,
            "livedebug": livedebug,
            "init": init,
            "migrate": migrate,
        }
        co.main(variables=variables)
    else:
        file_to_execute, *arguments = args

        if not os.path.exists(file_to_execute):
            print(f"No such file or directory: '{file_to_execute}'",
                  file=sys.stderr)
            sys.exit(1)

        module = _load_file_module(file_to_execute)
        variables = {k: getattr(module, k) for k in dir(module)}
        co.main(variables=variables, argv=arguments, filename=file_to_execute)
Ejemplo n.º 2
0
    def _main(cls):
        import conducto as co

        variables = {
            "delete": cls._delete_cli,
            "exists": cls._exists_cli,
            "get": cls._get_cli,
            "gets": cls._gets_cli,
            "list": cls._list_cli,
            "put": cls._put_cli,
            "puts": cls._puts_cli,
            "size": cls._size_cli,
            "url": cls._url_cli,
            "cache-exists": cls._cache_exists_cli,
            "clear-cache": cls._clear_cache_cli,
            "save-cache": cls._save_cache_cli,
            "restore-cache": cls._restore_cache_cli,
        }
        co.main(variables=variables, printer=cls._print)
Ejemplo n.º 3
0
import conducto as co


def go() -> co.Serial:
    with co.Serial() as node:
        co.Exec("echo hi", name='node')
    return node


if __name__ == "__main__":
    co.main(default=go)
Ejemplo n.º 4
0
    # The image parameters that appear in `reqs_py` and `reqs_packages` are
    # depenencies of this function. But the pipeline launcher doesn't need them.
    #
    # Import them inside the function to reduce external dependency.

    os.environ["PATH"] = ":".join([os.environ["PATH"], "/usr/games"])
    from sh import fortune

    now = datetime.now()
    parent = co.Parallel()
    for i in range(monthrange(now.year, now.month)[1]):

        date = f"{now.year}-{now.month}-{i + 1}"

        fortune_str = indent(fortune().stdout.decode(), prefix=16 * " ")

        cmd = cleandoc(f"""
            echo "About {date} the spirits say:"
            cat << EOF

                {fortune_str[16:]}
            EOF""")

        parent[date] = co.Exec(cmd)

    return parent


if __name__ == "__main__":
    co.main(default=pipeline)
Ejemplo n.º 5
0
import conducto as co


def compare() -> co.Serial:

    with co.Parallel() as root:
        with co.Serial(name="run until error"):

            # will fail because grep returns nonzero
            co.Exec("echo foo | grep bar", name="fail")

            # will remain pending because the previous node failed
            co.Exec("echo baz", name="succeed")

        with co.Serial(stop_on_error=False, name="run all children"):

            # will fail because grep returns nonzero
            co.Exec("echo wakka | grep bang", name="fail")

            # will run and succeed despite the earlier failure
            co.Exec("echo splat", name="succeed")

    return root


co.main(default=compare)
Ejemplo n.º 6
0
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 1)
        else:

            # most of the sensors work just fine
            r[f'/pmt/poll/{name}'] = co.Exec(certain, 0)

    run_callback = co.callback.slack_status(recipient="SlackUser",
                                            message="polling sensors")
    r.on_running(run_callback)

    err_callback = co.callback.slack_status(recipient="#array-status", )
    r.on_error(err_callback)

    done_callback = co.callback.slack_status(
        recipient="#array-status",
        message="all sensors reporting nominally",
    )
    r.on_done(done_callback)

    # other events include:
    # - on_queued
    # - on_running
    # - on_killed
    # - on_state_change

    return r


if __name__ == "__main__":
    co.main(default=poll_sensors)
Ejemplo n.º 7
0
        # TODO: instead of modeling a fixed number of clock ticks
        # use a lazy node to extend this until a grid state is repeated
        for tick in ticks:
            with co.Serial(name=f"tick {tick}",
                           image=game_of_life) as iteration:

                iteration["show grid"] = co.Exec(show_grid(tick))
                iteration["find neighbors"] = co.Exec(find_neighborhoods(tick))

                with co.Parallel(name=f"apply_rules",
                                 image=game_of_life) as rules:

                    rules["isolate"] = co.Exec(isolate(tick))
                    rules["survive"] = co.Exec(survive(tick))
                    rules["crowd"] = co.Exec(crowd(tick))
                    rules["reproduce"] = co.Exec(reproduce(tick))
                    rules["ignore"] = co.Exec(ignore(tick))

                iteration["next grid"] = co.Exec(next_grid(tick))

            image_names.append(f"image_{tick}.png")

        image_list = " ".join(image_names)
        pipeline["animate"] = co.Exec(animate(image_list))

    return pipeline


if __name__ == "__main__":
    co.main(default=life)
Ejemplo n.º 8
0
import conducto as co

python_img = co.Image(
    image="python:3.8-alpine",
    copy_url="https://github.com/leachim6/hello-world",
    copy_branch="master",
    path_map={"./local-copy/p": "p"},
)


def hello() -> co.Serial:
    pipeline = co.Serial()
    pipeline["Say Hi"] = co.Exec("python p/python3.py", image=python_img)
    return pipeline


if __name__ == "__main__":
    co.main(default=hello)
Ejemplo n.º 9
0
import conducto as co

cmd = "cat /etc/*-release"


def which_distro() -> co.Serial:
    pipeline = co.Serial()
    pipeline["Node Name"] = co.Exec(cmd, image="alpine:latest")
    return pipeline


if __name__ == "__main__":
    co.main(default=which_distro)
Ejemplo n.º 10
0
# wget https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip
# unzip RDS-2005-0004.zip
import conducto as co


def download_and_plot() -> co.Serial:
    download_command = """
            apt update -y && apt install -y curl unzip
            curl https://www.fs.usda.gov/rds/archive/products/RDS-2005-0004/RDS-2005-0004.zip > data.zip
            unzip data.zip
        """
    image = co.Image(dockerfile='./Dockerfile', context='.')
    with co.Serial(image=image) as pipeline:
        co.Exec(download_command, name="download")
        with co.Parallel(name='plot'):
            co.Exec('python rainfall.py', name='daily')
            co.Exec('python rainfall.py --resample M --save', name='monthly')
    return pipeline


if __name__ == '__main__':
    co.main(default=download_and_plot)
Ejemplo n.º 11
0
        # no ambiguity here, all kwargs refer to conducto.Node.__init__
        co.Exec('''echo "node has 1.5 cpu's"''', name="A", cpu=1.5)

        # native method parameters come first
        # modify the node object in a second step, then connect it to its parent
        node_obj = co.Exec(myfunc, "DDR4-2933 (quad channel)", cpu=2950)
        node_obj.set(cpu=0.75, mem=1.5)
        node["B"] = node_obj

        # or connect it to its parent, then modify it in place
        node["C"] = co.Exec(myfunc, "DDR4-2667 (dual channel)")
        node["C"].set(cpu=0.75, mem=1.5)

        # some non-custom types don't have obvious string representations
        payload = {"foo": 2, "bar": 3}
        func(payload)

        # so you may have to handle the serialization yourself
        node["D"] = co.Exec(wrappedfunc, json.dumps(payload))

        # custom types work, but you need to provide helpers
        param_obj = Emoticon(happy=True)
        node["E"] = co.Exec(describe, param_obj)

    return node


if __name__ == "__main__":
    co.main(default=disambiguate)
Ejemplo n.º 12
0
        fortune_str = fortune()

        cmd = cleandoc(f"""
            echo "About {date} the spirits say:"
            cat << EOF
            {indent(fortune_str, prefix='            ')}
            EOF""")

        parent[date] = co.Exec(cmd)

    return parent


# copy_dir places this file in the image so that
# the above function can be found when the Lazy node runs
img = co.Image(copy_dir=".",
               reqs_py=["conducto", "sh"],
               reqs_packages=["fortune"])


def make_pipeline() -> co.Serial:
    root = co.Serial(image=img)
    root['fortune'] = co.Lazy(nodes_for_this_month)
    return root


if __name__ == "__main__":
    co.Image.share_directory("fortune", ".")
    co.main(default=make_pipeline)
Ejemplo n.º 13
0
    with co.Serial() as root:
        hi = co.Exec(say_it)
        hi.image = py_img
        root["Say Hi"] = hi

    return root


# Use the package repository native to your image linux flavor
##############################################################

# use `apt` to install `jq` into the image
lin_img = co.Image(reqs_packages=["jq"])


# have it parse some json
def hello_linux() -> co.Serial:
    pipeline = co.Serial()
    pipeline["Say Hi"] = co.Exec(
        """
        echo '{"message": "Hello World"}' | jq '.message'
        """,
        image=lin_img,
    )
    return pipeline


if __name__ == "__main__":
    co.main(default=hello_py)
Ejemplo n.º 14
0
            if line.strip() == "":
                continue
            obj = json.loads(line)
            layer = [obj["ID"], _size_mb(obj["Size"])]
            if layer[0] == "\u003cmissing\u003e":
                sizes[-1][1] += layer[1]
            else:
                sizes.append(layer)

        overlaps.extend(sizes)
        singles.update(dict(sizes))

    image_virtual_total = sum([v for k, v in overlaps])
    image_size_net = sum([v for k, v in singles.items()])

    return image_virtual_total, image_size_net


async def show_usage(label):
    image_ids = await _get_image_ids(label)

    virtual, size_net = await _image_list_sizes(image_ids)

    print(f"Images labeled {label}:  {len(image_ids)}")
    print(f"Virtual Total:  {virtual:.2f} MB")
    print(f"Net Size:  {size_net:.2f} MB")


if __name__ == "__main__":
    co.main(default=cleanup)
Ejemplo n.º 15
0
    in_files = sorted(glob.glob(in_glob))

    output = co.Parallel()
    for f in in_files:
        # Input: RAW_DATA_DIR/validate3554.tfrecord
        # Output: OUT_DIR/validate3554.pkl.gzip
        base = os.path.basename(f).replace(".tfrecord", "")
        out_path = os.path.join(out_dir, base + ".pkl.gzip")

        if len(in_files) > 50:
            import re
            parent = re.sub("(\d\d)\d\d", "\\1__", base)
            if parent not in output:
                output[parent] = co.Parallel()
            base = f"{parent}/{base}"

        output[base] = co.Exec(commands.backtest,
                               model_path=model_path,
                               data_path=f,
                               out_path=out_path)
    return output

    return output


IMG = co.Image(copy_dir=".", reqs_py=["conducto", "tensorflow", "matplotlib"])

if __name__ == "__main__":
    co.main(image=IMG)
Ejemplo n.º 16
0
import conducto as co


def cicd() -> co.Serial:
    image = co.Image("node:current-alpine",
                     copy_url="https://github.com/flippedcoder/its-hot",
                     copy_branch="master")

    install_node = co.Exec("npm i")
    test_node = co.Exec("CI=true; npm test")
    build_node = co.Exec("npm build")
    deploy_node = co.Exec("echo secret stuff to deploy to an S3 bucket on AWS")

    pipeline = co.Serial(image=image, same_container=co.SameContainer.NEW)

    pipeline["Install dependencies..."] = install_node
    pipeline["Running tests..."] = test_node
    pipeline["Build project..."] = build_node
    pipeline["Deploy project..."] = deploy_node

    return pipeline


if __name__ == "__main__":
    co.main(default=cicd)
Ejemplo n.º 17
0
    #### Clear
    To clear the downloaded data and see these nodes in action again, select
    the "clear" node and click the _Unskip_ button.
    """
    with co.Serial(image=utils.IMG, doc=co.util.magic_doc()) as out:
        doc = co.util.magic_doc(func=btc.download)
        cleardoc = co.util.magic_doc(func=btc.clear)
        out["usage"] = co.Exec("conducto-data-user --help")
        out["download_20-11"] = co.Exec(
            "python code/btc.py download --start=-20 --end=-11", doc=doc)
        out["download_15-6"] = co.Exec(
            "python code/btc.py download --start=-15 --end=-6", doc=doc)
        out["download_10-now"] = co.Exec(
            "python code/btc.py download --start=-10 --end=-1", doc=doc)
        out["clear"] = co.Exec("python code/btc.py clear",
                               skip=True,
                               doc=cleardoc)
    return out


def examples() -> co.Serial:
    ex = co.Serial(doc=__doc__)
    ex["co.data.pipeline"] = data_pipeline()
    ex["co.data.user"] = data_user()
    return ex


if __name__ == "__main__":
    print(__doc__)
    co.main(default=examples)
Ejemplo n.º 18
0
                # The idea here is that the .coverage.* file is unique
                # from others being built in other containers
                co.Exec(coverage_template.format(share=share),
                        name="{} Coverage".format(name),
                        image=image)

        # Coverage Reporting
        co.Exec(coverage_report_template.format(share=share),
                name="Test Code Coverage",
                image=base_image)

        with co.Parallel(name="Packaging"):
            for entry in pkg_dockerfiles:
                name, dockerfile = entry
                image = co.Image(dockerfile=dockerfile,
                                 context=context,
                                 **image_kwargs)

                # Build our packages
                co.Exec(rpm_pkg_template, name=name, image=image)

    return pipeline


if __name__ == "__main__":
    """
    Execute our pipeline
    """
    co.main(default=all_checks)
Ejemplo n.º 19
0
            # genes
            source_url, genes_file = genes(data_dir)
            download["genes: S288C"] = download_file(source_url, genes_file)

        hits = {}

        with co.Parallel(name="Process"):
            for name, _, target_file in genomes(data_dir):
                with co.Serial(name=name) as process_one:

                    process_one["Make BLAST DB"] = co.Exec(f"""
                         cd {data_dir}
                         makeblastdb -in {target_file} -dbtype nucl -out {name}
                        """)

                    hits_file = f"{name}_hits.xml"
                    process_one["Find Genes"] = co.Exec(f"""
                        cd {data_dir}
                        blastn -query {genes_file} -outfmt 5 -db {name} > {hits_file}
                        """)
                    hits[name] = hits_file

        root["Analyze"] = co.Exec(analyze, json.dumps(hits))
        # root["Analyze"] = co.nb(something???)

    return root


if __name__ == "__main__":
    co.main(default=main)
Ejemplo n.º 20
0
import conducto as co


def build_and_test() -> co.Serial:
    image = co.Image(copy_dir="./code")
    with co.Serial(image=image, stop_on_error=False) as pipeline:
        with co.Parallel(name="Trade") as first_trading:
            first_trading['US'] = co.Exec("python3 first_stock_trading.py")
            first_trading['CHINA'] = co.Exec("python3 second_stock_trading.py")
        with co.Parallel(name="TopK") as second_trading:
            second_trading['US'] = co.Exec(
                "python3 first_topK_stock_pipeline.py")
            second_trading['CHINA'] = co.Exec(
                "python3 second_topK_stock_pipeline.py")

    return pipeline


if __name__ == "__main__":

    co.main(default=build_and_test)
Ejemplo n.º 21
0
# Island Information:
# hawaii -> echo big island
# maui county:
#     maui -> echo valley isle
#     lanai -> echo pineapple isle
#     molokai -> echo friendly isle
#     kahoolawe -> echo target isle
# oahu -> echo gathering place
# kauai county:
#     kauai -> echo garden isle
#     niihau -> echo forbidden isle


def islands() -> co.Serial:
    with co.Serial() as pipeline:
        pipeline["hawaii"] = co.Exec("echo big island")
        with co.Parallel(name="maui_county") as maui_county:
            maui_county["maui"] = co.Exec("echo valley isle")
            maui_county["lanai"] = co.Exec("echo pineapple isle")
            maui_county["molokai"] = co.Exec("echo friendly isle")
            maui_county["kahoolawe"] = co.Exec("echo target isle")
        pipeline["oahu"] = co.Exec("echo gathering place")
        with co.Serial(name="kauai_county") as kauai_county:
            kauai_county["kauai"] = co.Exec("echo garden isle")
            kauai_county["niihau"] = co.Exec("echo forbidden isle")
    return pipeline


if __name__ == "__main__":
    co.main()
Ejemplo n.º 22
0
    pipeline = co.Serial(image=external_img)
    pipeline["Say Hi"] = co.Exec("python p/python3.py")
    return pipeline


# Include this git repo in the image
#####################################

examples_img = co.Image(image="python:3.8-alpine", copy_repo=True, reqs_py=["conducto"])

# plucks the hello-world pipeline out of it
def get_pipeline() -> co.Serial:
    from hello_world import hello

    return hello.pipeline()


# reference code in this repo
def hello_self() -> co.Serial:
    pipeline = co.Serial(image=examples_img, env={"PYTHONPATH": "."})
    pipeline["Say Hi"] = co.Lazy(get_pipeline)
    return pipeline


# co.Lazy builds the rest of the pipeline tree at runtime, so if new nodes are
# added to ../hello_world.pipeline.py in the future, those nodes will show up
# without needing to change this file

if __name__ == "__main__":
    co.main(default=hello_other)
Ejemplo n.º 23
0
{df.transpose().round(2).to_markdown()}
</ConductoMarkdown>
    """)


############################################################
# Constants and globals
############################################################
DATA_PATH = "/conducto/data/pipeline/steo.txt"

DATASETS = {
    "Heating Degree Days": r"^STEO.ZWHD_[^_]*\.M$",
    "Cooling Degree Days": r"^STEO.ZWCD_[^_]*.M$",
    "Electricity Generation": r"^STEO.NGEPGEN_[^_]*\.M$",
}

IMG = co.Image("python:3.8",
               copy_dir=".",
               reqs_py=["conducto", "pandas", "matplotlib", "tabulate"])

# Data is downloaded from the United States Energy Information Administration.
# https://www.eia.gov/opendata/bulkfiles.php
DOWNLOAD_COMMAND = f"""
echo "Downloading"
curl http://api.eia.gov/bulk/STEO.zip > steo.zip
unzip -cq steo.zip > {DATA_PATH}
""".strip()

if __name__ == "__main__":
    co.main(default=run)
Ejemplo n.º 24
0
    output["Deploy"] = co.Exec("echo aws cloudformation deploy")
    return output


def build(projects: typing.List[str]) -> co.Parallel:
    "Build projects in parallel, using simple shell command."

    # Override the parent image to use one with docker installed.
    img = co.Image(image="docker:19.03", copy_dir=".")

    output = co.Parallel(image=img, requires_docker=True)
    for project in projects:
        # Command needs docker; inherits flag from parent node
        output[project] = co.Exec(f"cd {project} && docker build .")
    return output


def test(projects: typing.List[str]) -> co.Parallel:
    "Group tests by project, all in parallel."
    output = co.Parallel()
    for project in projects:
        output[project] = co.Parallel()
        for name in utils.get_tests(project):
            # co.Exec often accepts a command string. In this case it takes (func, *args, **kwargs),
            output[project][name] = co.Exec(utils.run_test, project, test=name)
    return output


if __name__ == "__main__":
    co.main(default=ci_cd)