Beispiel #1
0
def test_create_pipeline_from_request():
    client = python_pachyderm.Client()

    repo_name = util.create_test_repo(client, "test_create_pipeline_from_request")
    pipeline_name = util.test_repo_name("test_create_pipeline_from_request")

    # more or less a copy of the opencv demo's edges pipeline spec
    client.create_pipeline_from_request(
        pps_proto.CreatePipelineRequest(
            pipeline=pps_proto.Pipeline(name=pipeline_name),
            description="A pipeline that performs image edge detection by using the OpenCV library.",
            input=pps_proto.Input(
                pfs=pps_proto.PFSInput(
                    glob="/*",
                    repo=repo_name,
                ),
            ),
            transform=pps_proto.Transform(
                cmd=["echo", "hi"],
                image="pachyderm/opencv",
            ),
        )
    )

    assert any(p.pipeline.name == pipeline_name for p in list(client.list_pipeline()))
Beispiel #2
0
def test_datums():
    sandbox = Sandbox("datums")
    pipeline_name = sandbox.pipeline_repo_name
    job_id = sandbox.wait()

    # flush the job so it fully finishes
    list(sandbox.client.wait_commit(sandbox.commit.id))

    datums = list(sandbox.client.list_datum(pipeline_name, job_id))
    assert len(datums) == 1
    datum = sandbox.client.inspect_datum(pipeline_name, job_id, datums[0].datum.id)
    assert datum.state == pps_proto.DatumState.SUCCESS

    with pytest.raises(
        python_pachyderm.RpcError,
        match=r"datum matching filter \[.*\] could not be found for job ID {}".format(
            job_id
        ),
    ):
        sandbox.client.restart_datum(pipeline_name, job_id)

    datums = list(
        sandbox.client.list_datum(
            input=pps_proto.Input(
                pfs=pps_proto.PFSInput(glob="/*", repo=sandbox.input_repo_name)
            )
        )
    )
    assert len(datums) == 1
Beispiel #3
0
def main():
    client = python_pachyderm.Client()

    client.create_pipeline(
        pipeline_name="spout",
        transform=pps_proto.Transform(
            cmd=["python3", "consumer/main.py"],
            image="pachyderm/example-spout101:2.0.0-beta.5",
        ),
        spout=pps_proto.Spout(),
        description=
        "A spout pipeline that emulates the reception of data from an external source",
    )

    client.create_pipeline(
        pipeline_name="processor",
        transform=pps_proto.Transform(
            cmd=["python3", "processor/main.py"],
            image="pachyderm/example-spout101:2.0.0-beta.5",
        ),
        input=pps_proto.Input(
            pfs=pps_proto.PFSInput(repo="spout", branch="master", glob="/*")),
        description="A pipeline that sorts 1KB vs 2KB files",
    )

    client.create_pipeline(
        pipeline_name="reducer",
        transform=pps_proto.Transform(
            cmd=["bash"],
            stdin=[
                "set -x",
                "FILES=/pfs/processor/*/*",
                "for f in $FILES",
                "do",
                "directory=`dirname $f`",
                "out=`basename $directory`",
                "cat $f >> /pfs/out/${out}.txt",
                "done",
            ],
        ),
        input=pps_proto.Input(pfs=pps_proto.PFSInput(
            repo="processor", branch="master", glob="/*")),
        description="A pipeline that reduces 1K/ and 2K/ directories",
    )
Beispiel #4
0
def check_pipeline_spec(req):
    assert req == pps_proto.CreatePipelineRequest(
        pipeline=pps_proto.Pipeline(name="foobar"),
        description="A pipeline that performs image edge detection by using the OpenCV library.",
        input=pps_proto.Input(
            pfs=pps_proto.PFSInput(glob="/*", repo="images"),
        ),
        transform=pps_proto.Transform(
            cmd=["python3", "/edges.py"],
            image="pachyderm/opencv",
        ),
    )
Beispiel #5
0
def test_create_pipeline():
    client = python_pachyderm.Client()
    client.delete_all()

    input_repo_name = util.create_test_repo(client, "input_repo_test_create_pipeline")

    client.create_pipeline(
        "pipeline_test_create_pipeline",
        transform=pps_proto.Transform(
            cmd=["sh"],
            image="alpine",
            stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)],
        ),
        input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/*", repo=input_repo_name)),
    )
    assert len(list(client.list_pipeline())) == 1
Beispiel #6
0
def create_test_pipeline(client: python_pachyderm.Client, test_name):
    repo_name_suffix = random_string(6)
    input_repo_name = create_test_repo(
        client, test_name, prefix="input", suffix=repo_name_suffix
    )
    pipeline_repo_name = test_repo_name(
        test_name, prefix="pipeline", suffix=repo_name_suffix
    )

    client.create_pipeline(
        pipeline_repo_name,
        transform=pps_proto.Transform(
            cmd=["sh"],
            image="alpine",
            stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)],
        ),
        input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/*", repo=input_repo_name)),
    )

    # TODO figre out what is actually happening here
    with client.commit(input_repo_name, "master") as commit:
        client.put_file_bytes(commit, "file.dat", b"DATA")

    return (commit, input_repo_name, pipeline_repo_name)
Beispiel #7
0
import python_pachyderm
from python_pachyderm.service import pps_proto

client = python_pachyderm.Client()
client.create_pipeline(
    pipeline_name="contour",
    transform=pps_proto.Transform(
        cmd=["python3", "contour.py"],
        image="svekars/contour-histogram:1.0",
    ),
    input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="photos")),
)
client.create_pipeline(
    pipeline_name="histogram",
    transform=pps_proto.Transform(
        cmd=["python3", "histogram.py"],
        image="svekars/contour-histogram:1.0",
    ),
    input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="contour")),
)
print(list(client.list_pipeline()))
Beispiel #8
0
def main():
    # Connects to a pachyderm cluster on the default host:port
    # (`localhost:30650`). This will work for certain environments (e.g. k8s
    # running on docker for mac), as well as when port forwarding is being
    # used. For other setups, you'll want one of the alternatives:
    # 1) To connect to pachyderm when this script is running inside the
    #    cluster, use `python_pachyderm.Client.new_in_cluster()`.
    # 2) To connect to pachyderm via a pachd address, use
    #    `python_pachyderm.Client.new_from_pachd_address`.
    # 3) To explicitly set the host and port, pass parameters into
    #   `python_pachyderm.Client()`.
    client = python_pachyderm.Client()

    # Create a repo called images
    client.create_repo("images")

    # Create the edges pipeline (and the edges repo automatically). This
    # pipeline runs when data is committed to the images repo, as indicated
    # by the input field.
    client.create_pipeline(
        "edges",
        transform=pps_proto.Transform(
            cmd=["python3", "/edges.py"],
            image="pachyderm/opencv",
        ),
        input=pps_proto.Input(
            pfs=pps_proto.PFSInput(repo="images", glob="/*")),
    )

    # Create the montage pipeline (and the montage repo automatically). This
    # pipeline runs when data is committed to either the images repo or edges
    # repo, as indicated by the input field.
    client.create_pipeline(
        "montage",
        transform=pps_proto.Transform(
            cmd=["sh"],
            image="v4tech/imagemagick",
            stdin=[
                "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png"
            ],
        ),
        input=pps_proto.Input(cross=[
            pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="images")),
            pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="edges")),
        ]),
    )

    with client.commit("images", "master") as commit:
        # Add some images, recursively inserting content from the images
        # directory. Alternatively, you could use `client.put_file_url` or
        # `client_put_file_bytes`.
        python_pachyderm.put_files(client, relpath("images"), commit, "/")

    # Wait for the commit (and its downstream commits) to finish
    for _ in client.wait_commit(commit.id):
        pass

    # Get the montage
    source_file = client.get_file(("montage", "master"), "/montage.png")
    with tempfile.NamedTemporaryFile(suffix="montage.png",
                                     delete=False) as dest_file:
        shutil.copyfileobj(source_file, dest_file)
        print("montage written to {}".format(dest_file.name))