Exemple #1
0
def main():
    # Connects to a pachyderm cluster on the default host:port
    # (`localhost:30650`). This will work for certain environments (e.g. k8s
    # running on docker for mac), as well as when port forwarding is being
    # used. For other setups, you'll want one of the alternatives:
    # 1) To connect to pachyderm when this script is running inside the
    #    cluster, use `python_pachyderm.Client.new_in_cluster()`.
    # 2) To connect to pachyderm via a pachd address, use
    #    `python_pachyderm.Client.new_from_pachd_address`.
    # 3) To explicitly set the host and port, pass parameters into
    #   `python_pachyderm.Client()`.
    client = python_pachyderm.Client()

    # Create a repo called images
    client.create_repo("images")

    # Create a pipeline specifically designed for executing python code. This
    # is equivalent to the edges pipeline in the standard opencv example.
    python_pachyderm.create_python_pipeline(
        client,
        relpath("edges"),
        input=python_pachyderm.Input(
            pfs=python_pachyderm.PFSInput(glob="/*", repo="images")),
    )

    # Create the montage pipeline
    client.create_pipeline(
        "montage",
        transform=python_pachyderm.Transform(
            cmd=["sh"],
            image="v4tech/imagemagick",
            stdin=[
                "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png"
            ],
        ),
        input=python_pachyderm.Input(cross=[
            python_pachyderm.Input(
                pfs=python_pachyderm.PFSInput(glob="/", repo="images")),
            python_pachyderm.Input(
                pfs=python_pachyderm.PFSInput(glob="/", repo="edges")),
        ]),
    )

    with client.commit("images", "master") as commit:
        # Add some images, recursively inserting content from the images
        # directory. Alternatively, you could use `client.put_file_url` or
        # `client_put_file_bytes`.
        python_pachyderm.put_files(client, relpath("images"), commit, "/")

    # Wait for the commit (and its downstream commits) to finish
    for _ in client.flush_commit([commit]):
        pass

    # Get the montage
    source_file = client.get_file("montage/master", "/montage.png")
    with tempfile.NamedTemporaryFile(suffix="montage.png",
                                     delete=False) as dest_file:
        shutil.copyfileobj(source_file, dest_file)
        print("montage written to {}".format(dest_file.name))
Exemple #2
0
def test_create_pipeline_from_request():
    client = python_pachyderm.Client()

    repo_name = util.create_test_repo(client,
                                      "test_create_pipeline_from_request")
    pipeline_name = util.test_repo_name("test_create_pipeline_from_request")

    # more or less a copy of the opencv demo's edges pipeline spec
    client.create_pipeline_from_request(
        python_pachyderm.CreatePipelineRequest(
            pipeline=python_pachyderm.Pipeline(name=pipeline_name),
            description=
            "A pipeline that performs image edge detection by using the OpenCV library.",
            input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(
                glob="/*",
                repo=repo_name,
            ), ),
            transform=python_pachyderm.Transform(
                cmd=["echo", "hi"],
                image="pachyderm/opencv",
            ),
        ))

    assert any(p.pipeline.name == pipeline_name
               for p in client.list_pipeline().pipeline_info)
Exemple #3
0
def create_test_pipeline(client, test_name):
    repo_name_suffix = random_string(6)
    input_repo_name = create_test_repo(client,
                                       test_name,
                                       prefix="input",
                                       suffix=repo_name_suffix)
    pipeline_repo_name = test_repo_name(test_name,
                                        prefix="pipeline",
                                        suffix=repo_name_suffix)

    client.create_pipeline(
        pipeline_repo_name,
        transform=python_pachyderm.Transform(
            cmd=["sh"],
            image="alpine",
            stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)],
        ),
        input=python_pachyderm.Input(
            pfs=python_pachyderm.PFSInput(glob="/*", repo=input_repo_name)),
        enable_stats=True,
    )

    with client.commit(input_repo_name, "master") as commit:
        client.put_file_bytes(commit, "file.dat", b"DATA")

    return (commit, input_repo_name, pipeline_repo_name)
Exemple #4
0
    def __init__(self, test_name):
        pfs_client = python_pachyderm.PfsClient()
        pps_client = python_pachyderm.PpsClient()

        repo_name_suffix = random_string(6)
        input_repo_name = "{}-input-{}".format(test_name, repo_name_suffix)
        pipeline_repo_name = "{}-pipeline-{}".format(test_name,
                                                     repo_name_suffix)

        pfs_client.create_repo(input_repo_name,
                               "input repo for {}".format(test_name))

        pps_client.create_pipeline(
            pipeline_repo_name,
            transform=python_pachyderm.Transform(
                cmd=["sh"],
                image="alpine",
                stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)]),
            input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(
                glob="/*", repo=input_repo_name)),
            enable_stats=True,
        )

        with pfs_client.commit(input_repo_name, 'master') as commit:
            pfs_client.put_file_bytes(commit, 'file.dat', b'DATA')

        self.pps_client = pps_client
        self.pfs_client = pfs_client
        self.commit = commit
        self.input_repo_name = input_repo_name
        self.pipeline_repo_name = pipeline_repo_name
Exemple #5
0
def check_pipeline_spec(req):
    assert req == python_pachyderm.CreatePipelineRequest(
        pipeline=python_pachyderm.Pipeline(name="foobar"),
        description=
        "A pipeline that performs image edge detection by using the OpenCV library.",
        input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(
            glob="/*", repo="images"), ),
        transform=python_pachyderm.Transform(
            cmd=["python3", "/edges.py"],
            image="pachyderm/opencv",
        ),
    )
def test_create_python_pipeline_bad_path():
    client = python_pachyderm.Client()
    repo_name = util.create_test_repo(client, "create_python_pipeline_bad_path")

    # create some sample data
    with client.commit(repo_name, "master") as commit:
        client.put_file_bytes(commit, 'file.dat', b'DATA')

    # create a pipeline from a file that does not exist - should fail
    with pytest.raises(Exception):
        python_pachyderm.create_python_pipeline(
            client, "./foobar2000",
            input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)),
        )
Exemple #7
0
def main():
    client = python_pachyderm.Client()

    client.create_pipeline(
        pipeline_name="producer",
        transform=python_pachyderm.Transform(
            cmd=["python3", "/app/main.py"],
            image="ysimonson/pachyderm_spout_producer",
        ),
        spout=python_pachyderm.Spout(
            overwrite=False,
            marker="marker",
        ),
    )

    python_pachyderm.create_python_pipeline(
        client,
        relpath("consumer"),
        input=python_pachyderm.Input(
            pfs=python_pachyderm.PFSInput(glob="/", repo="producer")),
    )
def test_create_python_pipeline():
    client = python_pachyderm.Client()
    repo_name = util.create_test_repo(client, "create_python_pipeline")
    pfs_input = python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name))
    pipeline_name = util.test_repo_name("create_python_pipeline", prefix="pipeline")

    # create some sample data
    with client.commit(repo_name, "master") as commit:
        client.put_file_bytes(commit, 'file.dat', b'DATA')

    # convenience function for verifying expected files exist
    def check_all_expected_files(extra_source_files, extra_build_files):
        list(client.flush_commit([c.commit for c in client.list_commit(pipeline_name)]))

        check_expected_files(client, "{}_build/source".format(pipeline_name), set([
            "/",
            "/main.py",
            *extra_source_files,
        ]))

        check_expected_files(client, "{}_build/build".format(pipeline_name), set([
            "/",
            "/run.sh",
            *extra_build_files,
        ]))

        check_expected_files(client, "{}/master".format(pipeline_name), set([
            "/",
            "/file.dat",
        ]))

    # 1) create a pipeline from a directory with a main.py and requirements.txt
    with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d:
        with open(os.path.join(d, "main.py"), "w") as f:
            f.write(TEST_LIB_SOURCE.format(repo_name))
        with open(os.path.join(d, "requirements.txt"), "w") as f:
            f.write(TEST_REQUIREMENTS_SOURCE)

        python_pachyderm.create_python_pipeline(
            client, d,
            input=pfs_input,
            pipeline_name=pipeline_name,
        )

    check_all_expected_files(
        ["/requirements.txt"],
        ["/leftpad-0.1.2-py3-none-any.whl", "/termcolor-1.1.0-py3-none-any.whl"],
    )
    file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat'))
    assert file == [b' DATA']

    # 2) update pipeline from a directory without a requirements.txt
    with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d:
        with open(os.path.join(d, "main.py"), "w") as f:
            f.write(TEST_STDLIB_SOURCE.format(repo_name))

        python_pachyderm.create_python_pipeline(
            client, d,
            input=pfs_input,
            pipeline_name=pipeline_name,
            update=True,
        )

    check_all_expected_files([], [])
    file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat'))
    assert file == [b'DATA']