Esempio n. 1
0
def main():
    # Connects to a pachyderm cluster on the default host:port
    # (`localhost:30650`). This will work for certain environments (e.g. k8s
    # running on docker for mac), as well as when port forwarding is being
    # used. For other setups, you'll want one of the alternatives:
    # 1) To connect to pachyderm when this script is running inside the
    #    cluster, use `python_pachyderm.Client.new_in_cluster()`.
    # 2) To connect to pachyderm via a pachd address, use
    #    `python_pachyderm.Client.new_from_pachd_address`.
    # 3) To explicitly set the host and port, pass parameters into
    #   `python_pachyderm.Client()`.
    client = python_pachyderm.Client()

    # Create a repo called images
    client.create_repo("images")

    # Create a pipeline specifically designed for executing python code. This
    # is equivalent to the edges pipeline in the standard opencv example.
    python_pachyderm.create_python_pipeline(
        client,
        relpath("edges"),
        input=python_pachyderm.Input(
            pfs=python_pachyderm.PFSInput(glob="/*", repo="images")),
    )

    # Create the montage pipeline
    client.create_pipeline(
        "montage",
        transform=python_pachyderm.Transform(
            cmd=["sh"],
            image="v4tech/imagemagick",
            stdin=[
                "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png"
            ],
        ),
        input=python_pachyderm.Input(cross=[
            python_pachyderm.Input(
                pfs=python_pachyderm.PFSInput(glob="/", repo="images")),
            python_pachyderm.Input(
                pfs=python_pachyderm.PFSInput(glob="/", repo="edges")),
        ]),
    )

    with client.commit("images", "master") as commit:
        # Add some images, recursively inserting content from the images
        # directory. Alternatively, you could use `client.put_file_url` or
        # `client_put_file_bytes`.
        python_pachyderm.put_files(client, relpath("images"), commit, "/")

    # Wait for the commit (and its downstream commits) to finish
    for _ in client.flush_commit([commit]):
        pass

    # Get the montage
    source_file = client.get_file("montage/master", "/montage.png")
    with tempfile.NamedTemporaryFile(suffix="montage.png",
                                     delete=False) as dest_file:
        shutil.copyfileobj(source_file, dest_file)
        print("montage written to {}".format(dest_file.name))
Esempio n. 2
0
def test_create_python_pipeline_bad_path():
    client = python_pachyderm.Client()
    repo_name = util.create_test_repo(client, "create_python_pipeline_bad_path")

    # create some sample data
    with client.commit(repo_name, "master") as commit:
        client.put_file_bytes(commit, 'file.dat', b'DATA')

    # create a pipeline from a file that does not exist - should fail
    with pytest.raises(Exception):
        python_pachyderm.create_python_pipeline(
            client, "./foobar2000",
            input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)),
        )
Esempio n. 3
0
def main():
    client = python_pachyderm.Client()

    client.create_pipeline(
        pipeline_name="producer",
        transform=python_pachyderm.Transform(
            cmd=["python3", "/app/main.py"],
            image="ysimonson/pachyderm_spout_producer",
        ),
        spout=python_pachyderm.Spout(
            overwrite=False,
            marker="marker",
        ),
    )

    python_pachyderm.create_python_pipeline(
        client,
        relpath("consumer"),
        input=python_pachyderm.Input(
            pfs=python_pachyderm.PFSInput(glob="/", repo="producer")),
    )
Esempio n. 4
0
def test_create_python_pipeline():
    client = python_pachyderm.Client()
    repo_name = util.create_test_repo(client, "create_python_pipeline")
    pfs_input = python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name))
    pipeline_name = util.test_repo_name("create_python_pipeline", prefix="pipeline")

    # create some sample data
    with client.commit(repo_name, "master") as commit:
        client.put_file_bytes(commit, 'file.dat', b'DATA')

    # convenience function for verifying expected files exist
    def check_all_expected_files(extra_source_files, extra_build_files):
        list(client.flush_commit([c.commit for c in client.list_commit(pipeline_name)]))

        check_expected_files(client, "{}_build/source".format(pipeline_name), set([
            "/",
            "/main.py",
            *extra_source_files,
        ]))

        check_expected_files(client, "{}_build/build".format(pipeline_name), set([
            "/",
            "/run.sh",
            *extra_build_files,
        ]))

        check_expected_files(client, "{}/master".format(pipeline_name), set([
            "/",
            "/file.dat",
        ]))

    # 1) create a pipeline from a directory with a main.py and requirements.txt
    with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d:
        with open(os.path.join(d, "main.py"), "w") as f:
            f.write(TEST_LIB_SOURCE.format(repo_name))
        with open(os.path.join(d, "requirements.txt"), "w") as f:
            f.write(TEST_REQUIREMENTS_SOURCE)

        python_pachyderm.create_python_pipeline(
            client, d,
            input=pfs_input,
            pipeline_name=pipeline_name,
        )

    check_all_expected_files(
        ["/requirements.txt"],
        ["/leftpad-0.1.2-py3-none-any.whl", "/termcolor-1.1.0-py3-none-any.whl"],
    )
    file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat'))
    assert file == [b' DATA']

    # 2) update pipeline from a directory without a requirements.txt
    with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d:
        with open(os.path.join(d, "main.py"), "w") as f:
            f.write(TEST_STDLIB_SOURCE.format(repo_name))

        python_pachyderm.create_python_pipeline(
            client, d,
            input=pfs_input,
            pipeline_name=pipeline_name,
            update=True,
        )

    check_all_expected_files([], [])
    file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat'))
    assert file == [b'DATA']