def main(): # Connects to a pachyderm cluster on the default host:port # (`localhost:30650`). This will work for certain environments (e.g. k8s # running on docker for mac), as well as when port forwarding is being # used. For other setups, you'll want one of the alternatives: # 1) To connect to pachyderm when this script is running inside the # cluster, use `python_pachyderm.Client.new_in_cluster()`. # 2) To connect to pachyderm via a pachd address, use # `python_pachyderm.Client.new_from_pachd_address`. # 3) To explicitly set the host and port, pass parameters into # `python_pachyderm.Client()`. client = python_pachyderm.Client() # Create a repo called images client.create_repo("images") # Create a pipeline specifically designed for executing python code. This # is equivalent to the edges pipeline in the standard opencv example. python_pachyderm.create_python_pipeline( client, relpath("edges"), input=python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/*", repo="images")), ) # Create the montage pipeline client.create_pipeline( "montage", transform=python_pachyderm.Transform( cmd=["sh"], image="v4tech/imagemagick", stdin=[ "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png" ], ), input=python_pachyderm.Input(cross=[ python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="images")), python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="edges")), ]), ) with client.commit("images", "master") as commit: # Add some images, recursively inserting content from the images # directory. Alternatively, you could use `client.put_file_url` or # `client_put_file_bytes`. python_pachyderm.put_files(client, relpath("images"), commit, "/") # Wait for the commit (and its downstream commits) to finish for _ in client.flush_commit([commit]): pass # Get the montage source_file = client.get_file("montage/master", "/montage.png") with tempfile.NamedTemporaryFile(suffix="montage.png", delete=False) as dest_file: shutil.copyfileobj(source_file, dest_file) print("montage written to {}".format(dest_file.name))
def test_create_pipeline_from_request(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "test_create_pipeline_from_request") pipeline_name = util.test_repo_name("test_create_pipeline_from_request") # more or less a copy of the opencv demo's edges pipeline spec client.create_pipeline_from_request( python_pachyderm.CreatePipelineRequest( pipeline=python_pachyderm.Pipeline(name=pipeline_name), description= "A pipeline that performs image edge detection by using the OpenCV library.", input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput( glob="/*", repo=repo_name, ), ), transform=python_pachyderm.Transform( cmd=["echo", "hi"], image="pachyderm/opencv", ), )) assert any(p.pipeline.name == pipeline_name for p in client.list_pipeline().pipeline_info)
def create_test_pipeline(client, test_name): repo_name_suffix = random_string(6) input_repo_name = create_test_repo(client, test_name, prefix="input", suffix=repo_name_suffix) pipeline_repo_name = test_repo_name(test_name, prefix="pipeline", suffix=repo_name_suffix) client.create_pipeline( pipeline_repo_name, transform=python_pachyderm.Transform( cmd=["sh"], image="alpine", stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)], ), input=python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/*", repo=input_repo_name)), enable_stats=True, ) with client.commit(input_repo_name, "master") as commit: client.put_file_bytes(commit, "file.dat", b"DATA") return (commit, input_repo_name, pipeline_repo_name)
def __init__(self, test_name): pfs_client = python_pachyderm.PfsClient() pps_client = python_pachyderm.PpsClient() repo_name_suffix = random_string(6) input_repo_name = "{}-input-{}".format(test_name, repo_name_suffix) pipeline_repo_name = "{}-pipeline-{}".format(test_name, repo_name_suffix) pfs_client.create_repo(input_repo_name, "input repo for {}".format(test_name)) pps_client.create_pipeline( pipeline_repo_name, transform=python_pachyderm.Transform( cmd=["sh"], image="alpine", stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)]), input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput( glob="/*", repo=input_repo_name)), enable_stats=True, ) with pfs_client.commit(input_repo_name, 'master') as commit: pfs_client.put_file_bytes(commit, 'file.dat', b'DATA') self.pps_client = pps_client self.pfs_client = pfs_client self.commit = commit self.input_repo_name = input_repo_name self.pipeline_repo_name = pipeline_repo_name
def check_pipeline_spec(req): assert req == python_pachyderm.CreatePipelineRequest( pipeline=python_pachyderm.Pipeline(name="foobar"), description= "A pipeline that performs image edge detection by using the OpenCV library.", input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput( glob="/*", repo="images"), ), transform=python_pachyderm.Transform( cmd=["python3", "/edges.py"], image="pachyderm/opencv", ), )
def test_create_python_pipeline_bad_path(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "create_python_pipeline_bad_path") # create some sample data with client.commit(repo_name, "master") as commit: client.put_file_bytes(commit, 'file.dat', b'DATA') # create a pipeline from a file that does not exist - should fail with pytest.raises(Exception): python_pachyderm.create_python_pipeline( client, "./foobar2000", input=python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)), )
def main(): client = python_pachyderm.Client() client.create_pipeline( pipeline_name="producer", transform=python_pachyderm.Transform( cmd=["python3", "/app/main.py"], image="ysimonson/pachyderm_spout_producer", ), spout=python_pachyderm.Spout( overwrite=False, marker="marker", ), ) python_pachyderm.create_python_pipeline( client, relpath("consumer"), input=python_pachyderm.Input( pfs=python_pachyderm.PFSInput(glob="/", repo="producer")), )
def test_create_python_pipeline(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "create_python_pipeline") pfs_input = python_pachyderm.Input(pfs=python_pachyderm.PFSInput(glob="/", repo=repo_name)) pipeline_name = util.test_repo_name("create_python_pipeline", prefix="pipeline") # create some sample data with client.commit(repo_name, "master") as commit: client.put_file_bytes(commit, 'file.dat', b'DATA') # convenience function for verifying expected files exist def check_all_expected_files(extra_source_files, extra_build_files): list(client.flush_commit([c.commit for c in client.list_commit(pipeline_name)])) check_expected_files(client, "{}_build/source".format(pipeline_name), set([ "/", "/main.py", *extra_source_files, ])) check_expected_files(client, "{}_build/build".format(pipeline_name), set([ "/", "/run.sh", *extra_build_files, ])) check_expected_files(client, "{}/master".format(pipeline_name), set([ "/", "/file.dat", ])) # 1) create a pipeline from a directory with a main.py and requirements.txt with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d: with open(os.path.join(d, "main.py"), "w") as f: f.write(TEST_LIB_SOURCE.format(repo_name)) with open(os.path.join(d, "requirements.txt"), "w") as f: f.write(TEST_REQUIREMENTS_SOURCE) python_pachyderm.create_python_pipeline( client, d, input=pfs_input, pipeline_name=pipeline_name, ) check_all_expected_files( ["/requirements.txt"], ["/leftpad-0.1.2-py3-none-any.whl", "/termcolor-1.1.0-py3-none-any.whl"], ) file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat')) assert file == [b' DATA'] # 2) update pipeline from a directory without a requirements.txt with tempfile.TemporaryDirectory(suffix="python_pachyderm") as d: with open(os.path.join(d, "main.py"), "w") as f: f.write(TEST_STDLIB_SOURCE.format(repo_name)) python_pachyderm.create_python_pipeline( client, d, input=pfs_input, pipeline_name=pipeline_name, update=True, ) check_all_expected_files([], []) file = list(client.get_file('{}/master'.format(pipeline_name), 'file.dat')) assert file == [b'DATA']