def test_spout_commit(): client = python_pachyderm.Client() client.delete_all() client.create_pipeline( pipeline_name="pipeline-spout-commit", transform=pps_proto.Transform( cmd=["bash"], stdin=[ "echo 'commit time' >> file.txt", "pachctl put file pipeline-spout-commit@master:/file.txt -f file.txt", ], ), spout=pps_proto.Spout(), ) c = client.subscribe_commit( repo_name="pipeline-spout-commit", branch="master", state=pfs_proto.FINISHED, origin_kind=pfs_proto.USER, ) next(c) commit_infos = list(client.list_commit("pipeline-spout-commit")) assert len(commit_infos) == 1
def test_create_pipeline_from_request(): client = python_pachyderm.Client() repo_name = util.create_test_repo(client, "test_create_pipeline_from_request") pipeline_name = util.test_repo_name("test_create_pipeline_from_request") # more or less a copy of the opencv demo's edges pipeline spec client.create_pipeline_from_request( pps_proto.CreatePipelineRequest( pipeline=pps_proto.Pipeline(name=pipeline_name), description="A pipeline that performs image edge detection by using the OpenCV library.", input=pps_proto.Input( pfs=pps_proto.PFSInput( glob="/*", repo=repo_name, ), ), transform=pps_proto.Transform( cmd=["echo", "hi"], image="pachyderm/opencv", ), ) ) assert any(p.pipeline.name == pipeline_name for p in list(client.list_pipeline()))
def main(): client = python_pachyderm.Client() client.create_pipeline( pipeline_name="spout", transform=pps_proto.Transform( cmd=["python3", "consumer/main.py"], image="pachyderm/example-spout101:2.0.0-beta.5", ), spout=pps_proto.Spout(), description= "A spout pipeline that emulates the reception of data from an external source", ) client.create_pipeline( pipeline_name="processor", transform=pps_proto.Transform( cmd=["python3", "processor/main.py"], image="pachyderm/example-spout101:2.0.0-beta.5", ), input=pps_proto.Input( pfs=pps_proto.PFSInput(repo="spout", branch="master", glob="/*")), description="A pipeline that sorts 1KB vs 2KB files", ) client.create_pipeline( pipeline_name="reducer", transform=pps_proto.Transform( cmd=["bash"], stdin=[ "set -x", "FILES=/pfs/processor/*/*", "for f in $FILES", "do", "directory=`dirname $f`", "out=`basename $directory`", "cat $f >> /pfs/out/${out}.txt", "done", ], ), input=pps_proto.Input(pfs=pps_proto.PFSInput( repo="processor", branch="master", glob="/*")), description="A pipeline that reduces 1K/ and 2K/ directories", )
def check_pipeline_spec(req): assert req == pps_proto.CreatePipelineRequest( pipeline=pps_proto.Pipeline(name="foobar"), description="A pipeline that performs image edge detection by using the OpenCV library.", input=pps_proto.Input( pfs=pps_proto.PFSInput(glob="/*", repo="images"), ), transform=pps_proto.Transform( cmd=["python3", "/edges.py"], image="pachyderm/opencv", ), )
def test_create_spout(): client = python_pachyderm.Client() client.delete_all() client.create_pipeline( pipeline_name="pipeline-create-spout", transform=pps_proto.Transform( cmd=["sh"], image="alpine", ), spout=pps_proto.Spout(), ) assert len(list(client.list_pipeline())) == 1
def test_create_pipeline(): client = python_pachyderm.Client() client.delete_all() input_repo_name = util.create_test_repo(client, "input_repo_test_create_pipeline") client.create_pipeline( "pipeline_test_create_pipeline", transform=pps_proto.Transform( cmd=["sh"], image="alpine", stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)], ), input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/*", repo=input_repo_name)), ) assert len(list(client.list_pipeline())) == 1
def create_test_pipeline(client: python_pachyderm.Client, test_name): repo_name_suffix = random_string(6) input_repo_name = create_test_repo( client, test_name, prefix="input", suffix=repo_name_suffix ) pipeline_repo_name = test_repo_name( test_name, prefix="pipeline", suffix=repo_name_suffix ) client.create_pipeline( pipeline_repo_name, transform=pps_proto.Transform( cmd=["sh"], image="alpine", stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)], ), input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/*", repo=input_repo_name)), ) # TODO figre out what is actually happening here with client.commit(input_repo_name, "master") as commit: client.put_file_bytes(commit, "file.dat", b"DATA") return (commit, input_repo_name, pipeline_repo_name)
import python_pachyderm from python_pachyderm.service import pps_proto client = python_pachyderm.Client() client.create_pipeline( pipeline_name="contour", transform=pps_proto.Transform( cmd=["python3", "contour.py"], image="svekars/contour-histogram:1.0", ), input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="photos")), ) client.create_pipeline( pipeline_name="histogram", transform=pps_proto.Transform( cmd=["python3", "histogram.py"], image="svekars/contour-histogram:1.0", ), input=pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="contour")), ) print(list(client.list_pipeline()))
def main(): # Connects to a pachyderm cluster on the default host:port # (`localhost:30650`). This will work for certain environments (e.g. k8s # running on docker for mac), as well as when port forwarding is being # used. For other setups, you'll want one of the alternatives: # 1) To connect to pachyderm when this script is running inside the # cluster, use `python_pachyderm.Client.new_in_cluster()`. # 2) To connect to pachyderm via a pachd address, use # `python_pachyderm.Client.new_from_pachd_address`. # 3) To explicitly set the host and port, pass parameters into # `python_pachyderm.Client()`. client = python_pachyderm.Client() # Create a repo called images client.create_repo("images") # Create the edges pipeline (and the edges repo automatically). This # pipeline runs when data is committed to the images repo, as indicated # by the input field. client.create_pipeline( "edges", transform=pps_proto.Transform( cmd=["python3", "/edges.py"], image="pachyderm/opencv", ), input=pps_proto.Input( pfs=pps_proto.PFSInput(repo="images", glob="/*")), ) # Create the montage pipeline (and the montage repo automatically). This # pipeline runs when data is committed to either the images repo or edges # repo, as indicated by the input field. client.create_pipeline( "montage", transform=pps_proto.Transform( cmd=["sh"], image="v4tech/imagemagick", stdin=[ "montage -shadow -background SkyBlue -geometry 300x300+2+2 $(find /pfs -type f | sort) /pfs/out/montage.png" ], ), input=pps_proto.Input(cross=[ pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="images")), pps_proto.Input(pfs=pps_proto.PFSInput(glob="/", repo="edges")), ]), ) with client.commit("images", "master") as commit: # Add some images, recursively inserting content from the images # directory. Alternatively, you could use `client.put_file_url` or # `client_put_file_bytes`. python_pachyderm.put_files(client, relpath("images"), commit, "/") # Wait for the commit (and its downstream commits) to finish for _ in client.wait_commit(commit.id): pass # Get the montage source_file = client.get_file(("montage", "master"), "/montage.png") with tempfile.NamedTemporaryFile(suffix="montage.png", delete=False) as dest_file: shutil.copyfileobj(source_file, dest_file) print("montage written to {}".format(dest_file.name))