def create_test_pipeline(client: python_pachyderm.Client, test_name): repo_name_suffix = random_string(6) input_repo_name = create_test_repo(client, test_name, prefix="input", suffix=repo_name_suffix) pipeline_repo_name = test_repo_name(test_name, prefix="pipeline", suffix=repo_name_suffix) client.create_pipeline( pipeline_repo_name, transform=pps_proto.Transform( cmd=["sh"], image="alpine", stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)], ), input=pps_proto.Input( pfs=pps_proto.PfsInput(glob="/*", repo=input_repo_name)), ) # TODO figre out what is actually happening here with client.commit(input_repo_name, "master") as commit: client.put_file_bytes(commit, "file.dat", b"DATA") return (commit, input_repo_name, pipeline_repo_name)
def main(): print("connecting to pachd") client = Client() print("connected") while True: # Polls queue msgs = receive_message() if msgs: with client.commit("spout", "master") as c: for msg in msgs: # hash the file to assign unique name filename = hashlib.sha256(msg).hexdigest() + ".txt" client.put_file_bytes(c, filename, msg)