def test_create_secret(self): # First job with secret1 user_info = {"uname": "abc", "passwd": "def"} secret1 = couler.create_secret(secret_data=user_info, name="dummy1") couler.run_container( image="python:3.6", secret=secret1, command="echo $uname" ) # Second job with secret2 that exists access_key = ["access_key", "access_value"] secret2 = couler.obtain_secret( secret_keys=access_key, namespace="test", name="dummy2" ) couler.run_container( image="python:3.6", secret=secret2, command="echo $access_value" ) # Check the secret yaml self.assertEqual(len(couler.states._secrets), 2) secret1_yaml = couler.states._secrets[secret1].to_yaml() secret2_yaml = couler.states._secrets[secret2].to_yaml() self.assertEqual(secret1_yaml["metadata"]["name"], "dummy1") self.assertEqual(len(secret1_yaml["data"]), 2) self.assertEqual( secret1_yaml["data"]["uname"], utils.encode_base64("abc") ) self.assertEqual( secret1_yaml["data"]["passwd"], utils.encode_base64("def") ) self.assertEqual(secret2_yaml["metadata"]["namespace"], "test") self.assertEqual(secret2_yaml["metadata"]["name"], "dummy2") self.assertEqual(len(secret2_yaml["data"]), 2)
def __init__(self, namespace, data, name=None, dry_run=False, use_existing=False, artifact_secret=False): if not isinstance(data, dict): raise TypeError("The secret data is required to be a dict") if not data: raise ValueError("The secret data is empty") self.namespace = namespace # TO avoid create duplicate secret cypher_md5 = hashlib.md5( json.dumps(data, sort_keys=True).encode("utf-8")).hexdigest() if name is None: self.name = "couler-%s" % cypher_md5 else: self.name = name self.data = {k: utils.encode_base64(v) for k, v in data.items()} self.dry_run = dry_run self.use_existing = use_existing self.artifact_secret = artifact_secret
def __init__(self, namespace, data): if not isinstance(data, dict): raise TypeError("The secret data is required to be a dict") if not data: raise ValueError("The secret data is empty") self.namespace = namespace # TO avoid create duplicate secret cypher_md5 = hashlib.md5( json.dumps(data, sort_keys=True).encode("utf-8") ).hexdigest() self.name = "couler-%s" % cypher_md5 self.data = {k: utils.encode_base64(v) for k, v in data.items()}
def test_tensorflow_train(self): access_key_secret = {"access_key": "key1234"} secret = couler.create_secret(secret_data=access_key_secret) tf.train( num_ps=2, num_workers=3, num_evaluators=1, image="tensorflow:1.13", command="python tf.py", no_chief=False, worker_resources="cpu=0.5,memory=1024", ps_restart_policy="Never", worker_restart_policy="OnFailure", evaluator_resources="cpu=2,memory=4096", clean_pod_policy="Running", secret=secret, ) secret_yaml = list(couler.states._secrets.values())[0].to_yaml() self.assertEqual(secret_yaml["data"]["access_key"], utils.encode_base64("key1234")) wf = couler.workflow_yaml() self.assertEqual(len(wf["spec"]["templates"]), 2) # Check steps template template0 = wf["spec"]["templates"][0] self.assertEqual(len(template0["steps"]), 1) self.assertEqual(len(template0["steps"][0]), 1) # Check train template template1 = wf["spec"]["templates"][1] self.assertEqual(template1["name"], "test-tensorflow-train") resource = template1["resource"] self.assertEqual(resource["action"], "create") self.assertEqual(resource["setOwnerReference"], "true") self.assertEqual( resource["successCondition"], "status.replicaStatuses.Worker.succeeded == 3", ) self.assertEqual( resource["failureCondition"], "status.replicaStatuses.Worker.failed > 0", ) # Check the tfjob spec tfjob = yaml.load(StringIO(resource["manifest"]), Loader=yaml.FullLoader) self.assertEqual(tfjob["kind"], "TFJob") self.assertEqual(tfjob["spec"]["cleanPodPolicy"], "Running") chief = tfjob["spec"]["tfReplicaSpecs"]["Chief"] self.assertEqual(chief["replicas"], 1) chief_container = chief["template"]["spec"]["containers"][0] self.assertEqual(chief_container["env"][0]["name"], "access_key") self.assertEqual( chief_container["env"][0]["valueFrom"]["secretKeyRef"]["name"], secret_yaml["metadata"]["name"], ) ps = tfjob["spec"]["tfReplicaSpecs"]["PS"] self.assertEqual(ps["replicas"], 2) self.assertEqual(ps["restartPolicy"], "Never") self.assertEqual(len(ps["template"]["spec"]["containers"]), 1) ps_container = ps["template"]["spec"]["containers"][0] self.assertEqual(ps_container["image"], "tensorflow:1.13") self.assertEqual(ps_container["command"], "python tf.py") worker = tfjob["spec"]["tfReplicaSpecs"]["Worker"] self.assertEqual(worker["replicas"], 3) self.assertEqual(worker["restartPolicy"], "OnFailure") self.assertEqual(len(worker["template"]["spec"]["containers"]), 1) worker_container = ps["template"]["spec"]["containers"][0] self.assertEqual(worker_container["image"], "tensorflow:1.13") self.assertEqual(worker_container["command"], "python tf.py") worker_container = worker["template"]["spec"]["containers"][0] self.assertEqual(worker_container["env"][0]["name"], "access_key") self.assertEqual( worker_container["env"][0]["valueFrom"]["secretKeyRef"]["name"], secret_yaml["metadata"]["name"], ) self.assertEqual(worker_container["resources"]["limits"]["cpu"], 0.5) self.assertEqual(worker_container["resources"]["limits"]["memory"], 1024) evaluator = tfjob["spec"]["tfReplicaSpecs"]["Evaluator"] self.assertEqual(evaluator["replicas"], 1) self.assertEqual(len(evaluator["template"]["spec"]["containers"]), 1) evaluator_container = evaluator["template"]["spec"]["containers"][0] self.assertEqual(evaluator_container["image"], "tensorflow:1.13") self.assertEqual(evaluator_container["resources"]["limits"]["cpu"], 2) self.assertEqual(evaluator_container["resources"]["limits"]["memory"], 4096)
def test_encode_base64(self): s = "test encode string" encode = utils.encode_base64(s) decode = str(base64.b64decode(encode), "utf-8") self.assertEqual(s, decode)
def test_pytorch_train(self): access_key_secret = {"access_key": "key1234"} secret = couler.create_secret(secret_data=access_key_secret) pytorch.train( num_workers=3, image="pytorch:1.13", command="python pytorch.py", worker_resources="cpu=0.5,memory=1024", worker_restart_policy="OnFailure", clean_pod_policy="Running", secret=secret, ) secret_yaml = list(couler.states._secrets.values())[0].to_yaml() self.assertEqual(secret_yaml["data"]["access_key"], utils.encode_base64("key1234")) wf = couler.workflow_yaml() self.assertEqual(len(wf["spec"]["templates"]), 2) # Check steps template template0 = wf["spec"]["templates"][0] self.assertEqual(len(template0["steps"]), 1) self.assertEqual(len(template0["steps"][0]), 1) # Check train template template1 = wf["spec"]["templates"][1] self.assertEqual(template1["name"], "test-pytorch-train") resource = template1["resource"] self.assertEqual(resource["action"], "create") self.assertEqual(resource["setOwnerReference"], "true") self.assertEqual( resource["successCondition"], "status.pytorchReplicaStatuses.Worker.succeeded > 0", ) self.assertEqual( resource["failureCondition"], "status.pytorchReplicaStatuses.Worker.failed > 0", ) # Check the PyTorchJob spec pytorch_job = yaml.load(StringIO(resource["manifest"]), Loader=yaml.FullLoader) self.assertEqual(pytorch_job["kind"], "PyTorchJob") self.assertEqual(pytorch_job["spec"]["cleanPodPolicy"], "Running") master = pytorch_job["spec"]["pytorchReplicaSpecs"]["Master"] self.assertEqual(master["replicas"], 1) chief_container = master["template"]["spec"]["containers"][0] self.assertEqual(chief_container["env"][0]["name"], "access_key") self.assertEqual( chief_container["env"][0]["valueFrom"]["secretKeyRef"]["name"], secret_yaml["metadata"]["name"], ) worker = pytorch_job["spec"]["pytorchReplicaSpecs"]["Worker"] self.assertEqual(worker["replicas"], 3) self.assertEqual(worker["restartPolicy"], "OnFailure") self.assertEqual(len(worker["template"]["spec"]["containers"]), 1) worker_container = worker["template"]["spec"]["containers"][0] self.assertEqual(worker_container["image"], "pytorch:1.13") self.assertEqual(worker_container["command"], "python pytorch.py") worker_container = worker["template"]["spec"]["containers"][0] self.assertEqual(worker_container["env"][0]["name"], "access_key") self.assertEqual( worker_container["env"][0]["valueFrom"]["secretKeyRef"]["name"], secret_yaml["metadata"]["name"], ) self.assertEqual(worker_container["resources"]["limits"]["cpu"], 0.5) self.assertEqual(worker_container["resources"]["limits"]["memory"], 1024)