예제 #1
0
    def test_create_secret(self):
        # First job with secret1
        user_info = {"uname": "abc", "passwd": "def"}
        secret1 = couler.create_secret(secret_data=user_info, name="dummy1")
        couler.run_container(
            image="python:3.6", secret=secret1, command="echo $uname"
        )

        # Second job with secret2 that exists
        access_key = ["access_key", "access_value"]
        secret2 = couler.obtain_secret(
            secret_keys=access_key, namespace="test", name="dummy2"
        )
        couler.run_container(
            image="python:3.6", secret=secret2, command="echo $access_value"
        )

        # Check the secret yaml
        self.assertEqual(len(couler.states._secrets), 2)
        secret1_yaml = couler.states._secrets[secret1].to_yaml()
        secret2_yaml = couler.states._secrets[secret2].to_yaml()

        self.assertEqual(secret1_yaml["metadata"]["name"], "dummy1")
        self.assertEqual(len(secret1_yaml["data"]), 2)
        self.assertEqual(
            secret1_yaml["data"]["uname"], utils.encode_base64("abc")
        )
        self.assertEqual(
            secret1_yaml["data"]["passwd"], utils.encode_base64("def")
        )

        self.assertEqual(secret2_yaml["metadata"]["namespace"], "test")
        self.assertEqual(secret2_yaml["metadata"]["name"], "dummy2")
        self.assertEqual(len(secret2_yaml["data"]), 2)
예제 #2
0
    def __init__(self,
                 namespace,
                 data,
                 name=None,
                 dry_run=False,
                 use_existing=False,
                 artifact_secret=False):

        if not isinstance(data, dict):
            raise TypeError("The secret data is required to be a dict")
        if not data:
            raise ValueError("The secret data is empty")

        self.namespace = namespace
        # TO avoid create duplicate secret
        cypher_md5 = hashlib.md5(
            json.dumps(data, sort_keys=True).encode("utf-8")).hexdigest()
        if name is None:
            self.name = "couler-%s" % cypher_md5
        else:
            self.name = name

        self.data = {k: utils.encode_base64(v) for k, v in data.items()}
        self.dry_run = dry_run
        self.use_existing = use_existing
        self.artifact_secret = artifact_secret
예제 #3
0
파일: secret.py 프로젝트: zjjott/couler
    def __init__(self, namespace, data):

        if not isinstance(data, dict):
            raise TypeError("The secret data is required to be a dict")
        if not data:
            raise ValueError("The secret data is empty")

        self.namespace = namespace
        # TO avoid create duplicate secret
        cypher_md5 = hashlib.md5(
            json.dumps(data, sort_keys=True).encode("utf-8")
        ).hexdigest()
        self.name = "couler-%s" % cypher_md5

        self.data = {k: utils.encode_base64(v) for k, v in data.items()}
예제 #4
0
    def test_tensorflow_train(self):
        access_key_secret = {"access_key": "key1234"}
        secret = couler.create_secret(secret_data=access_key_secret)

        tf.train(
            num_ps=2,
            num_workers=3,
            num_evaluators=1,
            image="tensorflow:1.13",
            command="python tf.py",
            no_chief=False,
            worker_resources="cpu=0.5,memory=1024",
            ps_restart_policy="Never",
            worker_restart_policy="OnFailure",
            evaluator_resources="cpu=2,memory=4096",
            clean_pod_policy="Running",
            secret=secret,
        )

        secret_yaml = list(couler.states._secrets.values())[0].to_yaml()
        self.assertEqual(secret_yaml["data"]["access_key"],
                         utils.encode_base64("key1234"))

        wf = couler.workflow_yaml()
        self.assertEqual(len(wf["spec"]["templates"]), 2)
        # Check steps template
        template0 = wf["spec"]["templates"][0]
        self.assertEqual(len(template0["steps"]), 1)
        self.assertEqual(len(template0["steps"][0]), 1)
        # Check train template
        template1 = wf["spec"]["templates"][1]
        self.assertEqual(template1["name"], "test-tensorflow-train")
        resource = template1["resource"]
        self.assertEqual(resource["action"], "create")
        self.assertEqual(resource["setOwnerReference"], "true")
        self.assertEqual(
            resource["successCondition"],
            "status.replicaStatuses.Worker.succeeded == 3",
        )
        self.assertEqual(
            resource["failureCondition"],
            "status.replicaStatuses.Worker.failed > 0",
        )
        # Check the tfjob spec
        tfjob = yaml.load(StringIO(resource["manifest"]),
                          Loader=yaml.FullLoader)
        self.assertEqual(tfjob["kind"], "TFJob")
        self.assertEqual(tfjob["spec"]["cleanPodPolicy"], "Running")

        chief = tfjob["spec"]["tfReplicaSpecs"]["Chief"]
        self.assertEqual(chief["replicas"], 1)
        chief_container = chief["template"]["spec"]["containers"][0]
        self.assertEqual(chief_container["env"][0]["name"], "access_key")
        self.assertEqual(
            chief_container["env"][0]["valueFrom"]["secretKeyRef"]["name"],
            secret_yaml["metadata"]["name"],
        )

        ps = tfjob["spec"]["tfReplicaSpecs"]["PS"]
        self.assertEqual(ps["replicas"], 2)
        self.assertEqual(ps["restartPolicy"], "Never")
        self.assertEqual(len(ps["template"]["spec"]["containers"]), 1)
        ps_container = ps["template"]["spec"]["containers"][0]
        self.assertEqual(ps_container["image"], "tensorflow:1.13")
        self.assertEqual(ps_container["command"], "python tf.py")

        worker = tfjob["spec"]["tfReplicaSpecs"]["Worker"]
        self.assertEqual(worker["replicas"], 3)
        self.assertEqual(worker["restartPolicy"], "OnFailure")
        self.assertEqual(len(worker["template"]["spec"]["containers"]), 1)
        worker_container = ps["template"]["spec"]["containers"][0]
        self.assertEqual(worker_container["image"], "tensorflow:1.13")
        self.assertEqual(worker_container["command"], "python tf.py")

        worker_container = worker["template"]["spec"]["containers"][0]
        self.assertEqual(worker_container["env"][0]["name"], "access_key")
        self.assertEqual(
            worker_container["env"][0]["valueFrom"]["secretKeyRef"]["name"],
            secret_yaml["metadata"]["name"],
        )
        self.assertEqual(worker_container["resources"]["limits"]["cpu"], 0.5)
        self.assertEqual(worker_container["resources"]["limits"]["memory"],
                         1024)

        evaluator = tfjob["spec"]["tfReplicaSpecs"]["Evaluator"]
        self.assertEqual(evaluator["replicas"], 1)
        self.assertEqual(len(evaluator["template"]["spec"]["containers"]), 1)
        evaluator_container = evaluator["template"]["spec"]["containers"][0]
        self.assertEqual(evaluator_container["image"], "tensorflow:1.13")
        self.assertEqual(evaluator_container["resources"]["limits"]["cpu"], 2)
        self.assertEqual(evaluator_container["resources"]["limits"]["memory"],
                         4096)
예제 #5
0
 def test_encode_base64(self):
     s = "test encode string"
     encode = utils.encode_base64(s)
     decode = str(base64.b64decode(encode), "utf-8")
     self.assertEqual(s, decode)
예제 #6
0
    def test_pytorch_train(self):
        access_key_secret = {"access_key": "key1234"}
        secret = couler.create_secret(secret_data=access_key_secret)

        pytorch.train(
            num_workers=3,
            image="pytorch:1.13",
            command="python pytorch.py",
            worker_resources="cpu=0.5,memory=1024",
            worker_restart_policy="OnFailure",
            clean_pod_policy="Running",
            secret=secret,
        )

        secret_yaml = list(couler.states._secrets.values())[0].to_yaml()
        self.assertEqual(secret_yaml["data"]["access_key"],
                         utils.encode_base64("key1234"))

        wf = couler.workflow_yaml()
        self.assertEqual(len(wf["spec"]["templates"]), 2)
        # Check steps template
        template0 = wf["spec"]["templates"][0]
        self.assertEqual(len(template0["steps"]), 1)
        self.assertEqual(len(template0["steps"][0]), 1)
        # Check train template
        template1 = wf["spec"]["templates"][1]
        self.assertEqual(template1["name"], "test-pytorch-train")
        resource = template1["resource"]
        self.assertEqual(resource["action"], "create")
        self.assertEqual(resource["setOwnerReference"], "true")
        self.assertEqual(
            resource["successCondition"],
            "status.pytorchReplicaStatuses.Worker.succeeded > 0",
        )
        self.assertEqual(
            resource["failureCondition"],
            "status.pytorchReplicaStatuses.Worker.failed > 0",
        )
        # Check the PyTorchJob spec
        pytorch_job = yaml.load(StringIO(resource["manifest"]),
                                Loader=yaml.FullLoader)
        self.assertEqual(pytorch_job["kind"], "PyTorchJob")
        self.assertEqual(pytorch_job["spec"]["cleanPodPolicy"], "Running")

        master = pytorch_job["spec"]["pytorchReplicaSpecs"]["Master"]
        self.assertEqual(master["replicas"], 1)
        chief_container = master["template"]["spec"]["containers"][0]
        self.assertEqual(chief_container["env"][0]["name"], "access_key")
        self.assertEqual(
            chief_container["env"][0]["valueFrom"]["secretKeyRef"]["name"],
            secret_yaml["metadata"]["name"],
        )

        worker = pytorch_job["spec"]["pytorchReplicaSpecs"]["Worker"]
        self.assertEqual(worker["replicas"], 3)
        self.assertEqual(worker["restartPolicy"], "OnFailure")
        self.assertEqual(len(worker["template"]["spec"]["containers"]), 1)
        worker_container = worker["template"]["spec"]["containers"][0]
        self.assertEqual(worker_container["image"], "pytorch:1.13")
        self.assertEqual(worker_container["command"], "python pytorch.py")

        worker_container = worker["template"]["spec"]["containers"][0]
        self.assertEqual(worker_container["env"][0]["name"], "access_key")
        self.assertEqual(
            worker_container["env"][0]["valueFrom"]["secretKeyRef"]["name"],
            secret_yaml["metadata"]["name"],
        )
        self.assertEqual(worker_container["resources"]["limits"]["cpu"], 0.5)
        self.assertEqual(worker_container["resources"]["limits"]["memory"],
                         1024)