Ejemplo n.º 1
0
def get_os_client_ex(spark, region):
    from oci_core import dfapp_get_os_client, get_delegation_token, get_os_client
    if USE_INSTANCE_PRINCIPLE:
        delegation_token = get_delegation_token(spark)
        os_client = dfapp_get_os_client(region, delegation_token)
    else:
        with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as key_f:
            key_f.write(OCI_KEY)
        _oci_config = dict(OCI_CONFIG)
        _oci_config['key_file'] = key_f.name
        os_client = get_os_client(None, config=_oci_config)
    return os_client
Ejemplo n.º 2
0
    def run(self,
            deployment_location,
            options={},
            args={},
            handlers=None,
            on_job_submitted=None,
            cli_mode=False):
        # options fields
        #     num_executors     : number
        #     driver_shape      :  string
        #     executor_shape    : string
        #     lib_url_duration  : number (repre the number of minutes)
        #     on_job_submitted  : callback, on_job_submitted(run_id, vendor_info={'oci_run_id': 'xxxyyy'})

        o = urlparse(deployment_location)
        if o.scheme != 'oci':
            raise SparkETLLaunchFailure("deployment_location must be in OCI")

        run_dir = self.config.get('run_dir') or self.config.get('run_base_dir')
        run_id = str(uuid.uuid4())

        namespace = o.netloc.split('@')[1]
        bucket = o.netloc.split('@')[0]
        root_path = o.path[1:]  # remove the leading "/"

        # let's get the deployment.json
        os_client = get_os_client(self.region, self.config.get("oci_config"))
        deployment = os_download_json(
            os_client, namespace, bucket,
            os.path.join(root_path, "deployment.json"))

        # let's upload the args
        client_channel = ClientChannel(self.region,
                                       self.config.get("oci_config"), run_dir,
                                       run_id)
        client_channel.write_json("args.json", args)

        o = urlparse(run_dir)
        namespace = o.netloc.split('@')[1]
        bucket = o.netloc.split('@')[0]
        root_path = o.path[1:]  # remove the leading "/"
        os_upload_json(os_client, args, namespace, bucket,
                       f"{root_path}/{run_id}/args.json")

        df_client = get_df_client(self.region, self.config.get("oci_config"))
        crd_argv = {
            'compartment_id':
            deployment['compartment_id'],
            'application_id':
            deployment['application_id'],
            'display_name':
            options["display_name"],
            'arguments': [
                "--deployment-location",
                deployment_location,
                "--run-id",
                run_id,
                "--run-dir",
                os.path.join(run_dir, run_id),
                "--app-region",
                self.region,
            ],
        }
        for key in ['num_executors', 'driver_shape', 'executor_shape']:
            if key in options:
                crd_argv[key] = options[key]

        create_run_details = oci.data_flow.models.CreateRunDetails(**crd_argv)
        r = df_client.create_run(create_run_details=create_run_details)
        check_response(
            r, lambda: SparkETLLaunchFailure(
                "dataflow failed to run the application"))
        run = r.data
        oci_run_id = run.id
        print(f"Job launched, run_id = {run_id}, oci_run_id = {run.id}")
        if on_job_submitted is not None:
            on_job_submitted(run_id, vendor_info={'oci_run_id': run.id})

        cli_entered = False
        while True:
            time.sleep(10)
            r = df_client.get_run(run_id=run.id)
            check_response(
                r, lambda: SparkETLGetStatusFailure(
                    "dataflow failed to get run status"))
            run = r.data
            print(f"Status: {run.lifecycle_state}")
            if run.lifecycle_state in ('FAILED', 'SUCCEEDED', 'CANCELED'):
                break
            handle_server_ask(client_channel, handlers)

            if cli_mode and not cli_entered and run.lifecycle_state == 'IN_PROGRESS':
                cli_entered = True
                cli_handler = CLIHandler(client_channel, None, handlers)
                cli_handler.loop()

        if run.lifecycle_state in ('FAILED', 'CANCELED'):
            raise Exception(f"Job failed with status: {run.lifecycle_state}")
        return client_channel.read_json('result.json')
Ejemplo n.º 3
0
 def delete_json(self, name):
     os_client = get_os_client(self.region, self.oci_config)
     object_name = os.path.join(self.root_path, self.run_id, name)
     os_delete_object(os_client, self.namespace, self.bucket, object_name)
Ejemplo n.º 4
0
 def write_json(self, name, payload):
     os_client = get_os_client(self.region, self.oci_config)
     object_name = os.path.join(self.root_path, self.run_id, name)
     os_upload_json(os_client, payload, self.namespace, self.bucket,
                    object_name)
Ejemplo n.º 5
0
 def read_json(self, name):
     os_client = get_os_client(self.region, self.oci_config)
     object_name = os.path.join(self.root_path, self.run_id, name)
     result = os_download_json(os_client, self.namespace, self.bucket,
                               object_name)
     return result
Ejemplo n.º 6
0
    def deploy(self, build_dir, destination_location):
        o = urlparse(destination_location)
        if o.scheme != 'oci':
            raise SparkETLDeploymentFailure("destination_location must be in OCI")

        namespace = o.netloc.split('@')[1]
        bucket = o.netloc.split('@')[0]
        root_path = o.path[1:]    # remove the leading "/"

        build = Build(build_dir)

        print("Uploading files:")
        # Data flow want to call python lib python.zip
        os_client = get_os_client(self.region, config=self.config.get("oci_config"))
        for artifact in build.artifacts:
            os_upload(
                os_client,
                f"{build_dir}/{artifact}",
                namespace,
                bucket,
                f"{root_path}/{build.version}/{artifact}"
            )

        # let's upload the job loader
        job_loader_filename = get_job_loader(self.config.get("oci_config"))

        os_upload(
            os_client,
            job_loader_filename,
            namespace,
            bucket,
            f"{root_path}/{build.version}/job_loader.py"
        )

        application = self.create_application(build.manifest, destination_location)
        app_info = {
            "application_id": application.id,
            "compartment_id": application.compartment_id
        }

        os_upload_json(
            os_client, app_info,
            namespace, bucket, f"{root_path}/{build.version}/deployment.json"
        )

        oci_config = self.config.get("oci_config")
        if oci_config is not None:
            os_upload(
                os_client,
                _save_json_temp(oci_config),
                namespace,
                bucket,
                "oci_config.json"
            )
            os_upload(
                os_client,
                oci_config['key_file'],
                namespace,
                bucket,
                "oci_api_key.pem",
            )