Example #1
0
 def upload_blob(self, workflow_id: Text, prj_pkg_path: Text) -> Text:
     with tempfile.TemporaryDirectory() as temp_dir:
         zip_file_name = 'workflow_{}_project.zip'.format(workflow_id)
         temp_dir_path = Path(temp_dir)
         zip_file_path = temp_dir_path / zip_file_name
         make_dir_zipfile(prj_pkg_path, zip_file_path)
         object_key = 'ai-flow-k8s/' + zip_file_name
         self.bucket.put_object_from_file(key=object_key,
                                          filename=str(zip_file_path))
     return object_key
Example #2
0
 def upload_blob(self, workflow_id: Text, prj_pkg_path: Text) -> Text:
     if self._remote_repo is not None:
         with tempfile.TemporaryDirectory() as temp_dir:
             zip_file_name = 'workflow_{}_project.zip'.format(workflow_id)
             upload_file_path = Path('{}/{}'.format(self._remote_repo,
                                                    zip_file_name))
             if os.path.exists(upload_file_path):
                 os.remove(upload_file_path)
             temp_dir_path = Path(temp_dir)
             zip_file_path = temp_dir_path / zip_file_name
             make_dir_zipfile(prj_pkg_path, zip_file_path)
             os.rename(zip_file_path, upload_file_path)
             return str(upload_file_path)
     else:
         return prj_pkg_path
    def upload_project(self, workflow_snapshot_id: Text,
                       project_path: Text) -> Text:
        """
        upload a given project to blob server for remote execution.

        :param workflow_snapshot_id: It is the unique identifier for each workflow generation.
        :param project_path: The path of this project.
        :return The uri of the uploaded project file in blob server.
        """
        with tempfile.TemporaryDirectory() as temp_dir:
            zip_file_name = 'workflow_{}_project.zip'.format(
                workflow_snapshot_id)
            temp_dir_path = Path(temp_dir)
            zip_file_path = temp_dir_path / zip_file_name
            make_dir_zipfile(project_path, zip_file_path)
            object_key = self.repo_name + '/' + zip_file_name
            self.bucket.put_object_from_file(key=object_key,
                                             filename=str(zip_file_path))
        return object_key
Example #4
0
 def save_model(self, local_path, remote_path=None) -> Text:
     """
     save a local export model to remote storage(Aliyun OSS)
     :param local_path: the local path of the model, the oss storage only supports binary files, thus we have to
     make the local path file a zip.
     :param remote_path: the object_key of the uploaded file in oss with the pattern like abc/efg/123.jpg
     :return: the object_key of uploaded file.
     """
     with tempfile.TemporaryDirectory() as temp_dir:
         local_path = Path(local_path)
         zip_file_name = os.path.splitext(local_path.name)[0] + '.zip'
         temp_dir_path = Path(temp_dir)
         zip_file_path = temp_dir_path / zip_file_name
         if local_path.is_dir():
             make_dir_zipfile(str(local_path), zip_file_path)
         else:
             make_file_zipfile(str(local_path), zip_file_path)
         if remote_path is None:
             object_key = 'ai-flow-model-manager/' + zip_file_name
         else:
             object_key = remote_path
         self.bucket.put_object_from_file(key=object_key,
                                          filename=str(zip_file_path))
     return object_key
Example #5
0
    def upload_project(self, workflow_snapshot_id: Text,
                       project_path: Text) -> Text:
        """
        upload a given project to blob server for remote execution.

        :param workflow_snapshot_id: It is the unique identifier for each workflow generation.
        :param project_path: The path of this project.
        :return The uri of the uploaded project file in blob server.
        """
        if self._remote_repo is not None:
            with tempfile.TemporaryDirectory() as temp_dir:
                zip_file_name = 'workflow_{}_project.zip'.format(
                    workflow_snapshot_id)
                upload_file_path = Path('{}/{}'.format(self._remote_repo,
                                                       zip_file_name))
                if os.path.exists(upload_file_path):
                    os.remove(upload_file_path)
                temp_dir_path = Path(temp_dir)
                zip_file_path = temp_dir_path / zip_file_name
                make_dir_zipfile(project_path, zip_file_path)
                shutil.move(zip_file_path, upload_file_path)
                return str(upload_file_path)
        else:
            return project_path
    def submit_job(self,
                   job: Job,
                   job_runtime_env: JobRuntimeEnv = None) -> JobHandle:
        handle = FlinkJobHandle(
            job=job, job_execution=job_runtime_env.job_execution_info)
        flink_job: FlinkJob = job
        job_config: FlinkJobConfig = FlinkJobConfig.from_job_config(
            flink_job.job_config)
        env = os.environ.copy()
        env.update(job_config.properties.get('env', {}))
        if not flink_job.is_java:
            run_graph_file = os.path.join(job_runtime_env.generated_dir,
                                          flink_job.run_graph_file)
            flink_env_file = os.path.join(job_runtime_env.generated_dir,
                                          flink_job.flink_env_file)
            # Add PYTHONPATH
            copy_path = sys.path.copy()
            copy_path.insert(0, job_runtime_env.workflow_dir)
            copy_path.insert(0, job_runtime_env.python_dep_dir)
            env['PYTHONPATH'] = ':'.join(copy_path)

            current_path = os.path.dirname(__file__)
            script_path = os.path.join(current_path, 'flink_run_main.py')
            python3_location = sys.executable
            if job_config.run_mode == 'local':
                bash_command = [
                    python3_location, script_path, run_graph_file,
                    job_runtime_env.working_dir, flink_env_file
                ]
            elif job_config.run_mode == 'cluster':
                bash_command = ['flink', 'run']

                if job_config.flink_run_args is not None:
                    bash_command.extend(job_config.flink_run_args)

                bash_command.append('-pyfs')
                files = [job_runtime_env.workflow_dir]
                if os.path.exists(job_runtime_env.python_dep_dir):
                    files.append(job_runtime_env.python_dep_dir)
                bash_command.append(','.join(files))

                if os.path.exists(job_runtime_env.resource_dir):
                    zip_file_util.make_dir_zipfile(
                        job_runtime_env.resource_dir,
                        os.path.join(job_runtime_env.working_dir,
                                     'resources.zip'))
                bash_command.extend([
                    '-pyarch',
                    os.path.join(job_runtime_env.working_dir,
                                 'resources.zip#resources')
                ])
                bash_command.extend([
                    '-py', script_path, run_graph_file,
                    job_runtime_env.working_dir, flink_env_file
                ])
            else:
                raise Exception(
                    'Flink supports run_mode local or cluster, do not support {}.'
                    .format(job_config.run_mode))
        else:
            # flink java job
            bash_command = ['flink', 'run']

            if job_config.flink_run_args is not None:
                bash_command.extend(job_config.flink_run_args)

            if os.path.exists(job_runtime_env.resource_dir):
                zip_file_util.make_dir_zipfile(
                    job_runtime_env.resource_dir,
                    os.path.join(job_runtime_env.working_dir, 'resources.zip'))
            processor: FlinkJavaProcessor = serialization_utils.\
                read_object_from_serialized_file(os.path.join(job_runtime_env.generated_dir, flink_job.processor_file))
            if processor.entry_class is not None:
                bash_command.extend(['-c', processor.entry_class])
            bash_command.append(
                os.path.join(job_runtime_env.jar_dep_dir,
                             processor.main_jar_file))
            bash_command.extend(processor.args)
        self.log.info(' '.join(bash_command))
        stdout_log = log_path_utils.stdout_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        if not os.path.exists(job_runtime_env.log_dir):
            os.makedirs(job_runtime_env.log_dir)

        sub_process = self.submit_process(
            bash_command=bash_command,
            env=env,
            working_dir=job_runtime_env.working_dir,
            stdout_log=stdout_log,
            stderr_log=stderr_log)
        handle.sub_process = sub_process
        handle.stdout_log = stdout_log
        handle.stderr_log = stderr_log

        if flink_job.is_java:
            # write job_id to file.
            num = 0
            while True:
                if os.path.exists(stdout_log):
                    break
                else:
                    time.sleep(1)
                num += 1
                if 0 == num % 20:
                    self.log.info("Waiting for stdout log file created...")

            while True:
                with open(stdout_log, 'r') as f:
                    lines = f.readlines()
                if len(lines) >= 1:
                    line = lines[0]
                    if line.startswith("Job has been submitted with JobID"):
                        job_id = line.split(' ')[6][:-1]
                        with open(
                                os.path.join(job_runtime_env.working_dir,
                                             'job_id'), 'w') as fp:
                            fp.write(job_id)
                        self.log.info('Flink job id {}'.format(job_id))
                    break
                else:
                    time.sleep(1)

        return handle