def generate_code(self, op_index, job: AbstractJob):
        LOCAL_PYTHON_OPERATOR = """env_{0}={{'PYTHONPATH': '{3}'}}
op_{0} = BashOperator(task_id='{1}', dag=dag, bash_command='{2}', env=env_{0})\n"""
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        current_path = os.path.abspath(__file__)
        python_package_path = os.path.abspath(
            os.path.dirname(current_path) + os.path.sep + ".")
        script_path = python_package_path + '/local_job_run.py'

        entry_module_path = job.job_config.properties['entry_module_path']
        python3_location = sys.executable
        cmd = [
            python3_location, script_path, job.job_config.project_path,
            job.exec_func_file, job.exec_args_file, entry_module_path
        ]
        cmd_str = ' '.join(cmd)
        add_path = ':'.join(copy_path)
        code_text = LOCAL_PYTHON_OPERATOR.format(
            op_index, job_name_to_task_id(job.job_name), cmd_str, add_path)
        return code_text
    def submit_job(self, job: VVPJob) -> VVPJobHandler:
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)

        vvp_config: VVPJobConfig = job.job_config
        dp_id, job_id = job.vvp_restful.submit_job(
            name=job.job_config.deployment_name,
            artifact_path=job.job_config.jar_path,
            entry_class=job.job_config.entry_class,
            main_args=job.job_config.main_args,
            addition_dependencies=job.job_config.addition_dependencies,
            flink_image_info=vvp_config.flink_image_info,
            parallelism=vvp_config.parallelism,
            resources=vvp_config.resources,
            flink_config=vvp_config.flink_config,
            logging=vvp_config.logging,
            kubernetes=vvp_config.kubernetes,
            upgrade_strategy=job.job_config.upgrade_strategy,
            restore_strategy=job.job_config.restore_strategy,
            spec=job.job_config.spec)
        job.vvp_deployment_id = dp_id
        job.vvp_restful.start_deployment(job.vvp_deployment_id)
        job.vvp_job_id = job_id

        return VVPJobHandler(vvp_restful=job.vvp_restful,
                             vvp_job_id=job_id,
                             vvp_deployment_id=dp_id,
                             job_instance_id=job.instance_id,
                             job_uuid=job.uuid,
                             workflow_id=job.job_context.workflow_execution_id)
 def generate_code(self, op_index, job: AbstractJob):
     sys_env = os.environ.copy()
     if job.job_config.flink_home is not None:
         sys_env[
             'PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
     python_path = sys.path.copy()
     if job.job_config.project_path is not None:
         blob_manager = BlobManagerFactory.get_blob_manager(
             job.job_config.properties)
         downloaded_blob_path = blob_manager.download_blob(
             job.instance_id, job.job_config.project_path)
         python_path.append(downloaded_blob_path + '/python_codes')
     if job.job_config.project_desc.python_paths is not None:
         python_path.extend(job.job_config.project_desc.python_paths)
     sys_env['PYTHONPATH'] = ':'.join(python_path)
     sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
     return """from flink_ai_flow.local_flink_job import LocalFlinkOperator\nenv_{0}={{'PYTHONPATH': '{4}', 'PATH': '{5}', 'PYFLINK_CLIENT_EXECUTABLE': '{6}'}}\nop_{0} = LocalFlinkOperator(task_id='{1}', bash_command='{2}', properties='{3}', dag=dag, env=env_{0})\n""".format(
         op_index, job_name_to_task_id(job.job_name),
         ' '.join(job.exec_cmd),
         json.dumps({
             'project_path': job.job_config.project_path,
             'workflow_execution_id': job.job_context.workflow_execution_id,
             'instance_id': job.instance_id
         }), sys_env['PYTHONPATH'], sys_env['PATH'],
         sys_env['PYFLINK_CLIENT_EXECUTABLE'])
    def test_project_upload_download_local_2(self):
        project_path = get_file_dir(__file__)
        config = {'local_repository': '/tmp', 'remote_repository': '/tmp'}

        # blob_server.type = local
        blob_manager = BlobManagerFactory.get_blob_manager(config)
        uploaded_path = blob_manager.upload_blob('1', project_path)

        downloaded_path = blob_manager.download_blob('1', uploaded_path)
        self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
    def test_custom_blob_manager(self):
        config = {
            'blob_server.type':
            'ai_flow.test.project.test_blob_manager.MockBlockManager'
        }
        blob_manager = BlobManagerFactory.get_blob_manager(config)
        uploaded_path = blob_manager.upload_blob('1', None)
        self.assertEqual('upload', uploaded_path)

        downloaded_path = blob_manager.download_blob('1', uploaded_path)
        self.assertEqual('download', downloaded_path)
    def submit_job(self, job: LocalPythonJob) -> Any:
        """
        :param job:
        :return:
        """
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        env = os.environ.copy()
        env['PYTHONPATH'] = ':'.join(copy_path)

        current_path = os.path.abspath(__file__)
        father_path = os.path.abspath(
            os.path.dirname(current_path) + os.path.sep + ".")
        script_path = father_path + '/local_job_run.py'

        entry_module_path = job.job_config.properties['entry_module_path']
        python3_location = sys.executable
        cmd = [
            python3_location, script_path, job.job_config.project_path,
            job.exec_func_file, job.exec_args_file, entry_module_path
        ]
        logging.info(' '.join(cmd))
        # every job submitter need set the job log file,
        # local python job set log file name LocalPythonJob_{workflow_execution_id}_{stdout,stderr}.log
        stdout_log = log_path_utils.stdout_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        stderr_log = log_path_utils.stderr_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        if not os.path.exists(
                job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())

        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            process = sp.Popen(cmd,
                               stderr=err,
                               stdout=out,
                               shell=False,
                               env=env)
            job_handler = LocalJobHandler(
                job_instance_id=job.instance_id,
                job_uuid=job.uuid,
                workflow_id=job.job_context.workflow_execution_id,
                process_object=process)
            self.job_handler_map[job.uuid] = job_handler
            return job_handler
Exemple #7
0
    def test_project_upload_download_local(self):
        project_path = get_file_dir(__file__)
        project_desc = get_project_description_from(project_path + "/../")

        # blob_server.type = local
        blob_manager = BlobManagerFactory.get_blob_manager(
            project_desc.project_config)
        uploaded_path = blob_manager.upload_blob('1', project_path)
        self.assertEqual(uploaded_path, project_path)

        downloaded_path = blob_manager.download_blob('1', uploaded_path)
        self.assertEqual(project_path, downloaded_path)
    def submit_job(self, job: LocalFlinkJob):
        """
        Submit the flink job to run in local.

        :param job: A flink job object which contains the necessary information for an execution.
        :return: A job handler that maintains the handler of a job in runtime.
        """
        sys_env = os.environ.copy()
        if job.job_config.flink_home is not None:
            sys_env[
                'PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        sys_env['PYTHONPATH'] = ':'.join(copy_path)
        logging.info(sys_env['PYTHONPATH'])
        # every job submitter need set the job log file,
        # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log

        stdout_log = log_path_utils.stdout_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        stderr_log = log_path_utils.stderr_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        if not os.path.exists(
                job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())
        sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            submitted_process = subprocess.Popen(args=job.exec_cmd,
                                                 shell=False,
                                                 stdout=out,
                                                 stderr=err,
                                                 env=sys_env)
        exec_handle = LocalJobHandler(
            job_uuid=job.uuid,
            job_instance_id=job.instance_id,
            workflow_id=job.job_context.workflow_execution_id,
            process_object=submitted_process)
        self.job_handler_map[job.uuid] = exec_handle
        return exec_handle
Exemple #9
0
def _upload_project_package(workflow: Workflow):
    """
    Upload the project package.

    :param workflow: The generated workflow.
    """
    project_desc = project_description()
    workflow_json_file = os.path.join(
        project_desc.get_absolute_temp_path(),
        project_desc.project_config.get_project_uuid() + "_workflow.json")
    with open(workflow_json_file, 'w') as f:
        f.write(json_utils.dumps(workflow))
    blob_manager = BlobManagerFactory.get_blob_manager(
        project_desc.project_config['blob'])
    uploaded_project_path = blob_manager.upload_blob(str(workflow.workflow_id),
                                                     project_desc.project_path)
    project_desc.project_config.set_uploaded_project_path(
        uploaded_project_path)
    for job in workflow.jobs.values():
        job.job_config.project_path = uploaded_project_path
    def upload_project_package(self, workflow: Workflow):
        """
        Upload the project package.

        :param workflow: The generated workflow.
        """
        # todo need to add update project uri
        with open(
                self.project_desc.get_absolute_temp_path() + "/" +
                self.project_desc.project_config.get_project_uuid() +
                "_workflow.json", 'w') as f:
            f.write(json_utils.dumps(workflow))
        blob_manager = BlobManagerFactory.get_blob_manager(
            self.project_desc.project_config)
        uploaded_project_path = blob_manager.upload_blob(
            str(workflow.workflow_id), self.project_desc.project_path)
        self.project_desc.project_config[
            'uploaded_project_path'] = uploaded_project_path
        for job in workflow.jobs.values():
            job.job_config.project_path = uploaded_project_path
            job.job_config.project_local_path = self.project_desc.project_path
    def submitWorkflow(self, request, context):
        try:
            rq: ScheduleWorkflowRequest = request
            workflow: Workflow = json_utils.loads(rq.workflow_json)
            workflow.workflow_name = rq.workflow_name
            config = {}
            config.update(workflow.project_desc.project_config['blob'])
            # config['local_repository'] = self._scheduler_config.repository()
            blob_manager = BlobManagerFactory.get_blob_manager(config)
            project_path: Text = blob_manager\
                .download_blob(workflow_id=workflow.workflow_id,
                               remote_path=workflow.project_desc.project_config.get('uploaded_project_path'),
                               local_path=self._scheduler_config.repository())

            project_desc: ProjectDesc = get_project_description_from(
                project_path)
            project_name = project_desc.project_name
            # update workflow
            workflow.project_desc = project_desc
            for n, j in workflow.jobs.items():
                j.job_config.project_desc = project_desc
                j.job_config.project_path = project_path

            workflow_info = self._scheduler.submit_workflow(
                workflow, project_desc)
            if workflow_info is None:
                return WorkflowInfoResponse(result=ResultProto(
                    status=StatusProto.ERROR,
                    error_message='{}, {} do not exist!'.format(
                        project_name, workflow.workflow_name)))
            return WorkflowInfoResponse(
                result=ResultProto(status=StatusProto.OK),
                workflow=workflow_to_proto(workflow_info))
        except Exception as err:
            return WorkflowInfoResponse(
                result=ResultProto(status=StatusProto.ERROR,
                                   error_message=traceback.format_exc()))
    def generate_code(self, op_index, job):
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        copy_set = set(copy_path)
        add_path = ':'.join(copy_set)

        VVP_OPERATOR = """env_{0} = {{'PYTHONPATH': '{7}'}}\nop_{0} = VVPFlinkOperator(task_id='{1}', dag=dag, bash_command='{2}', """ \
                       + """id_file='{3}', base_url='{4}', namespace='{5}', token='{6}', env=env_{0})\n"""
        id_file = '{}/temp/vvp/{}/{}'.format(
            job.job_config.project_path,
            str(job.job_context.workflow_execution_id), job.instance_id)
        return VVP_OPERATOR.format(op_index, job_name_to_task_id(job.job_name),
                                   job.exec_cmd, id_file,
                                   job.job_config.base_url,
                                   job.job_config.namespace,
                                   job.job_config.token, add_path)
    def test_project_upload_download_oss(self):
        project_path = get_file_dir(__file__)
        config = {
            'blob_server.type':
            'oss',
            'local_repository':
            '/tmp',
            'blob_server.access_key_id':
            os.environ.get('blob_server.access_key_id'),
            'blob_server.access_key_secret':
            os.environ.get('blob_server.access_key_secret'),
            'blob_server.endpoint':
            os.environ.get('blob_server.endpoint'),
            'blob_server.bucket':
            os.environ.get('blob_server.bucket'),
            'blob_server.repo_name':
            os.environ.get('blob_server.repo_name')
        }

        blob_manager = BlobManagerFactory.get_blob_manager(config)
        uploaded_path = blob_manager.upload_blob('1', project_path)

        downloaded_path = blob_manager.download_blob('1', uploaded_path)
        self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
Exemple #14
0
    def submit_job(self, job: LocalFlinkJob):
        """
        Submit the flink job to run in local.

        :param job: A flink job object which contains the necessary information for an execution.
        :return: A job handler that maintains the handler of a job in runtime.
        """
        # generate cmd
        if job.job_config.language_type == LanguageType.JAVA:
            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', job.config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config['entry_module_path']

            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [python3_location, version.py_main_file, job.job_config.project_path,
                            job.config_file, entry_module_path]
            else:
                exec_cmd = ['flink', 'run',
                            '-pym', version.py_cluster_module,
                            '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/',
                            '-pyexec', python3_location,
                            '--project-path', job.job_config.project_path,
                            '--config-file', job.config_file,
                            '--entry-module-path', entry_module_path]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))

        sys_env = os.environ.copy()
        if job.job_config.flink_home is not None:
            sys_env['PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
        blob_manager = BlobManagerFactory.get_blob_manager(job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        sys_env['PYTHONPATH'] = ':'.join(copy_path)
        logging.info(sys_env['PYTHONPATH'])
        # every job submitter need set the job log file,
        # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log

        stdout_log = log_path_utils.stdout_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        if not os.path.exists(job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())
        sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            submitted_process = subprocess.Popen(
                args=job.exec_cmd,
                shell=False,
                stdout=out,
                stderr=err,
                env=sys_env
            )
        exec_handle = LocalJobHandler(job_uuid=job.uuid,
                                      job_instance_id=job.instance_id,
                                      workflow_id=job.job_context.workflow_execution_id,
                                      process_object=submitted_process)
        self.job_handler_map[job.uuid] = exec_handle
        return exec_handle