def submit_job(self, job: LocalCMDJob) -> AbstractJobHandler:
     # every job submitter need set the job log file,
     # local cmd job set log file name LocalCMDJob_{workflow_execution_id}_{stdout,stderr}.log
     if not Path(job.job_config.project_desc.project_path).exists():
         job.job_config.project_desc.project_path = tempfile.gettempdir()
     stdout_log = log_path_utils.stdout_log_path(
         job.job_config.project_desc.get_absolute_log_path(), job.job_name)
     stderr_log = log_path_utils.stderr_log_path(
         job.job_config.project_desc.get_absolute_log_path(), job.job_name)
     if not os.path.exists(
             job.job_config.project_desc.get_absolute_log_path()):
         os.mkdir(job.job_config.project_desc.get_absolute_log_path())
     with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
         submitted_process = Popen(args=job.exec_cmd,
                                   shell=True,
                                   stdout=out,
                                   stderr=err)
         job.pid = submitted_process.pid
         job_handler = LocalJobHandler(
             job_instance_id=job.instance_id,
             job_uuid=job.uuid,
             workflow_id=job.job_context.workflow_execution_id,
             process_object=submitted_process)
         self.job_handler_map[job.uuid] = job_handler
         return job_handler
    def submit_job(self, job: LocalPythonJob) -> Any:
        """
        :param job:
        :return:
        """
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        env = os.environ.copy()
        env['PYTHONPATH'] = ':'.join(copy_path)

        current_path = os.path.abspath(__file__)
        father_path = os.path.abspath(
            os.path.dirname(current_path) + os.path.sep + ".")
        script_path = father_path + '/local_job_run.py'

        entry_module_path = job.job_config.properties['entry_module_path']
        python3_location = sys.executable
        cmd = [
            python3_location, script_path, job.job_config.project_path,
            job.exec_func_file, job.exec_args_file, entry_module_path
        ]
        logging.info(' '.join(cmd))
        # every job submitter need set the job log file,
        # local python job set log file name LocalPythonJob_{workflow_execution_id}_{stdout,stderr}.log
        stdout_log = log_path_utils.stdout_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        stderr_log = log_path_utils.stderr_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        if not os.path.exists(
                job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())

        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            process = sp.Popen(cmd,
                               stderr=err,
                               stdout=out,
                               shell=False,
                               env=env)
            job_handler = LocalJobHandler(
                job_instance_id=job.instance_id,
                job_uuid=job.uuid,
                workflow_id=job.job_context.workflow_execution_id,
                process_object=process)
            self.job_handler_map[job.uuid] = job_handler
            return job_handler
    def submit_job(self, job: LocalFlinkJob):
        """
        Submit the flink job to run in local.

        :param job: A flink job object which contains the necessary information for an execution.
        :return: A job handler that maintains the handler of a job in runtime.
        """
        sys_env = os.environ.copy()
        if job.job_config.flink_home is not None:
            sys_env[
                'PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
        blob_manager = BlobManagerFactory.get_blob_manager(
            job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(
                job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        sys_env['PYTHONPATH'] = ':'.join(copy_path)
        logging.info(sys_env['PYTHONPATH'])
        # every job submitter need set the job log file,
        # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log

        stdout_log = log_path_utils.stdout_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        stderr_log = log_path_utils.stderr_log_path(
            job.job_config.project_desc.get_absolute_log_path(), job.job_name)
        if not os.path.exists(
                job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())
        sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            submitted_process = subprocess.Popen(args=job.exec_cmd,
                                                 shell=False,
                                                 stdout=out,
                                                 stderr=err,
                                                 env=sys_env)
        exec_handle = LocalJobHandler(
            job_uuid=job.uuid,
            job_instance_id=job.instance_id,
            workflow_id=job.job_context.workflow_execution_id,
            process_object=submitted_process)
        self.job_handler_map[job.uuid] = exec_handle
        return exec_handle
Ejemplo n.º 4
0
    def submit_job(self,
                   job: Job,
                   job_runtime_env: JobRuntimeEnv = None) -> JobHandle:
        handle = PythonJobHandle(
            job=job, job_execution=job_runtime_env.job_execution_info)
        python_job: PythonJob = job
        run_graph_file = os.path.join(job_runtime_env.generated_dir,
                                      python_job.run_graph_file)

        env = os.environ.copy()
        if 'env' in job.job_config.properties:
            env.update(job.job_config.properties.get('env'))
        # Add PYTHONPATH
        copy_path = sys.path.copy()
        copy_path.insert(0, job_runtime_env.workflow_dir)
        copy_path.insert(0, job_runtime_env.python_dep_dir)
        env['PYTHONPATH'] = ':'.join(copy_path)

        current_path = os.path.dirname(__file__)
        script_path = os.path.join(current_path, 'python_run_main.py')
        if 'python_executable_path' in job.job_config.properties:
            python3_location = job.job_config.properties.get(
                'python_executable_path')
        else:
            python3_location = sys.executable
        bash_command = [
            python3_location, script_path, run_graph_file,
            job_runtime_env.working_dir
        ]

        stdout_log = log_path_utils.stdout_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        if not os.path.exists(job_runtime_env.log_dir):
            os.makedirs(job_runtime_env.log_dir)

        sub_process = self.submit_python_process(
            bash_command=bash_command,
            env=env,
            working_dir=job_runtime_env.working_dir,
            stdout_log=stdout_log,
            stderr_log=stderr_log)
        handle.sub_process = sub_process
        return handle
Ejemplo n.º 5
0
    def submit_job(self, job: LocalFlinkJob):
        """
        Submit the flink job to run in local.

        :param job: A flink job object which contains the necessary information for an execution.
        :return: A job handler that maintains the handler of a job in runtime.
        """
        # generate cmd
        if job.job_config.language_type == LanguageType.JAVA:
            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', job.config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config['entry_module_path']

            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [python3_location, version.py_main_file, job.job_config.project_path,
                            job.config_file, entry_module_path]
            else:
                exec_cmd = ['flink', 'run',
                            '-pym', version.py_cluster_module,
                            '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/',
                            '-pyexec', python3_location,
                            '--project-path', job.job_config.project_path,
                            '--config-file', job.config_file,
                            '--entry-module-path', entry_module_path]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))

        sys_env = os.environ.copy()
        if job.job_config.flink_home is not None:
            sys_env['PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
        blob_manager = BlobManagerFactory.get_blob_manager(job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        sys_env['PYTHONPATH'] = ':'.join(copy_path)
        logging.info(sys_env['PYTHONPATH'])
        # every job submitter need set the job log file,
        # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log

        stdout_log = log_path_utils.stdout_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        if not os.path.exists(job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())
        sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            submitted_process = subprocess.Popen(
                args=job.exec_cmd,
                shell=False,
                stdout=out,
                stderr=err,
                env=sys_env
            )
        exec_handle = LocalJobHandler(job_uuid=job.uuid,
                                      job_instance_id=job.instance_id,
                                      workflow_id=job.job_context.workflow_execution_id,
                                      process_object=submitted_process)
        self.job_handler_map[job.uuid] = exec_handle
        return exec_handle
Ejemplo n.º 6
0
    def submit_job(self,
                   job: Job,
                   job_runtime_env: JobRuntimeEnv = None) -> JobHandle:
        handle = FlinkJobHandle(
            job=job, job_execution=job_runtime_env.job_execution_info)
        flink_job: FlinkJob = job
        job_config: FlinkJobConfig = FlinkJobConfig.from_job_config(
            flink_job.job_config)
        env = os.environ.copy()
        env.update(job_config.properties.get('env', {}))
        if not flink_job.is_java:
            run_graph_file = os.path.join(job_runtime_env.generated_dir,
                                          flink_job.run_graph_file)
            flink_env_file = os.path.join(job_runtime_env.generated_dir,
                                          flink_job.flink_env_file)
            # Add PYTHONPATH
            copy_path = sys.path.copy()
            copy_path.insert(0, job_runtime_env.workflow_dir)
            copy_path.insert(0, job_runtime_env.python_dep_dir)
            env['PYTHONPATH'] = ':'.join(copy_path)

            current_path = os.path.dirname(__file__)
            script_path = os.path.join(current_path, 'flink_run_main.py')
            python3_location = sys.executable
            if job_config.run_mode == 'local':
                bash_command = [
                    python3_location, script_path, run_graph_file,
                    job_runtime_env.working_dir, flink_env_file
                ]
            elif job_config.run_mode == 'cluster':
                bash_command = ['flink', 'run']

                if job_config.flink_run_args is not None:
                    bash_command.extend(job_config.flink_run_args)

                bash_command.append('-pyfs')
                files = [job_runtime_env.workflow_dir]
                if os.path.exists(job_runtime_env.python_dep_dir):
                    files.append(job_runtime_env.python_dep_dir)
                bash_command.append(','.join(files))

                if os.path.exists(job_runtime_env.resource_dir):
                    zip_file_util.make_dir_zipfile(
                        job_runtime_env.resource_dir,
                        os.path.join(job_runtime_env.working_dir,
                                     'resources.zip'))
                bash_command.extend([
                    '-pyarch',
                    os.path.join(job_runtime_env.working_dir,
                                 'resources.zip#resources')
                ])
                bash_command.extend([
                    '-py', script_path, run_graph_file,
                    job_runtime_env.working_dir, flink_env_file
                ])
            else:
                raise Exception(
                    'Flink supports run_mode local or cluster, do not support {}.'
                    .format(job_config.run_mode))
        else:
            # flink java job
            bash_command = ['flink', 'run']

            if job_config.flink_run_args is not None:
                bash_command.extend(job_config.flink_run_args)

            if os.path.exists(job_runtime_env.resource_dir):
                zip_file_util.make_dir_zipfile(
                    job_runtime_env.resource_dir,
                    os.path.join(job_runtime_env.working_dir, 'resources.zip'))
            processor: FlinkJavaProcessor = serialization_utils.\
                read_object_from_serialized_file(os.path.join(job_runtime_env.generated_dir, flink_job.processor_file))
            if processor.entry_class is not None:
                bash_command.extend(['-c', processor.entry_class])
            bash_command.append(
                os.path.join(job_runtime_env.jar_dep_dir,
                             processor.main_jar_file))
            bash_command.extend(processor.args)
        self.log.info(' '.join(bash_command))
        stdout_log = log_path_utils.stdout_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job_runtime_env.log_dir,
                                                    job.job_name)
        if not os.path.exists(job_runtime_env.log_dir):
            os.makedirs(job_runtime_env.log_dir)

        sub_process = self.submit_process(
            bash_command=bash_command,
            env=env,
            working_dir=job_runtime_env.working_dir,
            stdout_log=stdout_log,
            stderr_log=stderr_log)
        handle.sub_process = sub_process
        handle.stdout_log = stdout_log
        handle.stderr_log = stderr_log

        if flink_job.is_java:
            # write job_id to file.
            num = 0
            while True:
                if os.path.exists(stdout_log):
                    break
                else:
                    time.sleep(1)
                num += 1
                if 0 == num % 20:
                    self.log.info("Waiting for stdout log file created...")

            while True:
                with open(stdout_log, 'r') as f:
                    lines = f.readlines()
                if len(lines) >= 1:
                    line = lines[0]
                    if line.startswith("Job has been submitted with JobID"):
                        job_id = line.split(' ')[6][:-1]
                        with open(
                                os.path.join(job_runtime_env.working_dir,
                                             'job_id'), 'w') as fp:
                            fp.write(job_id)
                        self.log.info('Flink job id {}'.format(job_id))
                    break
                else:
                    time.sleep(1)

        return handle