Esempio n. 1
0
        def generate_project(code):
            """
            generate project automatically without project path.

            :param code: project code.
            :return: project description.
            """
            project_path = tempfile.mkdtemp()
            # generate project config file.
            project_config().dump_to_file(project_path + '/project.yaml')
            # generate project structure.
            os.makedirs(project_path + '/python_codes')
            os.makedirs(project_path + '/jar_dependencies')
            os.makedirs(project_path + '/resources')
            open(project_path + '/python_codes/__init__.py', 'w')
            fd, temp_file = tempfile.mkstemp(suffix='.py',
                                             dir=project_path +
                                             '/python_codes')
            with open(temp_file, 'w') as f:
                f.write(code)
            # generate project description.
            project_desc: ProjectDesc = get_project_description_from(
                project_path)
            project_desc.project_name = project_config().get_project_name()
            project_desc.project_config['entry_module_path'] = (
                file_path_to_absolute_module(temp_file)).split('.')[-1]
            return project_desc
Esempio n. 2
0
 def set_entry_module_path(self, workflow: Workflow):
     """
     Set entry model path.
     :param workflow: The generated workflow.
     """
     if self.project_desc.project_config.get('entry_module_path') is None:
         entry_module_path = (file_path_to_absolute_module(
             sys.argv[0])).split('.')[-1]
         self.project_desc.project_config[
             'entry_module_path'] = entry_module_path
     for job in workflow.jobs.values():
         job.job_config.properties.update(self.project_desc.project_config)
Esempio n. 3
0
    def submit_job(self, job: LocalFlinkJob):
        """
        Submit the flink job to run in local.

        :param job: A flink job object which contains the necessary information for an execution.
        :return: A job handler that maintains the handler of a job in runtime.
        """
        # generate cmd
        if job.job_config.language_type == LanguageType.JAVA:
            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', job.config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config['entry_module_path']

            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [python3_location, version.py_main_file, job.job_config.project_path,
                            job.config_file, entry_module_path]
            else:
                exec_cmd = ['flink', 'run',
                            '-pym', version.py_cluster_module,
                            '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/',
                            '-pyexec', python3_location,
                            '--project-path', job.job_config.project_path,
                            '--config-file', job.config_file,
                            '--entry-module-path', entry_module_path]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))

        sys_env = os.environ.copy()
        if job.job_config.flink_home is not None:
            sys_env['PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH']
        blob_manager = BlobManagerFactory.get_blob_manager(job.job_config.properties)
        copy_path = sys.path.copy()
        if job.job_config.project_path is not None:
            downloaded_blob_path = blob_manager.download_blob(job.instance_id, job.job_config.project_path)
            python_codes_path = downloaded_blob_path + '/python_codes'
            copy_path.append(python_codes_path)
        if job.job_config.project_desc.python_paths is not None:
            copy_path.extend(job.job_config.project_desc.python_paths)
        sys_env['PYTHONPATH'] = ':'.join(copy_path)
        logging.info(sys_env['PYTHONPATH'])
        # every job submitter need set the job log file,
        # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log

        stdout_log = log_path_utils.stdout_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        stderr_log = log_path_utils.stderr_log_path(job.job_config.project_desc.get_absolute_log_path(),
                                                    job.job_name)
        if not os.path.exists(job.job_config.project_desc.get_absolute_log_path()):
            os.mkdir(job.job_config.project_desc.get_absolute_log_path())
        sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable
        with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err:
            submitted_process = subprocess.Popen(
                args=job.exec_cmd,
                shell=False,
                stdout=out,
                stderr=err,
                env=sys_env
            )
        exec_handle = LocalJobHandler(job_uuid=job.uuid,
                                      job_instance_id=job.instance_id,
                                      workflow_id=job.job_context.workflow_execution_id,
                                      process_object=submitted_process)
        self.job_handler_map[job.uuid] = exec_handle
        return exec_handle
    def generate_job_resource(self, job: LocalFlinkJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)

        if job.job_config.language_type == LanguageType.JAVA:
            execution_config_file = project_path_temp + '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id

            with open(execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = execution_config_file

            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', execution_config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(
                    sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config[
                    'entry_module_path']
            execution_config_file = '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id
            real_execution_config_file = project_path_temp + execution_config_file
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file
            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [
                    python3_location, version.py_main_file,
                    job.job_config.project_path, execution_config_file,
                    entry_module_path
                ]
            else:
                exec_cmd = [
                    'flink', 'run', '-pym', version.py_cluster_main_file,
                    '-pyfs', job.job_config.project_path + ',' +
                    job.job_config.project_path + '/python_codes/', '-pyexec',
                    python3_location, '--project-path',
                    job.job_config.project_path, '--config-file',
                    execution_config_file, '--entry-module-path',
                    entry_module_path
                ]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))