def generate_project(code): """ generate project automatically without project path. :param code: project code. :return: project description. """ project_path = tempfile.mkdtemp() # generate project config file. project_config().dump_to_file(project_path + '/project.yaml') # generate project structure. os.makedirs(project_path + '/python_codes') os.makedirs(project_path + '/jar_dependencies') os.makedirs(project_path + '/resources') open(project_path + '/python_codes/__init__.py', 'w') fd, temp_file = tempfile.mkstemp(suffix='.py', dir=project_path + '/python_codes') with open(temp_file, 'w') as f: f.write(code) # generate project description. project_desc: ProjectDesc = get_project_description_from( project_path) project_desc.project_name = project_config().get_project_name() project_desc.project_config['entry_module_path'] = ( file_path_to_absolute_module(temp_file)).split('.')[-1] return project_desc
def set_entry_module_path(self, workflow: Workflow): """ Set entry model path. :param workflow: The generated workflow. """ if self.project_desc.project_config.get('entry_module_path') is None: entry_module_path = (file_path_to_absolute_module( sys.argv[0])).split('.')[-1] self.project_desc.project_config[ 'entry_module_path'] = entry_module_path for job in workflow.jobs.values(): job.job_config.properties.update(self.project_desc.project_config)
def submit_job(self, job: LocalFlinkJob): """ Submit the flink job to run in local. :param job: A flink job object which contains the necessary information for an execution. :return: A job handler that maintains the handler of a job in runtime. """ # generate cmd if job.job_config.language_type == LanguageType.JAVA: exec_cmd = ['flink', 'run'] exec_cmd.extend(['-m', job.job_config.jm_host_port]) if job.job_config.class_path is not None: exec_cmd.extend(['-C', job.job_config.class_path]) if job.job_config.project_desc.jar_dependencies is not None: for jar in job.job_config.project_desc.jar_dependencies: exec_cmd.extend(['-C', "file://{}".format(jar)]) if job.job_config.main_class is not None: exec_cmd.extend(['-c', job.job_config.main_class]) exec_cmd.extend([job.job_config.jar_path]) exec_cmd.extend(['--execution-config', job.config_file]) if job.job_config.args is not None: exec_cmd.extend(job.job_config.args) else: if 'entry_module_path' not in job.job_config.project_desc.project_config: entry_module_path = (file_path_to_absolute_module(sys.argv[0])).split('.')[-1] else: entry_module_path = job.job_config.project_desc.project_config['entry_module_path'] python3_location = sys.executable if job.job_config.local_mode == 'python': exec_cmd = [python3_location, version.py_main_file, job.job_config.project_path, job.config_file, entry_module_path] else: exec_cmd = ['flink', 'run', '-pym', version.py_cluster_module, '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/', '-pyexec', python3_location, '--project-path', job.job_config.project_path, '--config-file', job.config_file, '--entry-module-path', entry_module_path] job.exec_cmd = exec_cmd logging.info(' '.join(exec_cmd)) sys_env = os.environ.copy() if job.job_config.flink_home is not None: sys_env['PATH'] = job.job_config.flink_home + '/bin:' + sys_env['PATH'] blob_manager = BlobManagerFactory.get_blob_manager(job.job_config.properties) copy_path = sys.path.copy() if job.job_config.project_path is not None: downloaded_blob_path = blob_manager.download_blob(job.instance_id, job.job_config.project_path) python_codes_path = downloaded_blob_path + '/python_codes' copy_path.append(python_codes_path) if job.job_config.project_desc.python_paths is not None: copy_path.extend(job.job_config.project_desc.python_paths) sys_env['PYTHONPATH'] = ':'.join(copy_path) logging.info(sys_env['PYTHONPATH']) # every job submitter need set the job log file, # local flink job set log file name LocalFlinkJob_{workflow_execution_id}_{stdout,stderr}.log stdout_log = log_path_utils.stdout_log_path(job.job_config.project_desc.get_absolute_log_path(), job.job_name) stderr_log = log_path_utils.stderr_log_path(job.job_config.project_desc.get_absolute_log_path(), job.job_name) if not os.path.exists(job.job_config.project_desc.get_absolute_log_path()): os.mkdir(job.job_config.project_desc.get_absolute_log_path()) sys_env['PYFLINK_CLIENT_EXECUTABLE'] = sys.executable with open(stdout_log, 'a') as out, open(stderr_log, 'a') as err: submitted_process = subprocess.Popen( args=job.exec_cmd, shell=False, stdout=out, stderr=err, env=sys_env ) exec_handle = LocalJobHandler(job_uuid=job.uuid, job_instance_id=job.instance_id, workflow_id=job.job_context.workflow_execution_id, process_object=submitted_process) self.job_handler_map[job.uuid] = exec_handle return exec_handle
def generate_job_resource(self, job: LocalFlinkJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) if job.job_config.language_type == LanguageType.JAVA: execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id with open(execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = execution_config_file exec_cmd = ['flink', 'run'] exec_cmd.extend(['-m', job.job_config.jm_host_port]) if job.job_config.class_path is not None: exec_cmd.extend(['-C', job.job_config.class_path]) if job.job_config.project_desc.jar_dependencies is not None: for jar in job.job_config.project_desc.jar_dependencies: exec_cmd.extend(['-C', "file://{}".format(jar)]) if job.job_config.main_class is not None: exec_cmd.extend(['-c', job.job_config.main_class]) exec_cmd.extend([job.job_config.jar_path]) exec_cmd.extend(['--execution-config', execution_config_file]) if job.job_config.args is not None: exec_cmd.extend(job.job_config.args) else: if 'entry_module_path' not in job.job_config.project_desc.project_config: entry_module_path = (file_path_to_absolute_module( sys.argv[0])).split('.')[-1] else: entry_module_path = job.job_config.project_desc.project_config[ 'entry_module_path'] execution_config_file = '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + execution_config_file with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file python3_location = sys.executable if job.job_config.local_mode == 'python': exec_cmd = [ python3_location, version.py_main_file, job.job_config.project_path, execution_config_file, entry_module_path ] else: exec_cmd = [ 'flink', 'run', '-pym', version.py_cluster_main_file, '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/', '-pyexec', python3_location, '--project-path', job.job_config.project_path, '--config-file', execution_config_file, '--entry-module-path', entry_module_path ] job.exec_cmd = exec_cmd logging.info(' '.join(exec_cmd))