def generate(self, sub_graph: AISubGraph, project_desc: ProjectDesc) -> KubernetesPythonJob: if sub_graph.config.exec_mode == ExecutionMode.BATCH: run_func = serialize(batch_run_func) py_context = JobContext(ExecutionMode.BATCH) else: run_func = serialize(stream_run_func) py_context = JobContext(ExecutionMode.STREAM) py_context.project_config = project_desc.project_config run_graph: RunGraph = self.build_run_graph(sub_graph, py_context) job_config: KubernetesPythonJobConfig = sub_graph.config return KubernetesPythonJob(run_graph=run_graph, run_func=run_func, job_context=py_context, job_config=job_config)
def generate_job_resource(self, job: LocalFlinkJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) if job.job_config.language_type == LanguageType.JAVA: execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id with open(execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = execution_config_file else: execution_config_file = '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + execution_config_file with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file
def generate_job_resource(self, job: VVPJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id job.config_file = execution_config_file with open(execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) script_path = version.vvp_job_main_file entry_module_path = job.job_config.project_desc.project_config[ 'entry_module_path'] python3_location = sys.executable cmd = [ python3_location, script_path, job.job_config.project_path, execution_config_file, entry_module_path ] job.exec_cmd = ' '.join(cmd) logging.info(job.exec_cmd)
def __init__(self, python_func: object, node_id: Text = None) -> None: """ Set the user defined python function as member variables. :param python_func: User defined python function. :param node_id: Id of node. """ super().__init__("python_func", node_id) self.python_func: bytes = serialize(python_func)
def __init__(self, python_object: object, node_id: Text = None) -> None: """ Construct of FlinkPythonExecutor. :param python_object: Object includes the python executor. :param node_id: Id of node. """ super().__init__(python_object=python_object, node_id=node_id) self.python_object: bytes = serialize(python_object)
def __init__(self, platform: Text): super().__init__(platform=platform, engine=FlinkEngine.engine()) self.flink_home = None self.jm_host_port = 'localhost:8081' self.class_path = None self.py_entry_file = None self.py_files = None self.py_module = None self.jar_path = None self.args = [] self.language_type: LanguageType = LanguageType.PYTHON self.table_env_create_func: bytes = serialization_utils.serialize( TableEnvCreator())
def build_run_graph(self, sub_graph: AISubGraph, context: JobContext) -> RunGraph: run_graph = RunGraph() processed_nodes = set() node_list: List[AINode] = [] for n in sub_graph.nodes.values(): node_list.append(n) for e in sub_graph.edges: data_channel_list = [] for c in sub_graph.edges[e]: cc: DataEdge = c data_channel_list.append(cc) run_graph.dependencies[e] = data_channel_list node_size = len(sub_graph.nodes) processed_size = len(processed_nodes) while processed_size != node_size: p_nodes = [] for i in range(len(node_list)): if node_list[i].instance_id in sub_graph.edges: flag = True for c in sub_graph.edges[node_list[i].instance_id]: if c.target_node_id in processed_nodes: pass else: flag = False break else: flag = True if flag: p_nodes.append(node_list[i]) if 0 == len(p_nodes): raise Exception("graph has circle!") for n in p_nodes: run_graph.nodes.append(n) run_graph.executor_bytes.append( serialize(self.compile_node(n, context=context))) node_list.remove(n) processed_nodes.add(n.instance_id) processed_size = len(processed_nodes) return run_graph
def generate_job_resource(self, job: LocalPythonJob): se_func = job.run_func se_args = serialize( RunArgs(run_graph=job.run_graph, job_context=job.job_context)) project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) func_file = project_path_temp + '/tmp_func' + str( uuid.uuid4()) + job.instance_id args_file = project_path_temp + '/tmp_args' + str( uuid.uuid4()) + job.instance_id with open(func_file, 'wb') as f: f.write(se_func) with open(args_file, 'wb') as f: f.write(se_args) job.exec_func_file: Text = Path(func_file).name job.exec_args_file: Text = Path(args_file).name
def generate_job_resource(self, job: KubernetesFlinkJob) -> None: """ Generate kubernetes flink job resource. :param job: Kubernetes flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) execution_config_file = 'job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + '/' + execution_config_file if job.job_config.language_type == LanguageType.JAVA: with open(real_execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = Path(real_execution_config_file).name else: with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file # generate python_codes.zip python_codes = '{}/python_codes'.format(project_path) if os.path.exists(python_codes): zip_dir = '{}/zip'.format(project_path) if os.path.exists(zip_dir): shutil.rmtree(zip_dir) shutil.copytree(python_codes, zip_dir + '/python_codes') shutil.make_archive(python_codes, 'zip', zip_dir)
def set_table_env_create_func(self, func): self.table_env_create_func = serialization_utils.serialize(func)
def generate_job_resource(self, job: LocalFlinkJob) -> None: """ Generate flink job resource. :param job: Local flink job. """ # gen config file project_path = job.job_config.project_path if project_path is None: project_path = "/tmp" project_path_temp = project_path + "/temp" if not os.path.exists(project_path_temp): os.mkdir(project_path_temp) if job.job_config.language_type == LanguageType.JAVA: execution_config_file = project_path_temp + '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id with open(execution_config_file, 'w') as f: f.write(dumps(job)) job.config_file = execution_config_file exec_cmd = ['flink', 'run'] exec_cmd.extend(['-m', job.job_config.jm_host_port]) if job.job_config.class_path is not None: exec_cmd.extend(['-C', job.job_config.class_path]) if job.job_config.project_desc.jar_dependencies is not None: for jar in job.job_config.project_desc.jar_dependencies: exec_cmd.extend(['-C', "file://{}".format(jar)]) if job.job_config.main_class is not None: exec_cmd.extend(['-c', job.job_config.main_class]) exec_cmd.extend([job.job_config.jar_path]) exec_cmd.extend(['--execution-config', execution_config_file]) if job.job_config.args is not None: exec_cmd.extend(job.job_config.args) else: if 'entry_module_path' not in job.job_config.project_desc.project_config: entry_module_path = (file_path_to_absolute_module( sys.argv[0])).split('.')[-1] else: entry_module_path = job.job_config.project_desc.project_config[ 'entry_module_path'] execution_config_file = '/job_execution_config_' + str( uuid.uuid4()) + "_" + job.instance_id real_execution_config_file = project_path_temp + execution_config_file with open(real_execution_config_file, 'wb') as f: f.write(serialization_utils.serialize(job)) job.config_file = execution_config_file python3_location = sys.executable if job.job_config.local_mode == 'python': exec_cmd = [ python3_location, version.py_main_file, job.job_config.project_path, execution_config_file, entry_module_path ] else: exec_cmd = [ 'flink', 'run', '-pym', version.py_cluster_main_file, '-pyfs', job.job_config.project_path + ',' + job.job_config.project_path + '/python_codes/', '-pyexec', python3_location, '--project-path', job.job_config.project_path, '--config-file', execution_config_file, '--entry-module-path', entry_module_path ] job.exec_cmd = exec_cmd logging.info(' '.join(exec_cmd))