예제 #1
0
    def generate(self, sub_graph: AISubGraph, project_desc: ProjectDesc) -> KubernetesPythonJob:
        if sub_graph.config.exec_mode == ExecutionMode.BATCH:
            run_func = serialize(batch_run_func)
            py_context = JobContext(ExecutionMode.BATCH)
        else:
            run_func = serialize(stream_run_func)
            py_context = JobContext(ExecutionMode.STREAM)

        py_context.project_config = project_desc.project_config
        run_graph: RunGraph = self.build_run_graph(sub_graph, py_context)
        job_config: KubernetesPythonJobConfig = sub_graph.config

        return KubernetesPythonJob(run_graph=run_graph, run_func=run_func,
                                   job_context=py_context, job_config=job_config)
예제 #2
0
    def generate_job_resource(self, job: LocalFlinkJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)

        if job.job_config.language_type == LanguageType.JAVA:
            execution_config_file = project_path_temp + '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id

            with open(execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = execution_config_file

        else:
            execution_config_file = '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id
            real_execution_config_file = project_path_temp + execution_config_file
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file
예제 #3
0
    def generate_job_resource(self, job: VVPJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)
        execution_config_file = project_path_temp + '/job_execution_config_' + str(
            uuid.uuid4()) + "_" + job.instance_id

        job.config_file = execution_config_file
        with open(execution_config_file, 'wb') as f:
            f.write(serialization_utils.serialize(job))

        script_path = version.vvp_job_main_file
        entry_module_path = job.job_config.project_desc.project_config[
            'entry_module_path']
        python3_location = sys.executable
        cmd = [
            python3_location, script_path, job.job_config.project_path,
            execution_config_file, entry_module_path
        ]
        job.exec_cmd = ' '.join(cmd)
        logging.info(job.exec_cmd)
예제 #4
0
    def __init__(self, python_func: object, node_id: Text = None) -> None:
        """
        Set the user defined python function as member variables.

        :param python_func: User defined python function.
        :param node_id: Id of node.
        """
        super().__init__("python_func", node_id)
        self.python_func: bytes = serialize(python_func)
예제 #5
0
    def __init__(self, python_object: object, node_id: Text = None) -> None:
        """
        Construct of FlinkPythonExecutor.

        :param python_object: Object includes the python executor.
        :param node_id: Id of node.
        """
        super().__init__(python_object=python_object, node_id=node_id)
        self.python_object: bytes = serialize(python_object)
 def __init__(self, platform: Text):
     super().__init__(platform=platform, engine=FlinkEngine.engine())
     self.flink_home = None
     self.jm_host_port = 'localhost:8081'
     self.class_path = None
     self.py_entry_file = None
     self.py_files = None
     self.py_module = None
     self.jar_path = None
     self.args = []
     self.language_type: LanguageType = LanguageType.PYTHON
     self.table_env_create_func: bytes = serialization_utils.serialize(
         TableEnvCreator())
    def build_run_graph(self, sub_graph: AISubGraph,
                        context: JobContext) -> RunGraph:
        run_graph = RunGraph()
        processed_nodes = set()
        node_list: List[AINode] = []
        for n in sub_graph.nodes.values():
            node_list.append(n)
        for e in sub_graph.edges:
            data_channel_list = []
            for c in sub_graph.edges[e]:
                cc: DataEdge = c
                data_channel_list.append(cc)
            run_graph.dependencies[e] = data_channel_list

        node_size = len(sub_graph.nodes)
        processed_size = len(processed_nodes)
        while processed_size != node_size:
            p_nodes = []
            for i in range(len(node_list)):
                if node_list[i].instance_id in sub_graph.edges:
                    flag = True
                    for c in sub_graph.edges[node_list[i].instance_id]:
                        if c.target_node_id in processed_nodes:
                            pass
                        else:
                            flag = False
                            break
                else:
                    flag = True
                if flag:
                    p_nodes.append(node_list[i])
            if 0 == len(p_nodes):
                raise Exception("graph has circle!")
            for n in p_nodes:
                run_graph.nodes.append(n)
                run_graph.executor_bytes.append(
                    serialize(self.compile_node(n, context=context)))
                node_list.remove(n)
                processed_nodes.add(n.instance_id)
            processed_size = len(processed_nodes)
        return run_graph
    def generate_job_resource(self, job: LocalPythonJob):
        se_func = job.run_func
        se_args = serialize(
            RunArgs(run_graph=job.run_graph, job_context=job.job_context))
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)
        func_file = project_path_temp + '/tmp_func' + str(
            uuid.uuid4()) + job.instance_id
        args_file = project_path_temp + '/tmp_args' + str(
            uuid.uuid4()) + job.instance_id
        with open(func_file, 'wb') as f:
            f.write(se_func)

        with open(args_file, 'wb') as f:
            f.write(se_args)
        job.exec_func_file: Text = Path(func_file).name
        job.exec_args_file: Text = Path(args_file).name
    def generate_job_resource(self, job: KubernetesFlinkJob) -> None:
        """
        Generate kubernetes flink job resource.

        :param job: Kubernetes flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"

        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)
        execution_config_file = 'job_execution_config_' + str(
            uuid.uuid4()) + "_" + job.instance_id
        real_execution_config_file = project_path_temp + '/' + execution_config_file

        if job.job_config.language_type == LanguageType.JAVA:
            with open(real_execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = Path(real_execution_config_file).name

        else:
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file

        # generate python_codes.zip
        python_codes = '{}/python_codes'.format(project_path)

        if os.path.exists(python_codes):
            zip_dir = '{}/zip'.format(project_path)
            if os.path.exists(zip_dir):
                shutil.rmtree(zip_dir)
            shutil.copytree(python_codes, zip_dir + '/python_codes')
            shutil.make_archive(python_codes, 'zip', zip_dir)
 def set_table_env_create_func(self, func):
     self.table_env_create_func = serialization_utils.serialize(func)
    def generate_job_resource(self, job: LocalFlinkJob) -> None:
        """
        Generate flink job resource.

        :param job: Local flink job.
        """
        # gen config file
        project_path = job.job_config.project_path
        if project_path is None:
            project_path = "/tmp"
        project_path_temp = project_path + "/temp"
        if not os.path.exists(project_path_temp):
            os.mkdir(project_path_temp)

        if job.job_config.language_type == LanguageType.JAVA:
            execution_config_file = project_path_temp + '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id

            with open(execution_config_file, 'w') as f:
                f.write(dumps(job))
            job.config_file = execution_config_file

            exec_cmd = ['flink', 'run']
            exec_cmd.extend(['-m', job.job_config.jm_host_port])
            if job.job_config.class_path is not None:
                exec_cmd.extend(['-C', job.job_config.class_path])

            if job.job_config.project_desc.jar_dependencies is not None:
                for jar in job.job_config.project_desc.jar_dependencies:
                    exec_cmd.extend(['-C', "file://{}".format(jar)])
            if job.job_config.main_class is not None:
                exec_cmd.extend(['-c', job.job_config.main_class])

            exec_cmd.extend([job.job_config.jar_path])
            exec_cmd.extend(['--execution-config', execution_config_file])

            if job.job_config.args is not None:
                exec_cmd.extend(job.job_config.args)
        else:
            if 'entry_module_path' not in job.job_config.project_desc.project_config:
                entry_module_path = (file_path_to_absolute_module(
                    sys.argv[0])).split('.')[-1]
            else:
                entry_module_path = job.job_config.project_desc.project_config[
                    'entry_module_path']
            execution_config_file = '/job_execution_config_' + str(
                uuid.uuid4()) + "_" + job.instance_id
            real_execution_config_file = project_path_temp + execution_config_file
            with open(real_execution_config_file, 'wb') as f:
                f.write(serialization_utils.serialize(job))
            job.config_file = execution_config_file
            python3_location = sys.executable
            if job.job_config.local_mode == 'python':
                exec_cmd = [
                    python3_location, version.py_main_file,
                    job.job_config.project_path, execution_config_file,
                    entry_module_path
                ]
            else:
                exec_cmd = [
                    'flink', 'run', '-pym', version.py_cluster_main_file,
                    '-pyfs', job.job_config.project_path + ',' +
                    job.job_config.project_path + '/python_codes/', '-pyexec',
                    python3_location, '--project-path',
                    job.job_config.project_path, '--config-file',
                    execution_config_file, '--entry-module-path',
                    entry_module_path
                ]

        job.exec_cmd = exec_cmd
        logging.info(' '.join(exec_cmd))