def generate_code(args): node = args["node"] requireds_info = args["requireds_info"] edges = args["edges"] checklist = {"df_count": {1}, "model_count": {0}} error, extra = IncomingEdgeValidityChecker.check_validity( node["id"], requireds_info, edges, checklist) final_code = [] shared_function_set = set() additional_local_code = [] errors = [] if (error == ErrorTypes.NO_ERROR): if ("portion" in extra["dfs"][0]): df_name = "df_" + extra["dfs"][0]["source_id"] + "[" + str( extra["dfs"][0]["portion"]) + "]" else: df_name = "df_" + extra["dfs"][0]["source_id"] if (error == ErrorTypes.NO_ERROR): my_args = { "node_id": node["id"], "input_dfs": [df_name], "shared_function_set": shared_function_set, "additional_local_code": additional_local_code, "errors": errors } gen_code = CodeGenerationUtils.handle_instantination_or_call( node["parameters"], 'df_' + node["id"] + '=' + df_name + '.' + node["ddfo_name"] + '(', my_args) final_code = CodeGenerationUtils.merge_with_additional_code( gen_code, additional_local_code) return final_code, shared_function_set, error
def generate_code(args): node = args["node"] requireds_info = args["requireds_info"] edges = args["edges"] checklist = {"df_count": {0}, "model_count": {0}} error, extra = IncomingEdgeValidityChecker.check_validity( node["id"], requireds_info, edges, checklist) final_code = [] shared_function_set = set() additional_local_code = [] errors = [] if (error == ErrorTypes.NO_ERROR): error, is_schema_appropriate = DataSourceValidityChecker.check_validity( node) if (error == ErrorTypes.NO_ERROR): my_args = { "node_id": node["id"], "shared_function_set": shared_function_set, "additional_local_code": additional_local_code, "errors": errors } if (is_schema_appropriate): gen_code = CodeGenerationUtils.handle_instantination_or_call( node["parameters"], "df_" + node["id"] + "=" + "spark.read." + node["file_type"] + "(", my_args) else: # For safety, but consider it again if ("schema" in node["parameters"]): del node["parameters"]["schema"] if (node["can_infer_schema"]): node["parameters"]["inferSchema"] = { "value": True, "type": "boolean" } gen_code = CodeGenerationUtils.handle_instantination_or_call( node["parameters"], "df_" + node["id"] + "=" + "spark.read.format(" + CodeGenerationUtils.handle_primitive(node["file_type"]) + ").load(", my_args) final_code = CodeGenerationUtils.merge_with_additional_code( gen_code, additional_local_code) return final_code, shared_function_set, error
def __generate_code_for_transformer_instantination(node, df_name, args): if (MultiInstanceHandlerUtils.should_generate_multiple_instances(node)): args["in_pipeline"] = True return MultiInstanceHandlerUtils.multi_instance_generation( node, df_name, args) else: return CodeGenerationUtils.handle_instantination_or_call( node["parameters"], 'pipeline_stage_' + node["id"] + ' = ' + node["transformer_name"] + '(', args)
def generate_code(args): node = args["node"] requireds_info = args["requireds_info"] edges = args["edges"] checklist = {"df_count": {1}, "model_count": {0}} error, extra = IncomingEdgeValidityChecker.check_validity( node["id"], requireds_info, edges, checklist) final_code = [] shared_function_set = set() additional_local_code = [] errors = [] if (error == ErrorTypes.NO_ERROR): if ("portion" in extra["dfs"][0]): df_name = "df_" + extra["dfs"][0]["source_id"] + "[" + str( extra["dfs"][0]["portion"]) + "]" else: df_name = "df_" + extra["dfs"][0]["source_id"] my_args = { "node_id": node["id"], "input_dfs": [df_name], "shared_function_set": shared_function_set, "additional_local_code": additional_local_code, "errors": errors } gen_code = CodeGenerationUtils.handle_instantination_or_call( node["parameters"], df_name + ".write.format(" + CodeGenerationUtils.handle_primitive(node["file_type"]) + ").save(", my_args) final_code = CodeGenerationUtils.merge_with_additional_code( gen_code, additional_local_code) args["additional_info"]["written_tables"].append( {"table_path": node["parameters"]["path"]["value"]}) return final_code, shared_function_set, error
def __single_generation(node, df_name, args): code=CodeGenerationUtils.handle_instantination_or_call(node["parameters"], 'estimator_' + node["id"] + ' = ' + node["estimator_name"] + '(', args) code.extend(['model_' + node["id"] + "=" + 'estimator_' + node["id"] + ".fit(" + df_name + ")", os.linesep]) code.extend(['df_' + node["id"] + "=" + 'model_' + node["id"] + '.transform(' + df_name + ')', os.linesep]) return code
def __generate_code_for_evaluator_instantination(node, args): return CodeGenerationUtils.handle_instantination_or_call( node["parameters"], 'evaluator_' + node["id"] + ' = ' + node["evaluator_name"] + '(', args)