コード例 #1
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {1}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):
        if (bool(extra["dfs"])):
            df_name = "df_" + extra["dfs"][0]
        else:
            df_name = "df_" + extra["portions"][0][0] + "[" + str(
                extra["portions"][0][1]) + "]"

        code.extend([
            "df_" + node["id"] + "=" + df_name + ".randomSplit(" +
            CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["weights"]) + ", " +
            CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["seed"]) + ")", os.linesep
        ])

    return code, error
コード例 #2
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {0}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):
        error, is_schema_appropriate = DataSourceValidityChecker.check_validity(
            node)
        if (error == ErrorTypes.NO_ERROR):
            # Must be a valid schema at this point.
            code.append("schema_" + node["id"] + "=")
            code.extend([
                CodeGenerationUtils.arrange_schema(
                    node["parameter"]["schema"]), os.linesep
            ])

            code.extend([
                "df_" + node["id"] + ' = spark.readStream.schema(schema_' +
                node["id"] + ")." + node["file_type"] + "(" +
                CodeGenerationUtils.arrange_parameter_value(
                    node["parameters"]["file_path"]) + ")", os.linesep
            ])

    return code, error
コード例 #3
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {1}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):
        if (bool(extra["dfs"])):
            df_name = "df_" + extra["dfs"][0]
        else:
            df_name = "df_" + extra["portions"][0][0] + "[" + str(
                extra["portions"][0][1]) + "]"

        code.append(
            df_name + '.selectExpr("CAST(' +
            node["parameters"]["unique_column_name"] +
            ' AS STRING) AS key", "to_json(struct(*)) AS value").write.format("kafka").option("kafka.bootstrap.servers", '
        )
        code.append(
            CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["host"] + ":" +
                node["parameters"]["port"]) + ")")
        code.extend([
            '.option("topic", ' + CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["topic"]) + ").save()", os.linesep
        ])

    return code, error
コード例 #4
0
def generate_code(args):
    node=args["node"]
    requireds_info=args["requireds_info"]
    edges=args["edges"]

    checklist={"df_count": {0}, "model_count": {0}}
    error, extra=IncomingEdgeValidityChecker.check_validity(node["id"], requireds_info, edges, checklist)
    code=[]
    if(error == ErrorTypes.NO_ERROR):
        error, is_schema_appropriate=DataSourceValidityChecker.check_validity(node)
        if(error == ErrorTypes.NO_ERROR):
            remaining_params = node["parameters"].keys()
            remaining_params.remove("file_path")
            if(is_schema_appropriate):
                code.append("schema_"+node["id"]+"=")
                code.extend([CodeGenerationUtils.arrange_schema(node["parameter"]["schema"]), os.linesep])
                code.append("df_" + node["id"] + "=" + "spark.read."+ node["file_type"] +"(path=" + CodeGenerationUtils.arrange_parameter_value(node["parameters"]["file_path"] + ", " + "schema="+ "schema_"+node["id"]))
                remaining_params.remove("schema")
            else:
                if(node["can_infer_schema"]):
                    code.append("df_" + node["id"] + "=" + "spark.read." + node["file_type"] + "(path=" + CodeGenerationUtils.arrange_parameter_value(node["parameters"]["file_path"]) +", " +"inferSchema="+"True")
                else:
                    code.append("df_" + node["id"] + "=" + "spark.read." + node["file_type"] + "(path=" + CodeGenerationUtils.arrange_parameter_value(node["parameters"]["file_path"]))

            for param in remaining_params:
                code.extend([", " + param + "=" + CodeGenerationUtils.arrange_parameter_value(node["parameters"][param])])
            code.extend([")", os.linesep])

    return code, error
コード例 #5
0
def __arg_dict_to_string(args):
    # Assuming that corresponding argument is a string which is appropriate for pre-defined datetime format.
    code=["{"]
    for arg in args:
        if(arg in __set_of_datetime_arguments):
            code.extend(['datetime.strptime("' + args[arg] + '", "' + __datetime_format +'")', ","])
        else:
            code.extend([CodeGenerationUtils.arrange_parameter_value(arg), ": ", CodeGenerationUtils.arrange_parameter_value(args[arg]), ","])
    if (len(args) > 0):
        code.pop()
    code.append("}")
    return ''.join(code)
コード例 #6
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist={"df_count": {1}, "model_count": {0}}
    error, extra=IncomingEdgeValidityChecker.check_validity(node["id"], requireds_info, edges, checklist)
    code=[]
    if(error == ErrorTypes.NO_ERROR):
        if(bool(extra["dfs"])):
            df_name="df_"+extra["dfs"][0]
        else:
            df_name = "df_" + extra["portions"][0][0] + "[" + str(extra["portions"][0][1]) + "]"

        code = ['estimator_' + node["id"] + ' = ' + node["estimator_name"] + '(']
        for param in node["parameters"]:
            code.extend([param + "=" + CodeGenerationUtils.arrange_parameter_value(node["parameters"][param]), ", "])
        if (len(node["parameters"]) > 0):
            code.pop()
        code.extend([")", os.linesep])

        code.extend(['model_' + node["id"] + "=" + 'estimator_' + node["id"] + ".fit(df_" + df_name + ")", os.linesep])
        code.extend(['df_' + node["id"] + "=" + 'model_' + node["id"] + '.transform(df_' + df_name + ')', os.linesep])

    return code, error
コード例 #7
0
def __generate_code_for_evaluator_instantination(node):
    code = ['evaluator_' + node["id"] + ' = ' + node["evaluator_name"] + '(']
    for param in node["parameters"]:
        code.extend([
            param + "=" + CodeGenerationUtils.arrange_parameter_value(
                node["parameters"][param]), ", "
        ])
    if (len(node["parameters"]) > 0):
        code.pop()
    code.extend([")", os.linesep])
    return code
コード例 #8
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {0}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):
        error, is_schema_appropriate = DataSourceValidityChecker.check_validity(
            node)
        if (error == ErrorTypes.NO_ERROR):
            # Must be a valid schema at this point.
            code.append("schema_" + node["id"] + "=")
            code.extend([
                CodeGenerationUtils.arrange_schema(
                    node["parameter"]["schema"]), os.linesep
            ])

            code.append(
                "df_" + node["id"] +
                ' = spark.readStream.format("kafka").option("kafka.bootstrap.servers", '
            )
            code.append(
                CodeGenerationUtils.arrange_parameter_value(
                    node["parameters"]["host"] + ":" +
                    node["parameters"]["port"]) + ")")
            code.append('.option("subscribe", ' +
                        CodeGenerationUtils.arrange_parameter_value(
                            node["parameters"]["topic"] + ")"))
            code.append(
                '.load().select(from_json(col("value").cast("string"), schema_'
                + node["id"] + ")")
            # For streams, we will use timestamp as a key while writing to kafka topic in case.
            code.extend([
                '.alias("value"), "timestamp").select("value.*", "timestamp")',
                os.linesep
            ])

    return code, error
コード例 #9
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist={"df_count": {1}, "model_count": {0}}
    error, extra=IncomingEdgeValidityChecker.check_validity(node["id"], requireds_info, edges, checklist)
    code=[]
    if(error == ErrorTypes.NO_ERROR):
        if (bool(extra["dfs"])):
            df_name = "df_" + extra["dfs"][0]
        else:
            df_name = "df_" + extra["portions"][0][0] + "[" + str(extra["portions"][0][1]) + "]"

        code.append("query_" + node["id"] + "=" + df_name + ".writeStream.format("+CodeGenerationUtils.arrange_parameter_value(node["file_type"])+")")
        code.append(".trigger("+ __generate_trigger_code(node) +")")
        code.append('.option("path", '+ CodeGenerationUtils.arrange_parameter_value(node["parameters"]["file_path"]) + ")")
        code.append('.option("checkpointLocation", ' + CodeGenerationUtils.arrange_parameter_value(node["parameters"]["checkpoint_path"]) + ").start()")
        code.extend([os.linesep, "query_" + node["id"], ".awaitTermination()", os.linesep])

    return code, error
コード例 #10
0
def __generate_code_for_param_grid(node, cur_estimator_name):
    code = ["param_grid_" + node["id"] + "=None"]
    # Assuming that fix parameters are given in the estimator itself.
    # Maybe reconsider this part.
    grid_params = node["parameters"]["parameter_grid"]
    if (bool(grid_params)):
        code = ["param_grid_" + node["id"] + "=ParamGridBuilder()"]
        for param in grid_params:
            code.extend([
                ".addGrid(" + cur_estimator_name + "." + param + ", " +
                CodeGenerationUtils.arrange_parameter_value(
                    grid_params[param]) + ")"
            ])
        code.extend([".build()", os.linesep])

    return code
コード例 #11
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {2}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):
        df_names = __get_dfs_to_join(extra)
        code.extend([
            "df_" + node["id"] + "=" + df_names[0] + ".join(" + df_names[1] +
            ", " + CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["join_column"]) + ")", os.linesep
        ])

    return code, error
コード例 #12
0
ファイル: ModelLoad.py プロジェクト: ahmetfyildirim/arakat
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {0}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    code = []
    if (error == ErrorTypes.NO_ERROR):

        code = [
            "model_" + node["id"] + "=" + node["parameters"]["model_type"] +
            ".load(" + CodeGenerationUtils.arrange_parameter_value(
                node["parameters"]["model_path"]) + ")", os.linesep
        ]

    return code, error