Ejemplo n.º 1
0
def convert_yaml(
    yaml_data: str, output: Any, array=True, inject_comments=False
) -> None:
    yaml = YAML(typ="rt")
    yaml.version = "1.1"  # type: ignore  # yaml.version is mis-typed as None
    events = yaml.parse(yaml_data)
    output = JsonnetRenderer(events, output, array, inject_comments).render()
    return output
Ejemplo n.º 2
0
def test_unicode_transfer(unicode_filename, verbose=False):
    yaml = YAML(typ='safe', pure=True)
    with open(unicode_filename, 'rb') as fp:
        data = fp.read().decode('utf-8')
    for encoding in [None, 'utf-8', 'utf-16-be', 'utf-16-le']:
        input = data
        if encoding is not None:
            input = ('\ufeff' + input).encode(encoding)
        output1 = yaml.emit(yaml.parse(input), allow_unicode=True)
        if encoding is None:
            stream = StringIO()
        else:
            stream = BytesIO()
        yaml.emit(yaml.parse(input), stream, allow_unicode=True)
        output2 = stream.getvalue()
        assert isinstance(output1, str), (type(output1), encoding)
        if encoding is None:
            assert isinstance(output2, str), (type(output1), encoding)
        else:
            assert isinstance(output2, bytes), (type(output1), encoding)
            output2.decode(encoding)
Ejemplo n.º 3
0
    def test_parse(self):
        # ensure `parse` method is functional and can parse "unsafe" yaml
        from ruamel.yaml import YAML
        from ruamel.yaml.constructor import ConstructorError

        yaml = YAML(typ='safe')
        s = '- !User0 {age: 18, name: Anthon}'
        # should fail to load
        with pytest.raises(ConstructorError):
            yaml.load(s)
        # should parse fine
        yaml = YAML(typ='safe')
        for _ in yaml.parse(s):
            pass
Ejemplo n.º 4
0
def convert_yaml_to_jsonnet(file, initialize=settings["initialize"]):

    # Initialize variable
    all_key_df = []
    all_alias_df = []
    f = StringIO()

    # Set file name
    file_name = Path(file).stem

    read_file = open(file, "r").read().replace("infinity", repr("infinity"))
    read_file = clean_comments(read_file)
    yaml = YAML(typ="rt")
    yaml.version = "1.1"  # type: ignore  # yaml.version is mis-typed as None

    # Parse and get events from the yml file
    # These events are Scalar Event, Sequence Start/ End event, Mapping event etc.

    events = yaml.parse(read_file)
    events_df = (
        pd.DataFrame(events, columns=["event"])
        .reset_index()
        .rename({"index": "event_id"}, axis=1)
    )
    post_processed_read_file = copy.deepcopy(read_file).splitlines()

    output, keys, alias = y2j.convert_yaml(
        yaml_data=read_file, output=f, array=False, inject_comments=False
    )
    output = output[: int(len(output) / 2)]
    output[-1] = output[-1].replace(",\n", "")
    output = [item for item in output if item != "#insert_comment"]
    copy_output = copy.deepcopy(output)
    output = "".join(output).splitlines()

    # Get all keys from the YML file
    # eg:  { geo_K10_: "geo"} the key would be geo and is the 10th in position

    all_key_df = pd.DataFrame(keys, columns=["raw_key_id", "key_event"])
    all_key_df["key_id"] = all_key_df.raw_key_id.apply(
        lambda x: x.replace("[", "").replace("]", "").replace("'", "")
    )
    all_key_df[
        ["key_value", "key_line_start", "key_col_start", "key_line_end", "key_col_end"]
    ] = pd.DataFrame(
        all_key_df.key_event.apply(
            lambda c: [
                c.value,
                c.start_mark.line,
                c.start_mark.column,
                c.end_mark.line,
                c.end_mark.column,
            ]
        ).to_list()
    )
    all_key_df["key_file_id"] = Path(file).stem
    all_key_df["line"] = all_key_df.key_line_start.apply(
        lambda x: post_processed_read_file[x]
    )

    # Get all alias from the YML file
    # eg: default: { <<: *geo} the alias would be *geo

    all_alias_df = pd.DataFrame(alias, columns=["event"])
    all_alias_df[
        ["value", "line_start", "col_start", "line_end", "col_end"]
    ] = pd.DataFrame(
        all_alias_df.event.apply(
            lambda c: [
                c.anchor,
                c.start_mark.line,
                c.start_mark.column,
                c.end_mark.line,
                c.end_mark.column,
            ]
        ).to_list()
    )
    all_alias_df["file_id"] = Path(file).stem
    all_alias_df["line"] = all_alias_df.line_start.apply(
        lambda x: post_processed_read_file[x]
    )
    all_alias_df["key"] = all_alias_df.line.apply(lambda x: x.lstrip().split()[0])

    # Convert jsonnet to json and get path for each keys

    _jsonnet = importlib.import_module("_jsonnet")
    jsonnet_str = _jsonnet.evaluate_snippet("default", "\n".join(output))
    obj = json.loads(jsonnet_str)
    d = benedict(obj, keypath_separator="|")
    k = d.keypaths(indexes=True)

    # Associate path to key
    # eg: default: { geo: &geo} (where path to geo is default.geo )
    path_df = pd.DataFrame(k, columns=["path"])
    path_df["key_id"] = path_df.path.apply(lambda x: x.split("|")[-1])

    # Create a dataframe that links the reference tag to the actual key
    # eg: geo: &geo (where geo is the reference variable)

    refer_by = []
    for line_id, line in enumerate(post_processed_read_file):
        if len(re.findall(r": &\S*|- &\S*", line)) > 0:
            pad = len(line) - len(line.lstrip())
            refer_by.append([line, line_id, pad, re.findall(r": &\S*|- &\S*", line)])

    refer_by_df = pd.DataFrame(
        refer_by, columns=["line", "ref_line_start", "ref_col_start", "reference"]
    ).explode("reference")
    refer_by_df["reference"] = refer_by_df.reference.apply(
        lambda x: re.sub("[^._|A-Za-z0-9/-]+", "", x)
    )
    refer_by_df["ref_file_id"] = Path(file).stem
    refer_by_df["list_flag"] = refer_by_df.reference.apply(
        lambda x: True if x.lstrip().startswith("-") else False
    )
    refer_by_df["reference"] = refer_by_df.reference.apply(
        lambda x: x[1:] if x.lstrip().startswith("-") else x
    )

    original_read_file = read_file.splitlines()

    # Get all alais whivh have to be merged to the associated key
    # eg: default: { <<: *geo} -> geo needs to be merged to default

    indirect_ref = all_alias_df[all_alias_df.key.isin(["<<:"])]
    indirect_ref_associated_key_df = indirect_ref.apply(
        lambda x: get_associated_key(
            all_key_df,
            original_read_file[x.line_start],
            x.file_id,
            x.line_start,
            x.col_start,
        ),
        axis=1,
    )
    indirect_ref_key_reference_df = pd.concat(
        [indirect_ref, indirect_ref_associated_key_df], axis=1
    )

    for line_start in indirect_ref_key_reference_df.line_start.unique():
        post_processed_read_file[line_start] = ""

    f = StringIO()
    read_file = "\n".join(post_processed_read_file)

    processed_output, processed_keys, processed_alias = y2j.convert_yaml(
        yaml_data=read_file, output=f, array=False, inject_comments=False
    )
    processed_output = processed_output[: int(len(processed_output) / 2)]
    processed_output[-1] = processed_output[-1].replace(",\n", "")
    processed_output = [item for item in processed_output if item != "#insert_comment"]
    processed_output = "".join(processed_output)

    # Initialize the reference paths for each key

    if file_name == "default":
        if initialize:
            default_reference_key = pd.DataFrame()
        else:
            default_reference_key = pd.read_csv(settings["reference_file"])
    else:
        default_reference_key = pd.read_csv(settings["reference_file"])

    if len(refer_by_df) > 0:
        if refer_by_df.list_flag.any():
            reference_key_list = refer_by_df[refer_by_df.list_flag].apply(
                lambda x: get_associated_key(
                    all_key_df,
                    original_read_file[x.ref_line_start],
                    x.ref_file_id,
                    x.ref_line_start,
                    x.ref_col_start,
                ),
                axis=1,
            )
            reference_key_list = pd.concat(
                [
                    refer_by_df[refer_by_df.list_flag],
                    reference_key_list[
                        [
                            "key_id",
                            "key_line_start",
                            "key_col_start",
                            "key_col_end",
                            "line",
                        ]
                    ],
                ],
                axis=1,
            ).reset_index(drop=True)

            reference_key_list["key_id"] = reference_key_list.apply(
                lambda x: x.key_id + "[" + str(x.name) + "]", axis=1
            )

        reference_key_no_list = pd.merge(
            refer_by_df[~refer_by_df.list_flag],
            all_key_df[
                ["key_id", "key_line_start", "key_col_start", "key_col_end", "line"]
            ],
            on="line",
        )

        if refer_by_df.list_flag.any():
            reference_key = pd.concat(
                [
                    reference_key_no_list[
                        ["reference", "ref_file_id", "list_flag", "key_id"]
                    ],
                    reference_key_list[
                        ["reference", "ref_file_id", "list_flag", "key_id"]
                    ],
                ]
            ).reset_index(drop=True)
        else:
            reference_key = reference_key_no_list[
                ["reference", "ref_file_id", "list_flag", "key_id"]
            ]

        reference_key = pd.merge(reference_key, path_df, on="key_id")
        reference_key["reference_path"] = reference_key.apply(
            lambda x: (
                x.ref_file_id
                + "."
                + ".".join(
                    [
                        "[" + repr(p.replace("_h_", "-")) + "]" if "_h_" in p else p
                        for p in x.path.split("|")
                    ]
                )
            ).replace(".[", "["),
            axis=1,
        )
        default_reference_key = pd.concat(
            [reference_key[["reference", "reference_path"]], default_reference_key]
        )

        if file_name == "default":
            if initialize:
                default_reference_key.to_csv(settings["reference_file"])

    direct_df = all_alias_df[all_alias_df.key != "<<:"]
    indirect_df = all_alias_df[all_alias_df.key == "<<:"]

    indirect_df["key_id"] = indirect_df.apply(
        lambda x: get_associated_key(
            all_key_df,
            original_read_file[x.line_start],
            x.file_id,
            x.line_start,
            x.col_start,
        ),
        axis=1,
    )["key_id"]

    processed_output_list = processed_output.splitlines()
    for idx, val in indirect_df[["key_id", "value"]].iterrows():
        l_id = [
            line_id
            for line_id, e in enumerate(processed_output_list)
            if val.key_id in e
        ][0]
        ob = processed_output_list[l_id].split(":")
        if "*" in processed_output_list[l_id]:
            processed_output_list[l_id] = (
                (ob[0] + ": " + repr("*" + val.value) + " + " + "".join(ob[1:]))
                .replace("null", "")
                .replace(".[", "[")
            )
        else:
            processed_output_list[l_id] = (
                (ob[0] + ": " + repr("*" + val.value) + " " + "".join(ob[1:]))
                .replace("null", "")
                .replace(".[", "[")
            )

    # Associate comment to its associated key

    comments_df = (
        pd.DataFrame(
            [e for e in flatten(events_df.event.apply(lambda x: x.comment)) if e],
            columns=["event"],
        )
        .reset_index()
        .rename({"index": "event_id"}, axis=1)
    )
    comments_df[
        ["comment", "line_start", "col_start", "line_end", "col_end"]
    ] = pd.DataFrame(
        comments_df.event.apply(
            lambda c: [
                c.value,
                c.start_mark.line,
                c.start_mark.column,
                c.end_mark.line,
                c.end_mark.column,
            ]
        ).to_list()
    )
    comments_df["file_id"] = file_name

    associatied_comments_df = comments_df.apply(
        lambda x: get_associated_key_for_comment(
            all_key_df,
            original_read_file[x.line_start],
            x.file_id,
            x.line_start,
            x.col_start,
        ),
        axis=1,
    )

    comments_df = pd.concat([comments_df, associatied_comments_df], axis=1)
    comments_df = comments_df[
        comments_df.comment.apply(lambda x: not bool(re.match("^\n+$", x)))
    ]

    for idx, val in comments_df[["comment", "raw_key_id"]].iterrows():
        l_id = [
            line_id
            for line_id, e in enumerate(processed_output_list)
            if val.raw_key_id in e
        ][0]
        comment_line = (
            "".join(
                [
                    "\n// " + line.lstrip()[1:].lstrip()
                    for line in val.comment.splitlines()
                    if len(line) > 0
                ]
            )
            + "\n"
        )
        processed_output_list[l_id] = comment_line + processed_output_list[l_id]

    final_op = "\n".join(processed_output_list)

    # Perform clean up operations

    for idx, val in default_reference_key.iterrows():
        final_op = final_op.replace(repr("*" + val.reference), val.reference_path)

    for f in re.findall("_K(.+?)_", final_op):
        final_op = final_op.replace("_K" + f + "_", "")
    for f in re.findall("_M(.+?)_", final_op):
        final_op = final_op.replace("_M" + f + "_", "")
    final_op = final_op.replace("_h_", "-").replace("// \n", "")
    final_op_lines = final_op.splitlines()

    for line_id, line in enumerate(final_op_lines):
        if "['<<']" in line:
            final_op_lines[line_id] = line.replace("['<<']:", "").rstrip()[:-1] + " + {"
            final_op_lines[line_id - 1] = ""

    # Create reference local variables

    if file_name != "default":
        final_op_lines = [
            "local default = import 'default.jsonnet';",
            "{",
            "local " + file_name + " = $ ,",
        ] + final_op_lines[1:]
    else:
        final_op_lines = ["{", "local " + file_name + " = $ ,"] + final_op_lines[1:]

    # Format lists within the jsonnet

    complete_jsonnet = ""
    for line_id, line in enumerate(final_op_lines):
        if (
            (":" not in line)
            & ("//" not in line)
            & ("{" not in line)
            & ("}" not in line)
            & (len(line) > 0)
            & ("=" not in line)
            & ("[" not in line)
            & ("]" not in line)
            & (".yml" not in line)
            & ("|||" not in line)
            & (line.rstrip().endswith(","))
        ):
            final_op_lines[line_id - 1] = final_op_lines[line_id - 1].replace("\n", "")

        else:
            final_op_lines[line_id] = final_op_lines[line_id] + "\n"

    complete_jsonnet = "".join(final_op_lines)
    complete_jsonnet = complete_jsonnet.replace("\n\n//", "\n//")
    complete_jsonnet = complete_jsonnet.replace("\n\n|||", "\n|||")

    # Write the output to jsonnet file

    with open(settings["output_path"] + file_name + ".jsonnet", "w") as f:
        f.write(complete_jsonnet)
    print(file_name + " has been processed!")