Beispiel #1
0
def get_table_name(name_transformer: DestinationNameTransformer, parent: str,
                   child: str, suffix: str, json_path: List[str]) -> str:
    max_length = name_transformer.get_name_max_length(
    ) - 2  # less two for the underscores
    json_path_hash = hash_json_path(json_path)
    norm_suffix = suffix if not suffix or suffix.startswith(
        "_") else f"_{suffix}"
    norm_parent = parent if not parent else name_transformer.normalize_table_name(
        parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        return name_transformer.truncate_identifier_name(
            f"{norm_child}{norm_suffix}")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}{norm_suffix}"
    # if everything fits except for the parent, just truncate the parent
    elif (len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < (max_length - min_parent_length):
        max_parent_length = max_length - len(norm_child) - len(
            json_path_hash) - len(norm_suffix)
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}{norm_suffix}"
    # otherwise first truncate parent to the minimum length and middle truncate the child
    else:
        norm_child_max_length = max_length - min_parent_length - len(
            json_path_hash) - len(norm_suffix)
        trunc_norm_child = name_transformer.truncate_identifier_name(
            norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}{norm_suffix}"
Beispiel #2
0
def get_nested_hashed_table_name(name_transformer: DestinationNameTransformer, schema: str, json_path: List[str], child: str) -> str:
    """
    In normalization code base, we often have to deal with naming for tables, combining informations from:
    - parent table: to denote where a table is extracted from (in case of nesting)
    - child table: in case of nesting, the field name or the original stream name
    - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables,
    so we can add a suffix to distinguish the different transformation steps of a pipeline.
    - json path: in terms of parent and nested field names in order to reach the table currently being built

    All these informations should be included (if possible) in the table naming for the user to (somehow) identify and
    recognize what data is available there.
    """
    parent = "_".join(json_path[:-1])
    max_length = name_transformer.get_name_max_length()
    json_path_hash = hash_json_path([schema] + json_path)
    norm_parent = parent if not parent else name_transformer.normalize_table_name(parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        raise RuntimeError("There is no nested table names without parents")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(json_path_hash) + len(norm_child) + 2) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}"
    # if everything fits except for the parent, just truncate the parent (still guarantees parent is of length min_parent_length)
    elif (min_parent_length + len(json_path_hash) + len(norm_child) + 2) < max_length:
        max_parent_length = max_length - len(json_path_hash) - len(norm_child) - 2
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}"
    # otherwise first truncate parent to the minimum length and middle truncate the child too
    else:
        norm_child_max_length = max_length - len(json_path_hash) - 2 - min_parent_length
        trunc_norm_child = name_transformer.truncate_identifier_name(norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}"
def get_table_name(name_transformer: DestinationNameTransformer, parent: str,
                   child: str, suffix: str, json_path: List[str]) -> str:
    """
    In normalization code base, we often have to deal with naming for tables, combining informations from:
    - parent table: to denote where a table is extracted from (in case of nesting)
    - child table: in case of nesting, the field name or the original stream name
    - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables,
    so we can add a suffix to distinguish the different transformation steps of a pipeline.
    - json path: in terms of parent and nested field names in order to reach the table currently being built

    All these informations should be included (if possible) in the table naming for the user to (somehow) identify and
    recognize what data is available there.
    """
    max_length = name_transformer.get_name_max_length(
    ) - 2  # less two for the underscores
    json_path_hash = hash_json_path(json_path)
    norm_suffix = suffix if not suffix or suffix.startswith(
        "_") else f"_{suffix}"
    norm_parent = parent if not parent else name_transformer.normalize_table_name(
        parent, False, False)
    norm_child = name_transformer.normalize_table_name(child, False, False)
    min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent))

    # no parent
    if not parent:
        return name_transformer.truncate_identifier_name(
            f"{norm_child}{norm_suffix}")
    # if everything fits without truncation, don't truncate anything
    elif (len(norm_parent) + len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < max_length:
        return f"{norm_parent}_{json_path_hash}_{norm_child}{norm_suffix}"
    # if everything fits except for the parent, just truncate the parent
    elif (len(norm_child) + len(json_path_hash) +
          len(norm_suffix)) < (max_length - min_parent_length):
        max_parent_length = max_length - len(norm_child) - len(
            json_path_hash) - len(norm_suffix)
        return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}{norm_suffix}"
    # otherwise first truncate parent to the minimum length and middle truncate the child
    else:
        norm_child_max_length = max_length - min_parent_length - len(
            json_path_hash) - len(norm_suffix)
        trunc_norm_child = name_transformer.truncate_identifier_name(
            norm_child, norm_child_max_length)
        return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}{norm_suffix}"