Beispiel #1
0
def get_children(column: ResultSetColumnType) -> List[ResultSetColumnType]:
    """
    Get the children of a complex Presto type (row or array).

    For arrays, we return a single list with the base type:

        >>> get_children(dict(name="a", type="ARRAY(BIGINT)", is_dttm=False))
        [{"name": "a", "type": "BIGINT", "is_dttm": False}]

    For rows, we return a list of the columns:

        >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)",  is_dttm=False))
        [{'name': 'a._col0', 'type': 'BIGINT', 'is_dttm': False}, {'name': 'a.foo', 'type': 'VARCHAR', 'is_dttm': False}]  # pylint: disable=line-too-long

    :param column: dictionary representing a Presto column
    :return: list of dictionaries representing children columns
    """
    pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)")
    if not column["type"]:
        raise ValueError
    match = pattern.match(column["type"])
    if not match:
        raise Exception(f"Unable to parse column type {column['type']}")

    group = match.groupdict()
    type_ = group["type"].upper()
    children_type = group["children"]
    if type_ == "ARRAY":
        return [{
            "name": column["name"],
            "type": children_type,
            "is_dttm": False
        }]

    if type_ == "ROW":
        nameless_columns = 0
        columns = []
        for child in utils.split(children_type, ","):
            parts = list(utils.split(child.strip(), " "))
            if len(parts) == 2:
                name, type_ = parts
                name = name.strip('"')
            else:
                name = f"_col{nameless_columns}"
                type_ = parts[0]
                nameless_columns += 1
            _column: ResultSetColumnType = {
                "name": f"{column['name']}.{name.lower()}",
                "type": type_,
                "is_dttm": False,
            }
            columns.append(_column)
        return columns

    raise Exception(f"Unknown type {type_}!")
Beispiel #2
0
def get_children(column: Dict[str, str]) -> List[Dict[str, str]]:
    """
    Get the children of a complex Presto type (row or array).

    For arrays, we return a single list with the base type:

        >>> get_children(dict(name="a", type="ARRAY(BIGINT)"))
        [{"name": "a", "type": "BIGINT"}]

    For rows, we return a list of the columns:

        >>> get_children(dict(name="a", type="ROW(BIGINT,FOO VARCHAR)"))
        [{'name': 'a._col0', 'type': 'BIGINT'}, {'name': 'a.foo', 'type': 'VARCHAR'}]

    :param column: dictionary representing a Presto column
    :return: list of dictionaries representing children columns
    """
    pattern = re.compile(r"(?P<type>\w+)\((?P<children>.*)\)")
    match = pattern.match(column["type"])
    if not match:
        raise Exception(f"Unable to parse column type {column['type']}")

    group = match.groupdict()
    type_ = group["type"].upper()
    children_type = group["children"]
    if type_ == "ARRAY":
        return [{"name": column["name"], "type": children_type}]
    elif type_ == "ROW":
        nameless_columns = 0
        columns = []
        for child in utils.split(children_type, ","):
            parts = list(utils.split(child.strip(), " "))
            if len(parts) == 2:
                name, type_ = parts
                name = name.strip('"')
            else:
                name = f"_col{nameless_columns}"
                type_ = parts[0]
                nameless_columns += 1
            columns.append({
                "name": f"{column['name']}.{name.lower()}",
                "type": type_
            })
        return columns
    else:
        raise Exception(f"Unknown type {type_}!")
 def test_split(self):
     self.assertEqual(list(split("a b")), ["a", "b"])
     self.assertEqual(list(split("a,b", delimiter=",")), ["a", "b"])
     self.assertEqual(list(split("a,(b,a)", delimiter=",")), ["a", "(b,a)"])
     self.assertEqual(
         list(split('a,(b,a),"foo , bar"', delimiter=",")),
         ["a", "(b,a)", '"foo , bar"'],
     )
     self.assertEqual(list(split("a,'b,c'", delimiter=",", quote="'")),
                      ["a", "'b,c'"])
     self.assertEqual(list(split('a "b c"')), ["a", '"b c"'])
     self.assertEqual(list(split(r'a "b \" c"')), ["a", r'"b \" c"'])