def generate_to_meta(self,
                         arrow_schema: pa.Schema,
                         meta_init_dict: dict = None) -> Metadata:
        """Generates our metadata instance from an arrow schema

        Args:
            arrow_schema (pa.Schema): pa.Schema from an arrow table

        Returns:
            Metadata: An agnostic metadata instance
        """
        if not meta_init_dict:
            meta_init_dict = {}

        if "columns" in meta_init_dict:
            warnings.warn(
                "columns key found in meta_init_dict will be overwritten")

        meta_init_dict["columns"] = []
        meta_init_dict["_converted_from"] = "arrow_schema"

        for field in arrow_schema:
            meta_init_dict["columns"].append({
                "name":
                field.name,
                "type":
                self.reverse_convert_col_type(field.type)
            })

        m = Metadata.from_dict(meta_init_dict)
        return m
def _get_arrow_schema(schema: Union[pa.schema, Metadata, dict]):
    ac = ArrowConverter()
    if isinstance(schema, Metadata):
        schema = ac.generate_to_meta(schema)
    elif isinstance(schema, dict):
        schema = Metadata.from_dict(schema)
        schema = ac.generate_to_meta(schema)
    elif isinstance(schema, pa.Schema):
        pass
    else:
        raise TypeError(f"schema type not allowed: {type(schema)}")

    return schema
예제 #3
0
    def generate_to_meta(
        self,
        table_meta: TableMeta,
        data_format_mapper: Callable = None,
        col_type_mapper: Callable = None,
    ) -> Metadata:
        """Takes a TableMeta object and converts it to our Metadata object

        Args:
            etl_manager_table_meta (Metadata): TableMeta object from etl-manager
            data_format_mapper (Callable, optional): If not set the function
                will just set the file_format parameter to the str in the
                original data_format of the TableMeta. If you want to use
                your own mapper set a function object to this param e.g.
                data_format_mapper = my_lookup_dict.get
            col_type_mapper (Callable, option): If not set the col type conversion
                from TableMeta -> Metadata is done using the converters
                reverse_convert_col_type method. If you need a custom conversion
                set a function to this parameter to use said function instead of
                reverse_convert_col_type This callable should expect the TableMeta
                col type str and return the Metadata col type str name.
        Returns:
            TableMeta: An object from the TableMeta class in etl_manager.meta
        """

        table_meta_dict = deepcopy(table_meta.to_dict())

        renamed_params = {"data_format": "file_format"}
        for old_name, new_name in renamed_params.items():
            table_meta_dict[new_name] = table_meta_dict.pop(old_name)

        if data_format_mapper:
            table_meta_dict["file_format"] = data_format_mapper(
                table_meta_dict["file_format"])

        # remove etl_manager schema
        del table_meta_dict["$schema"]

        # convert columns
        etl_cols = table_meta_dict.pop("columns")
        for c in etl_cols:
            if col_type_mapper is None:
                c["type"] = self.reverse_convert_col_type(c["type"])
            else:
                c["type"] = col_type_mapper(c["type"])

        table_meta_dict["columns"] = etl_cols

        table_meta_dict["_converted_from"] = "etl_manager"
        return Metadata.from_dict(table_meta_dict)