def generate_to_meta(self, arrow_schema: pa.Schema, meta_init_dict: dict = None) -> Metadata: """Generates our metadata instance from an arrow schema Args: arrow_schema (pa.Schema): pa.Schema from an arrow table Returns: Metadata: An agnostic metadata instance """ if not meta_init_dict: meta_init_dict = {} if "columns" in meta_init_dict: warnings.warn( "columns key found in meta_init_dict will be overwritten") meta_init_dict["columns"] = [] meta_init_dict["_converted_from"] = "arrow_schema" for field in arrow_schema: meta_init_dict["columns"].append({ "name": field.name, "type": self.reverse_convert_col_type(field.type) }) m = Metadata.from_dict(meta_init_dict) return m
def _get_arrow_schema(schema: Union[pa.schema, Metadata, dict]): ac = ArrowConverter() if isinstance(schema, Metadata): schema = ac.generate_to_meta(schema) elif isinstance(schema, dict): schema = Metadata.from_dict(schema) schema = ac.generate_to_meta(schema) elif isinstance(schema, pa.Schema): pass else: raise TypeError(f"schema type not allowed: {type(schema)}") return schema
def generate_to_meta( self, table_meta: TableMeta, data_format_mapper: Callable = None, col_type_mapper: Callable = None, ) -> Metadata: """Takes a TableMeta object and converts it to our Metadata object Args: etl_manager_table_meta (Metadata): TableMeta object from etl-manager data_format_mapper (Callable, optional): If not set the function will just set the file_format parameter to the str in the original data_format of the TableMeta. If you want to use your own mapper set a function object to this param e.g. data_format_mapper = my_lookup_dict.get col_type_mapper (Callable, option): If not set the col type conversion from TableMeta -> Metadata is done using the converters reverse_convert_col_type method. If you need a custom conversion set a function to this parameter to use said function instead of reverse_convert_col_type This callable should expect the TableMeta col type str and return the Metadata col type str name. Returns: TableMeta: An object from the TableMeta class in etl_manager.meta """ table_meta_dict = deepcopy(table_meta.to_dict()) renamed_params = {"data_format": "file_format"} for old_name, new_name in renamed_params.items(): table_meta_dict[new_name] = table_meta_dict.pop(old_name) if data_format_mapper: table_meta_dict["file_format"] = data_format_mapper( table_meta_dict["file_format"]) # remove etl_manager schema del table_meta_dict["$schema"] # convert columns etl_cols = table_meta_dict.pop("columns") for c in etl_cols: if col_type_mapper is None: c["type"] = self.reverse_convert_col_type(c["type"]) else: c["type"] = col_type_mapper(c["type"]) table_meta_dict["columns"] = etl_cols table_meta_dict["_converted_from"] = "etl_manager" return Metadata.from_dict(table_meta_dict)