def make(self, key, store=None, context=None): context = get_context(context) if "sql" not in self.data: raise Exception( f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have sql." ) if "filename" not in self.data: raise Exception( f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a filename." ) if store is None: store = context.store() with TemporaryDirectory() as tmpdir: metadata = self.metadata(key) try: ctx = self.make_execution_context(tmpdir, store, context) df = ctx.sql(self.data["sql"]) table = pyarrow.Table.from_batches(df.collect()) path = Path(tmpdir) / self.data["filename"] pyarrow.parquet.write_table(table, str(path)) b = path.read_bytes() store.store(key, b, metadata) except: m = Metadata(metadata) m.exception("Parquet SQL recipe failed", traceback=traceback.format_exc()) store.store_metadata(key, m.as_dict())
def make(self, key, store=None, context=None): context = get_context(context) if store is None: store = context.store() context.evaluate( self.data["query"], store_key=key, store_to=store, )
def polars_df(data, extension=None, context=None): """Convert bytes or a dataframe to a workbook""" context=get_context(context) if type(data)==bytes: context.info(f"Polars data-frame from bytes. Extension:'{extension}'") return POLARS_DATAFRAME_STATE_TYPE.from_bytes(data, extension=extension) elif isinstance(data,pd.DataFrame): context.info("Polars data-frame from Pandas data-frame") return pl.DataFrame(data) elif isinstance(data,pl.DataFrame): context.info("Polars data-frame kept as it is") return data raise Exception(f"Unsupported polars dataframe type: {type(data)}")
def evaluate_and_save( query, target_directory=None, target_file=None, target_resource_directory=None ): """Evaluate query and save result. Output is saved either to - a target directory (current working directory by default) to a file deduced from the query, or - to target_file (if specified) Returns a state. """ return get_context().evaluate_and_save( query, target_directory=target_directory, target_file=target_file, target_resource_directory=target_resource_directory, )
def workbook(data, index=True, header=True, context=None): """Convert bytes or a dataframe to a workbook""" context = get_context(context) if type(data) == bytes: context.info("Workbook from bytes") return OPENPYXL_WORKBOOK_STATE_TYPE.from_bytes(data) elif isinstance(data, pd.DataFrame): context.info("Workbook from pandas DataFrame") wb = Workbook() ws = wb.active for r in dataframe_to_rows(df, index=index, header=header): ws.append(r) return wb elif isinstance(data, Workbook): return data raise Exception(f"Unsupported workbook type: {type(data)}")
def make(self, key, store=None, context=None): import liquer.store as ls import liquer.state_types as st context = get_context(context) try: if "filename" not in self.data: raise Exception( f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a filename.") if "concat" not in self.data: raise Exception( f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a 'concat' section with queries to concatenate.") if store is None: store = context.store() to_join = [] for i,x in enumerate(self.data["concat"]): if type(x) == str: context.info(f"Evaluate query {i+1}: {x}") df = context.evaluate(x).get() if not isinstance(df, pd.DataFrame): raise Exception(f"Query {i+1} ({x}) in recipe {self.recipe_name()} is not a dataframe but {type(df)}") to_join.append(df) elif type(x) == dict: q = x['query'] column = x["column"] value = x["value"] context.info(f"Evaluate query {i+1}: {q}") df = context.evaluate(q).get() if not isinstance(df, pd.DataFrame): raise Exception(f"Query {i+1} ({q}) in recipe {self.recipe_name()} is not a dataframe but {type(df)}") df[column] = value to_join.append(df) else: raise Exception(f"Unrecognized element {i+1} to concat: {x}") df = pd.concat(to_join, sort=False) extension = ls.key_extension(key) b, mimetype, type_identifier = st.encode_state_data(df, extension=extension) metadata = self.metadata(key) metadata.update({type_identifier: type_identifier, mimetype: mimetype}) metadata["data_characteristics"]=st.data_characteristics(df) store.store(key, b, metadata=metadata) except: metadata = self.metadata(key) m= Metadata(metadata) m.exception("Pandas concat recipe failed",traceback=traceback.format_exc()) store.store_metadata(key,m.as_dict())
def workbook_sheet_df(wb, sheet=None, context=None): """Extract a workbook sheet as a data-frame""" context = get_context(context) if type(wb) == bytes: wb = workbook(wb, context=context) if sheet in ("", None): context.info("Using active sheet") ws = wb.active else: ws = wb[sheet] try: i = int(sheet) sheet = wb.sheetnames[i] context.info(f"Using sheet {i} with name '{sheet}'") except: pass data = ws.values cols = next(data)[1:] data = list(data) idx = [r[0] for r in data] data = (islice(r, 1, None) for r in data) df = pd.DataFrame(data, index=idx, columns=cols) return df
def evaluate(query): """Evaluate query, returns a State, cache the output in supplied cache""" return get_context().evaluate(query)
def evaluate_template(template: str, prefix="$", sufix="$"): """Evaluate a string template; replace all queries by their values Queries in the template are delimited by prefix and sufix. Queries should evaluate to strings and should not cause errors. """ return get_context().evaluate_template(template, prefix=prefix, sufix=sufix)
def sync_store(context=None): context = get_context(context) context.info(f"Sync store") context.store().sync() return dict(status="OK", message="Store synchronized")
def get_context(self): if self.context is None: return get_context() else: return self.context.new_empty()