Esempio n. 1
0
 def make(self, key, store=None, context=None):
     context = get_context(context)
     if "sql" not in self.data:
         raise Exception(
             f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have sql."
         )
     if "filename" not in self.data:
         raise Exception(
             f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a filename."
         )
     if store is None:
         store = context.store()
     with TemporaryDirectory() as tmpdir:
         metadata = self.metadata(key)
         try:
             ctx = self.make_execution_context(tmpdir, store, context)
             df = ctx.sql(self.data["sql"])
             table = pyarrow.Table.from_batches(df.collect())
             path = Path(tmpdir) / self.data["filename"]
             pyarrow.parquet.write_table(table, str(path))
             b = path.read_bytes()
             store.store(key, b, metadata)
         except:
             m = Metadata(metadata)
             m.exception("Parquet SQL recipe failed",
                         traceback=traceback.format_exc())
             store.store_metadata(key, m.as_dict())
Esempio n. 2
0
 def make(self, key, store=None, context=None):
     context = get_context(context)
     if store is None:
         store = context.store()
     context.evaluate(
         self.data["query"],
         store_key=key,
         store_to=store,
     )
Esempio n. 3
0
def polars_df(data, extension=None, context=None):
    """Convert bytes or a dataframe to a workbook"""
    context=get_context(context)
    if type(data)==bytes:
        context.info(f"Polars data-frame from bytes. Extension:'{extension}'")
        return POLARS_DATAFRAME_STATE_TYPE.from_bytes(data, extension=extension)
    elif isinstance(data,pd.DataFrame):
        context.info("Polars data-frame from Pandas data-frame")
        return pl.DataFrame(data)
    elif isinstance(data,pl.DataFrame):
        context.info("Polars data-frame kept as it is")
        return data
    raise Exception(f"Unsupported polars dataframe type: {type(data)}")
Esempio n. 4
0
def evaluate_and_save(
    query, target_directory=None, target_file=None, target_resource_directory=None
):
    """Evaluate query and save result.
    Output is saved either to
    - a target directory (current working directory by default) to a file deduced from the query, or
    - to target_file (if specified)
    Returns a state.
    """
    return get_context().evaluate_and_save(
        query,
        target_directory=target_directory,
        target_file=target_file,
        target_resource_directory=target_resource_directory,
    )
Esempio n. 5
0
def workbook(data, index=True, header=True, context=None):
    """Convert bytes or a dataframe to a workbook"""
    context = get_context(context)
    if type(data) == bytes:
        context.info("Workbook from bytes")
        return OPENPYXL_WORKBOOK_STATE_TYPE.from_bytes(data)
    elif isinstance(data, pd.DataFrame):
        context.info("Workbook from pandas DataFrame")
        wb = Workbook()
        ws = wb.active
        for r in dataframe_to_rows(df, index=index, header=header):
            ws.append(r)
        return wb
    elif isinstance(data, Workbook):
        return data
    raise Exception(f"Unsupported workbook type: {type(data)}")
Esempio n. 6
0
    def make(self, key, store=None, context=None):
        import liquer.store as ls
        import liquer.state_types as st
        context = get_context(context)
        try:
            if "filename" not in self.data:
                raise Exception(
                    f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a filename.")
            if "concat" not in self.data:
                raise Exception(
                    f"Recipe {self.recipe_name()} of type {self.recipe_type()} does not have a 'concat' section with queries to concatenate.")
            if store is None:
                store = context.store()
            
            to_join = []
            for i,x in enumerate(self.data["concat"]):
                if type(x) == str:
                    context.info(f"Evaluate query {i+1}: {x}")
                    df = context.evaluate(x).get()
                    if not isinstance(df, pd.DataFrame):
                        raise Exception(f"Query {i+1} ({x}) in recipe {self.recipe_name()} is not a dataframe but {type(df)}")
                    to_join.append(df)
                elif type(x) == dict:
                    q = x['query']
                    column = x["column"]
                    value = x["value"]
                    context.info(f"Evaluate query {i+1}: {q}")
                    df = context.evaluate(q).get()
                    if not isinstance(df, pd.DataFrame):
                        raise Exception(f"Query {i+1} ({q}) in recipe {self.recipe_name()} is not a dataframe but {type(df)}")
                    df[column] = value
                    to_join.append(df)
                else:
                    raise Exception(f"Unrecognized element {i+1} to concat: {x}")
            df = pd.concat(to_join, sort=False)

            extension = ls.key_extension(key)
            b, mimetype, type_identifier = st.encode_state_data(df, extension=extension)
            metadata = self.metadata(key)
            metadata.update({type_identifier: type_identifier, mimetype: mimetype})
            metadata["data_characteristics"]=st.data_characteristics(df)
            store.store(key, b, metadata=metadata)
        except:
            metadata = self.metadata(key)
            m= Metadata(metadata)
            m.exception("Pandas concat recipe failed",traceback=traceback.format_exc())
            store.store_metadata(key,m.as_dict())
Esempio n. 7
0
def workbook_sheet_df(wb, sheet=None, context=None):
    """Extract a workbook sheet as a data-frame"""
    context = get_context(context)
    if type(wb) == bytes:
        wb = workbook(wb, context=context)
    if sheet in ("", None):
        context.info("Using active sheet")
        ws = wb.active
    else:
        ws = wb[sheet]
    try:
        i = int(sheet)
        sheet = wb.sheetnames[i]
        context.info(f"Using sheet {i} with name '{sheet}'")
    except:
        pass

    data = ws.values
    cols = next(data)[1:]
    data = list(data)
    idx = [r[0] for r in data]
    data = (islice(r, 1, None) for r in data)
    df = pd.DataFrame(data, index=idx, columns=cols)
    return df
Esempio n. 8
0
def evaluate(query):
    """Evaluate query, returns a State, cache the output in supplied cache"""
    return get_context().evaluate(query)
Esempio n. 9
0
def evaluate_template(template: str, prefix="$", sufix="$"):
    """Evaluate a string template; replace all queries by their values
    Queries in the template are delimited by prefix and sufix.
    Queries should evaluate to strings and should not cause errors.
    """
    return get_context().evaluate_template(template, prefix=prefix, sufix=sufix)
Esempio n. 10
0
def sync_store(context=None):
    context = get_context(context)
    context.info(f"Sync store")
    context.store().sync()
    return dict(status="OK", message="Store synchronized")
Esempio n. 11
0
 def get_context(self):
     if self.context is None:
         return get_context()
     else:
         return self.context.new_empty()