Example #1
0
def read_from_db(datasource, sql):
    if sql is None:
        raise_runtime_error('sql is required parameter')

    with DbEngine(**datasource) as engine:
        df = pd.read_sql_query(sql, engine)
        util.validate_column_name(df)
        return {'table': df}
Example #2
0
def read_parquet_or_csv(path):
    try:
        # try parquet data storage first using path as key
        df = pd.read_parquet(path=data_util.make_data_path_from_key(path),
                             engine='pyarrow')
    except pyarrow.lib.ArrowIOError:
        df = read_csv(path)

    data_util.validate_column_name(df)

    return df
Example #3
0
def read_from_db(datasource, sql):
    if sql is None:
        raise_runtime_error('sql is required parameter')
    import re
    sqlToken = re.sub(' +', ' ',
                      sql.lower().replace("(",
                                          " ( ").replace(")", " ) ")).replace(
                                              ". ", ".").split(" ")
    for i in range(len(sqlToken)):
        if sqlToken[i] == 'from':
            tmp_token = sqlToken[i + 1].split('.')
            if len(tmp_token) == 2 and tmp_token[1] in sys_table_lists:
                raise Exception(
                    'Cannot access system tables from Brightics: {}'.format(
                        sqlToken[i + 1]))
    with DbEngine(**datasource) as engine:
        df = pd.read_sql_query(sql, engine)
        util.validate_column_name(df)
        return {'table': df}
Example #4
0
def read_parquet(path):
    df = pd.read_parquet(path=path, engine='pyarrow')
    data_util.validate_column_name(df)
    return df