def read_from_db(datasource, sql): if sql is None: raise_runtime_error('sql is required parameter') with DbEngine(**datasource) as engine: df = pd.read_sql_query(sql, engine) util.validate_column_name(df) return {'table': df}
def read_parquet_or_csv(path): try: # try parquet data storage first using path as key df = pd.read_parquet(path=data_util.make_data_path_from_key(path), engine='pyarrow') except pyarrow.lib.ArrowIOError: df = read_csv(path) data_util.validate_column_name(df) return df
def read_from_db(datasource, sql): if sql is None: raise_runtime_error('sql is required parameter') import re sqlToken = re.sub(' +', ' ', sql.lower().replace("(", " ( ").replace(")", " ) ")).replace( ". ", ".").split(" ") for i in range(len(sqlToken)): if sqlToken[i] == 'from': tmp_token = sqlToken[i + 1].split('.') if len(tmp_token) == 2 and tmp_token[1] in sys_table_lists: raise Exception( 'Cannot access system tables from Brightics: {}'.format( sqlToken[i + 1])) with DbEngine(**datasource) as engine: df = pd.read_sql_query(sql, engine) util.validate_column_name(df) return {'table': df}
def read_parquet(path): df = pd.read_parquet(path=path, engine='pyarrow') data_util.validate_column_name(df) return df