def runsql(self): """Runs arbitrary sql and returns and html table""" session = db.session() limit = 1000 data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() content = "" if mydb: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = (select('*').select_from( TextAsFrom(text(sql), ['*']).alias('inner_qry')).limit(limit)) sql = str( qry.compile(eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) content = df.to_html( index=False, na_rep='', classes=("dataframe table table-striped table-bordered " "table-condensed sql_results")) except Exception as e: content = ('<div class="alert alert-danger">' "{}</div>").format(e.message) session.commit() return content
def dashboard(self, dashboard_id): """Server side rendering for a dashboard""" session = db.session() qry = session.query(models.Dashboard) if dashboard_id.isdigit(): qry = qry.filter_by(id=int(dashboard_id)) else: qry = qry.filter_by(slug=dashboard_id) templates = session.query(models.CssTemplate).all() dash = qry.first() # Hack to log the dashboard_id properly, even when getting a slug @log_this def dashboard(**kwargs): # noqa pass dashboard(dashboard_id=dash.id) return self.render_template( "caravel/dashboard.html", dashboard=dash, templates=templates, dash_save_perm=appbuilder.sm.has_access('can_save_dash', 'Caravel'), dash_edit_perm=appbuilder.sm.has_access('can_edit', 'DashboardModelView'))
def runsql(self): """Runs arbitrary sql and returns and html table""" session = db.session() limit = 1000 data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() content = "" if mydb: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = ( select('*') .select_from(TextAsFrom(text(sql), ['*']).alias('inner_qry')) .limit(limit) ) sql = str(qry.compile(eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) content = df.to_html( index=False, na_rep='', classes=( "dataframe table table-striped table-bordered " "table-condensed sql_results")) except Exception as e: content = ( '<div class="alert alert-danger">' "{}</div>" ).format(e.message) session.commit() return content
def dashboard(self, dashboard_id): """Server side rendering for a dashboard""" session = db.session() qry = session.query(models.Dashboard) if dashboard_id.isdigit(): qry = qry.filter_by(id=int(dashboard_id)) else: qry = qry.filter_by(slug=dashboard_id) templates = session.query(models.CssTemplate).all() dash = qry.first() # Hack to log the dashboard_id properly, even when getting a slug @log_this def dashboard(**kwargs): # noqa pass dashboard(dashboard_id=dash.id) return self.render_template( "caravel/dashboard.html", dashboard=dash, user_id=g.user.get_id(), templates=templates, dash_save_perm=self.can_access('can_save_dash', 'Caravel'), dash_edit_perm=check_ownership(dash, raise_if_false=False))
def dashboard(self, dashboard_id): """Server side rendering for a dashboard""" session = db.session() qry = session.query(models.Dashboard) if dashboard_id.isdigit(): qry = qry.filter_by(id=int(dashboard_id)) else: qry = qry.filter_by(slug=dashboard_id) templates = session.query(models.CssTemplate).all() dash = qry.first() # Hack to log the dashboard_id properly, even when getting a slug @log_this def dashboard(**kwargs): # noqa pass dashboard(dashboard_id=dash.id) pos_dict = {} if dash.position_json: pos_dict = { int(o['slice_id']): o for o in json.loads(dash.position_json)} return self.render_template( "caravel/dashboard.html", dashboard=dash, templates=templates, pos_dict=pos_dict)
def dashboard(self, dashboard_id): """Server side rendering for a dashboard""" session = db.session() qry = session.query(models.Dashboard) if dashboard_id.isdigit(): qry = qry.filter_by(id=int(dashboard_id)) else: qry = qry.filter_by(slug=dashboard_id) templates = session.query(models.CssTemplate).all() dash = qry.first() # Hack to log the dashboard_id properly, even when getting a slug @log_this def dashboard(**kwargs): # noqa pass dashboard(dashboard_id=dash.id) dash_edit_perm = check_ownership(dash, raise_if_false=False) dash_save_perm = dash_edit_perm and self.can_access('can_save_dash', 'Caravel') return self.render_template( "caravel/dashboard.html", dashboard=dash, user_id=g.user.get_id(), templates=templates, dash_save_perm=dash_save_perm, dash_edit_perm=dash_edit_perm)
def dashboard(self, dashboard_id): """Server side rendering for a dashboard""" session = db.session() qry = session.query(models.Dashboard) if dashboard_id.isdigit(): qry = qry.filter_by(id=int(dashboard_id)) else: qry = qry.filter_by(slug=dashboard_id) templates = session.query(models.CssTemplate).all() dash = qry.first() # Hack to log the dashboard_id properly, even when getting a slug @log_this def dashboard(**kwargs): # noqa pass dashboard(dashboard_id=dash.id) pos_dict = {} if dash.position_json: pos_dict = { int(o['slice_id']): o for o in json.loads(dash.position_json) } return self.render_template("caravel/dashboard.html", dashboard=dash, templates=templates, pos_dict=pos_dict)
def overwrite_slice(self, slc): can_update = check_ownership(slc, raise_if_false=False) if not can_update: flash("You cannot overwrite [{}]".format(slc), "danger") else: session = db.session() session.merge(slc) session.commit() msg = "Slice [{}] has been overwritten".format(slc.slice_name) flash(msg, "info")
def slice(self, slice_id): """Redirects a request for a slice id to its corresponding URL""" session = db.session() qry = session.query(models.Slice).filter_by(id=int(slice_id)) slc = qry.first() if slc: return redirect(slc.slice_url) else: flash("The specified slice could not be found", "danger") return redirect('/slicemodelview/list/')
def overwrite_slice(self, slc): can_update = check_ownership(slc, raise_if_false=False) if not can_update: flash("You cannot overwrite [{}]".format(slc)) else: session = db.session() session.merge(slc) session.commit() msg = "Slice [{}] has been overwritten".format(slc.slice_name) flash(msg, "info")
def slice(self, slice_id): """Redirects a request for a slice id to its corresponding URL""" session = db.session() qry = session.query(models.Slice).filter_by(id=int(slice_id)) slc = qry.first() if slc: url = '{slc.slice_url}&standalone={standalone}'.format( slc=slc, standalone=request.args.get('standalone', 'false')) return redirect(url) else: flash("The specified slice could not be found", "danger") return redirect('/slicemodelview/list/')
def sql_json(self): """Runs arbitrary sql and returns and json""" session = db.session() limit = 1000 sql = request.form.get('sql') database_id = request.form.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() if not (self.can_access( 'all_datasource_access', 'all_datasource_access') or self.can_access('database_access', mydb.perm)): raise utils.CaravelSecurityException(_( "SQL Lab requires the `all_datasource_access` or " "specific DB permission")) error_msg = "" if not mydb: error_msg = "The database selected doesn't seem to exist" else: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = ( select('*') .select_from(TextAsFrom(text(sql), ['*']) .alias('inner_qry')) .limit(limit) ) sql = '{}'.format(qry.compile( eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) df = df.fillna(0) # TODO make sure NULL except Exception as e: logging.exception(e) error_msg = utils.error_msg_from_exception(e) session.commit() if error_msg: return Response( json.dumps({ 'error': error_msg, }), status=500, mimetype="application/json") else: data = { 'columns': [c for c in df.columns], 'data': df.to_dict(orient='records'), } return json.dumps( data, default=utils.json_int_dttm_ser, allow_nan=False)
def add_slices(self, dashboard_id): """Add and save slices to a dashboard""" data = json.loads(request.form.get('data')) session = db.session() Slice = models.Slice # noqa dash = session.query(models.Dashboard).filter_by(id=dashboard_id).first() check_ownership(dash, raise_if_false=True) new_slices = session.query(Slice).filter(Slice.id.in_(data['slice_ids'])) dash.slices += new_slices session.merge(dash) session.commit() session.close() return "SLICES ADDED"
def save(self, args, slc): """Saves (inserts or overwrite a slice) """ session = db.session() slice_name = args.get('slice_name') action = args.get('action') # TODO use form processing form wtforms d = args.to_dict(flat=False) del d['action'] del d['previous_viz_type'] as_list = ('metrics', 'groupby', 'columns') for k in d: v = d.get(k) if k in as_list and not isinstance(v, list): d[k] = [v] if v else [] if k not in as_list and isinstance(v, list): d[k] = v[0] table_id = druid_datasource_id = None datasource_type = args.get('datasource_type') if datasource_type in ('datasource', 'druid'): druid_datasource_id = args.get('datasource_id') elif datasource_type == 'table': table_id = args.get('datasource_id') if action == "save": slc = models.Slice() msg = "Slice [{}] has been saved".format(slice_name) elif action == "overwrite": msg = "Slice [{}] has been overwritten".format(slice_name) slc.params = json.dumps(d, indent=4, sort_keys=True) slc.datasource_name = args.get('datasource_name') slc.viz_type = args.get('viz_type') slc.druid_datasource_id = druid_datasource_id slc.table_id = table_id slc.datasource_type = datasource_type slc.slice_name = slice_name if action == "save": session.add(slc) elif action == "overwrite": session.merge(slc) session.commit() flash(msg, "info") return redirect(slc.slice_url)
def runsql(self): """Runs arbitrary sql and returns and html table""" # TODO deprecate in favor on `sql_json` session = db.session() limit = 1000 data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() if not (self.can_access( 'all_datasource_access', 'all_datasource_access') or self.can_access('database_access', mydb.perm)): raise utils.CaravelSecurityException(_( "SQL Lab requires the `all_datasource_access` or " "specific db permission")) content = "" if mydb: eng = mydb.get_sqla_engine() if limit: sql = sql.strip().strip(';') qry = ( select('*') .select_from(TextAsFrom(text(sql), ['*']) .alias('inner_qry')) .limit(limit) ) sql = '{}'.format(qry.compile( eng, compile_kwargs={"literal_binds": True})) try: df = pd.read_sql_query(sql=sql, con=eng) content = df.to_html( index=False, na_rep='', classes=( "dataframe table table-striped table-bordered " "table-condensed sql_results").split(' ')) except Exception as e: content = ( '<div class="alert alert-danger">' "{}</div>" ).format(e.message) session.commit() return content
def create_temp_table(self): session = db.session() data = json.loads(request.form.get('data')) sql = data.get('sql') database_id = data.get('database_id') mydb = session.query(models.Database).filter_by(id=database_id).first() if ( not self.appbuilder.sm.has_access( 'all_datasource_access', 'all_datasource_access')): raise Exception("test") if mydb: eng = mydb.get_sqla_engine() sql = sql.strip().strip(';') qry = ( select('*') .select_from( TextAsFrom(text(sql), ['*']).alias('inner_qry')) ) sql = str(qry.compile(eng, compile_kwargs={"literal_binds": True})) df = pd.read_sql_query(sql=sql, con=eng) if not config.get('SQLALCHEMY_TEMPTABLE_URI'): return 'No temporary table configured', 500 dbobj = session.query(models.Database).filter_by( database_name='tempDB').first() if not dbobj: dbobj = models.Database(database_name='tempDB', sqlalchemy_uri=config.get( 'SQLALCHEMY_TEMPTABLE_URI')) session.add(dbobj) session.commit() tbl_name = 'Temp_%s' % datetime.today().strftime("%Y%m%dT%H%M") engine = create_engine(config.get('SQLALCHEMY_TEMPTABLE_URI')) df.to_sql(tbl_name, engine, if_exists='fail', chunksize=500, index=False) tbl = models.SqlaTable(table_name=tbl_name) tbl.description = 'Temporary table created from SQL query' tbl.is_featured = True tbl.database = dbobj db.session.merge(tbl) db.session.commit() tbl.fetch_metadata() session.commit() return 'OK', 200
def refresh_datasources(self): """endpoint that refreshes druid datasources metadata""" session = db.session() for cluster in session.query(models.DruidCluster).all(): try: cluster.refresh_datasources() except Exception as e: flash( "Error while processing cluster '{}'\n{}".format( cluster, str(e)), "danger") logging.exception(e) return redirect('/druidclustermodelview/list/') cluster.metadata_last_refreshed = datetime.now() flash( "Refreshed metadata from cluster " "[" + cluster.cluster_name + "]", 'info') session.commit() return redirect("/datasourcemodelview/list/")
def save_dash(self, dashboard_id): """Save a dashboard's metadata""" data = json.loads(request.form.get('data')) positions = data['positions'] slice_ids = [int(d['slice_id']) for d in positions] session = db.session() Dash = models.Dashboard # noqa dash = session.query(Dash).filter_by(id=dashboard_id).first() dash.slices = [o for o in dash.slices if o.id in slice_ids] dash.position_json = json.dumps(data['positions'], indent=4) md = dash.metadata_dejson if 'filter_immune_slices' not in md: md['filter_immune_slices'] = [] md['expanded_slices'] = data['expanded_slices'] dash.json_metadata = json.dumps(md, indent=4) dash.css = data['css'] session.merge(dash) session.commit() session.close() return "SUCCESS"
def favstar(self, class_name, obj_id, action): session = db.session() FavStar = models.FavStar count = 0 favs = session.query(FavStar).filter_by( class_name=class_name, obj_id=obj_id, user_id=g.user.id).all() if action == 'select': if not favs: session.add( FavStar( class_name=class_name, obj_id=obj_id, user_id=g.user.id, dttm=datetime.now())) count = 1 elif action == 'unselect': for fav in favs: session.delete(fav) else: count = len(favs) session.commit() return Response( json.dumps({'count': count}), mimetype="application/json")
def favstar(self, class_name, obj_id, action): session = db.session() FavStar = models.FavStar # noqa count = 0 favs = session.query(FavStar).filter_by( class_name=class_name, obj_id=obj_id, user_id=g.user.get_id()).all() if action == 'select': if not favs: session.add( FavStar( class_name=class_name, obj_id=obj_id, user_id=g.user.get_id(), dttm=datetime.now())) count = 1 elif action == 'unselect': for fav in favs: session.delete(fav) else: count = len(favs) session.commit() return Response( json.dumps({'count': count}), mimetype="application/json")
def overwrite_slice(self, slc): session = db.session() msg = "Slice [{}] has been overwritten".format(slc.slice_name) session.merge(slc) session.commit() flash(msg, "info")
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql); if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as( executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif ( query.limit and is_select and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.CaravelDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results and results_backend: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def get_sql_results(query_id, return_results=True): """Executes the sql query returns the results.""" session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql) if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as(executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif query.limit and is_select: executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() if database.backend == 'presto': polled = cursor.poll() # poll returns dict -- JSON status information or ``None`` # if the query is done # https://github.com/dropbox/PyHive/blob/ # b34bdbf51378b3979eaf5eca9e956f06ddc36ca0/pyhive/presto.py#L178 while polled: # Update the object and wait for the kill signal. stats = polled.get('stats', {}) if stats: completed_splits = float(stats.get('completedSplits')) total_splits = float(stats.get('totalSplits')) if total_splits and completed_splits: progress = 100 * (completed_splits / total_splits) if progress > query.progress: query.progress = progress session.commit() time.sleep(1) polled = cursor.poll() columns = None data = None if result_proxy.cursor: columns = [col[0] for col in result_proxy.cursor.description] data = result_proxy.fetchall() df = pd.DataFrame(data, columns=columns) df = df.where((pd.notnull(df)), None) # TODO consider generating tuples instead of dicts to send # less data through the wire. The command bellow does that, # but we'd need to align on the client side. # data = df.values.tolist() data = df.to_dict(orient='records') query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and data: # Presto doesn't provide result_proxy.row_count query.rows = len(data) # CTAs queries result in 1 cell having the # of the added rows. if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.commit() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } if query.status == models.QueryStatus.SUCCESS: payload['data'] = data payload['columns'] = columns else: payload['error'] = query.error_message if return_results: return payload '''
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql) if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as(executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif (query.limit and is_select and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor(database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.CaravelDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results and results_backend: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def get_sql_results(query_id, return_results=True): """Executes the sql query returns the results.""" session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql); if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as( executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif query.limit and is_select: executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() if database.backend == 'presto': polled = cursor.poll() # poll returns dict -- JSON status information or ``None`` # if the query is done # https://github.com/dropbox/PyHive/blob/ # b34bdbf51378b3979eaf5eca9e956f06ddc36ca0/pyhive/presto.py#L178 while polled: # Update the object and wait for the kill signal. stats = polled.get('stats', {}) if stats: completed_splits = float(stats.get('completedSplits')) total_splits = float(stats.get('totalSplits')) if total_splits and completed_splits: progress = 100 * (completed_splits / total_splits) if progress > query.progress: query.progress = progress session.commit() time.sleep(1) polled = cursor.poll() cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] data = result_proxy.fetchall() cdf = dataframe.CaravelDataFrame( pd.DataFrame(data, columns=column_names)) # TODO consider generating tuples instead of dicts to send # less data through the wire. The command bellow does that, # but we'd need to align on the client side. # data = df.values.tolist() query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.commit() if return_results: payload = { 'query_id': query.id, 'status': query.status, 'data': [], } if query.status == models.QueryStatus.SUCCESS: payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] else: payload['error'] = query.error_message return payload '''
def save_slice(self, slc): session = db.session() msg = "Slice [{}] has been saved".format(slc.slice_name) session.add(slc) session.commit() flash(msg, "info")