def test_template_kwarg(self):
     maindb = self.get_main_database(db.session)
     s = '{{ foo }}'
     tp = jinja_context.get_template_processor(database=maindb)
     rendered = tp.process_template(s, foo='bar')
     self.assertEqual('bar', rendered)
 def test_process_template(self) -> None:
     maindb = superset.utils.database.get_example_database()
     sql = "SELECT '{{ 1+1 }}'"
     tp = get_template_processor(database=maindb)
     rendered = tp.process_template(sql)
     self.assertEqual("SELECT '2'", rendered)
Beispiel #3
0
 def test_process_template(self):
     maindb = self.get_main_database(db.session)
     sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}'"
     tp = jinja_context.get_template_processor(database=maindb)
     rendered = tp.process_template(sql)
     self.assertEqual("SELECT '2017-01-01T00:00:00'", rendered)
Beispiel #4
0
def execute_sql(
    ctask, query_id, return_results=True, store_results=False, user_name=None,
    template_params=None,
):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        msg = 'Error: {}. You can find common superset errors and their \
            resolutions at: {}'.format(msg, troubleshooting_link) \
            if troubleshooting_link else msg
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
            return
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select() and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        tp = template_params or {}
        executed_sql = template_processor.process_template(
            executed_sql, **tp)
    except Exception as e:
        logging.exception(e)
        msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e)
        return handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description

    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    cdf = convert_results_to_df(cursor_description, data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
Beispiel #5
0
 def test_template_kwarg(self):
     maindb = utils.get_example_database()
     s = "{{ foo }}"
     tp = jinja_context.get_template_processor(database=maindb)
     rendered = tp.process_template(s, foo="bar")
     self.assertEqual("bar", rendered)
Beispiel #6
0
 def get_template_processor(self, **kwargs: Any) -> BaseTemplateProcessor:
     return get_template_processor(table=self,
                                   database=self.database,
                                   **kwargs)
Beispiel #7
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    query = session.query(models.Query).filter_by(id=query_id).one()
    database = query.database
    executed_sql = query.sql.strip().strip(';')
    db_engine_spec = database.db_engine_spec

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = sql_parse.SupersetQuery(executed_sql)
    if not superset_query.is_select() and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = create_table_as(executed_sql, query.tmp_table_name,
                                       database.force_ctas_schema)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    engine = database.get_sqla_engine(schema=query.schema)
    try:
        template_processor = get_template_processor(database=database,
                                                    query=query)
        executed_sql = template_processor.process_template(executed_sql)
        executed_sql = db_engine_spec.sql_preprocessor(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)
    try:
        query.executed_sql = executed_sql
        logging.info("Running query: \n{}".format(executed_sql))
        result_proxy = engine.execute(query.executed_sql, schema=query.schema)
    except Exception as e:
        logging.exception(e)
        handle_error(utils.error_msg_from_exception(e))

    cursor = result_proxy.cursor
    query.status = QueryStatus.RUNNING
    session.flush()
    db_engine_spec.handle_cursor(cursor, query, session)

    cdf = None
    if result_proxy.cursor:
        column_names = [col[0] for col in result_proxy.cursor.description]
        if db_engine_spec.limit_method == LimitMethod.FETCH_MANY:
            data = result_proxy.fetchmany(query.limit)
        else:
            data = result_proxy.fetchall()
        cdf = dataframe.SupersetDataFrame(
            pd.DataFrame(data, columns=column_names))

    query.rows = result_proxy.rowcount
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.rows == -1 and cdf:
        # Presto doesn't provide result_proxy.row_count
        query.rows = cdf.size
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(query.tmp_table_name, limit=query.limit))
    query.end_time = utils.now_as_float()
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': [],
    }
    payload['data'] = cdf.data if cdf else []
    payload['columns'] = cdf.columns_dict if cdf else []
    payload['query'] = query.to_dict()
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results and results_backend:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.flush()
    session.commit()

    if return_results:
        return payload
def test_get_template_kwarg_nested_module(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "{{ foo.dt }}"
    tp = get_template_processor(database=maindb, foo={"dt": datetime})
    with pytest.raises(SupersetTemplateException):
        tp.process_template(template)
 def test_get_template_kwarg_module(self) -> None:
     maindb = superset.utils.database.get_example_database()
     s = "{{ dt(2017, 1, 1).isoformat() }}"
     tp = get_template_processor(database=maindb, dt=datetime)
     with pytest.raises(SupersetTemplateException):
         tp.process_template(s)
 def test_template_kwarg_nested_module(self) -> None:
     maindb = superset.utils.database.get_example_database()
     s = "{{ foo.dt }}"
     tp = get_template_processor(database=maindb)
     with pytest.raises(SupersetTemplateException):
         tp.process_template(s, foo={"bar": datetime})
 def test_get_template_kwarg_lambda(self) -> None:
     maindb = superset.utils.database.get_example_database()
     s = "{{ foo() }}"
     tp = get_template_processor(database=maindb, foo=lambda: "bar")
     with pytest.raises(SupersetTemplateException):
         tp.process_template(s)
 def test_template_kwarg_dict(self) -> None:
     maindb = superset.utils.database.get_example_database()
     s = "{{ foo.bar }}"
     tp = get_template_processor(database=maindb)
     rendered = tp.process_template(s, foo={"bar": "baz"})
     self.assertEqual("baz", rendered)
 def test_get_template_kwarg(self) -> None:
     maindb = superset.utils.database.get_example_database()
     s = "{{ foo }}"
     tp = get_template_processor(database=maindb, foo="bar")
     rendered = tp.process_template(s)
     self.assertEqual("bar", rendered)
def test_template_kwarg_lambda(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "{{ foo() }}"
    tp = get_template_processor(database=maindb)
    with pytest.raises(SupersetTemplateException):
        tp.process_template(template, foo=lambda: "bar")
Beispiel #15
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    query = session.query(models.Query).filter_by(id=query_id).one()
    database = query.database
    executed_sql = query.sql.strip().strip(';')
    db_engine_spec = database.db_engine_spec

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    # Limit enforced only for retrieving the data, not for the CTA queries.
    is_select = is_query_select(executed_sql);
    if not is_select and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not is_select:
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id,
                start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = create_table_as(
            executed_sql, query.tmp_table_name, database.force_ctas_schema)
        query.select_as_cta_used = True
    elif (
            query.limit and is_select and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    engine = database.get_sqla_engine(schema=query.schema)
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)
    try:
        query.executed_sql = executed_sql
        logging.info("Running query: \n{}".format(executed_sql))
        result_proxy = engine.execute(query.executed_sql, schema=query.schema)
    except Exception as e:
        logging.exception(e)
        handle_error(utils.error_msg_from_exception(e))

    cursor = result_proxy.cursor
    query.status = QueryStatus.RUNNING
    session.flush()
    db_engine_spec.handle_cursor(cursor, query, session)

    cdf = None
    if result_proxy.cursor:
        column_names = [col[0] for col in result_proxy.cursor.description]
        if db_engine_spec.limit_method == LimitMethod.FETCH_MANY:
            data = result_proxy.fetchmany(query.limit)
        else:
            data = result_proxy.fetchall()
        cdf = dataframe.SupersetDataFrame(
            pd.DataFrame(data, columns=column_names))

    query.rows = result_proxy.rowcount
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.rows == -1 and cdf:
        # Presto doesn't provide result_proxy.row_count
        query.rows = cdf.size
    if query.select_as_cta:
        query.select_sql = '{}'.format(database.select_star(
            query.tmp_table_name, limit=query.limit))
    query.end_time = utils.now_as_float()
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': [],
    }
    payload['data'] = cdf.data if cdf else []
    payload['columns'] = cdf.columns_dict if cdf else []
    payload['query'] = query.to_dict()
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results and results_backend:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.flush()
    session.commit()

    if return_results:
        return payload
def test_template_kwarg_module(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "{{ dt(2017, 1, 1).isoformat() }}"
    tp = get_template_processor(database=maindb)
    with pytest.raises(SupersetTemplateException):
        tp.process_template(template, dt=datetime)
Beispiel #17
0
 def test_get_template_kwarg(self):
     maindb = self.get_main_database(db.session)
     s = "{{ foo }}"
     tp = jinja_context.get_template_processor(database=maindb, foo='bar')
     rendered = tp.process_template(s)
     self.assertEqual("bar", rendered)
Beispiel #18
0
 def test_template_kwarg(self):
     maindb = get_main_database(db.session)
     s = '{{ foo }}'
     tp = jinja_context.get_template_processor(database=maindb)
     rendered = tp.process_template(s, foo='bar')
     self.assertEqual('bar', rendered)
Beispiel #19
0
def execute_sql(
    ctask, query_id, return_results=True, store_results=False, user_name=None):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
            return
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        return handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    try:
        engine = database.get_sqla_engine(
            schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name)
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info("Running query: \n{}".format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info("Handling cursor")
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info("Fetching data: {}".format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            "after {} seconds.".format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info("Fetching cursor description")
    cursor_description = cursor.description

    conn.commit()
    conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    column_names = (
        [col[0] for col in cursor_description] if cursor_description else [])
    column_names = dedup(column_names)
    cdf = dataframe.SupersetDataFrame(
        pd.DataFrame(list(data), columns=column_names))

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False, ))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        results_backend.set(key, utils.zlib_compress(json_payload))
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
Beispiel #20
0
 def get_template_processor(self, **kwargs):
     return get_template_processor(table=self,
                                   database=self.database,
                                   **kwargs)
Beispiel #21
0
def execute_sql(
    ctask,
    query_id,
    return_results=True,
    store_results=False,
    user_name=None,
    template_params=None,
):
    """Executes the sql query returns the results."""
    session = get_session(not ctask.request.called_directly)

    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        msg = 'Error: {}. You can find common superset errors and their \
            resolutions at: {}'                               .format(msg, troubleshooting_link) \
            if troubleshooting_link else msg
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
            return
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(database=database,
                                                    query=query)
        tp = template_params or {}
        executed_sql = template_processor.process_template(executed_sql, **tp)
    except Exception as e:
        logging.exception(e)
        msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e)
        return handle_error(msg)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(executed_sql, user_name, sm, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=not ctask.request.called_directly,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description

    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    cdf = convert_results_to_df(cursor_description, data)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(query.tmp_table_name,
                                 limit=query.limit,
                                 schema=database.force_ctas_schema,
                                 show_cols=False,
                                 latest_partition=False))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, utils.zlib_compress(json_payload),
                            cache_timeout)
        query.results_key = key
        query.end_result_backend_time = utils.now_as_float()

    session.merge(query)
    session.commit()

    if return_results:
        return payload
Beispiel #22
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    try:
        query = session.query(Query).filter_by(id=query_id).one()
    except Exception as e:
        logging.error(
            "Query with id `{}` could not be retrieved".format(query_id))
        logging.error("Sleeping for a sec and retrying...")
        # Nasty hack to get around a race condition where the worker
        # cannot find the query it's supposed to run
        sleep(1)
        query = session.query(Query).filter_by(id=query_id).one()

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    if store_results and not results_backend:
        handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (query.limit and superset_query.is_select()
          and db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(database=database,
                                                    query=query)
        executed_sql = template_processor.process_template(executed_sql)
        executed_sql = db_engine_spec.sql_preprocessor(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")

    engine = database.get_sqla_engine(schema=query.schema)
    conn = engine.raw_connection()
    cursor = conn.cursor()
    logging.info("Running query: \n{}".format(executed_sql))
    try:
        logging.info(query.executed_sql)
        cursor.execute(query.executed_sql,
                       **db_engine_spec.cursor_execute_kwargs)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    try:
        logging.info("Handling cursor")
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info("Fetching data: {}".format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    conn.commit()
    conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps(
            {
                'query_id': query.id,
                'status': query.status,
                'query': query.to_dict(),
            },
            default=utils.json_iso_dttm_ser)

    column_names = ([col[0] for col in cursor.description]
                    if cursor.description else [])
    column_names = dedup(column_names)
    cdf = dataframe.SupersetDataFrame(
        pd.DataFrame(list(data), columns=column_names))

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(query.tmp_table_name,
                                 limit=query.limit,
                                 schema=database.force_ctas_schema))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    }
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.merge(query)
    session.commit()

    if return_results:
        return payload
def test_process_template(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "SELECT '{{ 1+1 }}'"
    tp = get_template_processor(database=maindb)
    assert tp.process_template(template) == "SELECT '2'"
Beispiel #24
0
 def get_template_processor(self, **kwargs):
     return get_template_processor(
         table=self, database=self.database, **kwargs)
def test_get_template_kwarg(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "{{ foo }}"
    tp = get_template_processor(database=maindb, foo="bar")
    assert tp.process_template(template) == "bar"
 def test_process_template(self):
     maindb = self.get_main_database(db.session)
     sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}'"
     tp = jinja_context.get_template_processor(database=maindb)
     rendered = tp.process_template(sql)
     self.assertEqual("SELECT '2017-01-01T00:00:00'", rendered)
def test_template_kwarg_dict(app_context: AppContext) -> None:
    maindb = superset.utils.database.get_example_database()
    template = "{{ foo.bar }}"
    tp = get_template_processor(database=maindb)
    assert tp.process_template(template, foo={"bar": "baz"}) == "baz"
Beispiel #28
0
def get_sql_results(self, query_id, return_results=True, store_results=False):
    """Executes the sql query returns the results."""
    if not self.request.called_directly:
        engine = sqlalchemy.create_engine(
            app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool)
        session_class = sessionmaker()
        session_class.configure(bind=engine)
        session = session_class()
    else:
        session = db.session()
        session.commit()  # HACK
    try:
        query = session.query(models.Query).filter_by(id=query_id).one()
    except Exception as e:
        logging.error("Query with id `{}` could not be retrieved".format(query_id))
        logging.error("Sleeping for a sec and retrying...")
        # Nasty hack to get around a race condition where the worker
        # cannot find the query it's supposed to run
        sleep(1)
        query = session.query(models.Query).filter_by(id=query_id).one()

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        raise Exception(query.error_message)

    if store_results and not results_backend:
        handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(query.sql)
    executed_sql = superset_query.stripped()
    if not superset_query.is_select() and not database.allow_dml:
        handle_error(
            "Only `SELECT` statements are allowed against this database")
    if query.select_as_cta:
        if not superset_query.is_select():
            handle_error(
                "Only `SELECT` statements can be used with the CREATE TABLE "
                "feature.")
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id,
                start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    elif (
            query.limit and superset_query.is_select() and
            db_engine_spec.limit_method == LimitMethod.WRAP_SQL):
        executed_sql = database.wrap_sql_limit(executed_sql, query.limit)
        query.limit_used = True
    try:
        template_processor = get_template_processor(
            database=database, query=query)
        executed_sql = template_processor.process_template(executed_sql)
        executed_sql = db_engine_spec.sql_preprocessor(executed_sql)
    except Exception as e:
        logging.exception(e)
        msg = "Template rendering failed: " + utils.error_msg_from_exception(e)
        handle_error(msg)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = utils.now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")

    engine = database.get_sqla_engine(schema=query.schema)
    conn = engine.raw_connection()
    cursor = conn.cursor()
    logging.info("Running query: \n{}".format(executed_sql))
    try:
        logging.info(query.executed_sql)
        cursor.execute(
            query.executed_sql, **db_engine_spec.cursor_execute_kwargs)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    try:
        logging.info("Handling cursor")
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info("Fetching data: {}".format(query.to_dict()))
        data = db_engine_spec.fetch_data(cursor, query.limit)
    except Exception as e:
        logging.exception(e)
        conn.close()
        handle_error(db_engine_spec.extract_error_message(e))

    conn.commit()
    conn.close()

    if query.status == utils.QueryStatus.STOPPED:
        return json.dumps({
            'query_id': query.id,
            'status': query.status,
            'query': query.to_dict(),
        }, default=utils.json_iso_dttm_ser)

    column_names = (
        [col[0] for col in cursor.description] if cursor.description else [])
    column_names = dedup(column_names)
    cdf = dataframe.SupersetDataFrame(pd.DataFrame(
        list(data), columns=column_names))

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(database.select_star(
            query.tmp_table_name,
            limit=query.limit,
            schema=database.force_ctas_schema
        ))
    query.end_time = utils.now_as_float()
    session.merge(query)
    session.flush()

    payload = {
        'query_id': query.id,
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    }
    payload = json.dumps(payload, default=utils.json_iso_dttm_ser)

    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info("Storing results in results backend, key: {}".format(key))
        results_backend.set(key, zlib.compress(payload))
        query.results_key = key

    session.merge(query)
    session.commit()

    if return_results:
        return payload
Beispiel #29
0
    def get_query_str(  # sqla
            self,
            engine,
            qry_start_dttm,
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            filter=None,  # noqa
            is_timeseries=True,
            timeseries_limit=15,
            timeseries_limit_metric=None,
            row_limit=None,
            inner_from_dttm=None,
            inner_to_dttm=None,
            orderby=None,
            extras=None,
            columns=None):
        """Querying any sqla table from this common interface"""
        template_kwargs = {
            'from_dttm': from_dttm,
            'groupby': groupby,
            'metrics': metrics,
            'row_limit': row_limit,
            'to_dttm': to_dttm,
        }
        template_processor = get_template_processor(table=self,
                                                    database=self.database,
                                                    **template_kwargs)

        # For backward compatibility
        if granularity not in self.dttm_cols:
            granularity = self.main_dttm_col

        cols = {col.column_name: col for col in self.columns}
        metrics_dict = {m.metric_name: m for m in self.metrics}

        if not granularity and is_timeseries:
            raise Exception(
                _("Datetime column not provided as part table configuration "
                  "and is required by this type of chart"))
        for m in metrics:
            if m not in metrics_dict:
                raise Exception(_("Metric '{}' is not valid".format(m)))
        metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics]
        timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric)
        timeseries_limit_metric_expr = None
        if timeseries_limit_metric:
            timeseries_limit_metric_expr = \
                timeseries_limit_metric.sqla_col
        if metrics:
            main_metric_expr = metrics_exprs[0]
        else:
            main_metric_expr = literal_column("COUNT(*)").label("ccount")

        select_exprs = []
        groupby_exprs = []

        if groupby:
            select_exprs = []
            inner_select_exprs = []
            inner_groupby_exprs = []
            for s in groupby:
                col = cols[s]
                outer = col.sqla_col
                inner = col.sqla_col.label(col.column_name + '__')

                groupby_exprs.append(outer)
                select_exprs.append(outer)
                inner_groupby_exprs.append(inner)
                inner_select_exprs.append(inner)
        elif columns:
            for s in columns:
                select_exprs.append(cols[s].sqla_col)
            metrics_exprs = []

        if granularity:

            @compiles(ColumnClause)
            def visit_column(element, compiler, **kw):
                """Patch for sqlalchemy bug

                TODO: sqlalchemy 1.2 release should be doing this on its own.
                Patch only if the column clause is specific for DateTime
                set and granularity is selected.
                """
                text = compiler.visit_column(element, **kw)
                try:
                    if (element.is_literal
                            and hasattr(element.type, 'python_type')
                            and type(element.type) is DateTime):
                        text = text.replace('%%', '%')
                except NotImplementedError:
                    # Some elements raise NotImplementedError for python_type
                    pass
                return text

            dttm_col = cols[granularity]
            time_grain = extras.get('time_grain_sqla')

            if is_timeseries:
                timestamp = dttm_col.get_timestamp_expression(time_grain)
                select_exprs += [timestamp]
                groupby_exprs += [timestamp]

            time_filter = dttm_col.get_time_filter(from_dttm, to_dttm)

        select_exprs += metrics_exprs
        qry = sa.select(select_exprs)

        tbl = table(self.table_name)
        if self.schema:
            tbl.schema = self.schema

        # Supporting arbitrary SQL statements in place of tables
        if self.sql:
            from_sql = template_processor.process_template(self.sql)
            tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry')

        if not columns:
            qry = qry.group_by(*groupby_exprs)

        where_clause_and = []
        having_clause_and = []
        for flt in filter:
            if not all([flt.get(s) for s in ['col', 'op', 'val']]):
                continue
            col = flt['col']
            op = flt['op']
            eq = flt['val']
            col_obj = cols.get(col)
            if col_obj and op in ('in', 'not in'):
                values = [types.strip("'").strip('"') for types in eq]
                if col_obj.is_num:
                    values = [utils.js_string_to_num(s) for s in values]
                cond = col_obj.sqla_col.in_(values)
                if op == 'not in':
                    cond = ~cond
                where_clause_and.append(cond)
        if extras:
            where = extras.get('where')
            if where:
                where_clause_and += [
                    wrap_clause_in_parens(
                        template_processor.process_template(where))
                ]
            having = extras.get('having')
            if having:
                having_clause_and += [
                    wrap_clause_in_parens(
                        template_processor.process_template(having))
                ]
        if granularity:
            qry = qry.where(and_(*([time_filter] + where_clause_and)))
        else:
            qry = qry.where(and_(*where_clause_and))
        qry = qry.having(and_(*having_clause_and))
        if groupby:
            qry = qry.order_by(desc(main_metric_expr))
        elif orderby:
            for col, ascending in orderby:
                direction = asc if ascending else desc
                qry = qry.order_by(direction(col))

        qry = qry.limit(row_limit)

        if is_timeseries and timeseries_limit and groupby:
            # some sql dialects require for order by expressions
            # to also be in the select clause -- others, e.g. vertica,
            # require a unique inner alias
            inner_main_metric_expr = main_metric_expr.label('mme_inner__')
            inner_select_exprs += [inner_main_metric_expr]
            subq = select(inner_select_exprs)
            subq = subq.select_from(tbl)
            inner_time_filter = dttm_col.get_time_filter(
                inner_from_dttm or from_dttm,
                inner_to_dttm or to_dttm,
            )
            subq = subq.where(and_(*(where_clause_and + [inner_time_filter])))
            subq = subq.group_by(*inner_groupby_exprs)
            ob = inner_main_metric_expr
            if timeseries_limit_metric_expr is not None:
                ob = timeseries_limit_metric_expr
            subq = subq.order_by(desc(ob))
            subq = subq.limit(timeseries_limit)
            on_clause = []
            for i, gb in enumerate(groupby):
                on_clause.append(groupby_exprs[i] == column(gb + '__'))

            tbl = tbl.join(subq.alias(), and_(*on_clause))

        qry = qry.select_from(tbl)

        sql = "{}".format(
            qry.compile(
                engine,
                compile_kwargs={"literal_binds": True},
            ), )
        logging.info(sql)
        sql = sqlparse.format(sql, reindent=True)
        return sql