Example #1
0
    def test_get_latest_returns_the_last_cached_result_for_negative_ttl(self):
        yesterday = utcnow() + datetime.timedelta(days=-100)
        very_old = self.factory.create_query_result(retrieved_at=yesterday)

        yesterday = utcnow() + datetime.timedelta(days=-1)
        qr = self.factory.create_query_result(retrieved_at=yesterday)
        found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, -1)

        self.assertEqual(found_query_result.id, qr.id)
Example #2
0
    def test_schedule_until_before(self):
        """
        Queries with non-null ``schedule['until']`` are reported by
        Query.outdated_queries() before the given time is past.
        """
        one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(schedule={'interval':'3600', 'until':one_day_from_now, 'time': None, 'day_of_week':None})
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #3
0
def check_alerts_for_query(self, query_id):
    from redash.wsgi import app

    logger.debug("Checking query %d for alerts", query_id)
    query = models.Query.get_by_id(query_id)
    for alert in query.alerts:
        alert.query = query
        new_state = alert.evaluate()
        passed_rearm_threshold = False
        if alert.rearm and alert.last_triggered_at:
            passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta(seconds=alert.rearm) < utils.utcnow()
        if new_state != alert.state or (alert.state == models.Alert.TRIGGERED_STATE and passed_rearm_threshold ):
            logger.info("Alert %d new state: %s", alert.id, new_state)
            old_state = alert.state
            alert.update_instance(state=new_state, last_triggered_at=utils.utcnow())

            if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE:
                logger.debug("Skipping notification (previous state was unknown and now it's ok).")
                continue

            # message = Message
            html = """
            Check <a href="{host}/alerts/{alert_id}">alert</a> / check <a href="{host}/queries/{query_id}">query</a>.
            """.format(host=base_url(alert.query.org), alert_id=alert.id, query_id=query.id)

            notify_mail(alert, html, new_state, app)

            if settings.HIPCHAT_API_TOKEN:
                notify_hipchat(alert, html, new_state)

            if settings.WEBHOOK_ENDPOINT:
                notify_webhook(alert, query, html, new_state)
Example #4
0
    def test_failure_extends_schedule(self):
        """
        Execution failures recorded for a query result in exponential backoff
        for scheduling future execution.
        """
        query = self.factory.create_query(schedule="60", schedule_failures=4)
        retrieved_at = utcnow() - datetime.timedelta(minutes=16)
        query_result = self.factory.create_query_result(
            retrieved_at=retrieved_at, query_text=query.query_text,
            query_hash=query.query_hash)
        query.latest_query_data = query_result

        self.assertEqual(list(models.Query.outdated_queries()), [])

        query_result.retrieved_at = utcnow() - datetime.timedelta(minutes=17)
        self.assertEqual(list(models.Query.outdated_queries()), [query])
Example #5
0
File: tasks.py Project: hudl/redash
def check_alerts_for_query(self, query_id):
    from redash.wsgi import app

    logger.debug("Checking query %d for alerts", query_id)
    query = models.Query.get_by_id(query_id)
    for alert in query.alerts:
        alert.query = query
        new_state = alert.evaluate()
        passed_rearm_threshold = False
        if alert.rearm and alert.last_triggered_at:
            passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta(seconds=alert.rearm) < utils.utcnow()
        if new_state != alert.state or (alert.state == models.Alert.TRIGGERED_STATE and passed_rearm_threshold ):
            logger.info("Alert %d new state: %s", alert.id, new_state)
            old_state = alert.state
            alert.update_instance(state=new_state, last_triggered_at=utils.utcnow())

            if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE:
                logger.debug("Skipping notification (previous state was unknown and now it's ok).")
                continue

            host = base_url(alert.query.org)
            
            for subscription in alert.subscriptions:
                try:
                    subscription.notify(alert, query, subscription.user, new_state, app, host) 
                except Exception as e:
                    logger.warn("Exception: {}".format(e))
Example #6
0
    def test_get_latest_doesnt_return_if_ttl_expired(self):
        yesterday = utcnow() - datetime.timedelta(days=1)
        qr = self.factory.create_query_result(retrieved_at=yesterday)

        found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=60)

        self.assertIsNone(found_query_result)
Example #7
0
    def test_get_latest_returns_if_ttl_not_expired(self):
        yesterday = utcnow() - datetime.timedelta(seconds=30)
        qr = self.factory.create_query_result(retrieved_at=yesterday)

        found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, max_age=120)

        self.assertEqual(found_query_result, qr)
Example #8
0
 def test_backoff(self):
     now = utcnow()
     two_hours_ago = now - datetime.timedelta(hours=2)
     self.assertTrue(models.should_schedule_next(two_hours_ago, now, "3600",
                                                 5))
     self.assertFalse(models.should_schedule_next(two_hours_ago, now,
                                                  "3600", 10))
Example #9
0
 def test_exact_time_with_day_change(self):
     now = utcnow().replace(hour=0, minute=1)
     previous = (now - datetime.timedelta(days=2)).replace(hour=23,
                                                           minute=59)
     schedule = "23:59".format(now.hour + 3)
     self.assertTrue(models.should_schedule_next(previous, now, schedule,
                                                 0))
Example #10
0
 def test_exact_time_that_needs_reschedule(self):
     now = utcnow()
     yesterday = now - datetime.timedelta(days=1)
     scheduled_datetime = now - datetime.timedelta(hours=3)
     scheduled_time = "{:02d}:00".format(scheduled_datetime.hour)
     self.assertTrue(models.should_schedule_next(yesterday, now,
                                                 scheduled_time, 0))
Example #11
0
    def outdated_queries(cls):
        queries = (
            Query.query
            .options(joinedload(Query.latest_query_data).load_only('retrieved_at'))
            .filter(Query.schedule.isnot(None))
            .order_by(Query.id)
        )

        now = utils.utcnow()
        outdated_queries = {}
        scheduled_queries_executions.refresh()

        for query in queries:
            if query.schedule['interval'] is None:
                continue

            if query.schedule['until'] is not None:
                schedule_until = pytz.utc.localize(datetime.datetime.strptime(query.schedule['until'], '%Y-%m-%d'))

                if schedule_until <= now:
                    continue

            if query.latest_query_data:
                retrieved_at = query.latest_query_data.retrieved_at
            else:
                retrieved_at = now

            retrieved_at = scheduled_queries_executions.get(query.id) or retrieved_at

            if should_schedule_next(retrieved_at, now, query.schedule['interval'], query.schedule['time'],
                                    query.schedule['day_of_week'], query.schedule_failures):
                key = "{}:{}".format(query.query_hash, query.data_source_id)
                outdated_queries[key] = query

        return outdated_queries.values()
Example #12
0
    def test_store_result_does_not_modify_query_update_at(self):
        original_updated_at = utcnow() - datetime.timedelta(hours=1)
        query = self.factory.create_query(updated_at=original_updated_at)

        models.QueryResult.store_result(query.org_id, query.data_source, query.query_hash, query.query_text, "", 0, utcnow())

        self.assertEqual(original_updated_at, query.updated_at)
Example #13
0
 def test_save_updates_updated_at_field(self):
     # This should be a test of ModelTimestampsMixin, but it's easier to test in context of existing model... :-\
     one_day_ago = utcnow().date() - datetime.timedelta(days=1)
     q = self.factory.create_query(created_at=one_day_ago, updated_at=one_day_ago)
     db.session.flush()
     q.name = 'x'
     db.session.flush()
     self.assertNotEqual(q.updated_at, one_day_ago)
Example #14
0
 def test_exact_time_every_x_days_that_doesnt_need_reschedule(self):
     now = utcnow()
     four_days_ago = now - datetime.timedelta(days=2)
     three_day_interval = "259200"
     scheduled_datetime = now - datetime.timedelta(hours=3)
     scheduled_time = "{:02d}:00".format(scheduled_datetime.hour)
     self.assertFalse(models.should_schedule_next(four_days_ago, now, three_day_interval,
                                                 scheduled_time))
Example #15
0
    def test_skips_fresh_queries(self):
        half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
        query = self.factory.create_query(schedule={'interval':'3600', 'time': None, 'until':None, 'day_of_week':None})
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertNotIn(query, queries)
Example #16
0
    def test_outdated_queries_works_with_ttl_based_schedule(self):
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(schedule={'interval':'3600', 'time': None, 'until':None, 'day_of_week':None})
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #17
0
    def test_outdated_queries_works_with_specific_time_schedule(self):
        half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
        query = self.factory.create_query(schedule=half_an_hour_ago.strftime('%H:%M'))
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago - datetime.timedelta(days=1))
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #18
0
    def test_returns_only_over_a_week_old_results(self):
        two_weeks_ago = utcnow() - datetime.timedelta(days=14)
        unused_qr = self.factory.create_query_result(retrieved_at=two_weeks_ago)
        db.session.flush()
        new_unused_qr = self.factory.create_query_result()

        self.assertIn((unused_qr.id,), models.QueryResult.unused())
        self.assertNotIn((new_unused_qr.id,), models.QueryResult.unused())
Example #19
0
    def test_outdated_queries_works_with_ttl_based_schedule(self):
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(schedule="3600")
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #20
0
    def test_skips_fresh_queries(self):
        half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
        query = self.factory.create_query(schedule="3600")
        query_result = self.factory.create_query_result(query=query.query_text, retrieved_at=half_an_hour_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertNotIn(query, queries)
Example #21
0
    def test_get_latest_returns_the_most_recent_result(self):
        yesterday = utcnow() - datetime.timedelta(seconds=30)
        old_qr = self.factory.create_query_result(retrieved_at=yesterday)
        qr = self.factory.create_query_result()

        found_query_result = models.QueryResult.get_latest(qr.data_source, qr.query_text, 60)

        self.assertEqual(found_query_result.id, qr.id)
Example #22
0
 def test_returns_only_unused_query_results(self):
     two_weeks_ago = utcnow() - datetime.timedelta(days=14)
     qr = self.factory.create_query_result()
     query = self.factory.create_query(latest_query_data=qr)
     db.session.flush()
     unused_qr = self.factory.create_query_result(retrieved_at=two_weeks_ago)
     self.assertIn((unused_qr.id,), models.QueryResult.unused())
     self.assertNotIn((qr.id,), list(models.QueryResult.unused()))
Example #23
0
 def setUp(self):
     super(TestQueryResultStoreResult, self).setUp()
     self.data_source = self.factory.data_source
     self.query = "SELECT 1"
     self.query_hash = gen_query_hash(self.query)
     self.runtime = 123
     self.utcnow = utcnow()
     self.data = "data"
    def test_skips_fresh_queries(self):
        query = self.factory.create_query(schedule="1200")
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)

        with patch('redash.tasks.queries.enqueue_query') as add_job_mock:
            refresh_queries()
            self.assertFalse(add_job_mock.called)
    def test_skips_queries_with_no_ttl(self):
        query = query_factory.create(schedule=None)
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = query_result_factory.create(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)

        with patch('redash.tasks.QueryTask.add_task') as add_job_mock:
            refresh_queries()
            self.assertFalse(add_job_mock.called)
Example #26
0
    def test_skips_queries_with_no_ttl(self):
        query = self.factory.create_query(schedule=None)
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(
            retrieved_at=retrieved_at,
            query=query.query,
            query_hash=query.query_hash)

        with patch('redash.tasks.queries.enqueue_query') as add_job_mock:
            refresh_queries()
            self.assertFalse(add_job_mock.called)
Example #27
0
    def test_enqueues_outdated_queries(self):
        query = self.factory.create_query(schedule="60")
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)
        query.latest_query_data = query_result
        query.save()

        with patch('redash.tasks.QueryTask.add_task') as add_job_mock:
            refresh_queries()
            add_job_mock.assert_called_with(query.query, query.data_source, scheduled=True, metadata=ANY)
Example #28
0
    def test_outdated_queries_works_with_specific_time_schedule(self):
        half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
        query = self.factory.create_query(
            schedule=half_an_hour_ago.strftime('%H:%M'))
        query_result = self.factory.create_query_result(
            query=query.query_text,
            retrieved_at=half_an_hour_ago - datetime.timedelta(days=1))
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #29
0
    def test_skips_fresh_queries(self):
        query = self.factory.create_query(schedule="1200")
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(
            retrieved_at=retrieved_at,
            query=query.query,
            query_hash=query.query_hash)

        with patch('redash.tasks.QueryTask.add_task') as add_job_mock:
            refresh_queries()
            self.assertFalse(add_job_mock.called)
    def test_enqueues_outdated_queries(self):
        query = query_factory.create(schedule="60")
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = query_result_factory.create(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)
        query.latest_query_data = query_result
        query.save()

        with patch('redash.tasks.QueryTask.add_task') as add_job_mock:
            refresh_queries()
            add_job_mock.assert_called_with(query.query, query.data_source, scheduled=True, metadata=ANY)
Example #31
0
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
    query_parameters = set(collect_query_parameters(query_text))
    missing_params = set(query_parameters) - set(parameter_values.keys())
    if missing_params:
        raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))

    if query_parameters:
        query_text = pystache.render(query_text, parameter_values)

    if max_age <= 0:
        query_result = None
    else:
        query_result = models.QueryResult.get_latest(data_source, query_text, max_age)

    query_hash = gen_query_hash(query_text)

    if query_result:
        logging.info("Returning cached result for query %s" % query_hash)
        return query_result

    try:
        started_at = time.time()
        data, error = data_source.query_runner.run_query(query_text, current_user)

        if error:
            logging.info('got bak error')
            logging.info(error)
            data_consumed_mb = 0.0
            return None
        else:
            dict_data = json.loads(data)
            data_consumed_mb = dict_data['data_consumed_mb']

        run_time = time.time() - started_at
        query_result, updated_query_ids = models.QueryResult.store_result(data_source.org, data_source,
                                                                              query_hash, query_text, data,
                                                                              run_time, utils.utcnow())
        query_result_metadata = models.QueryResultMetaData.store_result_metadata(
                                    updated_query_ids=updated_query_ids,
                                    query_result_id=query_result.id,
                                    data_consumed_mb=data_consumed_mb,
                                    data_source_id=data_source.id,
                                    query_hash=query_hash,
                                    run_by_user_id=current_user.id,
                                    run_at=utils.utcnow())

        models.db.session.commit()
        return query_result
    except Exception as e:
        if max_age > 0:
            abort(404, message="Unable to get result from the database, and no cached query result found.")
        else:
            abort(503, message="Unable to get result from the database.")
        return None
Example #32
0
 def test_empty_schedules(self):
     one_day_ago = (utcnow() -
                    datetime.timedelta(days=1)).strftime("%Y-%m-%d")
     query = self.factory.create_query(schedule={
         'interval': '3600',
         'until': one_day_ago
     })
     oq = staticmethod(lambda: [query])
     with patch.object(Query, 'past_scheduled_queries', oq):
         empty_schedules()
         self.assertEqual(query.schedule, None)
Example #33
0
    def run(self):
        signal.signal(signal.SIGINT, signal_handler)
        started_at = time.time()

        logger.debug("Executing query:\n%s", self.query)
        self._log_progress('executing_query')

        query_runner = self.data_source.query_runner
        annotated_query = self._annotate_query(query_runner)

        try:
            data, error = query_runner.run_query(annotated_query, self.user)
        except Exception as e:
            if isinstance(e, SoftTimeLimitExceeded):
                error = TIMEOUT_MESSAGE
            else:
                error = text_type(e)

            data = None
            logging.warning('Unexpected error while running query:', exc_info=1)

        run_time = time.time() - started_at

        logger.info(u"task=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error)

        _unlock(self.query_hash, self.data_source.id)

        if error is not None and data is None:
            result = QueryExecutionError(error)
            if self.scheduled_query is not None:
                self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False)
                self.scheduled_query.schedule_failures += 1
                models.db.session.add(self.scheduled_query)
                notify_of_failure(error, self.scheduled_query)
            models.db.session.commit()
            raise result
        else:
            if (self.scheduled_query and self.scheduled_query.schedule_failures > 0):
                self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False)
                self.scheduled_query.schedule_failures = 0
                models.db.session.add(self.scheduled_query)
            query_result, updated_query_ids = models.QueryResult.store_result(
                self.data_source.org_id, self.data_source,
                self.query_hash, self.query, data,
                run_time, utcnow())
            models.db.session.commit()  # make sure that alert sees the latest query result
            self._log_progress('checking_alerts')
            for query_id in updated_query_ids:
                check_alerts_for_query.delay(query_id)
            self._log_progress('finished')

            result = query_result.id
            models.db.session.commit()
            return result
Example #34
0
 def test_backoff(self):
     now = utcnow()
     two_hours_ago = now - datetime.timedelta(hours=2)
     self.assertTrue(
         models.should_schedule_next(two_hours_ago, now, "3600",
                                     failures=5))
     self.assertFalse(
         models.should_schedule_next(two_hours_ago,
                                     now,
                                     "3600",
                                     failures=10))
Example #35
0
 def test_returns_only_unused_query_results(self):
     two_weeks_ago = utcnow() - datetime.timedelta(days=14)
     qt = "SELECT 1"
     qr = self.factory.create_query_result(query_text=qt,
                                           retrieved_at=two_weeks_ago)
     query = self.factory.create_query(query_text=qt, latest_query_data=qr)
     unused_qr = self.factory.create_query_result(
         query_text=qt, retrieved_at=two_weeks_ago)
     db.session.flush()
     self.assertIn((unused_qr.id, ), models.QueryResult.unused())
     self.assertNotIn((qr.id, ), list(models.QueryResult.unused()))
Example #36
0
 def test_exact_time_every_x_days_that_doesnt_need_reschedule(self):
     now = utcnow()
     four_days_ago = now - datetime.timedelta(days=2)
     three_day_interval = "259200"
     scheduled_datetime = now - datetime.timedelta(hours=3)
     scheduled_time = "{:02d}:00".format(scheduled_datetime.hour)
     self.assertFalse(
         models.should_schedule_next(
             four_days_ago, now, three_day_interval, scheduled_time
         )
     )
Example #37
0
 def past_scheduled_queries(cls):
     now = utils.utcnow()
     queries = Query.query.filter(Query.schedule.isnot(None)).order_by(Query.id)
     return [
         query
         for query in queries
         if query.schedule["until"] is not None
         and pytz.utc.localize(
             datetime.datetime.strptime(query.schedule["until"], "%Y-%m-%d")
         )
         <= now
     ]
Example #38
0
    def test_schedule_until_before(self):
        """
        Queries with non-null ``schedule['until']`` are reported by
        Query.outdated_queries() before the given time is past.
        """
        one_day_from_now = (utcnow() +
                            datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(
            schedule={
                "interval": "3600",
                "until": one_day_from_now,
                "time": None,
                "day_of_week": None,
            })
        query_result = self.factory.create_query_result(
            query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #39
0
def update_sample(data_source_id, table_name, table_id, sample_updated_at):
    """
    For a given table, look up a sample row for it and update
    the "example" fields for it in the column_metadata table.
    """
    logger.info(u"task=update_sample state=start table_name=%s", table_name)
    start_time = time.time()
    ds = models.DataSource.get_by_id(data_source_id)

    persisted_columns = models.ColumnMetadata.query.filter(
        models.ColumnMetadata.exists.is_(True),
        models.ColumnMetadata.table_id == table_id,
    ).options(load_only("id", "name", "example"))

    update_threshold = utils.utcnow() - datetime.timedelta(
        days=settings.SCHEMA_SAMPLE_UPDATE_FREQUENCY_DAYS)

    first_column = persisted_columns.first()

    if (first_column and sample_updated_at and first_column.example
            and sample_updated_at > update_threshold):
        # Look at the first example in the persisted columns.
        # If this is *not* empty AND sample_updated_at is recent, don't update sample
        logger.info(
            u"task=update_sample state=abort - recent sample exists table_name=%s",
            table_name,
        )
        return

    sample = None
    try:
        sample = ds.query_runner.get_table_sample(table_name)
    except NotSupported:
        logger.info(u"Unable to fetch samples for {}".format(table_name))

    if not sample:
        return

    #  If a column exists, add a sample to it.
    for persisted_column in persisted_columns.all():
        column_example = sample.get(persisted_column.name, None)
        column_example = (column_example if isinstance(column_example, str)
                          else str(column_example))  # noqa: F821
        persisted_column.example = truncate_long_string(column_example, 4000)
        models.db.session.add(persisted_column)

    models.db.session.commit()
    logger.info(
        u"task=update_sample state=finished table_name=%s runtime=%.2f",
        table_name,
        time.time() - start_time,
    )
    return sample
Example #40
0
 def test_exact_time_every_x_weeks_that_doesnt_need_reschedule(self):
     # Setup:
     #
     # 1) The query should run every 3 weeks on Thurday
     # 2) The last time it ran was 3 weeks ago from this week's Tuesday
     # 3) It is now Wednesday of this week
     #
     # Expectation: Even though more than 3 weeks have passed since the
     #              last run 3 weeks ago on Tuesday, it's not overdue since
     #              it should be running on Thursdays.
     this_tuesday = utcnow() + datetime.timedelta(
         days=list(calendar.day_name).index("Tuesday") - utcnow().weekday())
     three_weeks_ago = this_tuesday - datetime.timedelta(weeks=3)
     now = this_tuesday + datetime.timedelta(days=1)
     three_week_interval = "1814400"
     scheduled_datetime = now - datetime.timedelta(hours=3)
     scheduled_time = "{:02d}:00".format(scheduled_datetime.hour)
     self.assertFalse(
         models.should_schedule_next(three_weeks_ago, now,
                                     three_week_interval, scheduled_time,
                                     "Thursday"))
Example #41
0
    def test_schedule_until_before(self):
        """
        Queries with non-null ``schedule['until']`` are reported by
        Query.outdated_queries() before the given time is past.
        """
        one_day_from_now = (utcnow() + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        query = self.create_scheduled_query(interval="3600", until=one_day_from_now)
        self.fake_previous_execution(query, hours=2)

        queries = models.Query.outdated_queries()

        self.assertIn(query, queries)
Example #42
0
    def test_schedule_until_after(self):
        """
        Queries with non-null ``schedule['until']`` are not reported by
        Query.outdated_queries() after the given time is past.
        """
        one_day_ago = (utcnow() -
                       datetime.timedelta(days=1)).strftime("%Y-%m-%d")
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(
            schedule={
                'interval': '3600',
                'until': one_day_ago,
                'time': None,
                'day_of_week': None
            })
        query_result = self.factory.create_query_result(
            query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertNotIn(query, queries)
Example #43
0
def check_alerts_for_query(self, query_id):
    from redash.wsgi import app

    logger.debug("Checking query %d for alerts", query_id)
    query = models.Query.get_by_id(query_id)
    for alert in query.alerts:
        alert.query = query
        new_state = alert.evaluate()
        passed_rearm_threshold = False
        if alert.rearm and alert.last_triggered_at:
            passed_rearm_threshold = alert.last_triggered_at + datetime.timedelta(
                seconds=alert.rearm) < utils.utcnow()
        if new_state != alert.state or (alert.state
                                        == models.Alert.TRIGGERED_STATE
                                        and passed_rearm_threshold):
            logger.info("Alert %d new state: %s", alert.id, new_state)
            old_state = alert.state
            alert.update_instance(state=new_state,
                                  last_triggered_at=utils.utcnow())

            if old_state == models.Alert.UNKNOWN_STATE and new_state == models.Alert.OK_STATE:
                logger.debug(
                    "Skipping notification (previous state was unknown and now it's ok)."
                )
                continue

            # message = Message
            html = """
            Check <a href="{host}/alerts/{alert_id}">alert</a> / check <a href="{host}/queries/{query_id}">query</a>.
            """.format(host=base_url(alert.query.org),
                       alert_id=alert.id,
                       query_id=query.id)

            notify_mail(alert, html, new_state, app)

            if settings.HIPCHAT_API_TOKEN:
                notify_hipchat(alert, html, new_state)

            if settings.WEBHOOK_ENDPOINT:
                notify_webhook(alert, query, html, new_state)
Example #44
0
def cleanup_data_in_table(table_model):
    removed_metadata = table_model.query.filter(
        table_model.exists == False, ).options(load_only('updated_at'))

    for removed_metadata_row in removed_metadata:
        is_old_data = (utils.utcnow() -
                       removed_metadata_row.updated_at) > datetime.timedelta(
                           days=settings.SCHEMA_METADATA_TTL_DAYS)

        table_model.query.filter(
            table_model.id == removed_metadata_row.id, ).delete()

    db.session.commit()
Example #45
0
    def run(self):
        signal.signal(signal.SIGINT, signal_handler)
        self.tracker.update(started_at=time.time(), state='started')

        logger.debug("Executing query:\n%s", self.query)
        self._log_progress('executing_query')

        query_runner = self.data_source.query_runner
        annotated_query = self._annotate_query(query_runner)

        try:
            data, error = query_runner.run_query(annotated_query, self.user)
        except Exception as e:
            error = text_type(e)
            data = None
            logging.warning('Unexpected error while running query:', exc_info=1)

        run_time = time.time() - self.tracker.started_at
        self.tracker.update(error=error, run_time=run_time, state='saving_results')

        logger.info(u"task=execute_query query_hash=%s data_length=%s error=[%s]", self.query_hash, data and len(data), error)

        _unlock(self.query_hash, self.data_source.id)

        if error:
            self.tracker.update(state='failed')
            result = QueryExecutionError(error)
            if self.scheduled_query is not None:
                self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False)
                self.scheduled_query.schedule_failures += 1
                models.db.session.add(self.scheduled_query)
            models.db.session.commit()
            raise result
        else:
            if (self.scheduled_query and self.scheduled_query.schedule_failures > 0):
                self.scheduled_query = models.db.session.merge(self.scheduled_query, load=False)
                self.scheduled_query.schedule_failures = 0
                models.db.session.add(self.scheduled_query)
            query_result, updated_query_ids = models.QueryResult.store_result(
                self.data_source.org_id, self.data_source,
                self.query_hash, self.query, data,
                run_time, utcnow())
            models.db.session.commit()  # make sure that alert sees the latest query result
            self._log_progress('checking_alerts')
            for query_id in updated_query_ids:
                check_alerts_for_query.delay(query_id)
            self._log_progress('finished')

            result = query_result.id
            models.db.session.commit()
            return result
Example #46
0
    def run(self):
        signal.signal(signal.SIGINT, signal_handler)
        self.tracker.update(started_at=time.time(), state='started')

        logger.debug("Executing query:\n%s", self.query)
        self._log_progress('executing_query')

        query_runner = self.data_source.query_runner
        annotated_query = self._annotate_query(query_runner)

        try:
            data, error = query_runner.run_query(annotated_query, self.user)
        except Exception as e:
            error = unicode(e)
            data = None
            logging.warning('Unexpected error while running query:',
                            exc_info=1)

        run_time = time.time() - self.tracker.started_at
        self.tracker.update(error=error,
                            run_time=run_time,
                            state='saving_results')

        logger.info(
            u"task=execute_query query_hash=%s data_length=%s error=[%s]",
            self.query_hash, data and len(data), error)

        _unlock(self.query_hash, self.data_source.id)

        if error:
            self.tracker.update(state='failed')
            result = QueryExecutionError(error)
            if self.scheduled_query:
                self.scheduled_query.schedule_failures += 1
                models.db.session.add(self.scheduled_query)
        else:
            if (self.scheduled_query
                    and self.scheduled_query.schedule_failures > 0):
                self.scheduled_query.schedule_failures = 0
                models.db.session.add(self.scheduled_query)
            query_result, updated_query_ids = models.QueryResult.store_result(
                self.data_source.org, self.data_source, self.query_hash,
                self.query, data, run_time, utils.utcnow())
            self._log_progress('checking_alerts')
            for query_id in updated_query_ids:
                check_alerts_for_query.delay(query_id)
            self._log_progress('finished')

            result = query_result.id
        models.db.session.commit()
        return result
Example #47
0
 def past_scheduled_queries(cls):
     now = utils.utcnow()
     queries = (
         Query.query
         .filter(Query.schedule.isnot(None))
         .order_by(Query.id)
     )
     return filter(
             lambda x:
             x.schedule["until"] is not None and pytz.utc.localize(
                 datetime.datetime.strptime(x.schedule['until'], '%Y-%m-%d')
             ) <= now,
             queries
             )
Example #48
0
    def outdated_queries(cls):
        queries = cls.select(cls, QueryResult.retrieved_at, DataSource)\
            .join(QueryResult)\
            .switch(Query).join(DataSource)\
            .where(cls.schedule != None)

        now = utils.utcnow()
        outdated_queries = {}
        for query in queries:
            if should_schedule_next(query.latest_query_data.retrieved_at, now, query.schedule):
                key = "{}:{}".format(query.query_hash, query.data_source.id)
                outdated_queries[key] = query

        return outdated_queries.values()
Example #49
0
    def test_failure_extends_schedule(self):
        """
        Execution failures recorded for a query result in exponential backoff
        for scheduling future execution.
        """
        query = self.factory.create_query(schedule={
            'interval': '60',
            'until': None,
            'time': None,
            'day_of_week': None
        },
                                          schedule_failures=4)
        retrieved_at = utcnow() - datetime.timedelta(minutes=16)
        query_result = self.factory.create_query_result(
            retrieved_at=retrieved_at,
            query_text=query.query_text,
            query_hash=query.query_hash)
        query.latest_query_data = query_result

        self.assertEqual(list(models.Query.outdated_queries()), [])

        query_result.retrieved_at = utcnow() - datetime.timedelta(minutes=17)
        self.assertEqual(list(models.Query.outdated_queries()), [query])
    def test_enqueues_query_only_once(self):
        query = self.factory.create_query(schedule="60")
        query2 = self.factory.create_query(schedule="60", query=query.query, query_hash=query.query_hash)
        retrieved_at = utcnow() - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)
        query.latest_query_data = query_result
        query2.latest_query_data = query_result
        query.save()
        query2.save()

        with patch('redash.tasks.queries.enqueue_query') as add_job_mock:
            refresh_queries()
            add_job_mock.assert_called_once_with(query.query, query.data_source, query.user_id, scheduled=True, metadata=ANY)#{'Query ID': query.id, 'Username': '******'})
Example #51
0
    def outdated_queries(cls):
        queries = (Query.query.options(
            joinedload(
                Query.latest_query_data).load_only("retrieved_at")).filter(
                    Query.schedule.isnot(None)).order_by(Query.id))

        now = utils.utcnow()
        outdated_queries = {}
        scheduled_queries_executions.refresh()

        for query in queries:
            try:
                if query.schedule.get("disabled"):
                    continue

                if query.schedule["until"]:
                    schedule_until = pytz.utc.localize(
                        datetime.datetime.strptime(query.schedule["until"],
                                                   "%Y-%m-%d"))

                    if schedule_until <= now:
                        continue

                retrieved_at = scheduled_queries_executions.get(
                    query.id) or (query.latest_query_data
                                  and query.latest_query_data.retrieved_at)

                if should_schedule_next(
                        retrieved_at or now,
                        now,
                        query.schedule["interval"],
                        query.schedule["time"],
                        query.schedule["day_of_week"],
                        query.schedule_failures,
                ):
                    key = "{}:{}".format(query.query_hash,
                                         query.data_source_id)
                    outdated_queries[key] = query
            except Exception as e:
                query.schedule["disabled"] = True
                db.session.commit()

                message = (
                    "Could not determine if query %d is outdated due to %s. The schedule for this query has been disabled."
                    % (query.id, repr(e)))
                logging.info(message)
                sentry.capture_exception(
                    type(e)(message).with_traceback(e.__traceback__))

        return list(outdated_queries.values())
Example #52
0
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state="STARTED", meta={"start_time": start_time, "custom_message": ""})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata["Task ID"] = self.request.id
        metadata["Query Hash"] = query_hash
        metadata["Queue"] = self.request.delivery_info["routing_key"]

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer("query_runner.{}.{}.run_time".format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state="STARTED", meta={"start_time": start_time, "error": error, "custom_message": ""})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time, utils.utcnow()
        )
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
    else:
        raise Exception(error)

    return query_result.id
Example #53
0
    def update(cls, user_id):
        """
        Update the user details hash using the given redis
        pipeline, user id, optional redis id and optional user
        details.

        The fields uid, rid and updated (timestamp) are
        enforced and can't be overwritten.
        """
        # try getting the user detail with the given user ID
        # or create one if it doesn't exist yet (e.g. when key was purged)
        try:
            user_detail = cls.get(cls.user_id == user_id)
            # update the timestamp with the current time
            user_detail.updated_at = utcnow()
            # save to Redis
            user_detail.save()
        except ValueError:
            user_detail = cls.create(
                user_id=user_id,
                updated_at=utcnow(),
            )
        return user_detail
Example #54
0
    def test_recent_empty_sample_refreshs(self):
        self.factory.data_source.query_runner.configuration['samples'] = True
        refresh_schema(self.factory.data_source.id)

        # Confirm no sample exists
        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, None)

        LAST_UPDATE = utils.utcnow() - datetime.timedelta(days=5)
        update_sample(self.factory.data_source.id, 'table', 1,
                      LAST_UPDATE.isoformat())

        column_metadata = ColumnMetadata.query.first()
        self.assertEqual(column_metadata.example, self.COLUMN_EXAMPLE)
Example #55
0
    def test_outdated_queries_works_with_ttl_based_schedule(self):
        two_hours_ago = utcnow() - datetime.timedelta(hours=2)
        query = self.factory.create_query(schedule={
            "interval": "3600",
            "time": None,
            "until": None,
            "day_of_week": None,
        })
        query_result = self.factory.create_query_result(
            query=query.query_text, retrieved_at=two_hours_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertIn(query, queries)
Example #56
0
    def test_skips_fresh_queries(self):
        half_an_hour_ago = utcnow() - datetime.timedelta(minutes=30)
        query = self.factory.create_query(schedule={
            "interval": "3600",
            "time": None,
            "until": None,
            "day_of_week": None,
        })
        query_result = self.factory.create_query_result(
            query=query.query_text, retrieved_at=half_an_hour_ago)
        query.latest_query_data = query_result

        queries = models.Query.outdated_queries()
        self.assertNotIn(query, queries)
Example #57
0
    def test_enqueues_only_for_relevant_data_source(self):
        query = self.factory.create_query(schedule="60")
        query2 = self.factory.create_query(schedule="3600", query=query.query, query_hash=query.query_hash)
        import psycopg2
        retrieved_at = utcnow().replace(tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None)) - datetime.timedelta(minutes=10)
        query_result = self.factory.create_query_result(retrieved_at=retrieved_at, query=query.query,
                                                   query_hash=query.query_hash)
        query.latest_query_data = query_result
        query2.latest_query_data = query_result
        query.save()
        query2.save()

        with patch('redash.tasks.QueryTask.add_task') as add_job_mock:
            refresh_queries()
            add_job_mock.assert_called_once_with(query.query, query.data_source, scheduled=True, metadata=ANY)
Example #58
0
 def test_doesnt_return_live_incremental_results(self):
     two_weeks_ago = utcnow() - datetime.timedelta(days=14)
     qt = "SELECT 1"
     qrs = [
         self.factory.create_query_result(query_text=qt,
                                          retrieved_at=two_weeks_ago)
         for _ in range(5)
     ]
     q = self.factory.create_query(query_text=qt,
                                   latest_query_data=qrs[0],
                                   schedule_resultset_size=3)
     for qr in qrs:
         self.factory.create_query_resultset(query_rel=q, result=qr)
     db.session.flush()
     self.assertEqual([], list(models.QueryResult.unused()))
Example #59
0
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
    query_parameters = set(collect_query_parameters(query_text))
    missing_params = set(query_parameters) - set(parameter_values.keys())
    if missing_params:
        raise Exception('Missing parameter value for: {}'.format(
            ", ".join(missing_params)))

    query_text = apply_parameters(query_text, parameter_values, data_source)

    if max_age <= 0:
        query_result = None
    else:
        query_result = models.QueryResult.get_latest(data_source, query_text,
                                                     max_age)

    query_hash = gen_query_hash(query_text)

    if query_result:
        logging.info("Returning cached result for query %s" % query_hash)
        return query_result

    try:
        started_at = time.time()
        data, error = data_source.query_runner.run_query(
            query_text, current_user)

        if error:
            logging.info('got bak error')
            logging.info(error)
            return None

        run_time = time.time() - started_at
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.org_id, data_source, query_hash, query_text, data,
            run_time, utcnow())

        models.db.session.commit()
        return query_result
    except Exception as e:
        if max_age > 0:
            abort(
                404,
                message=
                "Unable to get result from the database, and no cached query result found."
            )
        else:
            abort(503, message="Unable to get result from the database.")
        return None