Esempio n. 1
0
    def execute_query(self):
        self.status = 'STARTED'
        self.save()

        start_time = time.time()

        logging.info("Executing query:\n%s", self.query)

        query_runner = get_query_runner(self.data_source.type,
                                        self.data_source.options)

        with statsd_client.timer('query_runner.{}.{}.run_time'.format(
                self.data_source.type, self.data_source.name)):
            data, error = query_runner(self.query)

        run_time = time.time() - start_time
        logging.info("Query finished... data length=%s, error=%s", data
                     and len(data), error)

        self.error = error
        self.save()

        # TODO: it is possible that storing the data will fail, and we will need to retry
        # while we already marked the job as done
        if not error:
            models.QueryResult.store_result(self.data_source, self.query_hash,
                                            self.query, data, run_time,
                                            datetime.datetime.utcnow())
        else:
            raise Exception(error)
Esempio n. 2
0
def refresh_queries():
    logger.info("Refreshing queries...")

    outdated_queries_count = 0
    query_ids = []

    with statsd_client.timer('manager.outdated_queries_lookup'):
        for query in models.Query.outdated_queries():
            if query.data_source.paused:
                logging.info("Skipping refresh of %s because datasource - %s is paused (%s).", query.id, query.data_source.name, query.data_source.pause_reason)
            else:
                enqueue_query(query.query, query.data_source,
                              scheduled=True,
                              metadata={'Query ID': query.id, 'Username': '******'})

            query_ids.append(query.id)
            outdated_queries_count += 1

    statsd_client.gauge('manager.outdated_queries', outdated_queries_count)

    logger.info("Done refreshing queries. Found %d outdated queries: %s" % (outdated_queries_count, query_ids))

    status = redis_connection.hgetall('redash:status')
    now = time.time()

    redis_connection.hmset('redash:status', {
        'outdated_queries_count': outdated_queries_count,
        'last_refresh_at': now,
        'query_ids': json.dumps(query_ids)
    })

    statsd_client.gauge('manager.seconds_since_refresh', now - float(status.get('last_refresh_at', now)))
Esempio n. 3
0
def execute_query(self, query, data_source_id, metadata):
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'custom_message': ''
                      })

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join(
            [u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(
            data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data
                and len(data), error)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'error': error,
                          'custom_message': ''
                      })

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time, utils.utcnow())
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 4
0
def refresh_queries():
    logger.info("Refreshing queries...")

    outdated_queries_count = 0
    query_ids = []

    with statsd_client.timer('manager.outdated_queries_lookup'):
        for query in models.Query.outdated_queries():
            enqueue_query(query.query, query.data_source,
                          scheduled=True,
                          metadata={'Query ID': query.id, 'Username': '******'})
            query_ids.append(query.id)
            outdated_queries_count += 1

    statsd_client.gauge('manager.outdated_queries', outdated_queries_count)

    logger.info("Done refreshing queries. Found %d outdated queries: %s" % (outdated_queries_count, query_ids))

    status = redis_connection.hgetall('redash:status')
    now = time.time()

    redis_connection.hmset('redash:status', {
        'outdated_queries_count': outdated_queries_count,
        'last_refresh_at': now,
        'query_ids': json.dumps(query_ids)
    })

    statsd_client.gauge('manager.seconds_since_refresh', now - float(status.get('last_refresh_at', now)))
Esempio n. 5
0
def refresh_queries():
    logger.info("Refreshing queries...")

    outdated_queries_count = 0
    query_ids = []

    with statsd_client.timer('manager.outdated_queries_lookup'):
        for query in models.Query.outdated_queries():
            if settings.FEATURE_DISABLE_REFRESH_QUERIES: 
                logging.info("Disabled refresh queries.")
            elif query.data_source.paused:
                logging.info("Skipping refresh of %s because datasource - %s is paused (%s).", query.id, query.data_source.name, query.data_source.pause_reason)
            else:
                enqueue_query(query.query_text, query.data_source, query.user_id,
                              scheduled_query=query,
                              metadata={'Query ID': query.id, 'Username': '******'})

            query_ids.append(query.id)
            outdated_queries_count += 1

    statsd_client.gauge('manager.outdated_queries', outdated_queries_count)

    logger.info("Done refreshing queries. Found %d outdated queries: %s" % (outdated_queries_count, query_ids))

    status = redis_connection.hgetall('redash:status')
    now = time.time()

    redis_connection.hmset('redash:status', {
        'outdated_queries_count': outdated_queries_count,
        'last_refresh_at': now,
        'query_ids': json.dumps(query_ids)
    })

    statsd_client.gauge('manager.seconds_since_refresh', now - float(status.get('last_refresh_at', now)))
Esempio n. 6
0
def refresh_queries():
    logger.info("Refreshing queries...")

    outdated_queries_count = 0
    query_ids = []

    with statsd_client.timer('manager.outdated_queries_lookup'):
        for query in models.Query.outdated_queries():
            if settings.FEATURE_DISABLE_REFRESH_QUERIES:
                logging.info("Disabled refresh queries.")
            elif query.org.is_disabled:
                logging.debug("Skipping refresh of %s because org is disabled.", query.id)
            elif query.data_source is None:
                logging.debug("Skipping refresh of %s because the datasource is none.", query.id)
            elif query.data_source.paused:
                logging.debug("Skipping refresh of %s because datasource - %s is paused (%s).",
                              query.id, query.data_source.name, query.data_source.pause_reason)
            else:
                query_text = query.query_text

                parameters = {p['name']: p.get('value') for p in query.parameters}
                if any(parameters):
                    try:
                        query_text = query.parameterized.apply(parameters).query
                    except InvalidParameterError as e:
                        error = u"Skipping refresh of {} because of invalid parameters: {}".format(query.id, e.message)
                        track_failure(query, error)
                        continue
                    except QueryDetachedFromDataSourceError as e:
                        error = ("Skipping refresh of {} because a related dropdown "
                                 "query ({}) is unattached to any datasource.").format(query.id, e.query_id)
                        track_failure(query, error)
                        continue

                enqueue_query(query_text, query.data_source, query.user_id,
                              scheduled_query=query,
                              metadata={'Query ID': query.id, 'Username': '******'})

                query_ids.append(query.id)
                outdated_queries_count += 1

    statsd_client.gauge('manager.outdated_queries', outdated_queries_count)

    logger.info("Done refreshing queries. Found %d outdated queries: %s" % (outdated_queries_count, query_ids))

    status = redis_connection.hgetall('redash:status')
    now = time.time()

    redis_connection.hmset('redash:status', {
        'outdated_queries_count': outdated_queries_count,
        'last_refresh_at': now,
        'query_ids': json_dumps(query_ids)
    })

    statsd_client.gauge('manager.seconds_since_refresh', now - float(status.get('last_refresh_at', now)))
Esempio n. 7
0
def execute_query(self, query, data_source_id):
    # TODO: maybe this should be a class?
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'custom_message': ''
                      })

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if getattr(query_runner, 'annotate_query', True):
        # TODO: anotate with queu ename
        annotated_query = "/* Task Id: %s, Query hash: %s */ %s" % \
                          (self.request.id, query_hash, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(
            data_source.type, data_source.name)):
        data, error = query_runner(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data
                and len(data), error)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'error': error,
                          'custom_message': ''
                      })

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    # TODO: it is possible that storing the data will fail, and we will need to retry
    # while we already marked the job as done
    if not error:
        query_result = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time,
            datetime.datetime.utcnow())
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 8
0
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state="STARTED", meta={"start_time": start_time, "custom_message": ""})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata["Task ID"] = self.request.id
        metadata["Query Hash"] = query_hash
        metadata["Queue"] = self.request.delivery_info["routing_key"]

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer("query_runner.{}.{}.run_time".format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state="STARTED", meta={"start_time": start_time, "error": error, "custom_message": ""})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time, utils.utcnow()
        )
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 9
0
def refresh_queries():
    logger.info("Refreshing queries...")

    outdated_queries_count = 0
    query_ids = []

    with statsd_client.timer('manager.outdated_queries_lookup'):
        for query in models.Query.outdated_queries():
            if settings.FEATURE_DISABLE_REFRESH_QUERIES:
                logging.info("Disabled refresh queries.")
            elif query.org.is_disabled:
                logging.debug("Skipping refresh of %s because org is disabled.", query.id)
            elif query.data_source is None:
                logging.info("Skipping refresh of %s because the datasource is none.", query.id)
            elif query.data_source.paused:
                logging.info("Skipping refresh of %s because datasource - %s is paused (%s).", query.id, query.data_source.name, query.data_source.pause_reason)
            else:
                if query.options and len(query.options.get('parameters', [])) > 0:
                    query_params = {p['name']: p.get('value')
                                    for p in query.options['parameters']}
                    query_text = mustache_render(query.query_text, query_params)
                else:
                    query_text = query.query_text
                    
                if is_enqueued(query_text, query.data_source.id):
                    logging.info("Skipping refresh of %s because query is already queued up.", query.id)
                else:
                    enqueue_query(query_text, query.data_source, query.user_id,
                                  scheduled_query=query,
                                  metadata={'Query ID': query.id, 'Username': '******'})

                    query_ids.append(query.id)
                    outdated_queries_count += 1

    statsd_client.gauge('manager.outdated_queries', outdated_queries_count)

    logger.info("Done refreshing queries. Found %d outdated queries: %s" % (outdated_queries_count, query_ids))

    status = redis_connection.hgetall('redash:status')
    now = time.time()

    redis_connection.hmset('redash:status', {
        'outdated_queries_count': outdated_queries_count,
        'last_refresh_at': now,
        'query_ids': json_dumps(query_ids)
    })

    statsd_client.gauge('manager.seconds_since_refresh', now - float(status.get('last_refresh_at', now)))
Esempio n. 10
0
def execute_query(self, query, data_source_id, metadata):
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, utils.utcnow())
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 11
0
def execute_query(self, query, data_source_id):
    # TODO: maybe this should be a class?
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if getattr(query_runner, 'annotate_query', True):
        # TODO: anotate with queu ename
        annotated_query = "/* Task Id: %s, Query hash: %s */ %s" % \
                          (self.request.id, query_hash, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    # TODO: it is possible that storing the data will fail, and we will need to retry
    # while we already marked the job as done
    if not error:
        query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, datetime.datetime.utcnow())
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 12
0
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id)

    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'custom_message': ''
                      })

    logger.debug("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = data_source.query_runner

    logger.info(
        "task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
        query_hash, data_source.type, data_source.id, self.request.id,
        self.request.delivery_info['routing_key'],
        metadata.get('Query ID',
                     'unknown'), metadata.get('Username', 'unknown'))

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join(
            [u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(
            data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    logger.info(
        "task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
        query_hash, data_source.type, data_source.id, self.request.id,
        self.request.delivery_info['routing_key'],
        metadata.get('Query ID',
                     'unknown'), metadata.get('Username', 'unknown'))

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data
                and len(data), error)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'error': error,
                          'custom_message': ''
                      })

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.org_id, data_source.id, query_hash, query, data,
            run_time, utils.utcnow())
        logger.info(
            "task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
            query_hash, data_source.type, data_source.id, self.request.id,
            self.request.delivery_info['routing_key'],
            metadata.get('Query ID', 'unknown'),
            metadata.get('Username', 'unknown'))
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
        logger.info(
            "task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
            query_hash, data_source.type, data_source.id, self.request.id,
            self.request.delivery_info['routing_key'],
            metadata.get('Query ID', 'unknown'),
            metadata.get('Username', 'unknown'))
    else:
        raise QueryExecutionError(error)

    return query_result.id
Esempio n. 13
0
 def dispatch_request(self, *args, **kwargs):
     with statsd_client.timer('requests.{}.{}'.format(request.endpoint, request.method.lower())):
         response = super(BaseResource, self).dispatch_request(*args, **kwargs)
     return response
Esempio n. 14
0
 def dispatch_request(self, *args, **kwargs):
     with statsd_client.timer('requests.{}.{}'.format(
             request.endpoint, request.method.lower())):
         response = super(BaseResource,
                          self).dispatch_request(*args, **kwargs)
     return response
Esempio n. 15
0
File: tasks.py Progetto: hudl/redash
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id)

    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.debug("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = data_source.query_runner

    logger.info("task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    logger.info("task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source.id, query_hash, query, data, run_time, utils.utcnow())
        logger.info("task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                    query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                    metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
        logger.info("task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                    query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                    metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))
    else:
        raise QueryExecutionError(error)

    return query_result.id