Esempio n. 1
0
    def __init__(self, redis_connection, query, priority,
                 job_id=None,
                 wait_time=None, query_time=None,
                 updated_at=None, status=None, error=None, query_result_id=None,
                 process_id=0):
        self.redis_connection = redis_connection
        self.query = query
        self.priority = priority
        self.query_hash = gen_query_hash(self.query)
        self.query_result_id = query_result_id
        if process_id == 'None':
            self.process_id = None
        else:
            self.process_id = int(process_id)

        if job_id is None:
            self.id = str(uuid.uuid1())
            self.new_job = True
            self.wait_time = 0
            self.query_time = 0
            self.error = None
            self.updated_at = time.time() # job_dict.get('updated_at', time.time())
            self.status = self.WAITING # int(job_dict.get('status', self.WAITING))
        else:
            self.id = job_id
            self.new_job = False
            self.error = error
            self.wait_time = wait_time
            self.query_time = query_time
            self.updated_at = updated_at
            self.status = status
Esempio n. 2
0
    def pre_save(self, created):
        super(Query, self).pre_save(created)
        self.query_hash = utils.gen_query_hash(self.query)
        self._set_api_key()

        if self.last_modified_by is None:
            self.last_modified_by = self.user
Esempio n. 3
0
    def add_job(self, query, priority, data_source):
        query_hash = gen_query_hash(query)
        logging.info("[Manager][%s] Inserting job with priority=%s", query_hash, priority)
        try_count = 0
        job = None

        while try_count < self.max_retries:
            try_count += 1

            pipe = self.redis_connection.pipeline()
            try:
                pipe.watch('query_hash_job:%s' % query_hash)
                job_id = pipe.get('query_hash_job:%s' % query_hash)
                if job_id:
                    logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id)
                    job = worker.Job.load(self.redis_connection, job_id)
                else:
                    job = worker.Job(self.redis_connection, query=query, priority=priority,
                                     data_source_id=data_source.id,
                                     data_source_name=data_source.name,
                                     data_source_type=data_source.type,
                                     data_source_options=data_source.options)
                    pipe.multi()
                    job.save(pipe)
                    logging.info("[Manager][%s] Created new job: %s", query_hash, job.id)
                    self.queue.push(job.id, job.priority)
                break

            except redis.WatchError:
                continue

        if not job:
            logging.error("[Manager][%s] Failed adding job for query.", query_hash)

        return job
Esempio n. 4
0
 def __init__(self, task, query, data_source_id, user_id, metadata,
              scheduled_query):
     self.task = task
     self.query = query
     self.data_source_id = data_source_id
     self.metadata = metadata
     self.data_source = self._load_data_source()
     if user_id is not None:
         self.user = models.User.query.get(user_id)
     else:
         self.user = None
     # Close DB connection to prevent holding a connection for a long time while the query is executing.
     models.db.session.close()
     self.query_hash = gen_query_hash(self.query)
     self.scheduled_query = scheduled_query
     # Load existing tracker or create a new one if the job was created before code update:
     self.tracker = (
         QueryTaskTracker.get_by_task_id(task.request.id) or
         QueryTaskTracker.create(
             task.request.id,
             'created',
             self.query_hash,
             self.data_source_id,
             False,
             metadata
         )
     )
     if self.tracker.scheduled:
         models.scheduled_queries_executions.update(self.tracker.query_id)
Esempio n. 5
0
 def setUp(self):
     super(TestQueryResultStoreResult, self).setUp()
     self.data_source = data_source_factory.create()
     self.query = "SELECT 1"
     self.query_hash = gen_query_hash(self.query)
     self.runtime = 123
     self.utcnow = datetime.datetime.utcnow()
     self.data = "data"
Esempio n. 6
0
def enqueue_query(query, data_source, user_id, scheduled_query=None, metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)

                if job.ready():
                    logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status)
                    redis_connection.delete(_job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                time_limit = None

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None
                    time_limit = settings.ADHOC_QUERY_TIME_LIMIT

                result = execute_query.apply_async(args=(query, data_source.id, metadata, user_id, scheduled_query_id),
                                                   queue=queue_name,
                                                   time_limit=time_limit)
                job = QueryTask(async_result=result)
                tracker = QueryTaskTracker.create(
                    result.id, 'created', query_hash, data_source.id,
                    scheduled_query is not None, metadata)
                tracker.save(connection=pipe)

                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 7
0
def enqueue_query(query, data_source, scheduled=False, metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)
                tracker = QueryTaskTracker.get_by_task_id(job_id, connection=pipe)
                # tracker might not exist, if it's an old job
                if scheduled and tracker:
                    tracker.update(retries=tracker.retries+1)
                elif tracker:
                    tracker.update(scheduled_retries=tracker.scheduled_retries+1)

                if job.ready():
                    logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status)
                    redis_connection.delete(_job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled:
                    queue_name = data_source.scheduled_queue_name
                else:
                    queue_name = data_source.queue_name

                result = execute_query.apply_async(args=(query, data_source.id, metadata), queue=queue_name)
                job = QueryTask(async_result=result)
                tracker = QueryTaskTracker.create(result.id, 'created', query_hash, data_source.id, scheduled, metadata)
                tracker.save(connection=pipe)

                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 8
0
    def get_latest(cls, data_source, query, max_age=0):
        query_hash = utils.gen_query_hash(query)

        if max_age == -1:
            query = cls.select().where(cls.query_hash == query_hash,
                                       cls.data_source == data_source).order_by(cls.retrieved_at.desc())
        else:
            query = cls.select().where(cls.query_hash == query_hash, cls.data_source == data_source,
                                       peewee.SQL("retrieved_at + interval '%s second' >= now() at time zone 'utc'",
                                                  max_age)).order_by(cls.retrieved_at.desc())

        return query.first()
Esempio n. 9
0
 def __init__(self, task, query, data_source_id, metadata):
     self.task = task
     self.query = query
     self.data_source_id = data_source_id
     self.metadata = metadata
     self.data_source = self._load_data_source()
     self.query_hash = gen_query_hash(self.query)
     # Load existing tracker or create a new one if the job was created before code update:
     self.tracker = QueryTaskTracker.get_by_task_id(task.request.id) or QueryTaskTracker.create(task.request.id,
                                                                                                'created',
                                                                                                self.query_hash,
                                                                                                self.data_source_id,
                                                                                                False, metadata)
Esempio n. 10
0
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state="STARTED", meta={"start_time": start_time, "custom_message": ""})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata["Task ID"] = self.request.id
        metadata["Query Hash"] = query_hash
        metadata["Queue"] = self.request.delivery_info["routing_key"]

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer("query_runner.{}.{}.run_time".format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state="STARTED", meta={"start_time": start_time, "error": error, "custom_message": ""})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time, utils.utcnow()
        )
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 11
0
    def __init__(self, task, query, data_source_id, user_id, is_api_key, metadata,
                 scheduled_query):
        self.task = task
        self.query = query
        self.data_source_id = data_source_id
        self.metadata = metadata
        self.data_source = self._load_data_source()
        self.user = _resolve_user(user_id, is_api_key)

        # Close DB connection to prevent holding a connection for a long time while the query is executing.
        models.db.session.close()
        self.query_hash = gen_query_hash(self.query)
        self.scheduled_query = scheduled_query
        # Load existing tracker or create a new one if the job was created before code update:
        if scheduled_query:
            models.scheduled_queries_executions.update(scheduled_query.id)
Esempio n. 12
0
    def add_task(cls, query, data_source, scheduled=False, metadata={}):
        query_hash = gen_query_hash(query)
        logging.info("[Manager][%s] Inserting job", query_hash)
        logging.info("[Manager] Metadata: [%s]", metadata)
        try_count = 0
        job = None
        
        while try_count < cls.MAX_RETRIES:
            try_count += 1

            pipe = redis_connection.pipeline()
            try:
                pipe.watch(cls._job_lock_id(query_hash, data_source.id))
                job_id = pipe.get(cls._job_lock_id(query_hash, data_source.id))
                if job_id:
                    logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id)

                    job = cls(job_id=job_id)
                    if job.ready():
                        logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status)
                        redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))
                        job = None

                if not job:
                    pipe.multi()

                    if scheduled:
                        queue_name = data_source.scheduled_queue_name
                    else:
                        queue_name = data_source.queue_name

                    result = execute_query.apply_async(args=(query, data_source.id, metadata), queue=queue_name)
                    job = cls(async_result=result)
                    
                    logging.info("[Manager][%s] Created new job: %s", query_hash, job.id)
                    pipe.set(cls._job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME)
                    pipe.execute()
                break

            except redis.WatchError:
                continue

        if not job:
            logging.error("[Manager][%s] Failed adding job for query.", query_hash)

        return job
Esempio n. 13
0
    def get_query_result(self, query, ttl=0):
        query_hash = gen_query_hash(query)

        with self.db_transaction() as cursor:
            sql = (
                "SELECT id, query, data, runtime, retrieved_at, query_hash FROM query_results "
                "WHERE query_hash=%s "
                "AND retrieved_at < now() at time zone 'utc' - interval '%s second'"
                "ORDER BY retrieved_at DESC LIMIT 1"
            )
            cursor.execute(sql, (query_hash, psycopg2.extensions.AsIs(ttl)))
            query_result = cursor.fetchone()

        if query_result:
            query_result = QueryResult(*query_result)

        return query_result
Esempio n. 14
0
def execute_query(self, query, data_source_id, metadata):
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, utils.utcnow())
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 15
0
 def __init__(self, task, query, data_source_id, user_id, metadata,
              scheduled_query):
     self.task = task
     self.query = query
     self.data_source_id = data_source_id
     self.metadata = metadata
     self.data_source = self._load_data_source()
     if user_id is not None:
         self.user = models.User.query.get(user_id)
     else:
         self.user = None
     self.query_hash = gen_query_hash(self.query)
     self.scheduled_query = scheduled_query
     # Load existing tracker or create a new one if the job was created before code update:
     self.tracker = QueryTaskTracker.get_by_task_id(task.request.id) or QueryTaskTracker.create(task.request.id,
                                                                                                'created',
                                                                                                self.query_hash,
                                                                                                self.data_source_id,
                                                                                                False, metadata)
Esempio n. 16
0
def execute_query(self, query, data_source_id):
    # TODO: maybe this should be a class?
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if getattr(query_runner, 'annotate_query', True):
        # TODO: anotate with queu ename
        annotated_query = "/* Task Id: %s, Query hash: %s */ %s" % \
                          (self.request.id, query_hash, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    # TODO: it is possible that storing the data will fail, and we will need to retry
    # while we already marked the job as done
    if not error:
        query_result = models.QueryResult.store_result(data_source.id, query_hash, query, data, run_time, datetime.datetime.utcnow())
    else:
        raise Exception(error)

    return query_result.id
Esempio n. 17
0
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
    query_parameters = set(collect_query_parameters(query_text))
    missing_params = set(query_parameters) - set(parameter_values.keys())
    if missing_params:
        raise Exception('Missing parameter value for: {}'.format(", ".join(missing_params)))

    if query_parameters:
        query_text = mustache_render(query_text, parameter_values)

    if max_age <= 0:
        query_result = None
    else:
        query_result = models.QueryResult.get_latest(data_source, query_text, max_age)

    query_hash = gen_query_hash(query_text)

    if query_result:
        logging.info("Returning cached result for query %s" % query_hash)
        return query_result

    try:
        started_at = time.time()
        data, error = data_source.query_runner.run_query(query_text, current_user)

        if error:
            logging.info('got bak error')
            logging.info(error)
            return None

        run_time = time.time() - started_at
        query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source,
                                                                              query_hash, query_text, data,
                                                                              run_time, utcnow())

        models.db.session.commit()
        return query_result
    except Exception as e:
        if max_age > 0:
            abort(404, message="Unable to get result from the database, and no cached query result found.")
        else:
            abort(503, message="Unable to get result from the database.")
        return None
Esempio n. 18
0
    def store_query_result(self, query, data, run_time, retrieved_at):
        query_result_id = None
        query_hash = gen_query_hash(query)
        sql = (
            "INSERT INTO query_results (query_hash, query, data, runtime, retrieved_at) "
            "VALUES (%s, %s, %s, %s, %s) RETURNING id"
        )
        with self.db_transaction() as cursor:
            cursor.execute(sql, (query_hash, query, data, run_time, retrieved_at))
            if cursor.rowcount == 1:
                query_result_id = cursor.fetchone()[0]
                logging.info("[Manager][%s] Inserted query data; id=%s", query_hash, query_result_id)

                sql = "UPDATE queries SET latest_query_data_id=%s WHERE query_hash=%s"
                cursor.execute(sql, (query_result_id, query_hash))

                logging.info("[Manager][%s] Updated %s queries.", query_hash, cursor.rowcount)
            else:
                logging.error("[Manager][%s] Failed inserting query data.", query_hash)
        return query_result_id
Esempio n. 19
0
    def get_latest(cls, data_source, query, max_age=0):
        query_hash = utils.gen_query_hash(query)

        if max_age == -1:
            query = cls.query.filter(
                cls.query_hash == query_hash,
                cls.data_source == data_source
            )
        else:
            query = cls.query.filter(
                cls.query_hash == query_hash,
                cls.data_source == data_source,
                (
                    db.func.timezone('utc', cls.retrieved_at) +
                    datetime.timedelta(seconds=max_age) >=
                    db.func.timezone('utc', db.func.now())
                )
            )

        return query.order_by(cls.retrieved_at.desc()).first()
Esempio n. 20
0
    def store_query_result(self, data_source_id, query, data, run_time, retrieved_at):
        query_hash = gen_query_hash(query)

        query_result = models.QueryResult.create(query_hash=query_hash,
                                                 query=query,
                                                 runtime=run_time,
                                                 data_source=data_source_id,
                                                 retrieved_at=retrieved_at,
                                                 data=data)

        logging.info("[Manager][%s] Inserted query data; id=%s", query_hash, query_result.id)

        # TODO: move this logic to the model?
        updated_count = models.Query.update(latest_query_data=query_result).\
            where(models.Query.query_hash==query_hash, models.Query.data_source==data_source_id).\
            execute()

        logging.info("[Manager][%s] Updated %s queries.", query_hash, updated_count)

        return query_result.id
Esempio n. 21
0
def apply_parameters(template, parameters, data_source):
    query = SQLQuery(template).apply(parameters)

    # for now we only log `SQLInjectionError` to detect false positives
    try:
        text = query.text
    except SQLInjectionError:
        record_event({
            'action': 'sql_injection',
            'object_type': 'query',
            'query': template,
            'parameters': parameters,
            'timestamp': time.time(),
            'org_id': data_source.org_id
        })
    except Exception as e:
        logging.info(u"Failed applying parameters for query %s: %s", gen_query_hash(query.query), e.message)
    finally:
        text = query.query

    return text
Esempio n. 22
0
    def add_task(cls, query, data_source, scheduled=False):
        query_hash = gen_query_hash(query)
        logging.info("[Manager][%s] Inserting job", query_hash)
        try_count = 0
        job = None

        while try_count < cls.MAX_RETRIES:
            try_count += 1

            pipe = redis_connection.pipeline()
            try:
                pipe.watch(cls._job_lock_id(query_hash, data_source.id))
                job_id = pipe.get(cls._job_lock_id(query_hash, data_source.id))
                if job_id:
                    logging.info("[Manager][%s] Found existing job: %s", query_hash, job_id)

                    job = cls(job_id=job_id)
                else:
                    pipe.multi()

                    if scheduled:
                        queue_name = data_source.scheduled_queue_name
                    else:
                        queue_name = data_source.queue_name

                    result = execute_query.apply_async(args=(query, data_source.id), queue=queue_name)
                    job = cls(async_result=result)
                    logging.info("[Manager][%s] Created new job: %s", query_hash, job.id)
                    pipe.set(cls._job_lock_id(query_hash, data_source.id), job.id)
                    pipe.execute()
                break

            except redis.WatchError:
                continue

        if not job:
            logging.error("[Manager][%s] Failed adding job for query.", query_hash)

        return job
Esempio n. 23
0
 def __init__(self, task, query, data_source_id, user_id, metadata,
              scheduled_query):
     self.task = task
     self.query = query
     self.data_source_id = data_source_id
     self.metadata = metadata
     self.data_source = self._load_data_source()
     if user_id is not None:
         self.user = models.User.query.get(user_id)
     else:
         self.user = None
     # Close DB connection to prevent holding a connection for a long time while the query is executing.
     models.db.session.close()
     self.query_hash = gen_query_hash(self.query)
     self.scheduled_query = scheduled_query
     # Load existing tracker or create a new one if the job was created before code update:
     self.tracker = QueryTaskTracker.get_by_task_id(
         task.request.id) or QueryTaskTracker.create(
             task.request.id, 'created', self.query_hash,
             self.data_source_id, False, metadata)
     if self.tracker.scheduled:
         models.scheduled_queries_executions.update(self.tracker.query_id)
Esempio n. 24
0
def apply_parameters(template, parameters, data_source):
    query = SQLQuery(template).apply(parameters)

    # for now we only log `SQLInjectionError` to detect false positives
    try:
        text = query.text
    except SQLInjectionError:
        record_event({
            'action': 'sql_injection',
            'object_type': 'query',
            'query': template,
            'parameters': parameters,
            'timestamp': time.time(),
            'org_id': data_source.org_id
        })
    except Exception as e:
        logging.info(u"Failed applying parameters for query %s: %s",
                     gen_query_hash(query.query), e.message)
    finally:
        text = query.query

    return text
Esempio n. 25
0
    def __init__(self,
                 redis_connection,
                 query,
                 priority,
                 job_id=None,
                 wait_time=None,
                 query_time=None,
                 updated_at=None,
                 status=None,
                 error=None,
                 query_result_id=None,
                 process_id=0):
        self.redis_connection = redis_connection
        self.query = query
        self.priority = priority
        self.query_hash = gen_query_hash(self.query)
        self.query_result_id = query_result_id
        if process_id == 'None':
            self.process_id = None
        else:
            self.process_id = int(process_id)

        if job_id is None:
            self.id = str(uuid.uuid1())
            self.new_job = True
            self.wait_time = 0
            self.query_time = 0
            self.error = None
            self.updated_at = time.time(
            )  # job_dict.get('updated_at', time.time())
            self.status = self.WAITING  # int(job_dict.get('status', self.WAITING))
        else:
            self.id = job_id
            self.new_job = False
            self.error = error
            self.wait_time = wait_time
            self.query_time = query_time
            self.updated_at = updated_at
            self.status = status
Esempio n. 26
0
    def __init__(
        self, query, data_source_id, user_id, is_api_key, metadata, is_scheduled_query
    ):
        self.job = get_current_job()
        self.query = query
        self.data_source_id = data_source_id
        self.metadata = metadata
        self.data_source = self._load_data_source()
        self.query_id = metadata.get("query_id")
        self.user = _resolve_user(user_id, is_api_key, metadata.get("Query ID"))
        self.query_model = (
            models.Query.query.get(self.query_id)
            if self.query_id and self.query_id != "adhoc"
            else None
        )

        # Close DB connection to prevent holding a connection for a long time while the query is executing.
        models.db.session.close()
        self.query_hash = gen_query_hash(self.query)
        self.is_scheduled_query = is_scheduled_query
        if self.is_scheduled_query:
            # Load existing tracker or create a new one if the job was created before code update:
            models.scheduled_queries_executions.update(self.query_model.id)
Esempio n. 27
0
def gen_query_hash(target, val, oldval, initiator):
    target.query_hash = utils.gen_query_hash(val)
    target.schedule_failures = 0
Esempio n. 28
0
                             schedule=None,
                             data_source=data_source_factory.create,
                             org=1)

alert_factory = ModelFactory(redash.models.Alert,
                             name=Sequence('Alert {}'),
                             query=query_factory.create,
                             user=user_factory.create,
                             options={})

query_result_factory = ModelFactory(redash.models.QueryResult,
                                    data='{"columns":{}, "rows":[]}',
                                    runtime=1,
                                    retrieved_at=utcnow,
                                    query="SELECT 1",
                                    query_hash=gen_query_hash('SELECT 1'),
                                    data_source=data_source_factory.create,
                                    org=1)

visualization_factory = ModelFactory(redash.models.Visualization,
                                     type='CHART',
                                     query=query_factory.create,
                                     name='Chart',
                                     description='',
                                     options='{}')

widget_factory = ModelFactory(redash.models.Widget,
                              type='chart',
                              width=1,
                              options='{}',
                              dashboard=dashboard_factory.create,
Esempio n. 29
0
def enqueue_query(query,
                  data_source,
                  user_id,
                  scheduled_query=None,
                  metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()  # 连接redis
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)

                if job.ready():  # 判断job在celery中的是否结束
                    logging.info("[%s] job found is ready (%s), removing lock",
                                 query_hash, job.celery_status)
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                time_limit = None

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None
                    time_limit = settings.ADHOC_QUERY_TIME_LIMIT

                result = execute_query.apply_async(args=(query, data_source.id,
                                                         metadata, user_id,
                                                         scheduled_query_id),
                                                   queue=queue_name,
                                                   time_limit=time_limit)
                job = QueryTask(async_result=result)
                tracker = QueryTaskTracker.create(result.id, 'created',
                                                  query_hash, data_source.id,
                                                  scheduled_query is not None,
                                                  metadata)
                tracker.save(connection=pipe)

                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id,
                         settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 30
0
def gen_query_hash(target, val, oldval, initiator):
    target.query_hash = utils.gen_query_hash(val)
    target.schedule_failures = 0
Esempio n. 31
0
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id)

    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'custom_message': ''
                      })

    logger.debug("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = data_source.query_runner

    logger.info(
        "task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
        query_hash, data_source.type, data_source.id, self.request.id,
        self.request.delivery_info['routing_key'],
        metadata.get('Query ID',
                     'unknown'), metadata.get('Username', 'unknown'))

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join(
            [u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(
            data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    logger.info(
        "task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
        query_hash, data_source.type, data_source.id, self.request.id,
        self.request.delivery_info['routing_key'],
        metadata.get('Query ID',
                     'unknown'), metadata.get('Username', 'unknown'))

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data
                and len(data), error)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'error': error,
                          'custom_message': ''
                      })

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.org_id, data_source.id, query_hash, query, data,
            run_time, utils.utcnow())
        logger.info(
            "task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
            query_hash, data_source.type, data_source.id, self.request.id,
            self.request.delivery_info['routing_key'],
            metadata.get('Query ID', 'unknown'),
            metadata.get('Username', 'unknown'))
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
        logger.info(
            "task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
            query_hash, data_source.type, data_source.id, self.request.id,
            self.request.delivery_info['routing_key'],
            metadata.get('Query ID', 'unknown'),
            metadata.get('Username', 'unknown'))
    else:
        raise QueryExecutionError(error)

    return query_result.id
Esempio n. 32
0
 def save(self, *args, **kwargs):
     self.query_hash = utils.gen_query_hash(self.query)
     self._set_api_key()
     super(Query, self).save(*args, **kwargs)
Esempio n. 33
0
alert_factory = ModelFactory(
    redash.models.Alert,
    name=Sequence("Alert {}"),
    query_rel=query_factory.create,
    user=user_factory.create,
    options={},
)

query_result_factory = ModelFactory(
    redash.models.QueryResult,
    data='{"columns":{}, "rows":[]}',
    runtime=1,
    retrieved_at=utcnow,
    query_text="SELECT 1",
    query_hash=gen_query_hash("SELECT 1"),
    data_source=data_source_factory.create,
    org_id=1,
)

visualization_factory = ModelFactory(
    redash.models.Visualization,
    type="CHART",
    query_rel=query_factory.create,
    name="Chart",
    description="",
    options="{}",
)

widget_factory = ModelFactory(
    redash.models.Widget,
Esempio n. 34
0
 def gen_query_hash(self, query_text, set_auto_limit=False):
     query_text = self.apply_auto_limit(query_text, set_auto_limit)
     return utils.gen_query_hash(query_text)
Esempio n. 35
0
def enqueue_query(query, data_source, scheduled=False, metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)
                tracker = QueryTaskTracker.get_by_task_id(job_id,
                                                          connection=pipe)
                # tracker might not exist, if it's an old job
                if scheduled and tracker:
                    tracker.update(retries=tracker.retries + 1)
                elif tracker:
                    tracker.update(
                        scheduled_retries=tracker.scheduled_retries + 1)

                if job.ready():
                    logging.info("[%s] job found is ready (%s), removing lock",
                                 query_hash, job.celery_status)
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled:
                    queue_name = data_source.scheduled_queue_name
                else:
                    queue_name = data_source.queue_name

                result = execute_query.apply_async(args=(query, data_source.id,
                                                         metadata),
                                                   queue=queue_name)
                job = QueryTask(async_result=result)
                tracker = QueryTaskTracker.create(result.id, 'created',
                                                  query_hash, data_source.id,
                                                  scheduled, metadata)
                tracker.save(connection=pipe)

                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id,
                         settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 36
0
File: tasks.py Progetto: hudl/redash
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("task=execute_query state=load_ds ds_id=%d", data_source_id)

    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'custom_message': ''})

    logger.debug("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = data_source.query_runner

    logger.info("task=execute_query state=before query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    logger.info("task=execute_query state=after query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data and len(data), error)

    self.update_state(state='STARTED', meta={'start_time': start_time, 'error': error, 'custom_message': ''})

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(data_source.org_id, data_source.id, query_hash, query, data, run_time, utils.utcnow())
        logger.info("task=execute_query state=after_store query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                    query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                    metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
        logger.info("task=execute_query state=after_alerts query_hash=%s type=%s ds_id=%d task_id=%s queue=%s query_id=%s username=%s",
                    query_hash, data_source.type, data_source.id, self.request.id, self.request.delivery_info['routing_key'],
                    metadata.get('Query ID', 'unknown'), metadata.get('Username', 'unknown'))
    else:
        raise QueryExecutionError(error)

    return query_result.id
Esempio n. 37
0
def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)

                if job.ready():
                    logging.info("[%s] job found is ready (%s), removing lock", query_hash, job.celery_status)
                    redis_connection.delete(_job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                time_limit = None

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None
                    time_limit = settings.ADHOC_QUERY_TIME_LIMIT

                args = (query, data_source.id, metadata, user_id, scheduled_query_id, is_api_key)
                argsrepr = json_dumps({
                    'org_id': data_source.org_id,
                    'data_source_id': data_source.id,
                    'enqueue_time': time.time(),
                    'scheduled': scheduled_query_id is not None,
                    'query_id': metadata.get('Query ID'),
                    'user_id': user_id
                })

                result = execute_query.apply_async(args=args,
                                                   argsrepr=argsrepr,
                                                   queue=queue_name,
                                                   time_limit=time_limit)

                job = QueryTask(async_result=result)
                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id, settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 38
0
File: tasks.py Progetto: aadu/redash
def execute_query(self, query, data_source_id, metadata):
    signal.signal(signal.SIGINT, signal_handler)
    start_time = time.time()

    logger.info("Loading data source (%d)...", data_source_id)

    # TODO: we should probably cache data sources in Redis
    data_source = models.DataSource.get_by_id(data_source_id)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'custom_message': ''
                      })

    logger.info("Executing query:\n%s", query)

    query_hash = gen_query_hash(query)
    query_runner = get_query_runner(data_source.type, data_source.options)

    if query_runner.annotate_query():
        metadata['Task ID'] = self.request.id
        metadata['Query Hash'] = query_hash
        metadata['Queue'] = self.request.delivery_info['routing_key']

        annotation = u", ".join(
            [u"{}: {}".format(k, v) for k, v in metadata.iteritems()])

        logging.debug(u"Annotation: %s", annotation)

        annotated_query = u"/* {} */ {}".format(annotation, query)
    else:
        annotated_query = query

    with statsd_client.timer('query_runner.{}.{}.run_time'.format(
            data_source.type, data_source.name)):
        data, error = query_runner.run_query(annotated_query)

    run_time = time.time() - start_time
    logger.info("Query finished... data length=%s, error=%s", data
                and len(data), error)

    self.update_state(state='STARTED',
                      meta={
                          'start_time': start_time,
                          'error': error,
                          'custom_message': ''
                      })

    # Delete query_hash
    redis_connection.delete(QueryTask._job_lock_id(query_hash, data_source.id))

    if not error:
        query_result, updated_query_ids = models.QueryResult.store_result(
            data_source.id, query_hash, query, data, run_time, utils.utcnow())
        for query_id in updated_query_ids:
            check_alerts_for_query.delay(query_id)
    else:
        raise Exception(error)

    return query_result.id
 def test_gen_query_hash_NoneSQL(self):
     origin_query_text = "select *"
     base_runner = BaseQueryRunner({})
     self.assertEqual(gen_query_hash(origin_query_text),
                      base_runner.gen_query_hash(origin_query_text, True))
Esempio n. 40
0
    def update_instance(cls, query_id, **kwargs):
        if 'query' in kwargs:
            kwargs['query_hash'] = utils.gen_query_hash(kwargs['query'])

        update = cls.update(**kwargs).where(cls.id == query_id)
        return update.execute()
Esempio n. 41
0
 def __init__(self, redis_connection, query, priority, **kwargs):
     kwargs['query'] = fix_unicode(query)
     kwargs['priority'] = priority
     kwargs['query_hash'] = gen_query_hash(kwargs['query'])
     self.new_job = 'id' not in kwargs
     super(Job, self).__init__(redis_connection, **kwargs)
Esempio n. 42
0
def run_query_sync(data_source, parameter_values, query_text, max_age=0):
    query_parameters = set(collect_query_parameters(query_text))
    missing_params = set(query_parameters) - set(parameter_values.keys())
    if missing_params:
        raise Exception('Missing parameter value for: {}'.format(
            ", ".join(missing_params)))

    if query_parameters:
        query_text = pystache.render(query_text, parameter_values)

    if max_age <= 0:
        query_result = None
    else:
        query_result = models.QueryResult.get_latest(data_source, query_text,
                                                     max_age)

    query_hash = gen_query_hash(query_text)

    if query_result:
        logging.info("Returning cached result for query %s" % query_hash)
        return query_result.data

    try:
        started_at = time.time()
        data, error = data_source.query_runner.run_query(
            query_text, current_user)

        if error:
            data_consumed_mb = 0.0
            return None
        else:
            dict_data = json.loads(data)
            data_consumed_mb = dict_data['data_consumed_mb']

        # update cache
        if max_age > 0:
            run_time = time.time() - started_at
            query_result, updated_query_ids = models.QueryResult.store_result(
                data_source.org_id, data_source.id, query_hash, query_text,
                data, run_time, utils.utcnow())
            query_result_metadata = models.QueryResultMetaData.store_result_metadata(
                updated_query_ids=updated_query_ids,
                query_result_id=query_result.id,
                data_consumed_mb=data_consumed_mb,
                data_source_id=data_source.id,
                query_hash=query_hash,
                run_by_user_id=current_user.id,
                run_at=utils.utcnow())

            models.db.session.commit()
        return data
    except Exception:
        if max_age > 0:
            abort(
                404,
                message=
                "Unable to get result from the database, and no cached query result found."
            )
        else:
            abort(503, message="Unable to get result from the database.")
        return None
Esempio n. 43
0
 def save(self, *args, **kwargs):
     self.query_hash = utils.gen_query_hash(self.query)
     self._set_api_key()
     super(Query, self).save(*args, **kwargs)
Esempio n. 44
0
File: models.py Progetto: JZL/redash
    def update_instance(cls, query_id, **kwargs):
        if 'query' in kwargs:
            kwargs['query_hash'] = utils.gen_query_hash(kwargs['query'])

        update = cls.update(**kwargs).where(cls.id == query_id)
        return update.execute()
def enqueue_query(query,
                  data_source,
                  user_id,
                  is_api_key=False,
                  scheduled_query=None,
                  metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = Job.fetch(job_id)

                status = job.get_status()
                if status in [JobStatus.FINISHED, JobStatus.FAILED]:
                    logging.info(
                        "[%s] job found is ready (%s), removing lock",
                        query_hash,
                        status,
                    )
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None

                time_limit = settings.dynamic_settings.query_time_limit(
                    scheduled_query, user_id, data_source.org_id)
                metadata["Queue"] = queue_name

                queue = Queue(queue_name)
                job = queue.enqueue(
                    execute_query,
                    query,
                    data_source.id,
                    metadata,
                    user_id=user_id,
                    scheduled_query_id=scheduled_query_id,
                    is_api_key=is_api_key,
                    job_timeout=time_limit,
                )

                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(
                    _job_lock_id(query_hash, data_source.id),
                    job.id,
                    settings.JOB_EXPIRY_TIME,
                )
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 46
0
def enqueue_query(query,
                  data_source,
                  user_id,
                  is_api_key=False,
                  scheduled_query=None,
                  metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)

                if job.ready():
                    logging.info(
                        "[%s] job found is ready (%s), removing lock",
                        query_hash,
                        job.celery_status,
                    )
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None

                args = (
                    query,
                    data_source.id,
                    metadata,
                    user_id,
                    scheduled_query_id,
                    is_api_key,
                )
                argsrepr = json_dumps({
                    "org_id": data_source.org_id,
                    "data_source_id": data_source.id,
                    "enqueue_time": time.time(),
                    "scheduled": scheduled_query_id is not None,
                    "query_id": metadata.get("Query ID"),
                    "user_id": user_id,
                })

                time_limit = settings.dynamic_settings.query_time_limit(
                    scheduled_query, user_id, data_source.org_id)

                result = execute_query.apply_async(
                    args=args,
                    argsrepr=argsrepr,
                    queue=queue_name,
                    soft_time_limit=time_limit,
                )

                job = QueryTask(async_result=result)
                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(
                    _job_lock_id(query_hash, data_source.id),
                    job.id,
                    settings.JOB_EXPIRY_TIME,
                )
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 47
0
def enqueue_query(
    query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}
):
    query_id = metadata.get("Query ID", "unknown")
    query_hash = gen_query_hash(query)
    get_logger().info("[query_id=%s] [query_hash=%s] Inserting job", query_id, query_hash)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                job_status = "UNKNOWN"
                job_complete = False
                job_cancelled = "False"

                try:
                    job = Job.fetch(job_id)
                    job_exists = True
                    job_status = job.get_status()
                    job_complete = job_status in [JobStatus.FINISHED, JobStatus.FAILED]
                    if job.is_cancelled:
                        job_cancelled = "True"
                except NoSuchJobError:
                    job_exists = False
                    job_status = "EXPIRED"

                get_logger().info("[query_id=%s] [query_hash=%s] Found existing job [job.id=%s] [job_status=%s] [job_cancelled=%s]", query_id, query_hash, job_id, job_status, job_cancelled)

                if job_complete or (not job_exists):
                    #get_logger().info("[query_id=%s] [query_hash=%s] [job.id=%s], removing redis lock", query_id, query_hash, job_id)
                    redis_connection.delete(_job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name  #默认都是scheduled_queries
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name #默认都是queries
                    scheduled_query_id = None

                time_limit = settings.dynamic_settings.query_time_limit(
                    scheduled_query, user_id, data_source.org_id
                )
                metadata["Queue"] = queue_name
                metadata["Enqueue Time"] = time.time()

                queue = Queue(queue_name)
                enqueue_kwargs = {
                    "user_id": user_id,
                    "scheduled_query_id": scheduled_query_id,
                    "is_api_key": is_api_key,
                    "job_timeout": time_limit,
                    "meta": {
                        "data_source_id": data_source.id,
                        "org_id": data_source.org_id,
                        "scheduled": scheduled_query_id is not None,
                        "query_id": query_id,
                        "user_id": user_id,
                    },
                }

                if not scheduled_query:
                    enqueue_kwargs["result_ttl"] = settings.JOB_EXPIRY_TIME

                job = queue.enqueue(
                    execute_query, query, data_source.id, metadata, **enqueue_kwargs
                )

                get_logger().info("[query_id=%s] [query_hash=%s] Created new job [job.id=%s]", query_id, query_hash, job.id)
                pipe.set(
                    _job_lock_id(query_hash, data_source.id),
                    job.id,
                    settings.JOB_EXPIRY_TIME,
                )
                pipe.execute()
            break

        except redis.WatchError:
            get_logger().error("[query_id=%s] [query_hash=%s] redis.WatchError, try_count = %d", query_id, query_hash, try_count)
            continue

    if not job:
        get_logger().error("[Manager] [query_id=%s] [query_hash=%s] Failed adding job for query.", query_id, query_hash)

    return job
Esempio n. 48
0
    schedule=None,
    data_source=data_source_factory.create,
    org=1)

alert_factory = ModelFactory(redash.models.Alert,
                             name=Sequence('Alert {}'),
                             query=query_factory.create,
                             user=user_factory.create,
                             options={})

query_result_factory = ModelFactory(redash.models.QueryResult,
                                    data='{"columns":{}, "rows":[]}',
                                    runtime=1,
                                    retrieved_at=utcnow,
                                    query="SELECT 1",
                                    query_hash=gen_query_hash('SELECT 1'),
                                    data_source=data_source_factory.create,
                                    org=1)

visualization_factory = ModelFactory(redash.models.Visualization,
                                     type='CHART',
                                     query=query_factory.create,
                                     name='Chart',
                                     description='',
                                     options='{}')

widget_factory = ModelFactory(redash.models.Widget,
                              type='chart',
                              width=1,
                              options='{}',
                              dashboard=dashboard_factory.create,
Esempio n. 49
0
def enqueue_query(query,
                  data_source,
                  user_id,
                  is_api_key=False,
                  scheduled_query=None,
                  metadata={}):
    query_hash = gen_query_hash(query)
    logger.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logger.info("[%s] Found existing job: %s", query_hash, job_id)
                job_complete = None

                try:
                    job = Job.fetch(job_id)
                    job_exists = True
                    status = job.get_status()
                    job_complete = status in [
                        JobStatus.FINISHED, JobStatus.FAILED
                    ]

                    if job_complete:
                        message = "job found is complete (%s)" % status
                except NoSuchJobError:
                    message = "job found has expired"
                    job_exists = False

                if job_complete or not job_exists:
                    logger.info("[%s] %s, removing lock", query_hash, message)
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None

                time_limit = settings.dynamic_settings.query_time_limit(
                    scheduled_query, user_id, data_source.org_id)
                metadata["Queue"] = queue_name

                queue = Queue(queue_name)
                enqueue_kwargs = {
                    "user_id": user_id,
                    "scheduled_query_id": scheduled_query_id,
                    "is_api_key": is_api_key,
                    "job_timeout": time_limit,
                    "meta": {
                        "data_source_id": data_source.id,
                        "org_id": data_source.org_id,
                        "scheduled": scheduled_query_id is not None,
                        "query_id": metadata.get("Query ID"),
                        "user_id": user_id,
                    },
                }

                if not scheduled_query:
                    enqueue_kwargs["result_ttl"] = settings.JOB_EXPIRY_TIME

                job = queue.enqueue(execute_query, query, data_source.id,
                                    metadata, **enqueue_kwargs)

                logger.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(
                    _job_lock_id(query_hash, data_source.id),
                    job.id,
                    settings.JOB_EXPIRY_TIME,
                )
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logger.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 50
0
def hashify_query():
    arguments = request.get_json(force=True)
    sql = arguments.get("query_text", "")

    return gen_query_hash(sql)
Esempio n. 51
0
 def setUp(self):
     self.priority = 1
     self.query = "SELECT 1"
     self.query_hash = gen_query_hash(self.query)
Esempio n. 52
0
def enqueue_query(query,
                  data_source,
                  user_id,
                  scheduled_query=None,
                  metadata={}):
    query_hash = gen_query_hash(query)
    logging.info("Inserting job for %s with metadata=%s", query_hash, metadata)
    try_count = 0
    job = None

    while try_count < 5:
        try_count += 1

        pipe = redis_connection.pipeline()
        try:
            pipe.watch(_job_lock_id(query_hash, data_source.id))
            job_id = pipe.get(_job_lock_id(query_hash, data_source.id))
            if job_id:
                logging.info("[%s] Found existing job: %s", query_hash, job_id)

                job = QueryTask(job_id=job_id)

                if job.ready():
                    logging.info("[%s] job found is ready (%s), removing lock",
                                 query_hash, job.celery_status)
                    redis_connection.delete(
                        _job_lock_id(query_hash, data_source.id))
                    job = None

            if not job:
                pipe.multi()

                time_limit = None

                if scheduled_query:
                    queue_name = data_source.scheduled_queue_name
                    scheduled_query_id = scheduled_query.id
                else:
                    queue_name = data_source.queue_name
                    scheduled_query_id = None
                    time_limit = settings.ADHOC_QUERY_TIME_LIMIT

                args = (query, data_source.id, metadata, user_id,
                        scheduled_query_id)
                argsrepr = json_dumps({
                    'org_id': data_source.org_id,
                    'data_source_id': data_source.id,
                    'enqueue_time': time.time(),
                    'scheduled': scheduled_query_id is not None,
                    'query_id': metadata.get('Query ID'),
                    'user_id': user_id
                })

                result = execute_query.apply_async(args=args,
                                                   argsrepr=argsrepr,
                                                   queue=queue_name,
                                                   time_limit=time_limit)

                job = QueryTask(async_result=result)
                logging.info("[%s] Created new job: %s", query_hash, job.id)
                pipe.set(_job_lock_id(query_hash, data_source.id), job.id,
                         settings.JOB_EXPIRY_TIME)
                pipe.execute()
            break

        except redis.WatchError:
            continue

    if not job:
        logging.error("[Manager][%s] Failed adding job for query.", query_hash)

    return job
Esempio n. 53
0
 def __init__(self, redis_connection, query, priority, **kwargs):
     kwargs['query'] = fix_unicode(query)
     kwargs['priority'] = priority
     kwargs['query_hash'] = gen_query_hash(kwargs['query'])
     self.new_job = 'id' not in kwargs
     super(Job, self).__init__(redis_connection, **kwargs)