コード例 #1
0
ファイル: compute.py プロジェクト: derek-schultz/chronology
  def __init__(self, query, timeframe, bucket_width=None, untrusted_time=None,
               metis=False):
    """Initialize QueryCompute
    :param query: A string of python code to execute as a Jia query.
    :param timeframe: A timeframe dictionary. It specifies a mode, which can be
    'recent' or 'range'. Depending on which mode is selected, some of the other
    parameters will be unused. The unused parameters come from the frontend for
    the purposes of storing default/previous values. If the mode is recent,
    only 'value' and 'scale' are used. If the mode is 'range', only 'from' and
    'to' are used.

    Example timeframe:
    timeframe = {
      'mode': 'recent',
      'value': 1,
      'scale': 'days',
      'from': 'Sat Jun 10 2014 00:00:00',
      'to': 'Sun Jun 11 2014 00:00:00',
    }

    :param bucket_width: Optional bucket width in seconds
    :param untrusted_time: Optional untrusted time interval in seconds
    :param metis: Send `query` to metis for computation
    """
    self._query = query
    self._bucket_width = bucket_width
    self._untrusted_time = untrusted_time
    self._metis = metis
    self._start_time, self._end_time = self._get_timeframe_bounds(timeframe,
                                                                  bucket_width)

    self._cache_client = KronosClient(
        app.config['CACHE_KRONOS_URL'],
        namespace=app.config['CACHE_KRONOS_NAMESPACE'],
        blocking=False, sleep_block=0.2)

    # The query is sent through as an unused unique_id argument so that the
    # QueryCache hash can properly uniquely identify it
    unique = {
      'unique_id': self._query
    }

    if self._metis:
      query_func = self._run_metis
    elif app.config['ALLOW_PYCODE']:
      query_func = self._run_query
    else:
      raise ValueError("`metis` must be `True` if ALLOW_PYCODE is not enabled")

    if self._bucket_width:
      bucket_width_timedelta = datetime.timedelta(seconds=bucket_width)
      self._query_cache = QueryCache(self._cache_client, query_func,
                                     bucket_width_timedelta,
                                     app.config['CACHE_KRONOS_NAMESPACE'],
                                     query_function_kwargs=unique)
コード例 #2
0
ファイル: compute.py プロジェクト: sirrice/chronology
  def __init__(self, query, timeframe, bucket_width=None, untrusted_time=None,
               metis=False):
    """Initialize QueryCompute
    :param query: A string of python code to execute as a Jia query.
    :param timeframe: A timeframe dictionary. It specifies a mode, which can be
    'recent' or 'range'. Depending on which mode is selected, some of the other
    parameters will be unused. The unused parameters come from the frontend for
    the purposes of storing default/previous values. If the mode is recent,
    only 'value' and 'scale' are used. If the mode is 'range', only 'from' and
    'to' are used.

    Example timeframe:
    timeframe = {
      'mode': 'recent',
      'value': 1,
      'scale': 'days',
      'from': 'Sat Jun 10 2014 00:00:00',
      'to': 'Sun Jun 11 2014 00:00:00',
    }

    :param bucket_width: Optional bucket width in seconds
    :param untrusted_time: Optional untrusted time interval in seconds
    :param metis: Send `query` to metis for computation
    """
    try:
      self._app = current_app
      self._app.config  # The above line won't fail, but this one will
    except RuntimeError:
      from scheduler import get_app 
      self._app = get_app() 
    self._query = query
    self._bucket_width = bucket_width
    self._untrusted_time = untrusted_time
    self._metis = metis
    self._start_time, self._end_time = self._get_timeframe_bounds(timeframe,
                                                                  bucket_width)

    self._cache_client = KronosClient(
        self._app.config['CACHE_KRONOS_URL'],
        namespace=self._app.config['CACHE_KRONOS_NAMESPACE'],
        blocking=False, sleep_block=0.2)

    # The query is sent through as an unused unique_id argument so that the
    # QueryCache hash can properly uniquely identify it
    unique = {
      'unique_id': self._query
    }

    if self._metis:
      query_func = self._run_metis
    elif self._app.config['ALLOW_PYCODE']:
      query_func = self._run_query
    else:
      raise ValueError("`metis` must be `True` if ALLOW_PYCODE is not enabled")

    if self._bucket_width:
      bucket_width_timedelta = datetime.timedelta(seconds=bucket_width)
      self._query_cache = QueryCache(self._cache_client, query_func,
                                     bucket_width_timedelta,
                                     self._app.config['CACHE_KRONOS_NAMESPACE'],
                                     query_function_kwargs=unique)
コード例 #3
0
ファイル: compute.py プロジェクト: sirrice/chronology
class QueryCompute(object):
  """A class for executing Jia queries

  Provides `compute` and `cache` methods. `compute(use_cache=False)` can be
  called to simply run the query within a given `timeframe`. Otherwise,
  `bucket_width` must be specified to get from the cache. In order to write to
  the cache via the `cache` method, both `bucket_width` and `untrusted_time`
  must be specified.
  """
  def __init__(self, query, timeframe, bucket_width=None, untrusted_time=None,
               metis=False):
    """Initialize QueryCompute
    :param query: A string of python code to execute as a Jia query.
    :param timeframe: A timeframe dictionary. It specifies a mode, which can be
    'recent' or 'range'. Depending on which mode is selected, some of the other
    parameters will be unused. The unused parameters come from the frontend for
    the purposes of storing default/previous values. If the mode is recent,
    only 'value' and 'scale' are used. If the mode is 'range', only 'from' and
    'to' are used.

    Example timeframe:
    timeframe = {
      'mode': 'recent',
      'value': 1,
      'scale': 'days',
      'from': 'Sat Jun 10 2014 00:00:00',
      'to': 'Sun Jun 11 2014 00:00:00',
    }

    :param bucket_width: Optional bucket width in seconds
    :param untrusted_time: Optional untrusted time interval in seconds
    :param metis: Send `query` to metis for computation
    """
    try:
      self._app = current_app
      self._app.config  # The above line won't fail, but this one will
    except RuntimeError:
      from scheduler import get_app 
      self._app = get_app() 
    self._query = query
    self._bucket_width = bucket_width
    self._untrusted_time = untrusted_time
    self._metis = metis
    self._start_time, self._end_time = self._get_timeframe_bounds(timeframe,
                                                                  bucket_width)

    self._cache_client = KronosClient(
        self._app.config['CACHE_KRONOS_URL'],
        namespace=self._app.config['CACHE_KRONOS_NAMESPACE'],
        blocking=False, sleep_block=0.2)

    # The query is sent through as an unused unique_id argument so that the
    # QueryCache hash can properly uniquely identify it
    unique = {
      'unique_id': self._query
    }

    if self._metis:
      query_func = self._run_metis
    elif self._app.config['ALLOW_PYCODE']:
      query_func = self._run_query
    else:
      raise ValueError("`metis` must be `True` if ALLOW_PYCODE is not enabled")

    if self._bucket_width:
      bucket_width_timedelta = datetime.timedelta(seconds=bucket_width)
      self._query_cache = QueryCache(self._cache_client, query_func,
                                     bucket_width_timedelta,
                                     self._app.config['CACHE_KRONOS_NAMESPACE'],
                                     query_function_kwargs=unique)

  def _get_timeframe_bounds(self, timeframe, bucket_width):
    """
    Get a `bucket_width` aligned `start_time` and `end_time` from a
    `timeframe` dict
    """
    if bucket_width:
      bucket_width_seconds = bucket_width
      bucket_width = epoch_time_to_kronos_time(bucket_width)

    # TODO(derek): Potential optimization by setting the end_time equal to the
    # untrusted_time if end_time > untrusted_time and the results are not being
    # output to the user (only for caching)
    if timeframe['mode']['value'] == 'recent':
      # Set end_time equal to now and align to bucket width
      end_time = datetime_to_kronos_time(datetime.datetime.now())
      original_end_time = end_time
      duration = get_seconds(timeframe['value'], timeframe['scale']['name'])
      duration = epoch_time_to_kronos_time(duration)
      start_time = original_end_time - duration

      if bucket_width:
        # Align values to the bucket width
        # TODO(derek): Warn the user that the timeframe has been altered to fit
        # the bucket width
        if (end_time % bucket_width) != 0:
          end_time += bucket_width - (end_time % bucket_width)

        if (start_time % bucket_width) != 0:
          start_time -= (start_time % bucket_width)

      start = kronos_time_to_datetime(start_time)
      end = kronos_time_to_datetime(end_time)
    elif timeframe['mode']['value'] == 'range':
      end = datetime.datetime.strptime(timeframe['to'], DT_FORMAT)
      end_seconds = datetime_to_epoch_time(end)

      start = datetime.datetime.strptime(timeframe['from'], DT_FORMAT)
      start_seconds = datetime_to_epoch_time(start)

      if bucket_width:
        # Align values to the bucket width
        # TODO(derek): Warn the user that the timeframe has been altered to fit
        # the bucket width
        start_bump = start_seconds % bucket_width_seconds
        start -= datetime.timedelta(seconds=start_bump)
        if (end_seconds % bucket_width_seconds) != 0:
          end_bump = bucket_width_seconds - (end_seconds % bucket_width_seconds)
          end += datetime.timedelta(seconds=end_bump)
    else:
      raise ValueError("Timeframe mode must be 'recent' or 'range'")

    return start, end

  def _run_query(self, start_time, end_time, unique_id=None):
    """Executes a Python query string and returns events

    Acts as a wrapper around exec that injects necessary local variables into
    the scope of the user-provided query blob.

    :param start_time: Python datetime to be injected into query
    :param end_time: Python datetime to be injected into query
    :param unique_id: An unused flag that allows the scheduler to hash this
    function uniquely based on its args when it passes through
    """
    client = KronosClient(self._app.config['KRONOS_URL'],
                          namespace=self._app.config['KRONOS_NAMESPACE'],
                          blocking=False,
                          sleep_block=0.2)

    locals_dict = {
      'kronos_client': client,
      'events': [],
      'start_time': start_time,
      'end_time': end_time,
    }

    try:
      exec self._query in {}, locals_dict  # No globals.
    except:
      _, exception, tb = sys.exc_info()
      raise PyCodeError(exception, traceback.format_tb(tb))

    events = sorted(locals_dict.get('events', []),
                    key=lambda event: event['@time'])

    return events

  def _run_metis(self, start_time, end_time, unique_id=None):
    start_time = datetime_to_kronos_time(start_time)
    end_time = datetime_to_kronos_time(end_time)
    q = create_metis_query_plan(self._query, start_time, end_time)
    r = requests.post("%s/1.0/query" % self._app.config['METIS_URL'], data=q)
    return json.loads('[%s]' % (',').join(r.text.splitlines()))

  def compute(self, use_cache=True):
    """Call a user defined query and return events with optional help from
    the cache.

    :param use_cache: Specifies whether the cache should be used when possible
    """
    if use_cache:
      if not self._bucket_width:
        raise ValueError('QueryCompute must be initialized with a bucket_width'
                         ' to use caching features.')
      return list(self._query_cache.retrieve_interval(self._start_time,
                                                      self._end_time,
                                                      compute_missing=True))
    else:
      if self._metis:
        return self._run_metis(self._start_time, self._end_time)
      else:
        return self._run_query(self._start_time, self._end_time)

  def cache(self):
    """Call a user defined query and cache the results"""
    if not self._bucket_width or self._untrusted_time is None:
      raise ValueError('QueryCompute must be initialized with a bucket_width '
                       'and an untrusted_time in order to write to the cache.')

    now = datetime.datetime.now()
    untrusted_time = now - datetime.timedelta(seconds=self._untrusted_time)
    list(self._query_cache.compute_and_cache_missing_buckets(
        self._start_time,
        self._end_time,
        untrusted_time))
コード例 #4
0
  def test_cache_layer(self):
    cache = QueryCache(self.client, self.filter_and_sum,
                       self.bucket_width, self.computed_namespace)
    start_time = self.start_time - (self.bucket_width * 3)
    end_time = self.start_time + (self.total_events * self.increment) + (
      self.bucket_width * 3)
    untrusted_time = self.start_time + (
      timedelta(minutes=(self.total_events / 2) - 25))

    # Verify all results were computed correctly.
    self.verify_results(lambda: list(
        cache.compute_and_cache_missing_buckets(start_time, end_time,
                                                untrusted_time)),
                        cache, 25, 31)

    # Verify only trusted results are cached.
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 11, 0)

    # Running the same operations twice should result in the same
    # results as before.
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(start_time, end_time,
                                                           untrusted_time)),
      cache, 25, 17)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 11, 0)

    # Expanding the time range without caching should also result in the same
    # results
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time - self.bucket_width,
                                           end_time + self.bucket_width)),
      cache, 11, 0)

    # But specifying compute_missing should get all results for the timerange
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time - self.bucket_width,
                                           end_time + self.bucket_width,
                                           compute_missing=True)),
      cache, 25, 19)

    # Overlapping time queries should result in the same
    # results as before, and benefit from the cache.
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(start_time -
                                                           self.bucket_width,
                                                           end_time +
                                                           self.bucket_width,
                                                           untrusted_time)),
      cache, 25, 19)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 11, 0)

    # Increasing the trusted time should increase the cached results.
    untrusted_time = untrusted_time + timedelta(minutes=40)
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(start_time, end_time,
                                                           untrusted_time)),
      cache, 25, 17)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 13, 0)

    # Decreasing trusted time shouldn't remove results.
    untrusted_time = untrusted_time - timedelta(minutes=40)
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(start_time, end_time,
                                                           untrusted_time)),
      cache, 25, 15)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 13, 0)

    # If there are two cached entries, that cached time should no
    # longer be returned.
    results = list(cache.retrieve_interval(start_time, end_time))
    duplicate_result = dict(results[10])
    duplicate_result['b_sum'] = 0
    self.client.put({cache._scratch_stream: [duplicate_result]},
                    namespace=cache._scratch_namespace)
    self.client.flush()
    safe_results = list(cache.retrieve_interval(start_time, end_time))
    self.assertEqual(results[:10] + results[11:], safe_results)

    # Rerunning the cache/computation should re-cache the corrupted
    # element.
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(start_time, end_time,
                                                           untrusted_time)),
      cache, 25, 16)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 13, 0)

    # Forcing computation should generate the same result set.
    self.verify_results(
      lambda: list(cache.compute_and_cache_missing_buckets(
          start_time, end_time, untrusted_time, force_recompute=True)),
      cache, 25, 31)
    self.verify_results(
      lambda: list(cache.retrieve_interval(start_time, end_time)),
      cache, 13, 0)