Example #1
0
def _execute(task):
  """A wrapper around exec

  This exists outside the Scheduler class because it is pickled after it is
  sent to the executor.
  """
  print "[%s] -- %s -- START" % (datetime.datetime.now(), task['id'])
  try:
    with get_app().app_context():
      exec task['code'] in {}, {}
      print "[%s] -- %s -- COMPLETE" % (datetime.datetime.now(), task['id'])
  except Exception as e:
    if isinstance(e, PyCodeError):
      err_msg = "%s: %s\n%s" % (e.data['name'], e.data['message'],
                                ''.join(e.data['traceback']))
    else:
      err_msg = traceback.format_exc()
    sys.stderr.write(err_msg)
    sys.stderr.write("[%s] -- %s -- FAIL\n" % (datetime.datetime.now(),
                                               task['id']))
    email_msg = 'Task %s failed at %s\n\n%s' % (task['id'],
                                                datetime.datetime.now(),
                                                err_msg)
    send_mail(get_app().config['SCHEDULER_FAILURE_EMAILS'],
              'Scheduler Failure', email_msg)
  finally:
    return task
Example #2
0
  def _loop(self, reader):
    """Main execution loop of the scheduler.

    The loop runs every second. Between iterations, the loop listens for
    schedule or cancel requests coming from Flask via over the gipc pipe
    (reader) and modifies the queue accordingly.

    When a task completes, it is rescheduled
    """
    results = set()

    while True:
      now = datetime.datetime.now()
      if self._task_queue and self._task_queue[0][0] <= now:
        task = heappop(self._task_queue)[1]
        if task['id'] not in self._pending_cancels:
          result = self._executor.submit(_execute, task)
          results.add(result)
        else:
          self._pending_cancels.remove(task['id'])
      else:
        # Check for new tasks coming from HTTP
        with gevent.Timeout(0.5, False) as t:
          message = reader.get(timeout=t)
          if message[0] == 'schedule':
            self._schedule(message[1], next_run=now)
          elif message[0] == 'cancel':
            self._cancel(message[1])
        # Reschedule completed tasks
        if not results:
          gevent.sleep(0.5)
          continue
        ready = self._executor.wait(results, num=1, timeout=0.5)
        for result in ready:
          results.remove(result)
          if result.value:
            task = result.value
            interval = int(task['interval'])
            if interval:
              run_at = now + datetime.timedelta(seconds=int(task['interval']))
              self._schedule(task, next_run=run_at)
          else:
            err_msg = result.exception
            sys.stderr.write("ERROR: %s" % err_msg)
            email_msg = 'Task %s failed at %s\n\n%s' % (
              task['id'],
              datetime.datetime.now(),
              err_msg
            )
            send_mail(get_app().config['SCHEDULER_FAILURE_EMAILS'],
                      'Scheduler Failure',
                      email_msg)
Example #3
0
  def __init__(self):
    """Initialize the queue and spawn the main loop thread

    Upon initialization, tasks stored in the database are immediately
    scheduled.

    _task_queue is a priority queue ordered using Python's heapq functionality.
    Elements in _task_queue are tuples of the form (datetime, task) where
    datetime is the scheduled run time and task is a dictionary as defined
    in the above docstring for the Scheduler class.

    For concurrency safety reasons, never write to _task_queue outside the
    _loop() thread.
    """
    self._task_queue = []  # Never write to this outside the _loop thread
    self._pending_cancels = set()
    self._executor = GIPCExecutor()

    # Load previously scheduled tasks from database
    now = datetime.datetime.now()

    with get_app().app_context():
      saved_schedule = Task.query.filter_by(active=True)

    for task in saved_schedule:
      new_task = {
        'id': task.id,
        'interval': task.interval,
        'code': task.code
      }
      # Writing directly to the _task_queue is safe since we haven't started
      # the _loop yet
      self._task_queue.append((now, new_task))

    # Make _task_queue a priority queue
    heapify(self._task_queue)

    # Spawn main loop and save writer for future communication
    (read, write) = gipc.pipe()
    self._main_thread = gevent.spawn(self._loop, read)
    self._schedule_pipe = write
    atexit.register(self._interrupt)
Example #4
0
  def __init__(self, query, timeframe, bucket_width=None, untrusted_time=None,
               metis=False):
    """Initialize QueryCompute
    :param query: A string of python code to execute as a Jia query.
    :param timeframe: A timeframe dictionary. It specifies a mode, which can be
    'recent' or 'range'. Depending on which mode is selected, some of the other
    parameters will be unused. The unused parameters come from the frontend for
    the purposes of storing default/previous values. If the mode is recent,
    only 'value' and 'scale' are used. If the mode is 'range', only 'from' and
    'to' are used.

    Example timeframe:
    timeframe = {
      'mode': 'recent',
      'value': 1,
      'scale': 'days',
      'from': 'Sat Jun 10 2014 00:00:00',
      'to': 'Sun Jun 11 2014 00:00:00',
    }

    :param bucket_width: Optional bucket width in seconds
    :param untrusted_time: Optional untrusted time interval in seconds
    :param metis: Send `query` to metis for computation
    """
    try:
      self._app = current_app
      self._app.config  # The above line won't fail, but this one will
    except RuntimeError:
      from scheduler import get_app 
      self._app = get_app() 
    self._query = query
    self._bucket_width = bucket_width
    self._untrusted_time = untrusted_time
    self._metis = metis
    self._start_time, self._end_time = self._get_timeframe_bounds(timeframe,
                                                                  bucket_width)

    self._cache_client = KronosClient(
        self._app.config['CACHE_KRONOS_URL'],
        namespace=self._app.config['CACHE_KRONOS_NAMESPACE'],
        blocking=False, sleep_block=0.2)

    # The query is sent through as an unused unique_id argument so that the
    # QueryCache hash can properly uniquely identify it
    unique = {
      'unique_id': self._query
    }

    if self._metis:
      query_func = self._run_metis
    elif self._app.config['ALLOW_PYCODE']:
      query_func = self._run_query
    else:
      raise ValueError("`metis` must be `True` if ALLOW_PYCODE is not enabled")

    if self._bucket_width:
      bucket_width_timedelta = datetime.timedelta(seconds=bucket_width)
      self._query_cache = QueryCache(self._cache_client, query_func,
                                     bucket_width_timedelta,
                                     self._app.config['CACHE_KRONOS_NAMESPACE'],
                                     query_function_kwargs=unique)