コード例 #1
0
ファイル: process.py プロジェクト: ssalevan/aurora
  def _wait_for_control(self):
    """Wait for control of the checkpoint stream: must be run in the child."""
    total_wait_time = Amount(0, Time.SECONDS)

    with open(self.ckpt_file(), 'r') as fp:
      fp.seek(self._ckpt_head)
      rr = ThriftRecordReader(fp, RunnerCkpt)
      while total_wait_time < self.MAXIMUM_CONTROL_WAIT:
        ckpt_tail = os.path.getsize(self.ckpt_file())
        if ckpt_tail == self._ckpt_head:
          self._platform.clock().sleep(self.CONTROL_WAIT_CHECK_INTERVAL.as_(Time.SECONDS))
          total_wait_time += self.CONTROL_WAIT_CHECK_INTERVAL
          continue
        checkpoint = rr.try_read()
        if checkpoint:
          if not checkpoint.process_status:
            raise self.CheckpointError('No process status in checkpoint!')
          if (checkpoint.process_status.process != self.name() or
              checkpoint.process_status.state != ProcessState.FORKED or
              checkpoint.process_status.fork_time != self._fork_time or
              checkpoint.process_status.coordinator_pid != self._pid):
            self._log('Losing control of the checkpoint stream:')
            self._log('   fork_time [%s] vs self._fork_time [%s]' % (
                checkpoint.process_status.fork_time, self._fork_time))
            self._log('   coordinator_pid [%s] vs self._pid [%s]' % (
                checkpoint.process_status.coordinator_pid, self._pid))
            raise self.CheckpointError('Lost control of the checkpoint stream!')
          self._log('Taking control of the checkpoint stream at record: %s' %
            checkpoint.process_status)
          self._seq = checkpoint.process_status.seq + 1
          return True
    raise self.CheckpointError('Timed out waiting for checkpoint stream!')
コード例 #2
0
ファイル: muxer.py プロジェクト: sumanau7/incubator-aurora
  def _fast_forward_stream(self, process_name):
    log.debug('Fast forwarding %s stream to seq=%s' % (process_name,
      self._watermarks[process_name]))
    assert self._processes.get(process_name) is not None
    fp = self._processes[process_name]
    rr = ThriftRecordReader(fp, RunnerCkpt)
    current_watermark = -1
    records = 0
    while current_watermark < self._watermarks[process_name]:
      last_pos = fp.tell()
      record = rr.try_read()
      if record is None:
        break
      new_watermark = record.process_status.seq
      if new_watermark > self._watermarks[process_name]:
        log.debug('Over-seeked %s [watermark = %s, high watermark = %s], rewinding.' % (
          process_name, new_watermark, self._watermarks[process_name]))
        fp.seek(last_pos)
        break
      current_watermark = new_watermark
      records += 1

    if current_watermark < self._watermarks[process_name]:
      log.warning('Only able to fast forward to %s@sequence=%s, high watermark is %s' % (
         process_name, current_watermark, self._watermarks[process_name]))

    if records:
      log.debug('Fast forwarded %s %s record(s) to seq=%s.' % (process_name, records,
        current_watermark))
コード例 #3
0
  def _apply_states(self):
    """
      os.stat() the corresponding checkpoint stream of this task and determine if there are new ckpt
      records.  Attempt to read those records and update the high watermark for that stream.
      Returns True if new states were applied, False otherwise.
    """
    ckpt_offset = None
    try:
      ckpt_offset = os.stat(self._runner_ckpt).st_size

      updated = False
      if self._ckpt_head < ckpt_offset:
        with open(self._runner_ckpt, 'r') as fp:
          fp.seek(self._ckpt_head)
          rr = ThriftRecordReader(fp, RunnerCkpt)
          while True:
            runner_update = rr.try_read()
            if not runner_update:
              break
            try:
              self._dispatcher.dispatch(self._runnerstate, runner_update)
            except CheckpointDispatcher.InvalidSequenceNumber as e:
              log.error('Checkpoint stream is corrupt: %s' % e)
              break
          new_ckpt_head = fp.tell()
          updated = self._ckpt_head != new_ckpt_head
          self._ckpt_head = new_ckpt_head
      return updated
    except OSError as e:
      if e.errno == errno.ENOENT:
        # The log doesn't yet exist, will retry later.
        log.warning('Could not read from checkpoint %s' % self._runner_ckpt)
        return False
      else:
        raise
コード例 #4
0
ファイル: monitor.py プロジェクト: rowoot/aurora
    def _apply_states(self):
        """
      os.stat() the corresponding checkpoint stream of this task and determine if there are new ckpt
      records.  Attempt to read those records and update the high watermark for that stream.
      Returns True if new states were applied, False otherwise.
    """
        ckpt_offset = None
        try:
            ckpt_offset = os.stat(self._runner_ckpt).st_size

            updated = False
            if self._ckpt_head < ckpt_offset:
                with open(self._runner_ckpt, "r") as fp:
                    fp.seek(self._ckpt_head)
                    rr = ThriftRecordReader(fp, RunnerCkpt)
                    while True:
                        runner_update = rr.try_read()
                        if not runner_update:
                            break
                        try:
                            self._dispatcher.dispatch(self._runnerstate, runner_update)
                        except CheckpointDispatcher.InvalidSequenceNumber as e:
                            log.error("Checkpoint stream is corrupt: %s" % e)
                            break
                    new_ckpt_head = fp.tell()
                    updated = self._ckpt_head != new_ckpt_head
                    self._ckpt_head = new_ckpt_head
            return updated
        except OSError as e:
            if e.errno == errno.ENOENT:
                # The log doesn't yet exist, will retry later.
                log.warning("Could not read from checkpoint %s" % self._runner_ckpt)
                return False
            else:
                raise
コード例 #5
0
ファイル: muxer.py プロジェクト: sumanau7/incubator-aurora
  def select(self):
    """
      Read and multiplex checkpoint records from all the forked off process coordinators.

      Checkpoint records can come from one of two places:
        in-process: checkpoint records synthesized for FORKED and LOST events
        out-of-process: checkpoint records from from file descriptors of forked coordinators

      Returns a list of RunnerCkpt objects that were successfully read, or an empty
      list if none were read.
    """
    self._bind_processes()
    updates = []
    for handle in filter(None, self._processes.values()):
      try:
        fstat = os.fstat(handle.fileno())
      except OSError as e:
        log.error('Unable to fstat %s!' % handle.name)
        continue
      if handle.tell() > fstat.st_size:
        log.error('Truncated checkpoint record detected on %s!' % handle.name)
      elif handle.tell() < fstat.st_size:
        rr = ThriftRecordReader(handle, RunnerCkpt)
        while True:
          process_update = rr.try_read()
          if process_update:
            updates.append(process_update)
          else:
            break
    if len(updates) > 0:
      log.debug('select() returning %s updates:' % len(updates))
      for update in updates:
        log.debug('  = %s' % update)
    return updates
コード例 #6
0
  def _wait_for_control(self):
    """Wait for control of the checkpoint stream: must be run in the child."""
    total_wait_time = Amount(0, Time.SECONDS)

    with open(self.ckpt_file(), 'r') as fp:
      fp.seek(self._ckpt_head)
      rr = ThriftRecordReader(fp, RunnerCkpt)
      while total_wait_time < self.MAXIMUM_CONTROL_WAIT:
        ckpt_tail = os.path.getsize(self.ckpt_file())
        if ckpt_tail == self._ckpt_head:
          self._platform.clock().sleep(self.CONTROL_WAIT_CHECK_INTERVAL.as_(Time.SECONDS))
          total_wait_time += self.CONTROL_WAIT_CHECK_INTERVAL
          continue
        checkpoint = rr.try_read()
        if checkpoint:
          if not checkpoint.process_status:
            raise self.CheckpointError('No process status in checkpoint!')
          if (checkpoint.process_status.process != self.name() or
              checkpoint.process_status.state != ProcessState.FORKED or
              checkpoint.process_status.fork_time != self._fork_time or
              checkpoint.process_status.coordinator_pid != self._pid):
            self._log('Losing control of the checkpoint stream:')
            self._log('   fork_time [%s] vs self._fork_time [%s]' % (
                checkpoint.process_status.fork_time, self._fork_time))
            self._log('   coordinator_pid [%s] vs self._pid [%s]' % (
                checkpoint.process_status.coordinator_pid, self._pid))
            raise self.CheckpointError('Lost control of the checkpoint stream!')
          self._log('Taking control of the checkpoint stream at record: %s' %
            checkpoint.process_status)
          self._seq = checkpoint.process_status.seq + 1
          return True
    raise self.CheckpointError('Timed out waiting for checkpoint stream!')
コード例 #7
0
ファイル: muxer.py プロジェクト: radhikari54/Mastering-Mesos
    def _fast_forward_stream(self, process_name):
        log.debug('Fast forwarding %s stream to seq=%s' %
                  (process_name, self._watermarks[process_name]))
        assert self._processes.get(process_name) is not None
        fp = self._processes[process_name]
        rr = ThriftRecordReader(fp, RunnerCkpt)
        current_watermark = -1
        records = 0
        while current_watermark < self._watermarks[process_name]:
            last_pos = fp.tell()
            record = rr.try_read()
            if record is None:
                break
            new_watermark = record.process_status.seq
            if new_watermark > self._watermarks[process_name]:
                log.debug(
                    'Over-seeked %s [watermark = %s, high watermark = %s], rewinding.'
                    % (process_name, new_watermark,
                       self._watermarks[process_name]))
                fp.seek(last_pos)
                break
            current_watermark = new_watermark
            records += 1

        if current_watermark < self._watermarks[process_name]:
            log.warning(
                'Only able to fast forward to %s@sequence=%s, high watermark is %s'
                % (process_name, current_watermark,
                   self._watermarks[process_name]))

        if records:
            log.debug('Fast forwarded %s %s record(s) to seq=%s.' %
                      (process_name, records, current_watermark))
コード例 #8
0
ファイル: muxer.py プロジェクト: radhikari54/Mastering-Mesos
    def select(self):
        """
      Read and multiplex checkpoint records from all the forked off process coordinators.

      Checkpoint records can come from one of two places:
        in-process: checkpoint records synthesized for FORKED and LOST events
        out-of-process: checkpoint records from from file descriptors of forked coordinators

      Returns a list of RunnerCkpt objects that were successfully read, or an empty
      list if none were read.
    """
        self._bind_processes()
        updates = []
        for handle in filter(None, self._processes.values()):
            try:
                fstat = os.fstat(handle.fileno())
            except OSError:
                log.error('Unable to fstat %s!' % handle.name)
                continue
            if handle.tell() > fstat.st_size:
                log.error('Truncated checkpoint record detected on %s!' %
                          handle.name)
            elif handle.tell() < fstat.st_size:
                rr = ThriftRecordReader(handle, RunnerCkpt)
                while True:
                    process_update = rr.try_read()
                    if process_update:
                        updates.append(process_update)
                    else:
                        break
        if len(updates) > 0:
            log.debug('select() returning %s updates:' % len(updates))
            for update in updates:
                log.debug('  = %s' % update)
        return updates
コード例 #9
0
ファイル: muxer.py プロジェクト: sumanau7/incubator-aurora
 def has_data(self, process):
   """
     Return true if we think that there are updates available from the supplied process.
   """
   self._bind_processes()
   # TODO(wickman) Should this raise ProcessNotFound?
   if process not in self._processes:
     return False
   fp = self._processes[process]
   rr = ThriftRecordReader(fp, RunnerCkpt)
   old_pos = fp.tell()
   try:
     expected_new_pos = os.fstat(fp.fileno()).st_size
   except OSError as e:
     log.debug('ProcessMuxer could not fstat for process %s' % process)
     return False
   update = rr.try_read()
   if update:
     fp.seek(old_pos)
     return True
   return False
コード例 #10
0
ファイル: muxer.py プロジェクト: radhikari54/Mastering-Mesos
 def has_data(self, process):
     """
   Return true if we think that there are updates available from the supplied process.
 """
     self._bind_processes()
     # TODO(wickman) Should this raise ProcessNotFound?
     if process not in self._processes:
         return False
     fp = self._processes[process]
     rr = ThriftRecordReader(fp, RunnerCkpt)
     old_pos = fp.tell()
     try:
         os.fstat(fp.fileno()).st_size
     except OSError:
         log.debug('ProcessMuxer could not fstat for process %s' % process)
         return False
     update = rr.try_read()
     if update:
         fp.seek(old_pos)
         return True
     return False