コード例 #1
0
ファイル: rewarder_session.py プロジェクト: jshuadvd/universe
        def retriable_error(e, error_message):
            if isinstance(e, failure.Failure):
                e = e.value

            if self._already_closed(factory.i):
                logger.error('[%s] Got error, but giving up on reconnecting, since %d already disconnected', factory.label, factory.i)
                return

            # Also need to handle DNS errors, so let's just handle everything for now.
            #
            # reason.trap(twisted.internet.error.ConnectError, error.ConnectionError)
            if elapsed_sleep_time < start_timeout:
                sleep = min((2 * attempt+1), 10)
                logger.error('[%s] Waiting on rewarder: %s. Retry in %ds (slept %ds/%ds): %s', factory.label, error_message, sleep, elapsed_sleep_time, start_timeout, e)
                reactor.callLater(
                    sleep, self._connect, name=name, address=address,
                    env_id=env_id, seed=seed, fps=fps, i=i, network=network,
                    env_status=env_status, reward_buffer=reward_buffer, label=label,
                    attempt=attempt+1, elapsed_sleep_time=elapsed_sleep_time+sleep,
                    start_timeout=start_timeout, password=password,
                    observer=observer, skip_network_calibration=skip_network_calibration,
                )
            else:
                logger.error('[%s] %s. Retries exceeded (slept %ds/%ds): %s', factory.label, error_message, elapsed_sleep_time, start_timeout, e)
                record_error(e)
コード例 #2
0
ファイル: rewarder_session.py プロジェクト: smarthi/universe
        def websocket_failed(e):
            if isinstance(e, failure.Failure):
                e = e.value

            if self._already_closed(factory.i):
                logger.error('[%s] Giving up on reconnecting, since %d already disconnected', factory.label, factory.i)
                return

            # Also need to handle DNS errors, so let's just handle everything for now.
            #
            # reason.trap(twisted.internet.error.ConnectError, error.ConnectionError)
            if elapsed_sleep_time < start_timeout:
                sleep = min((2 * attempt+1), 10)
                logger.error('[%s] Waiting on rewarder: %s. Retry in %ds (slept %ds/%ds): %s', factory.label, websocket_failed.error_message, sleep, elapsed_sleep_time, start_timeout, e)
                reactor.callLater(
                    sleep, self._connect, name=name, address=address,
                    env_id=env_id, seed=seed, fps=fps, i=i, network=network,
                    env_status=env_status, reward_buffer=reward_buffer, label=label,
                    attempt=attempt+1, elapsed_sleep_time=elapsed_sleep_time+sleep,
                    start_timeout=start_timeout, password=password,
                    observer=observer, skip_network_calibration=skip_network_calibration,
                )
            else:
                logger.error('[%s] %s. Retries exceeded (slept %ds/%ds): %s', factory.label, websocket_failed.error_message, elapsed_sleep_time, start_timeout, e)
                record_error(e)
コード例 #3
0
ファイル: reward_proxy_server.py プロジェクト: deev/universe
 def _connect_errback(reason):
     if tries < max_attempts:
         # Somewhat arbitrary exponential backoff: should be
         # pretty rare, and indicate that we're just starting
         # up.
         delay = 1.5 ** tries
         logger.info('[RewardProxyServer] [%d] Connection to %s failed: %s. Try %d/%d; going to retry in %fs', self.id, remote, reason, tries, max_attempts, delay)
         reactor.callLater(
             delay, self.connect_upstream,
             tries=tries+1, max_attempts=max_attempts)
     else:
         logger.error('[RewardProxyServer] [%d] Connection to %s failed: %s. Completed %d/%d atttempts; disconnecting.', self.id, remote, reason, tries, max_attempts)
         self.transport.loseConnection()
コード例 #4
0
ファイル: connection_timer.py プロジェクト: deev/universe
def measure_clock_skew(label, host):
    cmd = ['ntpdate', '-q', '-p', '8', host]
    extra_logger.info('[%s] Starting network calibration with %s', label, ' '.join(cmd))
    skew = Clockskew(label, cmd)
    # TODO: search PATH for this?
    process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {})
    # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {})

    t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20))
    def timeout():
        if process.pid:
            logger.error('[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)', label, ' '.join(cmd), t)
            process.signalProcess(signal.SIGKILL)
            process.reapProcess()
    # TODO: make this part of the connection string
    reactor.callLater(t, timeout)
    return skew.deferred
コード例 #5
0
 def _connect_errback(reason):
     if tries < max_attempts:
         # Somewhat arbitrary exponential backoff: should be
         # pretty rare, and indicate that we're just starting
         # up.
         delay = 1.5**tries
         logger.info(
             '[RewardProxyServer] [%d] Connection to %s failed: %s. Try %d/%d; going to retry in %fs',
             self.id, remote, reason, tries, max_attempts, delay)
         reactor.callLater(delay,
                           self.connect_upstream,
                           tries=tries + 1,
                           max_attempts=max_attempts)
     else:
         logger.error(
             '[RewardProxyServer] [%d] Connection to %s failed: %s. Completed %d/%d atttempts; disconnecting.',
             self.id, remote, reason, tries, max_attempts)
         self.transport.loseConnection()
コード例 #6
0
ファイル: rewarder_session.py プロジェクト: jshuadvd/universe
 def _start(self):
     def calibrate():
         d = defer.Deferred()
         def fail(reason):
             logger.error('[%s] Could not recalibrate network: %s', self.client.factory.label, reason)
         d.addErrback(fail)
         self._start_measure_connection_time(d)
         self._start()
     self.recalibrate = reactor.callLater(5 * 60, calibrate)
コード例 #7
0
ファイル: rewarder_session.py プロジェクト: smarthi/universe
 def _start(self):
     def calibrate():
         d = defer.Deferred()
         def fail(reason):
             logger.error('[%s] Could not recalibrate network: %s', self.client.factory.label, reason)
         d.addErrback(fail)
         self._start_measure_connection_time(d)
         self._start()
     self.recalibrate = reactor.callLater(5 * 60, calibrate)
コード例 #8
0
def measure_clock_skew(label, host):
    cmd = ['ntpdate', '-q', '-p', '8', host]
    extra_logger.info('[%s] Starting network calibration with %s', label,
                      ' '.join(cmd))
    skew = Clockskew(label, cmd)
    # TODO: search PATH for this?
    process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {})
    # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {})

    t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20))

    def timeout():
        if process.pid:
            logger.error(
                '[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)',
                label, ' '.join(cmd), t)
            process.signalProcess(signal.SIGKILL)
            process.reapProcess()

    # TODO: make this part of the connection string
    reactor.callLater(t, timeout)
    return skew.deferred