Esempio n. 1
0
        def retriable_error(e, error_message):
            if isinstance(e, failure.Failure):
                e = e.value

            if self._already_closed(factory.i):
                logger.error('[%s] Got error, but giving up on reconnecting, since %d already disconnected', factory.label, factory.i)
                return

            # Also need to handle DNS errors, so let's just handle everything for now.
            #
            # reason.trap(twisted.internet.error.ConnectError, error.ConnectionError)
            if elapsed_sleep_time < start_timeout:
                sleep = min((2 * attempt+1), 10)
                logger.error('[%s] Waiting on rewarder: %s. Retry in %ds (slept %ds/%ds): %s', factory.label, error_message, sleep, elapsed_sleep_time, start_timeout, e)
                reactor.callLater(
                    sleep, self._connect, name=name, address=address,
                    env_id=env_id, seed=seed, fps=fps, i=i, network=network,
                    env_status=env_status, reward_buffer=reward_buffer, label=label,
                    attempt=attempt+1, elapsed_sleep_time=elapsed_sleep_time+sleep,
                    start_timeout=start_timeout, password=password,
                    observer=observer, skip_network_calibration=skip_network_calibration,
                )
            else:
                logger.error('[%s] %s. Retries exceeded (slept %ds/%ds): %s', factory.label, error_message, elapsed_sleep_time, start_timeout, e)
                record_error(e)
Esempio n. 2
0
        def websocket_failed(e):
            if isinstance(e, failure.Failure):
                e = e.value

            if self._already_closed(factory.i):
                logger.error('[%s] Giving up on reconnecting, since %d already disconnected', factory.label, factory.i)
                return

            # Also need to handle DNS errors, so let's just handle everything for now.
            #
            # reason.trap(twisted.internet.error.ConnectError, error.ConnectionError)
            if elapsed_sleep_time < start_timeout:
                sleep = min((2 * attempt+1), 10)
                logger.error('[%s] Waiting on rewarder: %s. Retry in %ds (slept %ds/%ds): %s', factory.label, websocket_failed.error_message, sleep, elapsed_sleep_time, start_timeout, e)
                reactor.callLater(
                    sleep, self._connect, name=name, address=address,
                    env_id=env_id, seed=seed, fps=fps, i=i, network=network,
                    env_status=env_status, reward_buffer=reward_buffer, label=label,
                    attempt=attempt+1, elapsed_sleep_time=elapsed_sleep_time+sleep,
                    start_timeout=start_timeout, password=password,
                    observer=observer, skip_network_calibration=skip_network_calibration,
                )
            else:
                logger.error('[%s] %s. Retries exceeded (slept %ds/%ds): %s', factory.label, websocket_failed.error_message, elapsed_sleep_time, start_timeout, e)
                record_error(e)
Esempio n. 3
0
 def _connect_errback(reason):
     if tries < max_attempts:
         # Somewhat arbitrary exponential backoff: should be
         # pretty rare, and indicate that we're just starting
         # up.
         delay = 1.5 ** tries
         logger.info('[RewardProxyServer] [%d] Connection to %s failed: %s. Try %d/%d; going to retry in %fs', self.id, remote, reason, tries, max_attempts, delay)
         reactor.callLater(
             delay, self.connect_upstream,
             tries=tries+1, max_attempts=max_attempts)
     else:
         logger.error('[RewardProxyServer] [%d] Connection to %s failed: %s. Completed %d/%d atttempts; disconnecting.', self.id, remote, reason, tries, max_attempts)
         self.transport.loseConnection()
Esempio n. 4
0
def measure_clock_skew(label, host):
    cmd = ['ntpdate', '-q', '-p', '8', host]
    extra_logger.info('[%s] Starting network calibration with %s', label, ' '.join(cmd))
    skew = Clockskew(label, cmd)
    # TODO: search PATH for this?
    process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {})
    # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {})

    t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20))
    def timeout():
        if process.pid:
            logger.error('[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)', label, ' '.join(cmd), t)
            process.signalProcess(signal.SIGKILL)
            process.reapProcess()
    # TODO: make this part of the connection string
    reactor.callLater(t, timeout)
    return skew.deferred
Esempio n. 5
0
 def _connect_errback(reason):
     if tries < max_attempts:
         # Somewhat arbitrary exponential backoff: should be
         # pretty rare, and indicate that we're just starting
         # up.
         delay = 1.5**tries
         logger.info(
             '[RewardProxyServer] [%d] Connection to %s failed: %s. Try %d/%d; going to retry in %fs',
             self.id, remote, reason, tries, max_attempts, delay)
         reactor.callLater(delay,
                           self.connect_upstream,
                           tries=tries + 1,
                           max_attempts=max_attempts)
     else:
         logger.error(
             '[RewardProxyServer] [%d] Connection to %s failed: %s. Completed %d/%d atttempts; disconnecting.',
             self.id, remote, reason, tries, max_attempts)
         self.transport.loseConnection()
Esempio n. 6
0
 def _start(self):
     def calibrate():
         d = defer.Deferred()
         def fail(reason):
             logger.error('[%s] Could not recalibrate network: %s', self.client.factory.label, reason)
         d.addErrback(fail)
         self._start_measure_connection_time(d)
         self._start()
     self.recalibrate = reactor.callLater(5 * 60, calibrate)
Esempio n. 7
0
 def _start(self):
     def calibrate():
         d = defer.Deferred()
         def fail(reason):
             logger.error('[%s] Could not recalibrate network: %s', self.client.factory.label, reason)
         d.addErrback(fail)
         self._start_measure_connection_time(d)
         self._start()
     self.recalibrate = reactor.callLater(5 * 60, calibrate)
Esempio n. 8
0
def measure_clock_skew(label, host):
    cmd = ['ntpdate', '-q', '-p', '8', host]
    extra_logger.info('[%s] Starting network calibration with %s', label,
                      ' '.join(cmd))
    skew = Clockskew(label, cmd)
    # TODO: search PATH for this?
    process = reactor.spawnProcess(skew, '/usr/sbin/ntpdate', cmd, {})
    # process = reactor.spawnProcess(skew, '/bin/sleep', ['sleep', '2'], {})

    t = float(os.environ.get('UNIVERSE_NTPDATE_TIMEOUT', 20))

    def timeout():
        if process.pid:
            logger.error(
                '[%s] %s call timed out after %ss; killing the subprocess. This is ok, but you could have more accurate timings by enabling UDP port 123 traffic to your env. (Alternatively, you can try increasing the timeout by setting environment variable UNIVERSE_NTPDATE_TIMEOUT=10.)',
                label, ' '.join(cmd), t)
            process.signalProcess(signal.SIGKILL)
            process.reapProcess()

    # TODO: make this part of the connection string
    reactor.callLater(t, timeout)
    return skew.deferred