Ejemplo n.º 1
0
class InputStream(object):
    """
    FCGI_STDIN or FCGI_DATA stream.
    Uses temporary file to store received data once max_mem bytes
    have been received.
    """
    def __init__(self, max_mem=1024):
        self._file = SpooledTemporaryFile(max_mem)
        self._eof_received = Event()

    def feed(self, data):
        if self._eof_received.is_set():
            raise IOError('Feeding file beyond EOF mark')
        if not data:  # EOF mark
            self._file.seek(0)
            self._eof_received.set()
        else:
            self._file.write(data)

    def __iter__(self):
        self._eof_received.wait()
        return iter(self._file)

    def read(self, size=-1):
        self._eof_received.wait()
        return self._file.read(size)

    def readlines(self, sizehint=0):
        self._eof_received.wait()
        return self._file.readlines(sizehint)

    @property
    def eof_received(self):
        return self._eof_received.is_set()
Ejemplo n.º 2
0
    def __call__(self, environ, start_response):
        handler = self.websocket.routes.get(environ['PATH_INFO'])

        if not handler:
            return self.wsgi_app(environ, start_response)

        # do handshake
        uwsgi.websocket_handshake(environ['HTTP_SEC_WEBSOCKET_KEY'], environ.get('HTTP_ORIGIN', ''))

        # setup events
        send_event = Event()
        send_queue = Queue(maxsize=1)

        recv_event = Event()
        recv_queue = Queue(maxsize=1)

        # create websocket client
        client = self.client(environ, uwsgi.connection_fd(), send_event,
                             send_queue, recv_event, recv_queue, self.websocket.timeout)

        # spawn handler
        handler = spawn(handler, client)

        # spawn recv listener
        def listener(client):
            ready = select([client.fd], [], [], client.timeout)
            recv_event.set()
        listening = spawn(listener, client)

        while True:
            if not client.connected:
                recv_queue.put(None)
                listening.kill()
                handler.join(client.timeout)
                return ''

            # wait for event to draw our attention
            ready = wait([handler, send_event, recv_event], None, 1)

            # handle send events
            if send_event.is_set():
                try:
                    uwsgi.websocket_send(send_queue.get())
                    send_event.clear()
                except IOError:
                    client.connected = False

            # handle receive events
            elif recv_event.is_set():
                recv_event.clear()
                try:
                    recv_queue.put(uwsgi.websocket_recv_nb())
                    listening = spawn(listener, client)
                except IOError:
                    client.connected = False

            # handler done, we're outta here
            elif handler.ready():
                listening.kill()
                return ''
Ejemplo n.º 3
0
def retry_with_recovery(
        transport: UDPTransport,
        messagedata: bytes,
        message_id: typing.MessageID,
        recipient: typing.Address,
        event_stop: Event,
        event_healthy: Event,
        event_unhealthy: Event,
        backoff: typing.Generator[int, None, None],
) -> bool:
    """ Send messagedata while the node is healthy until it's acknowledged.

    Note:
        backoff must be an infinite iterator, otherwise this task will
        become a hot loop.
    """

    # The underlying unhealthy will be cleared, care must be taken to properly
    # clear stop_or_unhealthy too.
    stop_or_unhealthy = event_first_of(
        event_stop,
        event_unhealthy,
    )

    acknowledged = False
    while not event_stop.is_set() and not acknowledged:

        # Packets must not be sent to an unhealthy node, nor should the task
        # wait for it to become available if the message has been acknowledged.
        if event_unhealthy.is_set():
            wait_recovery(
                event_stop,
                event_healthy,
            )

            # Assume wait_recovery returned because unhealthy was cleared and
            # continue execution, this is safe to do because event_stop is
            # checked below.
            stop_or_unhealthy.clear()

            if event_stop.is_set():
                return acknowledged

        acknowledged = retry(
            transport,
            messagedata,
            message_id,
            recipient,

            # retry will stop when this event is set, allowing this task to
            # wait for recovery when the node becomes unhealthy or to quit if
            # the stop event is set.
            stop_or_unhealthy,

            # Intentionally reusing backoff to restart from the last
            # timeout/number of iterations.
            backoff,
        )

    return acknowledged
Ejemplo n.º 4
0
class Signals(object):
	def __init__(self):
		self.new_user_event = Event()

	def signaluser(self, request):
		user_id = request.GET.get('id')	
		print self.new_user_event.is_set()	
		self.new_user_event.set()
		self.new_user_event.clear()
		return HttpResponse("New User Signaled")

	def receiveuser(self, request):
		self.new_user_event.wait()
		return HttpResponse("New User Received")
Ejemplo n.º 5
0
class GServer(ProtoBufRPCServer):
    def __init__(self, host, port, service, poolsize=128):
        self.gpool = Pool(poolsize)
        self.stop_event = Event()
        context = zmq.Context()
        self.port = port
        self.socket = context.socket(zmq.ROUTER)
        self.socket.bind("tcp://%s:%s" % (host, port))
        self.service = service

    def serve_forever(self,):
        while not self.stop_event.is_set():
            try:
                msg = self.socket.recv_multipart()
            except zmq.ZMQError:
                if self.socket.closed:
                    break
                raise e
            self.gpool.spawn(self.handle_request, msg)

    def shutdown(self,):
        self.socket.close()
        self.stop_event.set()

    def handle_request(self, msg):
        assert len(msg) == 3
        (id_, null, request) = msg
        assert null == ''
        response = self.handle(request)
        self.socket.send_multipart([id_, null, response.SerializeToString()])
Ejemplo n.º 6
0
class ConditionPoller(Thread):
    """
    generic polling mechanism: every interval seconds, check if condition returns a true value. if so, pass the value to callback
    if condition or callback raise exception, stop polling.
    """
    def __init__(self, condition, condition_callback, exception_callback, interval):
        self.polling_interval = interval
        self._shutdown_now = Event()
        self._condition = condition
        self._callback = condition_callback
        self._on_exception = exception_callback
        super(ConditionPoller,self).__init__()
    def shutdown(self):
        self.is_shutting_down = True
        self._shutdown_now.set()
    def run(self):
        try:
            while not self._shutdown_now.is_set():
                self._check_condition()
                self._shutdown_now.wait(self.polling_interval)
        except:
            log.error('thread failed', exc_info=True)
    def _check_condition(self):
        try:
            value = self._condition()
            if value:
                self._callback(value)
        except Exception as e:
            log.debug('stopping poller after exception', exc_info=True)
            self.shutdown()
            if self._on_exception:
                self._on_exception(e)
    def start(self):
        super(ConditionPoller,self).start()
Ejemplo n.º 7
0
class C2DMService(object):
    def __init__(self, source, email, password):
        self.source = source
        self.email = email
        self.password = password
        self._send_queue = Queue()
        self._send_queue_cleared = Event()
        self.log = logging.getLogger('pulsus.service.c2dm')

    def _send_loop(self):
        self._send_greenlet = gevent.getcurrent()
        try:
            self.log.info("C2DM service started")
            while True:
                notification = self._send_queue.get()
                try:
                    self._do_push(notification)
                except Exception, e:
                    self.log.exception("Error while pushing")
                    self._send_queue.put(notification)
                    gevent.sleep(5.0)
                finally:
                    if self._send_queue.qsize() < 1 and \
                            not self._send_queue_cleared.is_set():
                        self._send_queue_cleared.set()
Ejemplo n.º 8
0
class BlackBerryPushService(object):
    def __init__(self, app_id, password, push_url):
        self.app_id = app_id
        self.password = password
        self.push_url = push_url
        self._send_queue = Queue()
        self._send_queue_cleared = Event()
        self.log = logging.getLogger('pulsus.service.bbp')

    def _send_loop(self):
        self._send_greenlet = gevent.getcurrent()
        try:
            self.log.info("BlackBerry Push service started")
            while True:
                notification = self._send_queue.get()
                try:
                    self._do_push(notification)
                except Exception, e:
                    print e
                    self._send_queue.put(notification)
                    gevent.sleep(5.0)
                finally:
                    if self._send_queue.qsize() < 1 and \
                            not self._send_queue_cleared.is_set():
                        self._send_queue_cleared.set()
Ejemplo n.º 9
0
class Lock(object):

    def __init__(self, etcd, key, name, ttl=30):
        """."""
        self.etcd = etcd
        self.key = key
        self.name = name
        self._gthread = None
        self._ttl = ttl
        self._stopped = Event()

    def _heartbeat(self):
        while True:
            self._stopped.wait(self._ttl / 2)
            if self._stopped.is_set():
                break
            self.etcd.testandset(self.key, self.name, self.name,
                                 ttl=self._ttl)

    def lock(self):
        # This is to work around bugs in etcd.  Not very atomic
        # at all :(
        while True:
            try:
                e = self.etcd.get(self.key)
            except EtcdError, err:
                logging.error("lock: %s: error: %r" % (
                        self.key, err))
                self.etcd.set(self.key, self.name)
                self._gthread = gevent.spawn(self._heartbeat)
                break
            else:
                time.sleep(self._ttl / 2)
Ejemplo n.º 10
0
    def test_semaphore(self):
        edge = APIEdge(MockApp(), self.get_settings())
        api = edge.app.api
        edge.max_concurrent_calls = 1

        in_first_method = Event()
        finish_first_method = Event()
        def first_method():
            in_first_method.set()
            finish_first_method.wait()
        api.first_method = first_method

        in_second_method = Event()
        def second_method():
            in_second_method.set()
        api.second_method = second_method

        gevent.spawn(edge.execute, Call("first_method"))
        in_first_method.wait()

        gevent.spawn(edge.execute, Call("second_method"))
        gevent.sleep(0)

        assert_logged("too many concurrent callers")
        assert not in_second_method.is_set()

        finish_first_method.set()
        in_second_method.wait()
        self.assert_edge_clean(edge)
Ejemplo n.º 11
0
class RecurringTask(object):

    def __init__(self, interval, fn):
        self.interval = interval
        self.fn = fn
        self._wakeup = Event()
        self._stopped = Event()
        self._gthread = None

    def touch(self):
        """Make sure the task is executed now."""
        self._wakeup.set()
    
    def start(self):
        self._gthread = gevent.spawn(self._run)

    def stop(self):
        self._stopped.set()
        self._wakeup.set()

    def _run(self):
        while not self._stopped.is_set():
            self.fn()
            self._wakeup.wait(timeout=self.interval)
            self._wakeup.clear()
Ejemplo n.º 12
0
def test_spawning(defer):
    node = DummyNode()
    defer(node.stop)
    init_called, actor_spawned = Event(), Event()

    class MyActor(Actor):
        def __init__(self):
            init_called.set()

        def pre_start(self):
            actor_spawned.set()

    node.spawn(MyActor)
    ok_(not init_called.is_set())
    ok_(not actor_spawned.is_set())
    actor_spawned.wait()
    ok_(init_called.is_set())
Ejemplo n.º 13
0
Archivo: test.py Proyecto: sublee/lets
def test_job_queue_join_workers():
    ok = Event()
    q = lets.JobQueue()
    g = q.put(Greenlet(gevent.sleep, 0.1))
    g.link(lambda g: ok.set())
    # Before 0.0.24, JobQueue.join() doesn't guarantee finish of all workers.
    q.join()
    assert ok.is_set()
Ejemplo n.º 14
0
class NotificationService(object):
	def __init__(self, sandbox = True, **kwargs):
		if "certfile" not in kwargs:
			raise ValueError, u"Must specify a PEM bundle."
		self._sslargs = kwargs
		self._push_connection = None
		self._feedback_connection = None
		self._sandbox = sandbox
		self._send_queue = Queue()
		self._error_queue = Queue()
		self._feedback_queue = Queue()
		self._send_greenlet = None
		self._error_greenlet = None
		self._feedback_greenlet = None

		self._send_queue_cleared = Event()

	def _check_send_connection(self):
		if self._push_connection is None:
			s = ssl.wrap_socket(socket(AF_INET, SOCK_STREAM, 0),
				ssl_version=ssl.PROTOCOL_SSLv3,
				**self._sslargs)
			addr = ["gateway.push.apple.com", 2195]
			if self._sandbox:
				addr[0] = "gateway.sandbox.push.apple.com"
			s.connect_ex(tuple(addr))
			self._push_connection = s
			self._error_greenlet = gevent.spawn(self._error_loop)

	def _check_feedback_connection(self):
		if self._feedback_connection is None:
			s = ssl.wrap_socket(socket(AF_INET, SOCK_STREAM, 0),
				ssl_version = ssl.PROTOCOL_SSLv3,
				**self._sslargs)
			addr = ["feedback.push.apple.com", 2196]
			if self._sandbox:
				addr[0] = "feedback.sandbox.push.apple.com"
			s.connect_ex(tuple(addr))
			self._feedback_connection = s

	def _send_loop(self):
		self._send_greenlet = gevent.getcurrent()
		try:
			while True:
				msg = self._send_queue.get()
				self._check_send_connection()
				try:
					self._push_connection.send(str(msg))
				except Exception, e:
					self._send_queue.put(msg)
					self._push_connection.close()
					self._push_connection = None
					gevent.sleep(5.0)
				finally:
					if self._send_queue.qsize() < 1 and \
							not self._send_queue_cleared.is_set():
						self._send_queue_cleared.set()
Ejemplo n.º 15
0
class MsgGenerator(gevent.Greenlet):
    def __init__(self):
        super(MsgGenerator, self).__init__()
        self._complete = Event()
        self._jobs = {}
        self._instances = []

        # FIXME: monkey patch the whole world
        # because the python side of librados
        # uses threading.Thread.  However, rados
        # itself will still do blocking on e.g.
        # connect(), so we probably need to wrap
        # librados in its own non-gevent python
        # process and RPC to it.
        from gevent import monkey
        monkey.patch_all()
        monkey.patch_subprocess()

    def register(self, instance):
        if instance not in self._instances:
            self._instances.append(instance)

    def _emit(self, msg_event):
        for instance in self._instances:
            if instance.subscribed > 0:  # GMENO theory about memory leak
                instance.put(msg_event)

    def complete(self, jid, event):
        del self._jobs[jid]
        self._emit(event)

    def running_jobs(self):
        self._emit(MsgEvent(RUNNING_JOBS, [{'jid': jid} for jid in self._jobs.keys()]))

    def run_job(self, fqdn, cmd, args):
        if fqdn != socket.getfqdn():
            raise Unavailable()

        jid = uuid.uuid4().__str__()
        self._jobs[jid] = gevent.spawn(lambda: run_job_thread(self, jid, cmd, args))
        return jid

    def _run(self):
        try:
            while not self._complete.is_set():
                server_heartbeat, cluster_heartbeat = get_heartbeats()
                log.debug("server_heartbeat: %s" % server_heartbeat)
                log.debug("cluster_heartbeat: %s" % cluster_heartbeat)
                if server_heartbeat:
                    self._emit(MsgEvent(SERVER_HEARTBEAT, server_heartbeat))
                if cluster_heartbeat:
                    self._emit(MsgEvent(HEARTBEAT, cluster_heartbeat))

                self._complete.wait(HEARTBEAT_PERIOD)
        except:
            log.error(traceback.format_exc())
            raise
Ejemplo n.º 16
0
class Queue(gqueue.Queue):
        
    '''A subclass of gevent.queue.Queue used to organize communication messaging between Compysition Actors.

    Parameters:

        name (str):
            | The name of this queue. Used in certain actors to determine origin faster than reverse key-value lookup

    '''

    def __init__(self, name, *args, **kwargs):
        super(Queue, self).__init__(*args, **kwargs)
        self.name = name
        self.__has_content = Event()
        self.__has_content.clear()

    def get(self, block=False, *args, **kwargs):
        '''Gets an element from the queue.'''

        try:
            element = super(Queue, self).get(block=block, *args, **kwargs)
        except gqueue.Empty:
            self.__has_content.clear()
            raise QueueEmpty("Queue {0} has no waiting events".format(self.name))

        if self.qsize == 0:
            self.__has_content.clear()

        return element

    def put(self, element, *args, **kwargs):
        '''Puts element in queue.'''
        try:
            super(Queue, self).put(element, *args, **kwargs)
            self.__has_content.set()
        except gqueue.Full:
            raise QueueFull("Queue {0} is full".format(self.name))

    def wait_until_content(self):
        '''Blocks until at least 1 slot is taken.'''
        self.__has_content.wait()

    def wait_until_empty(self):
        '''Blocks until the queue is completely empty.'''

        while not self.__has_content.is_set():
            sleep(0)

    def dump(self, other_queue):
        """**Dump all items on this queue to another queue**"""
        try:
            while True:
                other_queue.put(self.next())
        except:
            pass
Ejemplo n.º 17
0
class ProcessDispatcherExecutorBase(object):
    """ Base class for PD Executors """

    def __init__(self, pd_core):
        self._pd_core = pd_core
        self.container = self._pd_core.container
        self.queue = Queue()
        self.quit_event = Event()
        self.exec_pool_size = min(int(get_safe(self._pd_core.pd_cfg, "executor.pool_size") or 1), 10)
        self.exec_pool = Pool(size=self.exec_pool_size)

    def start(self):
        self._pool_gl = spawn(self._action_loop)

    def stop(self):
        self.quit_event.set()
        self.queue.put("__QUIT__", None, None)
        self.exec_pool.kill()
        self.exec_pool.join(timeout=2)
        self._pool_gl.join(timeout=2)

    def add_action(self, action_tuple):
        if not action_tuple or len(action_tuple) != 3 or not isinstance(action_tuple[0], basestring) or \
                not isinstance(action_tuple[1], AsyncResult) or not isinstance(action_tuple[2], dict):
            raise BadRequest("Invalid action")
        self.queue.put(action_tuple)

    def execute_action(self, action_tuple):
        self.add_action(action_tuple)
        action_res = action_tuple[1]
        return action_res.get()  # Blocking on AsyncResult

    def _action_loop(self):
        for action in self.queue:
            if self.quit_event.is_set():
                break
            try:
                gl = self.exec_pool.spawn(self._process_action, action)
            except Exception as ex:
                log.exception("Error in PD Executor action")

    def _process_action(self, action):
        log.debug("PD execute action %s", action)
        action_name, action_asyncres, action_kwargs = action

        action_funcname = "_action_%s" % action_name
        action_func = getattr(self, action_funcname, None)
        if not action_func:
            log.warn("Action function not found")
            return
        try:
            action_res = action_func(action_kwargs)
            action_asyncres.set(action_res)
        except Exception as ex:
            log.exception("Error executing action")
            action_asyncres.set_exception(ex)
Ejemplo n.º 18
0
class StopGreenlet(Greenlet):
    def __init__(self, *args, **kwargs):
        self._stop_event = Event()
        super(StopGreenlet, self).__init__()

    def stop(self):
        self._stop_event.set()

    def stopped(self):
        return self._stop_event.is_set()
Ejemplo n.º 19
0
def wait_recovery(event_stop: Event, event_healthy: Event):
    event_first_of(
        event_stop,
        event_healthy,
    ).wait()

    if event_stop.is_set():
        return

    # There may be multiple threads waiting, do not restart them all at
    # once to avoid message flood.
    gevent.sleep(random.random())
Ejemplo n.º 20
0
Archivo: ramp.py Proyecto: 40a/aurproxy
class RampingShareAdjuster(ShareAdjuster):
  def __init__(self,
               endpoint,
               signal_update_fn,
               ramp_delay,
               ramp_seconds,
               curve='linear',
               update_frequency=10,
               as_of=None):
    super(RampingShareAdjuster, self).__init__(endpoint, signal_update_fn)
    self._ramp_delay = ramp_delay
    self._ramp_seconds = ramp_seconds
    self._curve_fn = _CURVE_FNS[curve]
    self._update_frequency = update_frequency
    self._start_time = as_of
    self._stop_event = Event()

  def start(self):
    """Start maintaining share adjustment factor for endpoint.
    """
    if not self._start_time:
      self._start_time = datetime.now() + timedelta(seconds=self._ramp_delay)
    spawn_later(self._update_frequency, self._update)

  def stop(self):
    """Stop maintaining share adjustment factor for endpoint.
    """
    self._stop_event.set()

  def _update(self):
    if not self._stop_event.is_set():
      try:
        self._signal_update_fn()
      finally:
        if datetime.now() > self._end_time:
          self.stop()
        else:
          spawn_later(self._update_frequency, self._update)

  @property
  def _end_time(self):
    return self._start_time + timedelta(seconds=self._ramp_seconds)

  @property
  def auditable_share(self):
    """Return current share adjustment factor.
    """
    as_of = datetime.now()
    share = self._curve_fn(self._start_time,
                           self._end_time,
                           as_of)

    return share, AuditItem('ramp', str(share))
Ejemplo n.º 21
0
def test_actors_are_garbage_collected_on_termination(defer):
    class MyActor(Actor):
        def __del__(self):
            del_called.set()

    node = DummyNode()
    defer(node.stop)
    del_called = Event()
    node.spawn(MyActor).stop()
    idle()
    gc.collect()
    ok_(del_called.is_set())
Ejemplo n.º 22
0
class Watcher(Greenlet):
    """
    A Greenlet to watch web server internals
    """
    def __init__(
        self, stats, reader_clients, writer_clients, event_push_client
    ):
        Greenlet.__init__(self)
        self._log = logging.getLogger(str(self))
        self._stats = stats
        self._reader_clients = reader_clients
        self._writer_clients = writer_clients
        self._event_push_client = event_push_client
        self._halt_event = Event()

    def _run(self):
        self._log.debug("starting")

        while not self._halt_event.is_set():

            reader_info = list()
            for client in self._reader_clients:
                reader_info.append(client.queue_size)

            writer_info = list()
            for client in self._writer_clients:
                writer_info.append(client.queue_size)

            self._log.info(
                "archives: %(archives)s; retrieves: %(retrieves)s" \
                % self._stats
            )
            self._event_push_client.info(
                "web-server-stats",
                "web server stats",
                stats=self._stats,
                reader=reader_info,
                writer=writer_info
            )
            self._halt_event.wait(_interval)

        self._log.debug("ending")

    def join(self, timeout=None):
        self._log.debug("joining")
        self._halt_event.set()
        Greenlet.join(self, timeout)
        self._log.debug("join complete")

    def __str__(self):
        return "StatsReporter"
Ejemplo n.º 23
0
Archivo: test.py Proyecto: sublee/lets
def test_object_pool_discard_later_with_slow_destroy():
    destroy_started = Event()
    destroy_ended = Event()

    def slow_destroy(obj):
        destroy_started.set()
        gevent.sleep(10)
        destroy_ended.set()

    pool = lets.ObjectPool(1, object, slow_destroy, discard_later=0.1)

    with pool.reserve() as a:
        pass

    destroy_started.wait()
    assert destroy_started.is_set()
    with pool.reserve() as b:
        pass
    # 'a' is still being destroyed.
    assert not destroy_ended.is_set()

    # 'b' should not be destroying 'a'.
    assert a is not b
Ejemplo n.º 24
0
class Miner(gevent.Greenlet):
    def __init__(self, web3, mine_sleep=1):
        super().__init__()
        self.web3 = web3
        self.mine_sleep = mine_sleep
        self.stop = Event()

    def _run(self):
        while self.stop.is_set() is False:
            #  tester miner sleeps for 1 sec by default, which is the same
            #  period as tester geth is using
            #  (see: raiden/tests/utils/geth.py:geth_generate_poa_genesis())
            self.web3.testing.mine(1)
            gevent.sleep(self.mine_sleep)
Ejemplo n.º 25
0
def expire_zookeeper_client_session(client, timeout=10):
    """Expire zookeeper session for the given client.

    This method should only be used for testing purposed.
    It will induce an EXPIRED_SESSION_STATE event in
    given client.

    Args:
        client: GZookeeperClient object
        timeout: optional timeout in seconds
            to wait for the session to expire.
            If None, this call will block.
            This is not recommended.
    
    Returns:
        True if session exipration occured within
        timeout seconds, False otherwise.
    """
    #session expiration event to wait on
    session_expiration_event = Event()

    def observer(event):
        if event.state_name == "EXPIRED_SESSION_STATE":
            session_expiration_event.set()
    client.add_session_observer(observer)

    #construct new client with same session_id
    #so we can cause a session expiration event
    #in our other client.
    zookeeper_client = GZookeeperClient(
            client.servers,
            client.session_id,
            client.session_password)
    
    def zookeeper_observer(event):
        #Upon connection, immediately stop the client
        #which will cause a session expiration in
        #self.zookeeper_client.
        if event.state_name == "CONNECTED_STATE":
            zookeeper_client.stop()
    
    zookeeper_client.add_session_observer(zookeeper_observer)
    zookeeper_client.start()
    zookeeper_client.join()
    
    session_expiration_event.wait(timeout)

    client.remove_session_observer(observer)
    return session_expiration_event.is_set()
Ejemplo n.º 26
0
def test_stopping_in_pre_start_directs_any_refs_to_deadletters(defer):
    class MyActor(Actor):
        def pre_start(self):
            self.stop()

        def receive(self, message):
            message_received.set()

    node = DummyNode()
    defer(node.stop)
    message_received = Event()
    a = node.spawn(MyActor)
    with expect_one_event(DeadLetter(a, 'dummy', sender=None)):
        a << 'dummy'
    ok_(not message_received.is_set())
Ejemplo n.º 27
0
def test_stopping_an_actor_prevents_it_from_processing_any_more_messages(defer):
    class MyActor(Actor):
        def receive(self, _):
            received.set()
    node = DummyNode()
    defer(node.stop)
    received = Event()
    a = node.spawn(MyActor)
    a << None
    received.wait()
    received.clear()
    a.stop()
    sleep(.001)
    ok_(not received.is_set(), "the '_stop' message should not be receivable in the actor")
    with expect_one_event(DeadLetter(a, None, sender=None)):
        a << None
Ejemplo n.º 28
0
def test_stopping_waits_till_the_ongoing_receive_is_complete(defer):
    class MyActor(Actor):
        def receive(self, message):
            released.wait()

        def post_stop(self):
            stopped.set()

    node = DummyNode()
    defer(node.stop)
    stopped, released = Event(), Event()
    a = node.spawn(MyActor) << 'foo'
    sleep(.001)
    a.stop()
    sleep(.001)
    ok_(not stopped.is_set())
    released.set()
    stopped.wait()
Ejemplo n.º 29
0
def test_stopping_waits_till_process_is_done_handling_a_message(defer):
    class MyProc(Actor):
        def run(self):
            self.get()
            try:
                released.wait()
                self.get()
            except GreenletExit:
                exited.set()
    node = DummyNode()
    defer(node.stop)
    exited, released = Event(), Event()
    r = node.spawn(MyProc)
    r << 'foo'
    sleep(.001)
    r.stop()
    sleep(.001)
    ok_(not exited.is_set())
    released.set()
    exited.wait()
Ejemplo n.º 30
0
class FtpWithRpcTestCase(TemporaryBaseDirectoryTestCase):
    def setUp(self):
        from gevent.event import Event
        super(FtpWithRpcTestCase, self).setUp()
        self.config = self._get_config_for_test()
        ensure_directory_exists(path.join(self.config.incoming_directory, 'main-stable'))
        self.test_succeded = Event()

    def mark_success(self, config, index, filepath):
        self.test_succeded.set()

    def test_upload(self):
        from infi.app_repo import service
        with patch("infi.app_repo.service.process_filepath_by_name") as process_filepath_by_name:
            process_filepath_by_name.side_effect = self.mark_success
            fd = StringIO("hello world")
            with self.ftp_server_context(self.config), self.ftp_client_context(self.config, True) as client:
                with self.rpc_server_context(self.config) as server:
                    client.storbinary("STOR main-stable/testfile", fd)
                    self.test_succeded.wait(1)
        self.assertTrue(self.test_succeded.is_set())
Ejemplo n.º 31
0
class RaidenService:
    """ A Raiden node. """
    def __init__(self, chain, default_registry, private_key_bin, transport,
                 discovery, config):
        if not isinstance(private_key_bin,
                          bytes) or len(private_key_bin) != 32:
            raise ValueError('invalid private_key')

        invalid_timeout = (
            config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN
            or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX)
        if invalid_timeout:
            raise ValueError('settle_timeout must be in range [{}, {}]'.format(
                NETTINGCHANNEL_SETTLE_TIMEOUT_MIN,
                NETTINGCHANNEL_SETTLE_TIMEOUT_MAX))

        self.tokens_to_connectionmanagers = dict()
        self.identifier_to_results = defaultdict(list)

        # This is a map from a hashlock to a list of channels, the same
        # hashlock can be used in more than one token (for tokenswaps), a
        # channel should be removed from this list only when the lock is
        # released/withdrawn but not when the secret is registered.
        self.token_to_hashlock_to_channels = defaultdict(
            lambda: defaultdict(list))

        self.chain = chain
        self.default_registry = default_registry
        self.config = config
        self.privkey = private_key_bin
        self.address = privatekey_to_address(private_key_bin)

        endpoint_registration_event = gevent.spawn(
            discovery.register,
            self.address,
            config['external_ip'],
            config['external_port'],
        )
        endpoint_registration_event.link_exception(
            endpoint_registry_exception_handler)

        self.private_key = PrivateKey(private_key_bin)
        self.pubkey = self.private_key.public_key.format(compressed=False)
        self.protocol = RaidenProtocol(
            transport,
            discovery,
            self,
            config['protocol']['retry_interval'],
            config['protocol']['retries_before_backoff'],
            config['protocol']['nat_keepalive_retries'],
            config['protocol']['nat_keepalive_timeout'],
            config['protocol']['nat_invitation_timeout'],
        )

        # TODO: remove this cyclic dependency
        transport.protocol = self.protocol

        self.blockchain_events = BlockchainEvents()
        self.alarm = AlarmTask(chain)
        self.shutdown_timeout = config['shutdown_timeout']
        self._block_number = None
        self.stop_event = Event()
        self.start_event = Event()
        self.chain.client.inject_stop_event(self.stop_event)

        self.wal = None

        self.database_path = config['database_path']
        if self.database_path != ':memory:':
            database_dir = os.path.dirname(config['database_path'])
            os.makedirs(database_dir, exist_ok=True)

            self.database_dir = database_dir
            # Prevent concurrent acces to the same db
            self.lock_file = os.path.join(self.database_dir, '.lock')
            self.db_lock = filelock.FileLock(self.lock_file)
        else:
            self.database_path = ':memory:'
            self.database_dir = None
            self.lock_file = None
            self.serialization_file = None
            self.db_lock = None

        # If the endpoint registration fails the node will quit, this must
        # finish before starting the protocol
        endpoint_registration_event.join()

        # Lock used to serialize calls to `poll_blockchain_events`, this is
        # important to give a consistent view of the node state.
        self.event_poll_lock = gevent.lock.Semaphore()

        self.start()

    def start(self):
        """ Start the node. """
        # XXX Should this really be here? Or will start() never be called again
        # after stop() in the lifetime of Raiden apart from the tests? This is
        # at least at the moment prompted by tests/integration/test_transer.py
        if self.stop_event and self.stop_event.is_set():
            self.stop_event.clear()

        if self.database_dir is not None:
            self.db_lock.acquire(timeout=0)
            assert self.db_lock.is_locked

        # The database may be :memory:
        storage = sqlite.SQLiteStorage(self.database_path,
                                       serialize.PickleSerializer())
        self.wal, unapplied_events = wal.restore_from_latest_snapshot(
            node.state_transition,
            storage,
        )

        # First run, initialize the basic state
        if self.wal.state_manager.current_state is None:
            block_number = self.chain.block_number()
            first_run = True

            state_change = ActionInitNode(block_number)
            self.wal.log_and_dispatch(state_change, block_number)

        # The alarm task must be started after the snapshot is loaded or the
        # state is primed, the callbacks assume the node is initialized.
        self.alarm.start()
        self.alarm.register_callback(self.poll_blockchain_events)
        self.alarm.register_callback(self.set_block_number)
        self._block_number = self.chain.block_number()

        # Registry registration must start *after* the alarm task. This
        # avoids corner cases where the registry is queried in block A, a new
        # block B is mined, and the alarm starts polling at block C.
        if first_run:
            self.register_payment_network(self.default_registry.address)

        # Start the protocol after the registry is queried to avoid warning
        # about unknown channels.
        self.protocol.start()

        # Health check needs the protocol layer
        self.start_neighbours_healthcheck()

        self.start_event.set()

        for event in unapplied_events:
            on_raiden_event(self, event)

    def start_neighbours_healthcheck(self):
        for neighbour in views.all_neighbour_nodes(
                self.wal.state_manager.current_state):
            if neighbour != ConnectionManager.BOOTSTRAP_ADDR:
                self.start_health_check_for(neighbour)

    def stop(self):
        """ Stop the node. """
        # Needs to come before any greenlets joining
        self.stop_event.set()
        self.protocol.stop_and_wait()
        self.alarm.stop_async()

        wait_for = [self.alarm]
        wait_for.extend(self.protocol.greenlets)
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        gevent.wait(wait_for, timeout=self.shutdown_timeout)

        # Filters must be uninstalled after the alarm task has stopped. Since
        # the events are polled by an alarm task callback, if the filters are
        # uninstalled before the alarm task is fully stopped the callback
        # `poll_blockchain_events` will fail.
        #
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        try:
            with gevent.Timeout(self.shutdown_timeout):
                self.blockchain_events.uninstall_all_event_listeners()
        except (gevent.timeout.Timeout, RaidenShuttingDown):
            pass

        if self.db_lock is not None:
            self.db_lock.release()

    def __repr__(self):
        return '<{} {}>'.format(self.__class__.__name__, pex(self.address))

    def set_block_number(self, block_number):
        state_change = Block(block_number)
        self.handle_state_change(state_change, block_number)

        # To avoid races, only update the internal cache after all the state
        # tasks have been updated.
        self._block_number = block_number

    def handle_state_change(self, state_change, block_number=None):
        is_logging = log.isEnabledFor(logging.DEBUG)

        if is_logging:
            log.debug('STATE CHANGE',
                      node=pex(self.address),
                      state_change=state_change)

        if block_number is None:
            block_number = self.get_block_number()

        event_list = self.wal.log_and_dispatch(state_change, block_number)

        for event in event_list:
            if is_logging:
                log.debug('EVENT', node=pex(self.address), event=event)

            on_raiden_event(self, event)

        return event_list

    def set_node_network_state(self, node_address, network_state):
        state_change = ActionChangeNodeNetworkState(node_address,
                                                    network_state)
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def start_health_check_for(self, node_address):
        self.protocol.start_health_check(node_address)

    def get_block_number(self):
        return views.block_number(self.wal.state_manager.current_state)

    def poll_blockchain_events(self, current_block=None):  # pylint: disable=unused-argument
        with self.event_poll_lock:
            for event in self.blockchain_events.poll_blockchain_events():
                on_blockchain_event(self, event)

    def sign(self, message):
        """ Sign message inplace. """
        if not isinstance(message, SignedMessage):
            raise ValueError('{} is not signable.'.format(repr(message)))

        message.sign(self.private_key, self.address)

    def send_async(self, recipient, message):
        """ Send `message` to `recipient` using the raiden protocol.

        The protocol will take care of resending the message on a given
        interval until an Acknowledgment is received or a given number of
        tries.
        """

        if not isaddress(recipient):
            raise ValueError('recipient is not a valid address.')

        if recipient == self.address:
            raise ValueError('programming error, sending message to itself')

        return self.protocol.send_async(recipient, message)

    def send_and_wait(self, recipient, message, timeout):
        """ Send `message` to `recipient` and wait for the response or `timeout`.

        Args:
            recipient (address): The address of the node that will receive the
                message.
            message: The transfer message.
            timeout (float): How long should we wait for a response from `recipient`.

        Returns:
            None: If the wait timed out
            object: The result from the event
        """
        if not isaddress(recipient):
            raise ValueError('recipient is not a valid address.')

        self.protocol.send_and_wait(recipient, message, timeout)

    def register_payment_network(self, registry_address):
        proxies = get_relevant_proxies(
            self.chain,
            self.address,
            registry_address,
        )

        # Install the filters first to avoid missing changes, as a consequence
        # some events might be applied twice.
        self.blockchain_events.add_proxies_listeners(proxies)

        token_network_list = list()
        for manager in proxies.channel_managers:
            manager_address = manager.address
            netting_channel_proxies = proxies.channelmanager_nettingchannels[
                manager_address]
            network = get_token_network_state_from_proxies(
                self, manager, netting_channel_proxies)
            token_network_list.append(network)

        payment_network = PaymentNetworkState(
            registry_address,
            token_network_list,
        )

        state_change = ContractReceiveNewPaymentNetwork(payment_network)
        self.handle_state_change(state_change)

    def connection_manager_for_token(self, token_address):
        if not isaddress(token_address):
            raise InvalidAddress('token address is not valid.')

        registry_address = self.default_registry.address
        known_token_networks = views.get_token_network_addresses_for(
            self.wal.state_manager.current_state,
            registry_address,
        )

        if token_address not in known_token_networks:
            raise InvalidAddress('token is not registered.')

        manager = self.tokens_to_connectionmanagers.get(token_address)

        if manager is None:
            manager = ConnectionManager(self, token_address)
            self.tokens_to_connectionmanagers[token_address] = manager

        return manager

    def leave_all_token_networks(self):
        state_change = ActionLeaveAllNetworks()
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def close_and_settle(self):
        log.info('raiden will close and settle all channels now')

        self.leave_all_token_networks()

        connection_managers = [
            self.tokens_to_connectionmanagers[token_address]
            for token_address in self.tokens_to_connectionmanagers
        ]

        if connection_managers:
            waiting.wait_for_settle_all_channels(
                self,
                self.alarm.wait_time,
            )

    def mediated_transfer_async(self, token_address, amount, target,
                                identifier):
        """ Transfer `amount` between this node and `target`.

        This method will start an asyncronous transfer, the transfer might fail
        or succeed depending on a couple of factors:

            - Existence of a path that can be used, through the usage of direct
              or intermediary channels.
            - Network speed, making the transfer sufficiently fast so it doesn't
              expire.
        """

        async_result = self.start_mediated_transfer(
            token_address,
            amount,
            identifier,
            target,
        )

        return async_result

    def direct_transfer_async(self, token_address, amount, target, identifier):
        """ Do a direct transfer with target.

        Direct transfers are non cancellable and non expirable, since these
        transfers are a signed balance proof with the transferred amount
        incremented.

        Because the transfer is non cancellable, there is a level of trust with
        the target. After the message is sent the target is effectively paid
        and then it is not possible to revert.

        The async result will be set to False iff there is no direct channel
        with the target or the payer does not have balance to complete the
        transfer, otherwise because the transfer is non expirable the async
        result *will never be set to False* and if the message is sent it will
        hang until the target node acknowledge the message.

        This transfer should be used as an optimization, since only two packets
        are required to complete the transfer (from the payers perspective),
        whereas the mediated transfer requires 6 messages.
        """

        self.protocol.start_health_check(target)

        if identifier is None:
            identifier = create_default_identifier()

        registry_address = self.default_registry.address
        direct_transfer = ActionTransferDirect(
            registry_address,
            token_address,
            target,
            identifier,
            amount,
        )

        self.handle_state_change(direct_transfer)

    def start_mediated_transfer(self, token_address, amount, identifier,
                                target):
        self.protocol.start_health_check(target)

        if identifier is None:
            identifier = create_default_identifier()

        assert identifier not in self.identifier_to_results

        async_result = AsyncResult()
        self.identifier_to_results[identifier].append(async_result)

        secret = random_secret()
        init_initiator_statechange = initiator_init(
            self,
            identifier,
            amount,
            secret,
            token_address,
            target,
        )

        # TODO: implement the network timeout raiden.config['msg_timeout'] and
        # cancel the current transfer if it happens (issue #374)
        #
        # Dispatch the state change even if there are no routes to create the
        # wal entry.
        self.handle_state_change(init_initiator_statechange)

        return async_result

    def mediate_mediated_transfer(self, transfer):
        init_mediator_statechange = mediator_init(self, transfer)
        self.handle_state_change(init_mediator_statechange)

    def target_mediated_transfer(self, transfer):
        init_target_statechange = target_init(self, transfer)
        self.handle_state_change(init_target_statechange)
Ejemplo n.º 32
0
        x.kill()
        with no_time():
            result = gevent.wait()
        assert result is True

    # exiting because of event (the spawned greenlet still runs)
    for _ in xrange(2):
        x = gevent.spawn_later(10, lambda: 5)
        event = Event()
        event_set = gevent.spawn_later(SMALL, event.set)
        with expected_time(SMALL):
            result = gevent.wait([event])
        assert result == [event], repr(result)
        assert not x.dead, x
        assert event_set.dead
        assert event.is_set()
        x.kill()
        with no_time():
            result = gevent.wait()
        assert result is True

    # checking "ref=False" argument
    for _ in xrange(2):
        gevent.get_hub().loop.timer(10, ref=False).start(lambda: None)
        with no_time():
            result = gevent.wait()
        assert result is True

    # checking "ref=False" attribute
    for _d in xrange(2):
        w = gevent.get_hub().loop.timer(10)
Ejemplo n.º 33
0
class MDSThrasher(Greenlet):
    """
    MDSThrasher::

    The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).

    The config is optional.  Many of the config parameters are a a maximum value
    to use when selecting a random value from a range.  To always use the maximum
    value, set no_random to true.  The config is a dict containing some or all of:

    max_thrash: [default: 1] the maximum number of active MDSs per FS that will be thrashed at
      any given time.

    max_thrash_delay: [default: 30] maximum number of seconds to delay before
      thrashing again.

    max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
      the replay state before thrashing.

    max_revive_delay: [default: 10] maximum number of seconds to delay before
      bringing back a thrashed MDS.

    randomize: [default: true] enables randomization and use the max/min values

    seed: [no default] seed the random number generator

    thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
      during replay.  Value should be between 0.0 and 1.0.

    thrash_max_mds: [default: 0.05] likelihood that the max_mds of the mds
      cluster will be modified to a value [1, current) or (current, starting
      max_mds]. Value should be between 0.0 and 1.0.

    thrash_while_stopping: [default: false] thrash an MDS while there
      are MDS in up:stopping (because max_mds was changed and some
      MDS were deactivated).

    thrash_weights: allows specific MDSs to be thrashed more/less frequently.
      This option overrides anything specified by max_thrash.  This option is a
      dict containing mds.x: weight pairs.  For example, [mds.a: 0.7, mds.b:
      0.3, mds.c: 0.0].  Each weight is a value from 0.0 to 1.0.  Any MDSs not
      specified will be automatically given a weight of 0.0 (not thrashed).
      For a given MDS, by default the trasher delays for up to
      max_thrash_delay, trashes, waits for the MDS to recover, and iterates.
      If a non-zero weight is specified for an MDS, for each iteration the
      thrasher chooses whether to thrash during that iteration based on a
      random value [0-1] not exceeding the weight of that MDS.

    Examples::


      The following example sets the likelihood that mds.a will be thrashed
      to 80%, mds.b to 20%, and other MDSs will not be thrashed.  It also sets the
      likelihood that an MDS will be thrashed in replay to 40%.
      Thrash weights do not have to sum to 1.

      tasks:
      - ceph:
      - mds_thrash:
          thrash_weights:
            - mds.a: 0.8
            - mds.b: 0.2
          thrash_in_replay: 0.4
      - ceph-fuse:
      - workunit:
          clients:
            all: [suites/fsx.sh]

      The following example disables randomization, and uses the max delay values:

      tasks:
      - ceph:
      - mds_thrash:
          max_thrash_delay: 10
          max_revive_delay: 1
          max_replay_thrash_delay: 4

    """

    def __init__(self, ctx, manager, config, fs, max_mds):
        Greenlet.__init__(self)

        self.config = config
        self.ctx = ctx
        self.e = None
        self.logger = log.getChild('fs.[{f}]'.format(f = fs.name))
        self.fs = fs
        self.manager = manager
        self.max_mds = max_mds
        self.name = 'thrasher.fs.[{f}]'.format(f = fs.name)
        self.stopping = Event()

        self.randomize = bool(self.config.get('randomize', True))
        self.thrash_max_mds = float(self.config.get('thrash_max_mds', 0.05))
        self.max_thrash = int(self.config.get('max_thrash', 1))
        self.max_thrash_delay = float(self.config.get('thrash_delay', 120.0))
        self.thrash_in_replay = float(self.config.get('thrash_in_replay', False))
        assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
            v=self.thrash_in_replay)
        self.max_replay_thrash_delay = float(self.config.get('max_replay_thrash_delay', 4.0))
        self.max_revive_delay = float(self.config.get('max_revive_delay', 10.0))

    def _run(self):
        try:
            self.do_thrash()
        except Exception as e:
            # Log exceptions here so we get the full backtrace (gevent loses them).
            # Also allow successful completion as gevent exception handling is a broken mess:
            #
            # 2017-02-03T14:34:01.259 CRITICAL:root:  File "gevent.libev.corecext.pyx", line 367, in gevent.libev.corecext.loop.handle_error (src/gevent/libev/gevent.corecext.c:5051)
            #   File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 558, in handle_error
            #     self.print_exception(context, type, value, tb)
            #   File "/home/teuthworker/src/git.ceph.com_git_teuthology_master/virtualenv/local/lib/python2.7/site-packages/gevent/hub.py", line 605, in print_exception
            #     traceback.print_exception(type, value, tb, file=errstream)
            #   File "/usr/lib/python2.7/traceback.py", line 124, in print_exception
            #     _print(file, 'Traceback (most recent call last):')
            #   File "/usr/lib/python2.7/traceback.py", line 13, in _print
            #     file.write(str+terminator)
            # 2017-02-03T14:34:01.261 CRITICAL:root:IOError
            self.e = e
            self.logger.exception("exception:")
            # allow successful completion so gevent doesn't see an exception...

    def log(self, x):
        """Write data to logger assigned to this MDThrasher"""
        self.logger.info(x)

    def stop(self):
        self.stopping.set()

    def kill_mds(self, mds):
        if self.config.get('powercycle'):
            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
                         remotes.keys())
            self.log('kill_mds on mds.{m} doing powercycle of {s}'.
                     format(m=mds, s=remote.name))
            self._assert_ipmi(remote)
            remote.console.power_off()
        else:
            self.ctx.daemons.get_daemon('mds', mds).stop()

    @staticmethod
    def _assert_ipmi(remote):
        assert remote.console.has_ipmi_credentials, (
            "powercycling requested but RemoteConsole is not "
            "initialized.  Check ipmi config.")

    def revive_mds(self, mds):
        """
        Revive mds -- do an ipmpi powercycle (if indicated by the config)
        and then restart.
        """
        if self.config.get('powercycle'):
            (remote,) = (self.ctx.cluster.only('mds.{m}'.format(m=mds)).
                         remotes.keys())
            self.log('revive_mds on mds.{m} doing powercycle of {s}'.
                     format(m=mds, s=remote.name))
            self._assert_ipmi(remote)
            remote.console.power_on()
            self.manager.make_admin_daemon_dir(self.ctx, remote)
        args = []
        self.ctx.daemons.get_daemon('mds', mds).restart(*args)

    def wait_for_stable(self, rank = None, gid = None):
        self.log('waiting for mds cluster to stabilize...')
        for itercount in itertools.count():
            status = self.fs.status()
            max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
            ranks = list(status.get_ranks(self.fs.id))
            stopping = sum(1 for _ in ranks if "up:stopping" == _['state'])
            actives = sum(1 for _ in ranks
                          if "up:active" == _['state'] and "laggy_since" not in _)

            if not bool(self.config.get('thrash_while_stopping', False)) and stopping > 0:
                if itercount % 5 == 0:
                    self.log('cluster is considered unstable while MDS are in up:stopping (!thrash_while_stopping)')
            else:
                if rank is not None:
                    try:
                        info = status.get_rank(self.fs.id, rank)
                        if info['gid'] != gid and "up:active" == info['state']:
                            self.log('mds.{name} has gained rank={rank}, replacing gid={gid}'.format(name = info['name'], rank = rank, gid = gid))
                            return status
                    except:
                        pass # no rank present
                    if actives >= max_mds:
                        # no replacement can occur!
                        self.log("cluster has {actives} actives (max_mds is {max_mds}), no MDS can replace rank {rank}".format(
                            actives=actives, max_mds=max_mds, rank=rank))
                        return status
                else:
                    if actives == max_mds:
                        self.log('mds cluster has {count} alive and active, now stable!'.format(count = actives))
                        return status, None
            if itercount > 300/2: # 5 minutes
                 raise RuntimeError('timeout waiting for cluster to stabilize')
            elif itercount % 5 == 0:
                self.log('mds map: {status}'.format(status=status))
            else:
                self.log('no change')
            sleep(2)

    def do_thrash(self):
        """
        Perform the random thrashing action
        """

        self.log('starting mds_do_thrash for fs {fs}'.format(fs = self.fs.name))
        stats = {
            "max_mds": 0,
            "deactivate": 0,
            "kill": 0,
        }

        while not self.stopping.is_set():
            delay = self.max_thrash_delay
            if self.randomize:
                delay = random.randrange(0.0, self.max_thrash_delay)

            if delay > 0.0:
                self.log('waiting for {delay} secs before thrashing'.format(delay=delay))
                self.stopping.wait(delay)
                if self.stopping.is_set():
                    continue

            status = self.fs.status()

            if random.random() <= self.thrash_max_mds:
                max_mds = status.get_fsmap(self.fs.id)['mdsmap']['max_mds']
                options = list(range(1, max_mds))+list(range(max_mds+1, self.max_mds+1))
                if len(options) > 0:
                    sample = random.sample(options, 1)
                    new_max_mds = sample[0]
                    self.log('thrashing max_mds: %d -> %d' % (max_mds, new_max_mds))
                    self.fs.set_max_mds(new_max_mds)
                    stats['max_mds'] += 1
                    self.wait_for_stable()

            count = 0
            for info in status.get_ranks(self.fs.id):
                name = info['name']
                label = 'mds.' + name
                rank = info['rank']
                gid = info['gid']

                # if thrash_weights isn't specified and we've reached max_thrash,
                # we're done
                count = count + 1
                if 'thrash_weights' not in self.config and count > self.max_thrash:
                    break

                weight = 1.0
                if 'thrash_weights' in self.config:
                    weight = self.config['thrash_weights'].get(label, '0.0')
                skip = random.randrange(0.0, 1.0)
                if weight <= skip:
                    self.log('skipping thrash iteration with skip ({skip}) > weight ({weight})'.format(skip=skip, weight=weight))
                    continue

                self.log('kill {label} (rank={rank})'.format(label=label, rank=rank))
                self.kill_mds(name)
                stats['kill'] += 1

                # wait for mon to report killed mds as crashed
                last_laggy_since = None
                itercount = 0
                while True:
                    status = self.fs.status()
                    info = status.get_mds(name)
                    if not info:
                        break
                    if 'laggy_since' in info:
                        last_laggy_since = info['laggy_since']
                        break
                    if any([(f == name) for f in status.get_fsmap(self.fs.id)['mdsmap']['failed']]):
                        break
                    self.log(
                        'waiting till mds map indicates {label} is laggy/crashed, in failed state, or {label} is removed from mdsmap'.format(
                            label=label))
                    itercount = itercount + 1
                    if itercount > 10:
                        self.log('mds map: {status}'.format(status=status))
                    sleep(2)

                if last_laggy_since:
                    self.log(
                        '{label} reported laggy/crashed since: {since}'.format(label=label, since=last_laggy_since))
                else:
                    self.log('{label} down, removed from mdsmap'.format(label=label, since=last_laggy_since))

                # wait for a standby mds to takeover and become active
                status = self.wait_for_stable(rank, gid)

                # wait for a while before restarting old active to become new
                # standby
                delay = self.max_revive_delay
                if self.randomize:
                    delay = random.randrange(0.0, self.max_revive_delay)

                self.log('waiting for {delay} secs before reviving {label}'.format(
                    delay=delay, label=label))
                sleep(delay)

                self.log('reviving {label}'.format(label=label))
                self.revive_mds(name)

                for itercount in itertools.count():
                    if itercount > 300/2: # 5 minutes
                        raise RuntimeError('timeout waiting for MDS to revive')
                    status = self.fs.status()
                    info = status.get_mds(name)
                    if info and info['state'] in ('up:standby', 'up:standby-replay', 'up:active'):
                        self.log('{label} reported in {state} state'.format(label=label, state=info['state']))
                        break
                    self.log(
                        'waiting till mds map indicates {label} is in active, standby or standby-replay'.format(label=label))
                    sleep(2)

        for stat in stats:
            self.log("stat['{key}'] = {value}".format(key = stat, value = stats[stat]))
Ejemplo n.º 34
0
class _FelixEtcdWatcher(gevent.Greenlet):
    """
    Greenlet that communicates with the etcd driver over a socket.

    * Does the initial handshake with the driver, sending it the init
      message.
    * Receives the pre-loaded config from the driver and uses that
      to do Felix's one-off configuration.
    * Sends the relevant config back to the driver.
    * Processes the event stream from the driver, sending it on to
      the splitter.

    This class is similar to the EtcdWatcher class in that it uses
    a PathDispatcher to fan out updates but it doesn't own an etcd
    connection of its own.
    """
    def __init__(self, config, etcd_api, status_reporter, hosts_ipset):
        super(_FelixEtcdWatcher, self).__init__()
        self._config = config
        self._etcd_api = etcd_api
        self._status_reporter = status_reporter
        self.hosts_ipset = hosts_ipset
        # Whether we've been in sync with etcd at some point.
        self._been_in_sync = False
        # Keep track of the config loaded from etcd so we can spot if it
        # changes.
        self.last_global_config = None
        self.last_host_config = None
        self.my_config_dir = dir_for_per_host_config(self._config.HOSTNAME)
        # Events triggered by the EtcdAPI Actor to tell us to load the config
        # and start polling.  These are one-way flags.
        self.load_config = Event()
        self.begin_polling = Event()
        # Event that we trigger once the config is loaded.
        self.configured = Event()
        # Polling state initialized at poll start time.
        self.splitter = None
        # Next-hop IP addresses of our hosts, if populated in etcd.
        self.ipv4_by_hostname = {}
        # Forces a resync after the current poll if set.  Safe to set from
        # another thread.  Automatically reset to False after the resync is
        # triggered.
        self.resync_requested = False
        self.dispatcher = PathDispatcher()
        # The Popen object for the driver.
        self._driver_process = None
        # Stats.
        self.read_count = 0
        self.msgs_processed = 0
        self.last_rate_log_time = monotonic_time()
        # Register for events when values change.
        self._register_paths()

    def _register_paths(self):
        """
        Program the dispatcher with the paths we care about.
        """
        reg = self.dispatcher.register
        # Profiles and their contents.
        reg(TAGS_KEY, on_set=self.on_tags_set, on_del=self.on_tags_delete)
        reg(RULES_KEY, on_set=self.on_rules_set, on_del=self.on_rules_delete)
        reg(PROFILE_LABELS_KEY,
            on_set=self.on_prof_labels_set,
            on_del=self.on_prof_labels_delete)
        # Tiered policy
        reg(TIER_DATA,
            on_set=self.on_tier_data_set,
            on_del=self.on_tier_data_delete)
        reg(TIERED_PROFILE,
            on_set=self.on_tiered_policy_set,
            on_del=self.on_tiered_policy_delete)
        # Hosts and endpoints.
        reg(HOST_IP_KEY,
            on_set=self.on_host_ip_set,
            on_del=self.on_host_ip_delete)
        reg(PER_ENDPOINT_KEY,
            on_set=self.on_endpoint_set,
            on_del=self.on_endpoint_delete)
        reg(CIDR_V4_KEY,
            on_set=self.on_ipam_v4_pool_set,
            on_del=self.on_ipam_v4_pool_delete)
        # Configuration keys.  If any of these is changed or created, we'll
        # restart to pick up the change.
        reg(CONFIG_PARAM_KEY,
            on_set=self._on_config_updated,
            on_del=self._on_config_updated)
        reg(PER_HOST_CONFIG_PARAM_KEY,
            on_set=self._on_host_config_updated,
            on_del=self._on_host_config_updated)

    @logging_exceptions
    def _run(self):
        # Don't do anything until we're told to load the config.
        _log.info("Waiting for load_config event...")
        self.load_config.wait()
        _log.info("...load_config set.  Starting driver read %s loop", self)
        # Start the driver process and wait for it to connect back to our
        # socket.
        self._msg_reader, self._msg_writer = self._start_driver()
        # Loop reading from the socket and processing messages.
        self._loop_reading_from_driver()

    def _loop_reading_from_driver(self):
        while True:
            try:
                # Note: self._msg_reader.new_messages() returns iterator so
                # whole for loop must be inside the try.
                for msg_type, msg in self._msg_reader.new_messages(timeout=1):
                    self._dispatch_msg_from_driver(msg_type, msg)
            except SocketClosed:
                _log.critical("The driver process closed its socket, Felix "
                              "must exit.")
                die_and_restart()
            if self.resync_requested:
                _log.info("Resync requested, sending resync request to driver")
                self.resync_requested = False
                self._msg_writer.send_message(MSG_TYPE_RESYNC)
            # Check that the driver hasn't died.  The recv() call should
            # raise an exception when the buffer runs dry but this usually
            # gets hit first.
            driver_rc = self._driver_process.poll()
            if driver_rc is not None:
                _log.critical(
                    "Driver process died with RC = %s.  Felix must "
                    "exit.", driver_rc)
                die_and_restart()

    def _dispatch_msg_from_driver(self, msg_type, msg):
        # Optimization: put update first in the "switch" block because
        # it's on the critical path.
        if msg_type == MSG_TYPE_UPDATE:
            _stats.increment("Update messages from driver")
            self._on_update_from_driver(msg)
        elif msg_type == MSG_TYPE_CONFIG_LOADED:
            _stats.increment("Config loaded messages from driver")
            self._on_config_loaded_from_driver(msg)
        elif msg_type == MSG_TYPE_STATUS:
            _stats.increment("Status messages from driver")
            self._on_status_from_driver(msg)
        else:
            raise RuntimeError("Unexpected message %s" % msg)
        self.msgs_processed += 1
        if self.msgs_processed % MAX_EVENTS_BEFORE_YIELD == 0:
            # Yield to ensure that other actors make progress.  (gevent only
            # yields for us if the socket would block.)  The sleep must be
            # non-zero to work around gevent issue where we could be
            # immediately rescheduled.
            gevent.sleep(0.000001)

    def _on_update_from_driver(self, msg):
        """
        Called when the driver sends us a key/value pair update.

        After the initial handshake, the stream of events consists
        entirely of updates unless something happens to change the
        state of the driver.

        :param dict msg: The message received from the driver.
        """
        assert self.configured.is_set(), "Received update before config"
        # The driver starts polling immediately, make sure we block until
        # everyone else is ready to receive updates.
        self.begin_polling.wait()
        # Unpack the message.
        key = msg[MSG_KEY_KEY]
        value = msg[MSG_KEY_VALUE]
        _log.debug("Update from driver: %s -> %s", key, value)
        # Output some very coarse stats.
        self.read_count += 1
        if self.read_count % 1000 == 0:
            now = monotonic_time()
            delta = now - self.last_rate_log_time
            _log.info("Processed %s updates from driver "
                      "%.1f/s", self.read_count, 1000.0 / delta)
            self.last_rate_log_time = now
        # Wrap the update in an EtcdEvent object so we can dispatch it via the
        # PathDispatcher.
        n = EtcdEvent("set" if value is not None else "delete", key, value)
        self.dispatcher.handle_event(n)

    def _on_config_loaded_from_driver(self, msg):
        """
        Called when we receive a config loaded message from the driver.

        This message is expected once per resync, when the config is
        pre-loaded by the driver.

        On the first call, responds to the driver synchronously with a
        config response.

        If the config has changed since a previous call, triggers Felix
        to die.
        """
        global_config = msg[MSG_KEY_GLOBAL_CONFIG]
        host_config = msg[MSG_KEY_HOST_CONFIG]
        _log.info("Config loaded by driver:\n"
                  "Global: %s\nPer-host: %s", global_config, host_config)
        if self.configured.is_set():
            # We've already been configured.  We don't yet support
            # dynamic config update so instead we check if the config
            # has changed and die if it has.
            _log.info("Checking configuration for changes...")
            if (host_config != self.last_host_config
                    or global_config != self.last_global_config):
                _log.warning("Felix configuration has changed, "
                             "felix must restart.")
                _log.info("Old host config: %s", self.last_host_config)
                _log.info("New host config: %s", host_config)
                _log.info("Old global config: %s", self.last_global_config)
                _log.info("New global config: %s", global_config)
                die_and_restart()
        else:
            # First time loading the config.  Report it to the config
            # object.  Take copies because report_etcd_config is
            # destructive.
            self.last_host_config = host_config.copy()
            self.last_global_config = global_config.copy()
            self._config.report_etcd_config(host_config, global_config)
            # Config now fully resolved, inform the driver.
            driver_log_file = self._config.DRIVERLOGFILE
            self._msg_writer.send_message(
                MSG_TYPE_CONFIG, {
                    MSG_KEY_LOG_FILE: driver_log_file,
                    MSG_KEY_SEV_FILE: self._config.LOGLEVFILE,
                    MSG_KEY_SEV_SCREEN: self._config.LOGLEVSCR,
                    MSG_KEY_SEV_SYSLOG: self._config.LOGLEVSYS,
                })
            self.configured.set()

    def _on_status_from_driver(self, msg):
        """
        Called when we receive a status update from the driver.

        The driver sends us status messages whenever its status changes.
        It moves through these states:

        (1) wait-for-ready (waiting for the global ready flag to become set)
        (2) resync (resyncing with etcd, processing a snapshot and any
            concurrent events)
        (3) in-sync (snapshot processsing complete, now processing only events
            from etcd)

        If the driver falls out of sync with etcd then it will start again
        from (1).

        If the status is in-sync, triggers the relevant processing.
        """
        status = msg[MSG_KEY_STATUS]
        _log.info("etcd driver status changed to %s", status)
        if status == STATUS_IN_SYNC and not self._been_in_sync:
            # We're now in sync, tell the Actors that need to do start-of-day
            # cleanup.
            self.begin_polling.wait()  # Make sure splitter is set.
            self._been_in_sync = True
            self.splitter.on_datamodel_in_sync()
            if self._config.REPORT_ENDPOINT_STATUS:
                self._status_reporter.clean_up_endpoint_statuses(async=True)
            self._update_hosts_ipset()

    def _start_driver(self):
        """
        Starts the driver subprocess, connects to it over the socket
        and sends it the init message.

        Stores the Popen object in self._driver_process for future
        access.

        :return: the connected socket to the driver.
        """
        _log.info("Creating server socket.")
        try:
            os.unlink("/run/felix-driver.sck")
        except OSError:
            _log.debug("Failed to delete driver socket, assuming it "
                       "didn't exist.")
        update_socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        update_socket.bind("/run/felix-driver.sck")
        update_socket.listen(1)
        self._driver_process = subprocess.Popen([
            sys.executable, "-m", "calico.etcddriver", "/run/felix-driver.sck"
        ])
        _log.info("Started etcd driver with PID %s", self._driver_process.pid)
        update_conn, _ = update_socket.accept()
        _log.info("Accepted connection on socket")
        # No longer need the server socket, remove it.
        try:
            os.unlink("/run/felix-driver.sck")
        except OSError:
            # Unexpected but carry on...
            _log.exception("Failed to unlink socket")
        else:
            _log.info("Unlinked server socket")

        # Wrap the socket in reader/writer objects that simplify using the
        # protocol.
        reader = MessageReader(update_conn)
        writer = MessageWriter(update_conn)
        # Give the driver its config.
        writer.send_message(
            MSG_TYPE_INIT, {
                MSG_KEY_ETCD_URLS: [
                    self._config.ETCD_SCHEME + "://" + addr
                    for addr in self._config.ETCD_ADDRS
                ],
                MSG_KEY_HOSTNAME:
                self._config.HOSTNAME,
                MSG_KEY_KEY_FILE:
                self._config.ETCD_KEY_FILE,
                MSG_KEY_CERT_FILE:
                self._config.ETCD_CERT_FILE,
                MSG_KEY_CA_FILE:
                self._config.ETCD_CA_FILE
            })
        return reader, writer

    def on_endpoint_set(self, response, hostname, orchestrator, workload_id,
                        endpoint_id):
        """Handler for endpoint updates, passes the update to the splitter."""
        combined_id = EndpointId(hostname, orchestrator, workload_id,
                                 endpoint_id)
        _log.debug("Endpoint %s updated", combined_id)
        _stats.increment("Endpoint created/updated")
        endpoint = parse_endpoint(self._config, combined_id, response.value)
        self.splitter.on_endpoint_update(combined_id, endpoint)

    def on_endpoint_delete(self, response, hostname, orchestrator, workload_id,
                           endpoint_id):
        """Handler for endpoint deleted, passes the update to the splitter."""
        combined_id = EndpointId(hostname, orchestrator, workload_id,
                                 endpoint_id)
        _log.debug("Endpoint %s deleted", combined_id)
        _stats.increment("Endpoint deleted")
        self.splitter.on_endpoint_update(combined_id, None)

    def on_rules_set(self, response, profile_id):
        """Handler for rules updates, passes the update to the splitter."""
        _log.debug("Rules for %s set", profile_id)
        _stats.increment("Rules created/updated")
        rules = parse_profile(profile_id, response.value)
        profile_id = intern(profile_id.encode("utf8"))
        self.splitter.on_rules_update(profile_id, rules)

    def on_rules_delete(self, response, profile_id):
        """Handler for rules deletes, passes the update to the splitter."""
        _log.debug("Rules for %s deleted", profile_id)
        _stats.increment("Rules deleted")
        self.splitter.on_rules_update(profile_id, None)

    def on_tags_set(self, response, profile_id):
        """Handler for tags updates, passes the update to the splitter."""
        _log.debug("Tags for %s set", profile_id)
        _stats.increment("Tags created/updated")
        rules = parse_tags(profile_id, response.value)
        profile_id = intern(profile_id.encode("utf8"))
        self.splitter.on_tags_update(profile_id, rules)

    def on_tags_delete(self, response, profile_id):
        """Handler for tags deletes, passes the update to the splitter."""
        _log.debug("Tags for %s deleted", profile_id)
        _stats.increment("Tags deleted")
        self.splitter.on_tags_update(profile_id, None)

    def on_prof_labels_set(self, response, profile_id):
        """Handler for profile labels, passes update to the splitter."""
        _log.debug("Labels for profile %s created/updated", profile_id)
        labels = parse_labels(profile_id, response.value)
        profile_id = intern(profile_id.encode("utf8"))
        self.splitter.on_prof_labels_set(profile_id, labels)

    def on_prof_labels_delete(self, response, profile_id):
        """Handler for profile label deletion

        passed update to the splitter."""
        _log.debug("Labels for profile %s deleted", profile_id)
        profile_id = intern(profile_id.encode("utf8"))
        self.splitter.on_prof_labels_set(profile_id, None)

    def on_tier_data_set(self, response, tier):
        _log.debug("Tier data set for tier %s", tier)
        _stats.increment("Tier data created/updated")
        data = parse_tier_data(tier, response.value)
        self.splitter.on_tier_data_update(tier, data)

    def on_tier_data_delete(self, response, tier):
        _log.debug("Tier data deleted for tier %s", tier)
        _stats.increment("Tier data deleted")
        self.splitter.on_tier_data_update(tier, None)

    def on_tiered_policy_set(self, response, tier, policy_id):
        _log.debug("Rules for %s/%s set", tier, policy_id)
        _stats.increment("Tiered rules created/updated")
        policy_id = TieredPolicyId(tier, policy_id)
        rules = parse_policy(policy_id, response.value)
        if rules is not None:
            selector = rules.pop("selector")
            order = rules.pop("order")
            self.splitter.on_rules_update(policy_id, rules)
            self.splitter.on_policy_selector_update(policy_id, selector, order)
        else:
            self.splitter.on_rules_update(policy_id, None)
            self.splitter.on_policy_selector_update(policy_id, None, None)

    def on_tiered_policy_delete(self, response, tier, policy_id):
        """Handler for tiered rules deletes, passes update to the splitter."""
        _log.debug("Rules for %s/%s deleted", tier, policy_id)
        _stats.increment("tiered rules deleted")
        policy_id = TieredPolicyId(tier, policy_id)
        self.splitter.on_rules_update(policy_id, None)
        self.splitter.on_policy_selector_update(policy_id, None, None)

    def on_host_ip_set(self, response, hostname):
        if not self._config.IP_IN_IP_ENABLED:
            _log.debug("Ignoring update to %s because IP-in-IP is disabled",
                       response.key)
            return
        _stats.increment("Host IP created/updated")
        ip = parse_host_ip(hostname, response.value)
        if ip:
            self.ipv4_by_hostname[hostname] = ip
        else:
            _log.warning(
                "Invalid IP for hostname %s: %s, treating as "
                "deletion", hostname, response.value)
            self.ipv4_by_hostname.pop(hostname, None)
        self._update_hosts_ipset()

    def on_host_ip_delete(self, response, hostname):
        if not self._config.IP_IN_IP_ENABLED:
            _log.debug("Ignoring update to %s because IP-in-IP is disabled",
                       response.key)
            return
        _stats.increment("Host IP deleted")
        if self.ipv4_by_hostname.pop(hostname, None):
            self._update_hosts_ipset()

    def _update_hosts_ipset(self):
        if not self._been_in_sync:
            _log.debug("Deferring update to hosts ipset until we're in-sync")
            return
        self.hosts_ipset.replace_members(frozenset(
            self.ipv4_by_hostname.values()),
                                         async=True)

    def _on_config_updated(self, response, config_param):
        new_value = response.value
        if self.last_global_config.get(config_param) != new_value:
            _log.critical(
                "Global config value %s updated.  Felix must be "
                "restarted.", config_param)
            die_and_restart()
        _stats.increment("Global config (non) updates")

    def _on_host_config_updated(self, response, hostname, config_param):
        if hostname != self._config.HOSTNAME:
            _log.debug("Ignoring config update for host %s", hostname)
            return
        _stats.increment("Per-host config created/updated")
        new_value = response.value
        if self.last_host_config.get(config_param) != new_value:
            _log.critical(
                "Global config value %s updated.  Felix must be "
                "restarted.", config_param)
            die_and_restart()

    def on_ipam_v4_pool_set(self, response, pool_id):
        _stats.increment("IPAM pool created/updated")
        pool = parse_ipam_pool(pool_id, response.value)
        self.splitter.on_ipam_pool_updated(pool_id, pool)

    def on_ipam_v4_pool_delete(self, response, pool_id):
        _stats.increment("IPAM pool deleted")
        self.splitter.on_ipam_pool_updated(pool_id, None)
Ejemplo n.º 35
0
class MeekSession(object):
    def __init__(self, sessionid, socksip, socksport, timeout, sessionmap):
        self.sessionid = sessionid
        self.socksip = socksip
        self.socksport = socksport
        self.timeout = timeout
        self.sessionmap = sessionmap
        self.sessionmap[self.sessionid] = self
        self.udpsock = None
        self.udp_associate = None
        self.socksconn = None
        self.allsocks = []
        self.status = SESSION_WAIT_INIT
        
        self.initialized = False
        self.in_queue = Queue()
        self.in_notifier = Event()
        self.in_notifier.clear()
        self.out_queue = Queue()
        self.timer = SharedTimer(self.timeout)
        self.finish = Event()
        self.finish.clear()
        
        self.threads = []
    
    def meeks_clean_thread(self):
        while not self.finish.is_set():
            gevent.sleep(SERVER_TURNAROUND_MAX)
        [t.join() for t in self.threads]
        self.clean()
            
    def write_to_socks(self, data):
        if self.udpsock:
            self.udpsock.sendto(data, self.udp_associate)
        else:
            self.socksconn.sendall(data)
            
    def meeks_write_to_socks_thread(self):
        while not self.finish.is_set():
            try:
                hasdata = self.in_notifier.wait(timeout=CLIENT_MAX_POLL_INTERVAL)
                self.in_notifier.clear()
                if not hasdata:
                    self.timer.count(CLIENT_MAX_POLL_INTERVAL)
                if self.timer.timeout():
                    break
                self.timer.reset()
                while not self.in_queue.empty():
                    data = self.in_queue.get()
                    log.debug("%s: RELAY-UP %d bytes" % (self.sessionid, len(data)))
                    self.write_to_socks(data)
            except Exception as ex:
                log.error("[Exception][meeks_write_to_socks_thread] %s: %s" % (self.sessionid, str(ex)))
                break
        self.finish.set()
        
    def meeks_read_from_socks_thread(self):
        while not self.finish.is_set():
            try:
                readable, _, _ = select.select(self.allsocks, [], [], CLIENT_MAX_POLL_INTERVAL)
                if not readable:
                    self.timer.count(CLIENT_MAX_POLL_INTERVAL)
                    if self.timer.timeout():
                        break
                else:
                    self.timer.reset()
                    if self.socksconn in readable:
                        if self.udpsock:
                            raise RelaySessionError("unexcepted read-event from tcp socket in UDP session")
                        data = self.socksconn.recv(MAX_PAYLOAD_LENGTH)
                        if not data:
                            raise RelaySessionError("peer closed")
                        self.out_queue.put(data)
                        continue
                    if self.udpsock and self.udpsock in readable:
                        data, _ = self.udpsock.recvfrom(MAX_PAYLOAD_LENGTH)
                        if data:
                            self.out_queue.put(data)
            except Exception as ex:
                log.error("[Exception][meeks_read_from_socks_thread] %s:%s" % (self.sessionid, str(ex)))
                break
        self.finish.set()
        
    def initialize(self):
        self.socksconn = socket.create_connection((self.socksip, self.socksport), self.timeout)
        self.allsocks = [self.socksconn]
        self.socksconn.sendall(InitRequest().pack())
        read_init_reply(self.socksconn)
        self.status = SESSION_WAIT_REQUEST
        self.initialized = True
    
    def cmd_connect(self, req):
        self.socksconn.sendall(req.pack())
        reply = read_reply(self.socksconn)
        resp = reply.pack()
        headers = [
            (HEADER_SESSION_ID, self.sessionid),
            (HEADER_MSGTYPE, MSGTYPE_DATA)
        ]
        
        self.threads.append(gevent.spawn(self.meeks_write_to_socks_thread))
        self.threads.append(gevent.spawn(self.meeks_read_from_socks_thread))
        # clean_thread will join the other two threads, then clean resources
        gevent.spawn(self.meeks_clean_thread)
        self.status = SESSION_TCP
        return resp, headers
        
    def cmd_udp_associate(self, req):
        self.udpsock = bind_local_udp(self.socksconn)
        self.allsocks.append(self.udpsock)
        addrtype, ip, port = sock_addr_info(self.udpsock)
        self.socksconn.sendall(Request(cmd=UDP_ASSOCIATE,
            addrtype=addrtype, dstaddr=ip, dstport=port).pack())
        reply = read_reply(self.socksconn)
        resp = reply.pack()
        headers = [
            (HEADER_SESSION_ID, self.sessionid),
            (HEADER_MSGTYPE, MSGTYPE_DATA)
        ]
        
        self.udp_associate = (reply.bndaddr, reply.bndport)
        self.threads.append(gevent.spawn(self.meeks_write_to_socks_thread))
        self.threads.append(gevent.spawn(self.meeks_read_from_socks_thread))
        # clean_thread will join the other two threads, then clean resources
        gevent.spawn(self.meeks_clean_thread)
        self.status = SESSION_UDP
        return resp, headers
    
    def cmd_bind(self, req):
        resp = ""
        headers = [
            (HEADER_SESSION_ID, self.sessionid),
            (HEADER_ERROR, "Not Supported")
        ]
        return resp, headers
    
    def sync_socks_request(self, data, env):
        req = Request()
        req.unpack(data)
        return {
            CONNECT: self.cmd_connect,
            BIND: self.cmd_bind,
            UDP_ASSOCIATE : self.cmd_udp_associate
        }[req.cmd](req)
        
    def _fetch_resp(self):
        data = []
        totalsize = 0
        while True:
            while not self.out_queue.empty() and totalsize < MAX_PAYLOAD_LENGTH:
                pkt = self.out_queue.get()
                data.append(pkt)
                totalsize += len(pkt)
            if data:
                return data, totalsize
            else:
                try:
                    self.out_queue.peek(block=True, timeout=SERVER_TURNAROUND_TIMEOUT)
                except Empty:
                    break
        return data, totalsize
        
    def fetch_resp(self):
        data, _ = self._fetch_resp()
        resp = "".join(data)
        headers = [
            (HEADER_SESSION_ID, self.sessionid),
            (HEADER_MSGTYPE, MSGTYPE_DATA),
        ]
        if self.status == SESSION_UDP and data:
            headers.append((HEADER_UDP_PKTS, ",".join([str(len(d)) for d in data])))
        return resp, headers
    
    def process_tcp(self, data, env):
        if data:
            self.in_queue.put(data)
            self.in_notifier.set()
        return self.fetch_resp()
        
    def process_udp(self, data, env):
        if data:
            lengths = env[header_to_env(HEADER_UDP_PKTS)].split(",")
            pos = 0
            for length in lengths:
                nxt = pos + int(length)
                self.in_queue.put(data[pos:nxt])
                pos = nxt
            self.in_notifier.set()
        return self.fetch_resp()
        
    def process(self, data, env):
        if not self.initialized:
            self.initialize()
    
        return {
            SESSION_WAIT_REQUEST: self.sync_socks_request,
            SESSION_TCP: self.process_tcp,
            SESSION_UDP: self.process_udp,
        }[self.status](data, env)    
    
    def alive(self):
        return not self.finish.is_set()
    
    def clean(self):
        self.finish.set()
        for sock in self.allsocks:
            sock.close()
            
        self.in_queue.queue.clear()
        self.out_queue.queue.clear()
        if self.sessionid in self.sessionmap:
            del self.sessionmap[self.sessionid]
            log.info("%s: quit, %d sessions left" % (self.sessionid, len(self.sessionmap.keys())))
Ejemplo n.º 36
0
class MsgGenerator(gevent.Greenlet):
    def __init__(self):
        super(MsgGenerator, self).__init__()
        self._complete = Event()
        self._jobs = {}
        self._instances = []

        # FIXME: monkey patch the whole world
        # because the python side of librados
        # uses threading.Thread.  However, rados
        # itself will still do blocking on e.g.
        # connect(), so we probably need to wrap
        # librados in its own non-gevent python
        # process and RPC to it.
        from gevent import monkey
        monkey.patch_all()
        monkey.patch_subprocess()

    def register(self, instance):
        if instance not in self._instances:
            self._instances.append(instance)

    def _emit(self, msg_event):
        for instance in self._instances:
            if instance.subscribed > 0:  # GMENO theory about memory leak
                instance.put(msg_event)

    def complete(self, jid, event):
        del self._jobs[jid]
        self._emit(event)

    def running_jobs(self):
        self._emit(
            MsgEvent(RUNNING_JOBS, [{
                'jid': jid
            } for jid in self._jobs.keys()]))

    def run_job(self, fqdn, cmd, args):
        if fqdn != socket.getfqdn():
            raise Unavailable()

        jid = uuid.uuid4().__str__()
        self._jobs[jid] = gevent.spawn(
            lambda: run_job_thread(self, jid, cmd, args))
        return jid

    def _run(self):
        try:
            while not self._complete.is_set():
                server_heartbeat, cluster_heartbeat = get_heartbeats()
                log.debug("server_heartbeat: %s" % server_heartbeat)
                log.debug("cluster_heartbeat: %s" % cluster_heartbeat)
                if server_heartbeat:
                    self._emit(MsgEvent(SERVER_HEARTBEAT, server_heartbeat))
                if cluster_heartbeat:
                    self._emit(MsgEvent(HEARTBEAT, cluster_heartbeat))

                self._complete.wait(HEARTBEAT_PERIOD)
        except:
            log.error(traceback.format_exc())
            raise
Ejemplo n.º 37
0
class MultiChannelSocket(MultiSocketHandler):
    """
    Use this class to implement virtual channels over web socket.
    To use it, inherit class from this and override init_channel function,
    where you can register all channel handlers by register_channel function

    Example:
    class MyWebSocket(MultiChannelWS):
        def init_channels(self):
            self.register_channel(0, NullChannelHandler)
            self.register_channel(1, FirstChannelHandler)
            ...
    """
    def __init__(self, request, transport):
        super(MultiChannelSocket, self).__init__(request, transport)
        self.session = WSSession()
        self.channel_handlers = {}
        self.permissions = None
        self.allowed_channels = None
        self.access_token = None
        self.close_event = Event()

    def clear_test_data(self):
        for key in TEST_DATA_KEYS:
            if self.channel_history.get(key):
                del self.channel_history[key]

    def write_test_data(self, test_data):
        for key in TEST_DATA_KEYS:
            self.channel_history[key] = test_data[key]

    # noinspection PyUnusedLocal
    def __call__(self, env, start_response):
        websocket = env.get('wsgi.websocket')
        if not websocket:
            self.bad_request()
        self.ws = websocket
        # Endless event loop
        while 1:
            try:
                data = self.ws.receive()
                self.clear_test_data()
            except WebSocketError as e:
                if is_ws_error_abnormal(e):
                    log.error('WebSocket fault: %s' % e.message,
                              extra=self.channel_history)
                break
            except Exception:
                f = Formatter()
                traceback = f.formatException(sys.exc_info())
                log.error('Servlet fault: \n%s' % traceback,
                          extra=self.channel_history)
                break

            if data:
                jd = json.loads(data)
                if jd.get('pkg') \
                        and jd['pkg'].get('data') \
                        and isinstance(jd['pkg']['data'], dict)\
                        and jd['pkg']['data'].get('testData'):
                    self.write_test_data(jd['pkg']['data']['testData'])
                    del jd['pkg']['data']['testData']
                self.channel_history['messages'].append(jd)
                if hasattr(self.session, 'sess') and self.session.sess:
                    self.channel_history['session_id'] = self.session.sess.id
                    self.channel_history['user_id'] = self.session.sess.user_id
                if not jd.get('channel') and jd.get('pkg'):
                    act = jd['pkg'].get('action')
                    assert not act.startswith('_'), "security violation"
                    try:
                        handler = getattr(self, act)
                    except WebSocketError as e:
                        if is_ws_error_abnormal(e):
                            f = Formatter()
                            traceback = f.formatException(sys.exc_info())
                            log.error('Global channel action error: \n%s' %
                                      traceback,
                                      extra=self.channel_history)
                        break
                    assert handler.__name__ == "action_wrapper", \
                        "%s is not allowed to be executed externally." % act
                    handler(jd['pkg']['data'])
                    continue
                if self.check_permissions \
                        and not self.validate_send(jd.get('channel')):
                    jd['result'] = 403
                    if not self.ws.closed:
                        try:
                            self.ws.send(json.dumps(jd))
                        except WebSocketError as e:
                            if is_ws_error_abnormal(e):
                                log.error('WebSocket fault: %s' % e.message,
                                          extra=self.channel_history)
                    continue
                else:
                    self.run_callback('message', ApiSocketMessage(data))
            else:
                log.debug('Web Socket is disconnected')
                self.close_event.set()
            if self.close_event.is_set():
                break
        self.run_callback('close')

    @action
    def open(self, data):
        self.propagate_greenlet_data(data)
        if not data.get('token'):
            self.bad_request(message='No access token, exit')
            return

        if not data.get('channel'):
            self.bad_request(message='No channel name')
            return

        self.access_token = data.get('token')
        self.reopen = data.get('reopen', False)
        self.pre_open()
        handler = self.allowed_channels.get(data.get('channel'))
        if not handler:
            return self.channel_404(data['channel'])

        if self.check_permissions \
                and not self.validate_open(data.get('channel')):
            return self.send_error_code(403, 'open', data.get('channel'))
        if not self.is_auth:
            return self.send_error_code(403, 'open', data.get('channel'))

        handler = self.register_channel(data.get('channel'), handler)
        self.run_callback('open')
        pkg = {
            'action': 'open',
            'data': {
                'closable': handler.closable,
                'result': 200
            },
        }
        package_to_send = {
            'channel': data.get('channel'),
            'pkg': pkg,
            'session_params': self.session.params
        }
        raw_data = json.dumps(package_to_send, default=datahandler)
        self.after_open()
        try:
            self.ws.send(raw_data)
        except WebSocketError as e:
            if is_ws_error_abnormal(e):
                log.error('WebSocket fault: %s' % e.message,
                          extra=self.channel_history)

    @action
    def close(self, data):
        if not data.get('channel'):
            raise Exception('No channel name, exit')

        handler = self.channel_handlers.get(data['channel'])

        if not handler:
            return self.channel_404(data['channel'])

        if self.check_permissions and not self.validate_close(data['channel']):
            return self.send_error_code(403, 'open', data['channel'])

        if not handler.closable and not self.ws.closed:
            try:
                self.ws.send(
                    json.dumps({
                        'pkg': {
                            'action': 'close',
                            'data': {
                                'channel': data['channel'],
                                'result': 501
                            },
                        },
                    }))
            except WebSocketError as e:
                if is_ws_error_abnormal(e):
                    log.error('WebSocket fault: %s' % e.message,
                              extra=self.channel_history)
            return

        handler.onclose()
        del self.channel_handlers[data['channel']]

        if not self.ws.closed:
            try:
                self.ws.send(
                    json.dumps({
                        'pkg': {
                            'action': 'close',
                            'data': {
                                'channel': data['channel'],
                                'result': 200
                            },
                        },
                    }))
            except WebSocketError as e:
                if is_ws_error_abnormal(e):
                    log.error('WebSocket fault: %s' % e.message,
                              extra=self.channel_history)

    def onopen(self):
        for channel_handler in self.channel_handlers.values():
            channel_handler.onopen()

    def onclose(self):
        for channel_handler in self.channel_handlers.values():
            channel_handler.onclose()

    def onmessage(self, msg):
        channel = msg.data.get('channel')
        if channel is None:
            raise MultiChannelWSError('No such channel ID in request')
        channel_handler = self.channel_handlers.get(channel)
        if not channel_handler:
            return self.channel_404(channel)
        if RIEMANN_USE:
            start = time.time()
            act = 'unknown'
            if 'action' in msg.data['pkg']:
                act = msg.data['pkg']['action']
            elif 'Action' in msg.data['pkg']:
                if 'name' in msg.data['pkg']['Action']:
                    act = msg.data['pkg']['Action']['name']
            channel_handler.onmessage(msg.data)
            RIEMANN_QUEUE.put(
                ("ws.%s.%s" % (channel, act), time.time() - start))
        else:
            channel_handler.onmessage(msg.data)

    def validate(self, permission_name):
        if self.permissions:
            return bool(self.permissions.get_perm(permission_name))
        return False

    def validate_open(self, channel):
        return self.validate('%s.ws.open' % channel)

    def validate_close(self, channel):
        return self.validate('%s.ws.close' % channel)

    def validate_send(self, channel):
        return self.validate('%s.ws.send' % channel)

    def register_channel(self, channel, channel_handler_class):
        """Registers new channel with channel id - channel and channel handler
           class - channel_handler_class
        """
        channel_handler = channel_handler_class(self.request, channel, self.ws,
                                                self.session, self.permissions,
                                                self.channel_history)
        self.channel_handlers[channel] = channel_handler
        return channel_handler
Ejemplo n.º 38
0
class GPing:
    """
    This class, when instantiated will start listening for ICMP responses.
    Then call its send method to send pings. Callbacks will be sent ping
    details
    """
    def __init__(self,timeout=2,max_outstanding=100):
        """
        :timeout            - amount of time a ICMP echo request can be outstanding
        :max_outstanding    - maximum number of outstanding ICMP echo requests without responses (limits traffic)
        """
        self.timeout = timeout
        self.max_outstanding = max_outstanding

        # id we will increment with each ping
        self.id = 0

        # object to hold and keep track of all of our self.pings
        self.pings = {}

        # Hold failures
        self.failures = []

        # event to file when we want to shut down
        self.die_event = Event()

        # setup socket
        icmp = socket.getprotobyname("icmp")
        try:
            self.socket = socket.socket(socket.AF_INET, socket.SOCK_RAW, icmp)
        except socket.error as e:
            if e.errno == 1:
                # Operation not permitted
                e.message = str(e) + (
                    " - Note that ICMP messages can only be sent from processes"
                    " running as root."
                )
                raise socket.error(e.message)
            raise # raise the original error

        self.receive_glet = gevent.spawn(self.__receive__)
        self.processto_glet = gevent.spawn(self.__process_timeouts__)


    def die(self):
        """
        try to shut everything down gracefully
        """
        print("shutting down")
        self.die_event.set()
        socket.cancel_wait()
        gevent.joinall([self.receive_glet,self.processto_glet])


    def join(self):
        """
        does a lot of nothing until self.pings is empty
        """
        while len(self.pings):
            gevent.sleep()


    def send(self, dest_addr, callback, idx, current_data, data, datapsize=64):
        """
        Send a ICMP echo request.
        :dest_addr - where to send it
        :callback  - what to call when we get a response
        :psize     - how much data to send with it
        """
        # make sure we dont have too many outstanding requests
        number_of_packages = current_data[1]


        while len(self.pings) >= self.max_outstanding:
            gevent.sleep()

        psize = datapsize
        # figure out our id
        packet_id = self.id

        # increment our id, but wrap if we go over the max size for USHORT
        self.id = (self.id + 1) % 2 ** 16


        # make a spot for this ping in self.pings
        self.pings[packet_id] = {'sent':False,'success':False,'error':False,'dest_addr':dest_addr,'dest_ip':None,'callback':callback,
        'idx': idx, 'current_data': current_data, 'data_to_write_to': data, 'dtime': time.time(), 'packages_received': 0 }

        # Resolve hostname
        try:
            dest_ip = socket.gethostbyname(dest_addr)
            self.pings[packet_id]['dest_ip'] = dest_ip
        except socket.gaierror as ex:
            self.pings[packet_id]['error'] = True
            self.pings[packet_id]['message'] = str(ex)
            return


        # Remove header size from packet size
        psize = psize - 8

        # Header is type (8), code (8), checksum (16), id (16), sequence (16)
        my_checksum = 0

        # Make a dummy heder with a 0 checksum.
        header = struct.pack("bbHHh", ICMP_ECHO_REQUEST, 0, my_checksum, packet_id, 1)
        my_bytes = struct.calcsize("d")
        data = (psize - my_bytes) * "Q"
        data = struct.pack("d", time.time()) + bytes(data, "utf-8")

        # Calculate the checksum on the data and the dummy header.
        my_checksum = checksum(header + data)

        # Now that we have the right checksum, we put that in. It's just easier
        # to make up a new header than to stuff it into the dummy.
        header = struct.pack(
            "bbHHh", ICMP_ECHO_REQUEST, 0, socket.htons(my_checksum), packet_id, 1
        )
        packet = header + data
        # note the send_time for checking for timeouts
        self.pings[packet_id]['data'] = data
        self.pings[packet_id]['application_id'] = current_data[4]
        self.pings[packet_id]['send_time'] = time.time()

        # send the packet
        for i in range(number_of_packages):
            self.socket.sendto(packet, (dest_ip, 1)) # Don't know about the 1

        #mark the packet as sent
        self.pings[packet_id]['sent'] = True


    def __process_timeouts__(self):
        """
        check to see if any of our pings have timed out
        """
        while not self.die_event.is_set():
            for i in self.pings:

                # Detect timeout
                if self.pings[i]['sent'] and time.time() - self.pings[i]['send_time'] > self.timeout:
                    self.pings[i]['error'] = True
                    self.pings[i]['message'] = 'Timeout after {} seconds'.format(self.timeout)

                # Handle all failures
                if self.pings[i]['error'] == True:
                    self.pings[i]['callback'](self.pings[i])
                    self.failures.append(self.pings[i])
                    del(self.pings[i])
                    break

            gevent.sleep()


    def __receive__(self):
        """
        receive response packets
        """
        while 1:
            # wait till we can recv
            try:
                socket.wait_read(self.socket.fileno())
            except socket.error as e:
                if e.errno == socket.EBADF:
                    print("interrupting wait_read")
                    return
                # reraise original exceptions
                print("re-throwing socket exception on wait_read()")
                raise

            time_received = time.time()
            received_packet, addr = self.socket.recvfrom(64)
            

            # while(received_packet):
            #     received_packet, addr = self.socket.recvfrom(1024)
            #     currently_received += 1

            icmpHeader = received_packet[20:28]
            type, code, checksum, packet_id, sequence = struct.unpack(
                "bbHHh", icmpHeader
            )


            if packet_id in self.pings:
                bytes_received = struct.calcsize("d")
                time_sent = struct.unpack("d", received_packet[28:28 + bytes_received])[0]


                # i'd call that a success
                # call our callback if we've got one

                self.pings[packet_id]['packages_received'] = self.pings[packet_id]['packages_received'] + 1
                
                if self.pings[packet_id]['packages_received'] == self.pings[packet_id]['current_data'][1]:
                    self.pings[packet_id]['delay'] = time_received - time_sent
                    self.pings[packet_id]['success'] = True
                    self.pings[packet_id]['callback'](self.pings[packet_id])
                    del(self.pings[packet_id])

    def print_failures(self):
        template = '{hostname:45}{message}'
        for failure in self.failures:
            message = template.format(hostname=failure['dest_addr'], message=failure.get('message', 'unknown error'))
Ejemplo n.º 39
0
class TreeHolderCleaner(object):
    def __init__(self, tree_hub):
        self._tree_hub = tree_hub
        self._old_offset = (60 * 60 * 24 *
                            settings.TREE_HOLDER_CLEANER_OLD_OFFSET)
        self._period = settings.TREE_HOLDER_CLEANER_PERIOD
        self._stopped = Event()

    def track(self, application_name, type_name):
        if not switch.is_switched_on(SWITCH_ENABLE_TREE_HOLDER_CLEANER_TRACK):
            return

        name = '{}:{}'.format(application_name, type_name)
        score = time.time()
        try:
            redis_client.zadd(REDIS_KEY, **{name: score})
        except Exception as e:
            logger.warning('tree holder cleaner track item failed: %s', e)

    def clean(self):
        if not (switch.is_switched_on(SWITCH_ENABLE_TREE_HOLDER_CLEANER_TRACK)
                and switch.is_switched_on(
                    SWITCH_ENABLE_TREE_HOLDER_CLEANER_CLEAN, False)):
            return

        if self._is_time_to_clean():
            self._clean()

    def spawn_cleaning_thread(self):
        gevent.spawn(self._worker)

    def _worker(self):
        while not self._stopped.is_set():
            self.clean()
            gevent.sleep(self._period)

    def _clean(self):
        max_score = time.time() - self._old_offset
        try:
            items = redis_client.zrangebyscore(REDIS_KEY, 0, max_score)
        except Exception as e:
            logger.warning('get tree holder cleaner data failed: %s', e)
            return

        for key in items:
            application_name, type_name = key.split(':')
            holder = self._tree_hub.release_tree_holder(
                application_name, type_name)
            if holder is not None:
                logger.info('release unused tree holder: %s %s',
                            application_name, type_name)
                monitor_client.increment('tree_holder.release_unused',
                                         tags={
                                             'application_name':
                                             application_name,
                                             'appid': application_name,
                                             'type_name': type_name,
                                         })

        self._clean_old_redis_data()

    def _clean_old_redis_data(self):
        max_score = time.time() - self._old_offset * 3
        try:
            redis_client.zremrangebyscore(REDIS_KEY, 0, max_score)
        except Exception as e:
            logger.warning('clean tree holder cleaner old data failed: %s', e)

    def _is_time_to_clean(self):
        condition = settings.TREE_HOLDER_CLEANER_CONDITION
        if not condition:
            return False

        cpu = self._get_cpu_percent()
        memory = self._get_virtual_memory_percent()
        # e.g. 'cpu < 50 and memory > 90'
        condition = condition.replace('cpu',
                                      str(cpu)).replace('memory', str(memory))
        try:
            return eval(condition, {}, {})
        except BaseException as e:
            logger.error('invalid tree holder cleaner condition: %r %s',
                         condition, e)
            capture_exception('invalid tree holder cleaner condition')
            return False

    def _get_cpu_percent(self):
        return psutil.cpu_percent()

    def _get_virtual_memory_percent(self):
        return psutil.virtual_memory().percent
Ejemplo n.º 40
0
class RaidenService:
    """ A Raiden node. """

    # pylint: disable=too-many-instance-attributes,too-many-public-methods

    def __init__(self, chain, default_registry, private_key_bin, transport,
                 discovery, config):
        if not isinstance(private_key_bin,
                          bytes) or len(private_key_bin) != 32:
            raise ValueError('invalid private_key')

        invalid_timeout = (
            config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN
            or config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX)
        if invalid_timeout:
            raise ValueError('settle_timeout must be in range [{}, {}]'.format(
                NETTINGCHANNEL_SETTLE_TIMEOUT_MIN,
                NETTINGCHANNEL_SETTLE_TIMEOUT_MAX))

        self.token_to_channelgraph = dict()
        self.tokens_to_connectionmanagers = dict()
        self.manager_to_token = dict()
        self.swapkey_to_tokenswap = dict()
        self.swapkey_to_greenlettask = dict()

        self.identifier_to_statemanagers = defaultdict(list)
        self.identifier_to_results = defaultdict(list)

        # This is a map from a hashlock to a list of channels, the same
        # hashlock can be used in more than one token (for tokenswaps), a
        # channel should be removed from this list only when the lock is
        # released/withdrawn but not when the secret is registered.
        self.token_to_hashlock_to_channels = defaultdict(
            lambda: defaultdict(list))

        self.chain = chain
        self.default_registry = default_registry
        self.config = config
        self.privkey = private_key_bin
        self.address = privatekey_to_address(private_key_bin)

        endpoint_registration_event = gevent.spawn(
            discovery.register,
            self.address,
            config['external_ip'],
            config['external_port'],
        )
        endpoint_registration_event.link_exception(
            endpoint_registry_exception_handler)

        self.private_key = PrivateKey(private_key_bin)
        self.pubkey = self.private_key.public_key.format(compressed=False)
        self.protocol = RaidenProtocol(
            transport,
            discovery,
            self,
            config['protocol']['retry_interval'],
            config['protocol']['retries_before_backoff'],
            config['protocol']['nat_keepalive_retries'],
            config['protocol']['nat_keepalive_timeout'],
            config['protocol']['nat_invitation_timeout'],
        )

        # TODO: remove this cyclic dependency
        transport.protocol = self.protocol

        self.message_handler = RaidenMessageHandler(self)
        self.state_machine_event_handler = StateMachineEventHandler(self)
        self.blockchain_events = BlockchainEvents()
        self.greenlet_task_dispatcher = GreenletTasksDispatcher()
        self.on_message = self.message_handler.on_message
        self.alarm = AlarmTask(chain)
        self.shutdown_timeout = config['shutdown_timeout']
        self._block_number = None
        self.stop_event = Event()
        self.start_event = Event()
        self.chain.client.inject_stop_event(self.stop_event)

        self.transaction_log = StateChangeLog(
            storage_instance=StateChangeLogSQLiteBackend(
                database_path=config['database_path']))

        if config['database_path'] != ':memory:':
            self.database_dir = os.path.dirname(config['database_path'])
            self.lock_file = os.path.join(self.database_dir, '.lock')
            self.snapshot_dir = os.path.join(self.database_dir, 'snapshots')
            self.serialization_file = os.path.join(self.snapshot_dir,
                                                   'data.pickle')

            if not os.path.exists(self.snapshot_dir):
                os.makedirs(self.snapshot_dir)

            # Prevent concurrent acces to the same db
            self.db_lock = filelock.FileLock(self.lock_file)
        else:
            self.database_dir = None
            self.lock_file = None
            self.snapshot_dir = None
            self.serialization_file = None
            self.db_lock = None

        # If the endpoint registration fails the node will quit, this must
        # finish before starting the protocol
        endpoint_registration_event.join()

        self.start()

    def start(self):
        """ Start the node. """
        # XXX Should this really be here? Or will start() never be called again
        # after stop() in the lifetime of Raiden apart from the tests? This is
        # at least at the moment prompted by tests/integration/test_transer.py
        if self.stop_event and self.stop_event.is_set():
            self.stop_event.clear()

        self.alarm.start()

        # Prime the block number cache and set the callbacks
        self._block_number = self.alarm.last_block_number
        self.alarm.register_callback(self.poll_blockchain_events)
        self.alarm.register_callback(self.set_block_number)

        # Registry registration must start *after* the alarm task, this avoid
        # corner cases were the registry is queried in block A, a new block B
        # is mined, and the alarm starts polling at block C.
        self.register_registry(self.default_registry.address)

        # Restore from snapshot must come after registering the registry as we
        # need to know the registered tokens to populate `token_to_channelgraph`
        if self.database_dir is not None:
            self.db_lock.acquire(timeout=0)
            assert self.db_lock.is_locked
            self.restore_from_snapshots()

        # Start the protocol after the registry is queried to avoid warning
        # about unknown channels.
        self.protocol.start()

        # Health check needs the protocol layer
        self.start_neighbours_healthcheck()

        self.start_event.set()

    def start_neighbours_healthcheck(self):
        for graph in self.token_to_channelgraph.values():
            for neighbour in graph.get_neighbours():
                if neighbour != ConnectionManager.BOOTSTRAP_ADDR:
                    self.start_health_check_for(neighbour)

    def stop(self):
        """ Stop the node. """
        # Needs to come before any greenlets joining
        self.stop_event.set()
        self.protocol.stop_and_wait()
        self.alarm.stop_async()

        wait_for = [self.alarm]
        wait_for.extend(self.protocol.greenlets)
        wait_for.extend(self.greenlet_task_dispatcher.stop())
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        gevent.wait(wait_for, timeout=self.shutdown_timeout)

        # Filters must be uninstalled after the alarm task has stopped. Since
        # the events are polled by an alarm task callback, if the filters are
        # uninstalled before the alarm task is fully stopped the callback
        # `poll_blockchain_events` will fail.
        #
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        try:
            with gevent.Timeout(self.shutdown_timeout):
                self.blockchain_events.uninstall_all_event_listeners()
        except (gevent.timeout.Timeout, RaidenShuttingDown):
            pass

        # save the state after all tasks are done
        if self.serialization_file:
            save_snapshot(self.serialization_file, self)

        if self.db_lock is not None:
            self.db_lock.release()

    def __repr__(self):
        return '<{} {}>'.format(self.__class__.__name__, pex(self.address))

    def restore_from_snapshots(self):
        data = load_snapshot(self.serialization_file)
        data_exists_and_is_recent = (data is not None
                                     and 'registry_address' in data
                                     and data['registry_address']
                                     == ROPSTEN_REGISTRY_ADDRESS)

        if data_exists_and_is_recent:
            first_channel = True
            for channel in data['channels']:
                try:
                    self.restore_channel(channel)
                    first_channel = False
                except AddressWithoutCode as e:
                    log.warn(
                        'Channel without code while restoring. Must have been '
                        'already settled while we were offline.',
                        error=str(e))
                except AttributeError as e:
                    if first_channel:
                        log.warn(
                            'AttributeError during channel restoring. If code has changed'
                            ' then this is fine. If not then please report a bug.',
                            error=str(e))
                        break
                    else:
                        raise

            for restored_queue in data['queues']:
                self.restore_queue(restored_queue)

            self.protocol.receivedhashes_to_acks = data[
                'receivedhashes_to_acks']
            self.protocol.nodeaddresses_to_nonces = data[
                'nodeaddresses_to_nonces']

            self.restore_transfer_states(data['transfers'])

    def set_block_number(self, block_number):
        state_change = Block(block_number)
        self.state_machine_event_handler.log_and_dispatch_to_all_tasks(
            state_change)

        for graph in self.token_to_channelgraph.values():
            for channel in graph.address_to_channel.values():
                channel.state_transition(state_change)

        # To avoid races, only update the internal cache after all the state
        # tasks have been updated.
        self._block_number = block_number

    def set_node_network_state(self, node_address, network_state):
        for graph in self.token_to_channelgraph.values():
            channel = graph.partneraddress_to_channel.get(node_address)

            if channel:
                channel.network_state = network_state

    def start_health_check_for(self, node_address):
        self.protocol.start_health_check(node_address)

    def get_block_number(self):
        return self._block_number

    def poll_blockchain_events(self, current_block=None):
        # pylint: disable=unused-argument
        on_statechange = self.state_machine_event_handler.on_blockchain_statechange

        for state_change in self.blockchain_events.poll_state_change(
                self._block_number):
            on_statechange(state_change)

    def find_channel_by_address(self, netting_channel_address_bin):
        for graph in self.token_to_channelgraph.values():
            channel = graph.address_to_channel.get(netting_channel_address_bin)

            if channel is not None:
                return channel

        raise ValueError('unknown channel {}'.format(
            encode_hex(netting_channel_address_bin)))

    def sign(self, message):
        """ Sign message inplace. """
        if not isinstance(message, SignedMessage):
            raise ValueError('{} is not signable.'.format(repr(message)))

        message.sign(self.private_key, self.address)

    def send_async(self, recipient, message):
        """ Send `message` to `recipient` using the raiden protocol.

        The protocol will take care of resending the message on a given
        interval until an Acknowledgment is received or a given number of
        tries.
        """

        if not isaddress(recipient):
            raise ValueError('recipient is not a valid address.')

        if recipient == self.address:
            raise ValueError('programming error, sending message to itself')

        return self.protocol.send_async(recipient, message)

    def send_and_wait(self, recipient, message, timeout):
        """ Send `message` to `recipient` and wait for the response or `timeout`.

        Args:
            recipient (address): The address of the node that will receive the
                message.
            message: The transfer message.
            timeout (float): How long should we wait for a response from `recipient`.

        Returns:
            None: If the wait timed out
            object: The result from the event
        """
        if not isaddress(recipient):
            raise ValueError('recipient is not a valid address.')

        self.protocol.send_and_wait(recipient, message, timeout)

    def register_secret(self, secret: bytes):
        """ Register the secret with any channel that has a hashlock on it.

        This must search through all channels registered for a given hashlock
        and ignoring the tokens. Useful for refund transfer, split transfer,
        and token swaps.

        Raises:
            TypeError: If secret is unicode data.
        """
        if not isinstance(secret, bytes):
            raise TypeError('secret must be bytes')

        hashlock = sha3(secret)
        revealsecret_message = RevealSecret(secret)
        self.sign(revealsecret_message)

        for hash_channel in self.token_to_hashlock_to_channels.values():
            for channel in hash_channel[hashlock]:
                channel.register_secret(secret)

                # The protocol ignores duplicated messages.
                self.send_async(
                    channel.partner_state.address,
                    revealsecret_message,
                )

    def register_channel_for_hashlock(self, token_address, channel, hashlock):
        channels_registered = self.token_to_hashlock_to_channels[
            token_address][hashlock]

        if channel not in channels_registered:
            channels_registered.append(channel)

    def handle_secret(  # pylint: disable=too-many-arguments
            self, identifier, token_address, secret, partner_secret_message,
            hashlock):
        """ Unlock/Witdraws locks, register the secret, and send Secret
        messages as necessary.

        This function will:
            - Unlock the locks created by this node and send a Secret message to
            the corresponding partner so that she can withdraw the token.
            - Withdraw the lock from sender.
            - Register the secret for the locks received and reveal the secret
            to the senders


        Note:
            The channel needs to be registered with
            `raiden.register_channel_for_hashlock`.
        """
        # handling the secret needs to:
        # - unlock the token for all `forward_channel` (the current one
        #   and the ones that failed with a refund)
        # - send a message to each of the forward nodes allowing them
        #   to withdraw the token
        # - register the secret for the `originating_channel` so that a
        #   proof can be made, if necessary
        # - reveal the secret to the `sender` node (otherwise we
        #   cannot withdraw the token)
        channels_list = self.token_to_hashlock_to_channels[token_address][
            hashlock]
        channels_to_remove = list()

        revealsecret_message = RevealSecret(secret)
        self.sign(revealsecret_message)

        messages_to_send = []
        for channel in channels_list:
            # unlock a pending lock
            if channel.our_state.is_known(hashlock):
                secret = channel.create_secret(identifier, secret)
                self.sign(secret)

                channel.register_transfer(
                    self.get_block_number(),
                    secret,
                )

                messages_to_send.append((
                    channel.partner_state.address,
                    secret,
                ))

                channels_to_remove.append(channel)

            # withdraw a pending lock
            elif channel.partner_state.is_known(hashlock):
                if partner_secret_message:
                    is_balance_proof = (partner_secret_message.sender
                                        == channel.partner_state.address
                                        and partner_secret_message.channel
                                        == channel.channel_address)

                    if is_balance_proof:
                        channel.register_transfer(
                            self.get_block_number(),
                            partner_secret_message,
                        )
                        channels_to_remove.append(channel)
                    else:
                        channel.register_secret(secret)
                        messages_to_send.append((
                            channel.partner_state.address,
                            revealsecret_message,
                        ))
                else:
                    channel.register_secret(secret)
                    messages_to_send.append((
                        channel.partner_state.address,
                        revealsecret_message,
                    ))

            else:
                log.error(
                    'Channel is registered for a given lock but the lock is not contained in it.'
                )

        for channel in channels_to_remove:
            channels_list.remove(channel)

        if not channels_list:
            del self.token_to_hashlock_to_channels[token_address][hashlock]

        # send the messages last to avoid races
        for recipient, message in messages_to_send:
            self.send_async(
                recipient,
                message,
            )

    def get_channel_details(self, token_address, netting_channel):
        channel_details = netting_channel.detail()
        our_state = ChannelEndState(
            channel_details['our_address'],
            channel_details['our_balance'],
            None,
            EMPTY_MERKLE_TREE,
        )
        partner_state = ChannelEndState(
            channel_details['partner_address'],
            channel_details['partner_balance'],
            None,
            EMPTY_MERKLE_TREE,
        )

        def register_channel_for_hashlock(channel, hashlock):
            self.register_channel_for_hashlock(
                token_address,
                channel,
                hashlock,
            )

        channel_address = netting_channel.address
        reveal_timeout = self.config['reveal_timeout']
        settle_timeout = channel_details['settle_timeout']

        external_state = ChannelExternalState(
            register_channel_for_hashlock,
            netting_channel,
        )

        channel_detail = ChannelDetails(
            channel_address,
            our_state,
            partner_state,
            external_state,
            reveal_timeout,
            settle_timeout,
        )

        return channel_detail

    def restore_channel(self, serialized_channel):
        token_address = serialized_channel.token_address

        netting_channel = self.chain.netting_channel(
            serialized_channel.channel_address, )

        # restoring balances from the blockchain since the serialized
        # value could be falling behind.
        channel_details = netting_channel.detail()

        # our_address is checked by detail
        assert channel_details[
            'partner_address'] == serialized_channel.partner_address

        if serialized_channel.our_leaves:
            our_layers = compute_layers(serialized_channel.our_leaves)
            our_tree = MerkleTreeState(our_layers)
        else:
            our_tree = EMPTY_MERKLE_TREE

        our_state = ChannelEndState(
            channel_details['our_address'],
            channel_details['our_balance'],
            serialized_channel.our_balance_proof,
            our_tree,
        )

        if serialized_channel.partner_leaves:
            partner_layers = compute_layers(serialized_channel.partner_leaves)
            partner_tree = MerkleTreeState(partner_layers)
        else:
            partner_tree = EMPTY_MERKLE_TREE

        partner_state = ChannelEndState(
            channel_details['partner_address'],
            channel_details['partner_balance'],
            serialized_channel.partner_balance_proof,
            partner_tree,
        )

        def register_channel_for_hashlock(channel, hashlock):
            self.register_channel_for_hashlock(
                token_address,
                channel,
                hashlock,
            )

        external_state = ChannelExternalState(
            register_channel_for_hashlock,
            netting_channel,
        )
        details = ChannelDetails(
            serialized_channel.channel_address,
            our_state,
            partner_state,
            external_state,
            serialized_channel.reveal_timeout,
            channel_details['settle_timeout'],
        )

        graph = self.token_to_channelgraph[token_address]
        graph.add_channel(details)
        channel = graph.address_to_channel.get(
            serialized_channel.channel_address, )

        channel.our_state.balance_proof = serialized_channel.our_balance_proof
        channel.partner_state.balance_proof = serialized_channel.partner_balance_proof

    def restore_queue(self, serialized_queue):
        receiver_address = serialized_queue['receiver_address']
        token_address = serialized_queue['token_address']

        queue = self.protocol.get_channel_queue(
            receiver_address,
            token_address,
        )

        for messagedata in serialized_queue['messages']:
            queue.put(messagedata)

    def restore_transfer_states(self, transfer_states):
        self.identifier_to_statemanagers = transfer_states

    def register_registry(self, registry_address):
        proxies = get_relevant_proxies(
            self.chain,
            self.address,
            registry_address,
        )

        # Install the filters first to avoid missing changes, as a consequence
        # some events might be applied twice.
        self.blockchain_events.add_proxies_listeners(proxies)

        for manager in proxies.channel_managers:
            token_address = manager.token_address()
            manager_address = manager.address

            channels_detail = list()
            netting_channels = proxies.channelmanager_nettingchannels[
                manager_address]
            for channel in netting_channels:
                detail = self.get_channel_details(token_address, channel)
                channels_detail.append(detail)

            edge_list = manager.channels_addresses()
            graph = ChannelGraph(
                self.address,
                manager_address,
                token_address,
                edge_list,
                channels_detail,
            )

            self.manager_to_token[manager_address] = token_address
            self.token_to_channelgraph[token_address] = graph

            self.tokens_to_connectionmanagers[
                token_address] = ConnectionManager(self, token_address, graph)

    def channel_manager_is_registered(self, manager_address):
        return manager_address in self.manager_to_token

    def register_channel_manager(self, manager_address):
        manager = self.default_registry.manager(manager_address)
        netting_channels = [
            self.chain.netting_channel(channel_address) for channel_address in
            manager.channels_by_participant(self.address)
        ]

        # Install the filters first to avoid missing changes, as a consequence
        # some events might be applied twice.
        self.blockchain_events.add_channel_manager_listener(manager)
        for channel in netting_channels:
            self.blockchain_events.add_netting_channel_listener(channel)

        token_address = manager.token_address()
        edge_list = manager.channels_addresses()
        channels_detail = [
            self.get_channel_details(token_address, channel)
            for channel in netting_channels
        ]

        graph = ChannelGraph(
            self.address,
            manager_address,
            token_address,
            edge_list,
            channels_detail,
        )

        self.manager_to_token[manager_address] = token_address
        self.token_to_channelgraph[token_address] = graph

        self.tokens_to_connectionmanagers[token_address] = ConnectionManager(
            self, token_address, graph)

    def register_netting_channel(self, token_address, channel_address):
        netting_channel = self.chain.netting_channel(channel_address)
        self.blockchain_events.add_netting_channel_listener(netting_channel)

        detail = self.get_channel_details(token_address, netting_channel)
        graph = self.token_to_channelgraph[token_address]
        graph.add_channel(detail)

    def connection_manager_for_token(self, token_address):
        if not isaddress(token_address):
            raise InvalidAddress('token address is not valid.')
        if token_address in self.tokens_to_connectionmanagers.keys():
            manager = self.tokens_to_connectionmanagers[token_address]
        else:
            raise InvalidAddress('token is not registered.')
        return manager

    def leave_all_token_networks_async(self):
        leave_results = []
        for token_address in self.token_to_channelgraph.keys():
            try:
                connection_manager = self.connection_manager_for_token(
                    token_address)
                leave_results.append(connection_manager.leave_async())
            except InvalidAddress:
                pass
        combined_result = AsyncResult()
        gevent.spawn(gevent.wait, leave_results).link(combined_result)
        return combined_result

    def close_and_settle(self):
        log.info('raiden will close and settle all channels now')

        connection_managers = [
            self.connection_manager_for_token(token_address)
            for token_address in self.token_to_channelgraph
        ]

        def blocks_to_wait():
            return max(connection_manager.min_settle_blocks
                       for connection_manager in connection_managers)

        all_channels = list(
            itertools.chain.from_iterable([
                connection_manager.open_channels
                for connection_manager in connection_managers
            ]))

        leaving_greenlet = self.leave_all_token_networks_async()
        # using the un-cached block number here
        last_block = self.chain.block_number()

        earliest_settlement = last_block + blocks_to_wait()

        # TODO: estimate and set a `timeout` parameter in seconds
        # based on connection_manager.min_settle_blocks and an average
        # blocktime from the past

        current_block = last_block
        while current_block < earliest_settlement:
            gevent.sleep(self.alarm.wait_time)
            last_block = self.chain.block_number()
            if last_block != current_block:
                current_block = last_block
                avg_block_time = self.chain.estimate_blocktime()
                wait_blocks_left = blocks_to_wait()
                not_settled = sum(
                    1 for channel in all_channels
                    if not channel.state == CHANNEL_STATE_SETTLED)
                if not_settled == 0:
                    log.debug('nothing left to settle')
                    break
                log.info(
                    'waiting at least %s more blocks (~%s sec) for settlement'
                    '(%s channels not yet settled)' %
                    (wait_blocks_left, wait_blocks_left * avg_block_time,
                     not_settled))

            leaving_greenlet.wait(timeout=blocks_to_wait() *
                                  self.chain.estimate_blocktime() * 1.5)

        if any(channel.state != CHANNEL_STATE_SETTLED
               for channel in all_channels):
            log.error('Some channels were not settled!',
                      channels=[
                          pex(channel.channel_address)
                          for channel in all_channels
                          if channel.state != CHANNEL_STATE_SETTLED
                      ])

    def mediated_transfer_async(self, token_address, amount, target,
                                identifier):
        """ Transfer `amount` between this node and `target`.

        This method will start an asyncronous transfer, the transfer might fail
        or succeed depending on a couple of factors:

            - Existence of a path that can be used, through the usage of direct
              or intermediary channels.
            - Network speed, making the transfer sufficiently fast so it doesn't
              expire.
        """

        async_result = self.start_mediated_transfer(
            token_address,
            amount,
            identifier,
            target,
        )

        return async_result

    def direct_transfer_async(self, token_address, amount, target, identifier):
        """ Do a direct tranfer with target.

        Direct transfers are non cancellable and non expirable, since these
        transfers are a signed balance proof with the transferred amount
        incremented.

        Because the transfer is non cancellable, there is a level of trust with
        the target. After the message is sent the target is effectively paid
        and then it is not possible to revert.

        The async result will be set to False iff there is no direct channel
        with the target or the payer does not have balance to complete the
        transfer, otherwise because the transfer is non expirable the async
        result *will never be set to False* and if the message is sent it will
        hang until the target node acknowledge the message.

        This transfer should be used as an optimization, since only two packets
        are required to complete the transfer (from the payer's perspective),
        whereas the mediated transfer requires 6 messages.
        """
        graph = self.token_to_channelgraph[token_address]
        direct_channel = graph.partneraddress_to_channel.get(target)

        direct_channel_with_capacity = (direct_channel
                                        and direct_channel.can_transfer and
                                        amount <= direct_channel.distributable)

        if direct_channel_with_capacity:
            direct_transfer = direct_channel.create_directtransfer(
                amount, identifier)
            self.sign(direct_transfer)
            direct_channel.register_transfer(
                self.get_block_number(),
                direct_transfer,
            )

            direct_transfer_state_change = ActionTransferDirect(
                identifier,
                amount,
                token_address,
                direct_channel.partner_state.address,
            )
            # TODO: add the transfer sent event
            state_change_id = self.transaction_log.log(
                direct_transfer_state_change)

            # TODO: This should be set once the direct transfer is acknowledged
            transfer_success = EventTransferSentSuccess(
                identifier,
                amount,
                target,
            )
            self.transaction_log.log_events(state_change_id,
                                            [transfer_success],
                                            self.get_block_number())

            async_result = self.protocol.send_async(
                direct_channel.partner_state.address,
                direct_transfer,
            )

        else:
            async_result = AsyncResult()
            async_result.set(False)

        return async_result

    def start_mediated_transfer(self, token_address, amount, identifier,
                                target):
        # pylint: disable=too-many-locals

        async_result = AsyncResult()
        graph = self.token_to_channelgraph[token_address]

        available_routes = get_best_routes(
            graph,
            self.protocol.nodeaddresses_networkstatuses,
            self.address,
            target,
            amount,
            None,
        )

        if not available_routes:
            async_result.set(False)
            return async_result

        self.protocol.start_health_check(target)

        if identifier is None:
            identifier = create_default_identifier()

        route_state = RoutesState(available_routes)
        our_address = self.address
        block_number = self.get_block_number()

        transfer_state = LockedTransferState(
            identifier=identifier,
            amount=amount,
            token=token_address,
            initiator=self.address,
            target=target,
            expiration=None,
            hashlock=None,
            secret=None,
        )

        # Issue #489
        #
        # Raiden may fail after a state change using the random generator is
        # handled but right before the snapshot is taken. If that happens on
        # the next initialization when raiden is recovering and applying the
        # pending state changes a new secret will be generated and the
        # resulting events won't match, this breaks the architecture model,
        # since it's assumed the re-execution of a state change will always
        # produce the same events.
        #
        # TODO: Removed the secret generator from the InitiatorState and add
        # the secret into all state changes that require one, this way the
        # secret will be serialized with the state change and the recovery will
        # use the same /random/ secret.
        random_generator = RandomSecretGenerator()

        init_initiator = ActionInitInitiator(
            our_address=our_address,
            transfer=transfer_state,
            routes=route_state,
            random_generator=random_generator,
            block_number=block_number,
        )

        state_manager = StateManager(initiator.state_transition, None)
        self.state_machine_event_handler.log_and_dispatch(
            state_manager, init_initiator)

        # TODO: implement the network timeout raiden.config['msg_timeout'] and
        # cancel the current transfer if it hapens (issue #374)
        self.identifier_to_statemanagers[identifier].append(state_manager)
        self.identifier_to_results[identifier].append(async_result)

        return async_result

    def mediate_mediated_transfer(self, message):
        # pylint: disable=too-many-locals
        identifier = message.identifier
        amount = message.lock.amount
        target = message.target
        token = message.token
        graph = self.token_to_channelgraph[token]

        available_routes = get_best_routes(
            graph,
            self.protocol.nodeaddresses_networkstatuses,
            self.address,
            target,
            amount,
            message.sender,
        )

        from_channel = graph.partneraddress_to_channel[message.sender]
        from_route = channel_to_routestate(from_channel, message.sender)

        our_address = self.address
        from_transfer = lockedtransfer_from_message(message)
        route_state = RoutesState(available_routes)
        block_number = self.get_block_number()

        init_mediator = ActionInitMediator(
            our_address,
            from_transfer,
            route_state,
            from_route,
            block_number,
        )

        state_manager = StateManager(mediator.state_transition, None)

        self.state_machine_event_handler.log_and_dispatch(
            state_manager, init_mediator)

        self.identifier_to_statemanagers[identifier].append(state_manager)

    def target_mediated_transfer(self, message):
        graph = self.token_to_channelgraph[message.token]
        from_channel = graph.partneraddress_to_channel[message.sender]
        from_route = channel_to_routestate(from_channel, message.sender)

        from_transfer = lockedtransfer_from_message(message)
        our_address = self.address
        block_number = self.get_block_number()

        init_target = ActionInitTarget(
            our_address,
            from_route,
            from_transfer,
            block_number,
        )

        state_manager = StateManager(target_task.state_transition, None)
        self.state_machine_event_handler.log_and_dispatch(
            state_manager, init_target)

        identifier = message.identifier
        self.identifier_to_statemanagers[identifier].append(state_manager)
Ejemplo n.º 41
0
class RaidenService:
    """ A Raiden node. """

    def __init__(
            self,
            chain: BlockChainService,
            default_registry: Registry,
            default_secret_registry: SecretRegistry,
            private_key_bin,
            transport,
            config,
            discovery=None,
    ):
        if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32:
            raise ValueError('invalid private_key')

        invalid_timeout = (
            config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or
            config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX
        )
        if invalid_timeout:
            raise ValueError('settle_timeout must be in range [{}, {}]'.format(
                NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX,
            ))

        self.tokens_to_connectionmanagers = dict()
        self.identifier_to_results = defaultdict(list)

        self.chain: BlockChainService = chain
        self.default_registry = default_registry
        self.default_secret_registry = default_secret_registry
        self.config = config
        self.privkey = private_key_bin
        self.address = privatekey_to_address(private_key_bin)
        self.discovery = discovery

        if config['transport_type'] == 'udp':
            endpoint_registration_event = gevent.spawn(
                discovery.register,
                self.address,
                config['external_ip'],
                config['external_port'],
            )
            endpoint_registration_event.link_exception(endpoint_registry_exception_handler)

        self.private_key = PrivateKey(private_key_bin)
        self.pubkey = self.private_key.public_key.format(compressed=False)
        self.transport = transport

        self.blockchain_events = BlockchainEvents()
        self.alarm = AlarmTask(chain)
        self.shutdown_timeout = config['shutdown_timeout']
        self.stop_event = Event()
        self.start_event = Event()
        self.chain.client.inject_stop_event(self.stop_event)

        self.wal = None

        self.database_path = config['database_path']
        if self.database_path != ':memory:':
            database_dir = os.path.dirname(config['database_path'])
            os.makedirs(database_dir, exist_ok=True)

            self.database_dir = database_dir
            # Prevent concurrent access to the same db
            self.lock_file = os.path.join(self.database_dir, '.lock')
            self.db_lock = filelock.FileLock(self.lock_file)
        else:
            self.database_path = ':memory:'
            self.database_dir = None
            self.lock_file = None
            self.serialization_file = None
            self.db_lock = None

        if config['transport_type'] == 'udp':
            # If the endpoint registration fails the node will quit, this must
            # finish before starting the transport
            endpoint_registration_event.join()

        self.event_poll_lock = gevent.lock.Semaphore()

        self.start()

    def start(self):
        """ Start the node. """
        if self.stop_event and self.stop_event.is_set():
            self.stop_event.clear()

        if self.database_dir is not None:
            self.db_lock.acquire(timeout=0)
            assert self.db_lock.is_locked

        # The database may be :memory:
        storage = sqlite.SQLiteStorage(self.database_path, serialize.PickleSerializer())
        self.wal, unapplied_events = wal.restore_from_latest_snapshot(
            node.state_transition,
            storage,
        )

        if self.wal.state_manager.current_state is None:
            block_number = self.chain.block_number()

            state_change = ActionInitNode(
                random.Random(),
                block_number,
            )
            self.wal.log_and_dispatch(state_change, block_number)
            payment_network = PaymentNetworkState(
                self.default_registry.address,
                [],  # empty list of token network states as it's the node's startup
            )
            state_change = ContractReceiveNewPaymentNetwork(payment_network)
            self.handle_state_change(state_change)

            # On first run Raiden needs to fetch all events for the payment
            # network, to reconstruct all token network graphs and find opened
            # channels
            last_log_block_number = 0
        else:
            # The `Block` state change is dispatched only after all the events
            # for that given block have been processed, filters can be safely
            # installed starting from this position without losing events.
            last_log_block_number = views.block_number(self.wal.state_manager.current_state)

        self.install_and_query_payment_network_filters(
            self.default_registry.address,
            last_log_block_number,
        )

        # Regarding the timing of starting the alarm task it is important to:
        # - Install the filters which will be polled by poll_blockchain_events
        #   after the state has been primed, otherwise the state changes won't
        #   have effect.
        # - Install the filters using the correct from_block value, otherwise
        #   blockchain logs can be lost.
        self.alarm.register_callback(self._callback_new_block)
        self.alarm.start()

        # Start the transport after the registry is queried to avoid warning
        # about unknown channels.
        queueids_to_queues = views.get_all_messagequeues(views.state_from_raiden(self))
        self.transport.start(self, queueids_to_queues)

        # Health check needs the transport layer
        self.start_neighbours_healthcheck()

        for event in unapplied_events:
            on_raiden_event(self, event)

        self.start_event.set()

    def start_neighbours_healthcheck(self):
        for neighbour in views.all_neighbour_nodes(self.wal.state_manager.current_state):
            if neighbour != ConnectionManager.BOOTSTRAP_ADDR:
                self.start_health_check_for(neighbour)

    def stop(self):
        """ Stop the node. """
        # Needs to come before any greenlets joining
        self.stop_event.set()
        self.transport.stop_and_wait()
        self.alarm.stop_async()

        wait_for = [self.alarm]
        wait_for.extend(getattr(self.transport, 'greenlets', []))
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        gevent.wait(wait_for, timeout=self.shutdown_timeout)

        # Filters must be uninstalled after the alarm task has stopped. Since
        # the events are polled by an alarm task callback, if the filters are
        # uninstalled before the alarm task is fully stopped the callback
        # `poll_blockchain_events` will fail.
        #
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        try:
            with gevent.Timeout(self.shutdown_timeout):
                self.blockchain_events.uninstall_all_event_listeners()
        except (gevent.timeout.Timeout, RaidenShuttingDown):
            pass

        self.blockchain_events.reset()

        if self.db_lock is not None:
            self.db_lock.release()

    def __repr__(self):
        return '<{} {}>'.format(self.__class__.__name__, pex(self.address))

    def get_block_number(self):
        return views.block_number(self.wal.state_manager.current_state)

    def handle_state_change(self, state_change, block_number=None):
        log.debug('STATE CHANGE', node=pex(self.address), state_change=state_change)

        if block_number is None:
            block_number = self.get_block_number()

        event_list = self.wal.log_and_dispatch(state_change, block_number)

        for event in event_list:
            log.debug('EVENT', node=pex(self.address), raiden_event=event)

            on_raiden_event(self, event)

        return event_list

    def set_node_network_state(self, node_address, network_state):
        state_change = ActionChangeNodeNetworkState(node_address, network_state)
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def start_health_check_for(self, node_address):
        self.transport.start_health_check(node_address)

    def _callback_new_block(self, current_block_number):
        """Called once a new block is detected by the alarm task.

        Note:
            This should be called only once per block, otherwise there will be
            duplicated `Block` state changes in the log.

            Therefore this method should be called only once a new block is
            mined with the appropriate block_number argument from the
            AlarmTask.
        """
        # Raiden relies on blockchain events to update its off-chain state,
        # therefore some APIs /used/ to forcefully poll for events.
        #
        # This was done for APIs which have on-chain side-effects, e.g.
        # openning a channel, where polling the event is required to update
        # off-chain state to providing a consistent view to the caller, e.g.
        # the channel exists after the API call returns.
        #
        # That pattern introduced a race, because the events are returned only
        # once per filter, and this method would be called concurrently by the
        # API and the AlarmTask. The following lock is necessary, to ensure the
        # expected side-effects are properly applied (introduced by the commit
        # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d)
        with self.event_poll_lock:
            for event in self.blockchain_events.poll_blockchain_events():
                # These state changes will be procesed with a block_number
                # which is /larger/ than the NodeState's block_number.
                on_blockchain_event(self, event, current_block_number)

            # On restart the Raiden node will re-create the filters with the
            # ethereum node. These filters will have the from_block set to the
            # value of the latest Block state change. To avoid missing events
            # the Block state change is dispatched only after all of the events
            # have been processed.
            #
            # This means on some corner cases a few events may be applied
            # twice, this will happen if the node crashed and some events have
            # been processed but the Block state change has not been
            # dispatched.
            state_change = Block(current_block_number)
            self.handle_state_change(state_change, current_block_number)

    def sign(self, message):
        """ Sign message inplace. """
        if not isinstance(message, SignedMessage):
            raise ValueError('{} is not signable.'.format(repr(message)))

        message.sign(self.private_key)

    def install_and_query_payment_network_filters(self, payment_network_id, from_block=0):
        proxies = get_relevant_proxies(
            self.chain,
            self.address,
            payment_network_id,
        )

        # Install the filters and then poll them and dispatch the events to the WAL
        with self.event_poll_lock:
            self.blockchain_events.add_proxies_listeners(proxies, from_block)
            for event in self.blockchain_events.poll_blockchain_events():
                on_blockchain_event(self, event, event.event_data['block_number'])

    def connection_manager_for_token(self, registry_address, token_address):
        if not is_binary_address(token_address):
            raise InvalidAddress('token address is not valid.')

        known_token_networks = views.get_token_network_addresses_for(
            self.wal.state_manager.current_state,
            registry_address,
        )

        if token_address not in known_token_networks:
            raise InvalidAddress('token is not registered.')

        manager = self.tokens_to_connectionmanagers.get(token_address)

        if manager is None:
            manager = ConnectionManager(self, registry_address, token_address)
            self.tokens_to_connectionmanagers[token_address] = manager

        return manager

    def leave_all_token_networks(self):
        state_change = ActionLeaveAllNetworks()
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def close_and_settle(self):
        log.info('raiden will close and settle all channels now')

        self.leave_all_token_networks()

        connection_managers = [
            self.tokens_to_connectionmanagers[token_address]
            for token_address in self.tokens_to_connectionmanagers
        ]

        if connection_managers:
            waiting.wait_for_settle_all_channels(
                self,
                self.alarm.wait_time,
            )

    def mediated_transfer_async(
            self,
            token_network_identifier,
            amount,
            target,
            identifier,
    ):
        """ Transfer `amount` between this node and `target`.

        This method will start an asyncronous transfer, the transfer might fail
        or succeed depending on a couple of factors:

            - Existence of a path that can be used, through the usage of direct
              or intermediary channels.
            - Network speed, making the transfer sufficiently fast so it doesn't
              expire.
        """

        async_result = self.start_mediated_transfer(
            token_network_identifier,
            amount,
            target,
            identifier,
        )

        return async_result

    def direct_transfer_async(self, token_network_identifier, amount, target, identifier):
        """ Do a direct transfer with target.

        Direct transfers are non cancellable and non expirable, since these
        transfers are a signed balance proof with the transferred amount
        incremented.

        Because the transfer is non cancellable, there is a level of trust with
        the target. After the message is sent the target is effectively paid
        and then it is not possible to revert.

        The async result will be set to False iff there is no direct channel
        with the target or the payer does not have balance to complete the
        transfer, otherwise because the transfer is non expirable the async
        result *will never be set to False* and if the message is sent it will
        hang until the target node acknowledge the message.

        This transfer should be used as an optimization, since only two packets
        are required to complete the transfer (from the payers perspective),
        whereas the mediated transfer requires 6 messages.
        """

        self.transport.start_health_check(target)

        if identifier is None:
            identifier = create_default_identifier()

        direct_transfer = ActionTransferDirect(
            token_network_identifier,
            target,
            identifier,
            amount,
        )

        self.handle_state_change(direct_transfer)

    def start_mediated_transfer(
            self,
            token_network_identifier,
            amount,
            target,
            identifier,
    ):

        self.transport.start_health_check(target)

        if identifier is None:
            identifier = create_default_identifier()

        assert identifier not in self.identifier_to_results

        async_result = AsyncResult()
        self.identifier_to_results[identifier].append(async_result)

        secret = random_secret()
        init_initiator_statechange = initiator_init(
            self,
            identifier,
            amount,
            secret,
            token_network_identifier,
            target,
        )

        # TODO: implement the network timeout raiden.config['msg_timeout'] and
        # cancel the current transfer if it happens (issue #374)
        #
        # Dispatch the state change even if there are no routes to create the
        # wal entry.
        self.handle_state_change(init_initiator_statechange)

        return async_result

    def mediate_mediated_transfer(self, transfer: LockedTransfer):
        init_mediator_statechange = mediator_init(self, transfer)
        self.handle_state_change(init_mediator_statechange)

    def target_mediated_transfer(self, transfer: LockedTransfer):
        init_target_statechange = target_init(transfer)
        self.handle_state_change(init_target_statechange)
Ejemplo n.º 42
0
class _Poller:
    def __init__(
        self,
        polled_call,
        polled_call_args=(),
        polling_period=1000,
        value_changed_callback=None,
        error_callback=None,
        compare=True,
    ):
        self.polled_call_ref = saferef.safe_ref(polled_call)
        self.args = polled_call_args
        self.polling_period = polling_period
        self.value_changed_callback_ref = saferef.safe_ref(
            value_changed_callback)
        self.error_callback_ref = saferef.safe_ref(error_callback)
        self.compare = compare
        self.old_res = NotInitializedValue
        self.queue = _threading.Queue()  # Queue.Queue()
        self.delay = 0
        self.stop_event = Event()
        self.async_watcher = gevent.get_hub().loop.async_()

    def start_delayed(self, delay):
        self.delay = delay
        _threading.start_new_thread(self.run, ())  # self.start()

    def stop(self):
        self.stop_event.set()
        del POLLERS[self.get_id()]

    def is_stopped(self):
        return self.stop_event.is_set()

    def get_id(self):
        return id(self)

    def get_polling_period(self):
        return self.polling_period

    def set_polling_period(self, polling_period):
        # logging.info(">>>>> CHANGIG POLLING PERIOD TO %d", polling_period)
        self.polling_period = polling_period

    def restart(self, delay=0):
        self.stop()

        polled_call = self.polled_call_ref()
        value_changed_cb = self.value_changed_callback_ref()
        error_cb = self.error_callback_ref()
        if polled_call is not None:
            return poll(
                polled_call,
                self.args,
                self.polling_period,
                value_changed_cb,
                error_cb,
                self.compare,
                delay,
                start_value=self.old_res,
            )

    def new_event(self):
        while True:
            try:
                res = Queue().get_nowait()
            except Empty:
                break

            if isinstance(res, PollingException):
                cb = self.error_callback_ref()
                if cb is not None:
                    gevent.spawn(cb, res.original_exception, res.poller_id)
            else:
                cb = self.value_changed_callback_ref()
                if cb is not None:
                    gevent.spawn(cb, res)

    def run(self):
        sleep = gevent.monkey._get_original("time", ["sleep"])[0]

        self.async_watcher.start(self.new_event)
        err_callback_args = None
        error_cb = None
        first_run = True

        while not self.stop_event.is_set():
            if first_run and self.delay:
                sleep(self.delay / 1000.0)
            first_run = False

            if self.stop_event.is_set():
                break

            polled_call = self.polled_call_ref()
            if polled_call is None:
                break

            try:
                res = polled_call(*self.args)
            except Exception as e:
                if self.stop_event.is_set():
                    break
                error_cb = self.error_callback_ref()
                if error_cb is not None:
                    self.queue.put(PollingException(e, self.get_id()))
                break

            del polled_call

            if self.stop_event.is_set():
                break

            if isinstance(res, numpy.ndarray):  # for arrays
                comparison = res == self.old_res
                if isinstance(comparison, bool):
                    is_equal = comparison
                else:
                    is_equal = all(comparison)
            else:
                is_equal = res == self.old_res

            if self.compare and is_equal:
                # do nothing: previous value is the same as "new" value
                pass
            else:
                new_value = True
                if self.compare:
                    new_value = not is_equal

                if new_value:
                    self.old_res = res
                    self.queue.put(res)
                    self.async_watcher.send()

            sleep(self.polling_period / 1000.0)

        if error_cb is not None:
            self.async_watcher.send()
Ejemplo n.º 43
0
class Channel(object):
    # pylint: disable=too-many-instance-attributes,too-many-arguments

    def __init__(self, our_state, partner_state, external_state, asset_address,
                 reveal_timeout, settle_timeout):

        if settle_timeout <= reveal_timeout:
            # reveal_timeout must be a fraction of the settle_timeout
            raise ValueError(
                'reveal_timeout can not be larger-or-equal to settle_timeout')

        if reveal_timeout < 3:
            # To guarantee that assets won't be lost the expiration needs to
            # decrease at each hop, this is what forces the next hop to reveal
            # the secret with enough time for this node to unlock the lock with
            # the previous.
            #
            # This /should be/ at least:
            #
            #   reveal_timeout = blocks_to_learn + blocks_to_mine * 2
            #
            # Where:
            #
            # - `blocks_to_learn` is the estimated worst case for a given block
            # to propagate to the full network. This is the time to learn a
            # secret revealed throught the blockchain.
            # - `blocks_to_mine * 2` is the estimated worst case for a given
            # transfer to be included in a block. This is the time to close a
            # channel and then to unlock a lock on chain.
            #
            raise ValueError('reveal_timeout must be at least 1')

        if not isinstance(settle_timeout, (int, long)):
            raise ValueError('settle_timeout must be integral')

        if not isinstance(reveal_timeout, (int, long)):
            raise ValueError('reveal_timeout must be integral')

        self.our_state = our_state
        self.partner_state = partner_state

        self.asset_address = asset_address
        self.reveal_timeout = reveal_timeout
        self.settle_timeout = settle_timeout
        self.external_state = external_state

        self.open_event = Event()
        self.close_event = Event()
        self.settle_event = Event()

        external_state.callback_on_opened(lambda _: self.open_event.set())
        external_state.callback_on_closed(lambda _: self.close_event.set())
        external_state.callback_on_settled(lambda _: self.settle_event.set())

        external_state.callback_on_closed(self.channel_closed)

        self.received_transfers = []
        self.sent_transfers = [
        ]  #: transfers that were sent, required for settling
        self.on_withdrawable_callbacks = list(
        )  # mapping of transfer to callback list
        self.on_task_completed_callbacks = list()  # XXX naming

    @property
    def isopen(self):
        return self.external_state.isopen()

    @property
    def contract_balance(self):
        """ Return the amount of asset used to open the channel. """
        return self.our_state.contract_balance

    @property
    def transferred_amount(self):
        """ Return how much we transferred to partner. """
        return self.our_state.transferred_amount

    @property
    def balance(self):
        """ Return our current balance.

        Balance is equal to `initial_deposit + received_amount - sent_amount`,
        were both `receive_amount` and `sent_amount` are unlocked.
        """
        return self.our_state.balance(self.partner_state)

    @property
    def distributable(self):
        """ Return the available amount of the asset that our end of the
        channel can transfer to the partner.
        """
        return self.our_state.distributable(self.partner_state)

    @property
    def locked(self):
        """ Return the current amount of our asset that is locked waiting for a
        secret.

        The locked value is equal to locked transfers that have being
        initialized but the secret has not being revealed.
        """
        return self.partner_state.locked()

    @property
    def outstanding(self):
        return self.our_state.locked()

    def register_withdrawable_callback(self, callback):
        self.on_withdrawable_callbacks.append(callback)

    def channel_closed(self, block_number):
        self.external_state.register_block_alarm(self.blockalarm_for_settle)

        balance_proof = self.partner_state.balance_proof

        transfer = balance_proof.transfer
        unlock_proofs = balance_proof.get_known_unlocks()

        self.external_state.update_transfer(self.our_state.address, transfer)
        self.external_state.unlock(self.our_state.address, unlock_proofs)

    def blockalarm_for_settle(self, block_number):
        def _settle():
            for _ in range(3):
                try:
                    self.external_state.settle()
                except:
                    log.exception('Timedout while calling settle')

                # wait for the settle event, it could be our transaction or our
                # partner's
                self.settle_event.wait(0.5)

                if self.settle_event.is_set():
                    log.info('channel automatically settled')
                    return

        if self.external_state.closed_block + self.settle_timeout >= block_number:
            gevent.spawn(_settle)  # don't block the alarm
            return REMOVE_CALLBACK

    def get_state_for(self, node_address_bin):
        if self.our_state.address == node_address_bin:
            return self.our_state

        if self.partner_state.address == node_address_bin:
            return self.partner_state

        raise Exception('Unknow address {}'.format(
            encode_hex(node_address_bin)))

    def register_secret(self, secret):
        """ Register a secret.

        This wont claim the lock (update the transferred_amount), it will only
        save the secret in case that a proof needs to be created. This method
        can be used for any of the ends of the channel.

        Note:
            When a secret is revealed a message could be in-transit containing
            the older lockroot, for this reason the recipient cannot update
            it's locksroot at the moment a secret was revealed.

            The protocol is to register the secret so that it can compute a
            proof of balance, if necessary, forward the secret to the sender
            and wait for the update from it. It's the sender duty to order the
            current in-transit (and possible the transfers in queue) transfers
            and the secret/locksroot update.

            The channel and it's queue must be changed in sync, a transfer must
            not be created and while we update the balance_proof.

        Args:
            secret: The secret that releases a locked transfer.
        """
        hashlock = sha3(secret)

        our_known = self.our_state.balance_proof.is_known(hashlock)
        partner_known = self.partner_state.balance_proof.is_known(hashlock)

        if not our_known and not partner_known:
            msg = 'Secret doesnt correspond to a registered hashlock. hashlock:{} asset:{}'.format(
                pex(hashlock),
                pex(self.asset_address),
            )

            raise ValueError(msg)

        if our_known:
            lock = self.our_state.balance_proof.get_lock_by_hashlock(hashlock)

            if log.isEnabledFor(logging.DEBUG):
                log.debug(
                    'SECRET REGISTERED node:%s %s > %s asset:%s hashlock:%s amount:%s',
                    pex(self.our_state.address),
                    pex(self.our_state.address),
                    pex(self.partner_state.address),
                    pex(self.asset_address),
                    pex(hashlock),
                    lock.amount,
                )

            self.our_state.register_secret(secret)

        if partner_known:
            lock = self.partner_state.balance_proof.get_lock_by_hashlock(
                hashlock)

            if log.isEnabledFor(logging.DEBUG):
                log.debug(
                    'SECRET REGISTERED node:%s %s > %s asset:%s hashlock:%s amount:%s',
                    pex(self.our_state.address),
                    pex(self.partner_state.address),
                    pex(self.our_state.address),
                    pex(self.asset_address),
                    pex(hashlock),
                    lock.amount,
                )

            self.partner_state.register_secret(secret)

    def release_lock(self, secret):
        """ Release a lock for a transfer that was initiated from this node.

        Only the sender of the mediated transfer can release a lock, the
        receiver might know the secret but it needs to wait for a message from
        the initiator. This is because the sender needs to coordinate states
        updates (the hashlock for the transfers that are in transit and/or in
        queue need to be in sync with the state known by the partner).

        Note:
            Releasing a lock should always be accompained by at least one
            Secret message to the partner node.

            The node should also release the locks for the refund transfer.
        """
        hashlock = sha3(secret)

        if not self.partner_state.balance_proof.is_known(hashlock):
            raise ValueError(
                'The secret doesnt unlock any hashlock. hashlock:{} asset:{}'.
                format(
                    pex(hashlock),
                    pex(self.asset_address),
                ))

        lock = self.partner_state.balance_proof.get_lock_by_hashlock(hashlock)

        if log.isEnabledFor(logging.DEBUG):
            log.debug(
                'ASSET UNLOCKED %s > %s asset:%s hashlock:%s lockhash:%s amount:%s',
                pex(self.our_state.address),
                pex(self.partner_state.address),
                pex(self.asset_address),
                pex(hashlock),
                pex(sha3(lock.as_bytes)),
                lock.amount,
            )

        self.partner_state.release_lock(self.our_state, secret)

    def withdraw_lock(self, secret):
        """ A lock was released by the sender, withdraw it's funds and update
        the state.
        """
        hashlock = sha3(secret)

        if not self.our_state.balance_proof.is_known(hashlock):
            msg = 'The secret doesnt withdraw any hashlock. hashlock:{} asset:{}'.format(
                pex(hashlock),
                pex(self.asset_address),
            )
            raise ValueError(msg)

        lock = self.our_state.balance_proof.get_lock_by_hashlock(hashlock)

        if log.isEnabledFor(logging.DEBUG):
            log.debug(
                'ASSET WITHDRAWED %s < %s asset:%s hashlock:%s lockhash:%s amount:%s',
                pex(self.our_state.address),
                pex(self.partner_state.address),
                pex(self.asset_address),
                pex(hashlock),
                pex(sha3(lock.as_bytes)),
                lock.amount,
            )

        self.our_state.release_lock(self.partner_state, secret)

    def register_transfer(self, transfer):
        """ Register a signed transfer, updating the channel's state accordingly. """

        if transfer.recipient == self.partner_state.address:
            self.register_transfer_from_to(
                transfer,
                from_state=self.our_state,
                to_state=self.partner_state,
            )

            self.sent_transfers.append(transfer)

        elif transfer.recipient == self.our_state.address:
            self.register_transfer_from_to(
                transfer,
                from_state=self.partner_state,
                to_state=self.our_state,
            )
            self.received_transfers.append(transfer)

        else:
            raise ValueError('Invalid address')

    def register_transfer_from_to(self, transfer, from_state, to_state):  # noqa pylint: disable=too-many-branches
        """ Validates and register a signed transfer, updating the channel's state accordingly.

        Note:
            The transfer must be register before it is sent, not on
            acknowledgement. That is necessary for to reasons:

            - Guarantee that the transfer is valid.
            - Avoiding sending a new transaction without funds.

        Raises:
            InsufficientBalance: If the transfer is negative or above the distributable amount.
            InvalidLocksRoot: If locksroot check fails.
            InvalidLockTime: If the transfer has expired.
            InvalidNonce: If the expected nonce does not match.
            InvalidSecret: If there is no lock registered for the given secret.
            ValueError: If there is an address mismatch (asset or node address).
        """
        if transfer.asset != self.asset_address:
            raise ValueError('Asset address mismatch')

        if transfer.recipient != to_state.address:
            raise ValueError('Unknow recipient')

        if transfer.sender != from_state.address:
            raise ValueError('Unsigned transfer')

        # nonce is changed only when a transfer is un/registered, if the test
        # fail either we are out of sync, a message out of order, or it's an
        # forged transfer
        if transfer.nonce < 1 or transfer.nonce != from_state.nonce:
            raise InvalidNonce(transfer)

        # if the locksroot is out-of-sync (because a transfer was created while
        # a Secret was in trafic) the balance _will_ be wrong, so first check
        # the locksroot and then the balance
        if isinstance(transfer, LockedTransfer):
            block_number = self.external_state.get_block_number()

            if to_state.balance_proof.is_pending(transfer.lock.hashlock):
                raise ValueError('hashlock is already registered')

            # As a receiver: Check that all locked transfers are registered in
            # the locksroot, if any hashlock is missing there is no way to
            # claim it while the channel is closing
            expected_locksroot = to_state.compute_merkleroot_with(
                transfer.lock)
            if expected_locksroot != transfer.locksroot:
                if log.isEnabledFor(logging.ERROR):
                    log.error(
                        'LOCKSROOT MISMATCH node:%s %s > %s lockhash:%s lockhashes:%s',
                        pex(self.our_state.address),
                        pex(from_state.address),
                        pex(to_state.address),
                        pex(sha3(transfer.lock.as_bytes)),
                        lpex(to_state.balance_proof.unclaimed_merkletree()),
                        expected_locksroot=pex(expected_locksroot),
                        received_locksroot=pex(transfer.locksroot),
                    )

                raise InvalidLocksRoot(transfer)

            # As a receiver: If the lock expiration is larger than the settling
            # time a secret could be revealed after the channel is settled and
            # we won't be able to claim the asset
            if not transfer.lock.expiration - block_number < self.settle_timeout:
                log.error(
                    "Transfer expiration doesn't allow for correct settlement.",
                    lock_expiration=transfer.lock.expiration,
                    current_block=block_number,
                    settle_timeout=self.settle_timeout,
                )

                raise ValueError(
                    "Transfer expiration doesn't allow for correct settlement."
                )

            if not transfer.lock.expiration - block_number > self.reveal_timeout:
                log.error(
                    'Expiration smaller than the minimum required.',
                    lock_expiration=transfer.lock.expiration,
                    current_block=block_number,
                    reveal_timeout=self.reveal_timeout,
                )

                raise ValueError(
                    'Expiration smaller than the minimum required.')

        # only check the balance if the locksroot matched
        if transfer.transferred_amount < from_state.transferred_amount:
            if log.isEnabledFor(logging.ERROR):
                log.error(
                    'NEGATIVE TRANSFER node:%s %s > %s %s',
                    pex(self.our_state.address),
                    pex(from_state.address),
                    pex(to_state.address),
                    transfer,
                )

            raise ValueError('Negative transfer')

        amount = transfer.transferred_amount - from_state.transferred_amount
        distributable = from_state.distributable(to_state)

        if amount > distributable:
            raise InsufficientBalance(transfer)

        if isinstance(transfer, LockedTransfer):
            if amount + transfer.lock.amount > distributable:
                raise InsufficientBalance(transfer)

        # all checks need to be done before the internal state of the channel
        # is changed, otherwise if a check fails and state was changed the
        # channel will be left trashed

        if isinstance(transfer, LockedTransfer):
            if log.isEnabledFor(logging.DEBUG):
                log.debug(
                    'REGISTERED LOCK node:%s %s > %s currentlocksroot:%s lockhashes:%s',
                    pex(self.our_state.address),
                    pex(from_state.address),
                    pex(to_state.address),
                    pex(to_state.balance_proof.merkleroot_for_unclaimed()),
                    lpex(to_state.balance_proof.unclaimed_merkletree()),
                    lock_amount=transfer.lock.amount,
                    lock_expiration=transfer.lock.expiration,
                    lock_hashlock=pex(transfer.lock.hashlock),
                    lockhash=pex(sha3(transfer.lock.as_bytes)),
                )

            to_state.register_locked_transfer(transfer)

            # register this channel as waiting for the secret (the secret can
            # be revealed through a message or an blockchain log)
            self.external_state.register_channel_for_hashlock(
                self,
                transfer.lock.hashlock,
            )

        if isinstance(transfer, DirectTransfer):
            to_state.register_direct_transfer(transfer)

        from_state.transferred_amount = transfer.transferred_amount
        from_state.nonce += 1

        if isinstance(transfer, DirectTransfer):
            # if we are the recipient, spawn callback for incoming transfers
            if transfer.recipient == self.our_state.address:
                for callback in self.on_withdrawable_callbacks:
                    gevent.spawn(
                        callback,
                        transfer.asset,
                        transfer.recipient,
                        transfer.sender,  # 'initiator' is sender here
                        transfer.transferred_amount,
                        None  # no hashlock in DirectTransfer
                    )

            # if we are the sender, call the 'success' callback
            elif from_state.address == self.our_state.address:
                callbacks_to_remove = list()
                for callback in self.on_task_completed_callbacks:
                    result = callback(
                        task=None,
                        success=True)  # XXX maybe use gevent.spawn()

                    if result is True:
                        callbacks_to_remove.append(callback)

                for callback in callbacks_to_remove:
                    self.on_task_completed_callbacks.remove(callback)

        if log.isEnabledFor(logging.DEBUG):
            log.debug(
                'REGISTERED TRANSFER node:%s %s > %s '
                'transfer:%s transferred_amount:%s nonce:%s '
                'current_locksroot:%s',
                pex(self.our_state.address),
                pex(from_state.address),
                pex(to_state.address),
                repr(transfer),
                from_state.transferred_amount,
                from_state.nonce,
                pex(to_state.balance_proof.merkleroot_for_unclaimed()),
            )

    def create_directtransfer(self, amount, identifier):
        """ Return a DirectTransfer message.

        This message needs to be signed and registered with the channel before
        sent.
        """
        if not self.isopen:
            raise ValueError('The channel is closed')

        from_ = self.our_state
        to_ = self.partner_state

        distributable = from_.distributable(to_)

        if amount <= 0 or amount > distributable:
            log.debug(
                'Insufficient funds',
                amount=amount,
                distributable=distributable,
            )

            raise ValueError('Insufficient funds')

        transferred_amount = from_.transferred_amount + amount
        current_locksroot = to_.balance_proof.merkleroot_for_unclaimed()

        return DirectTransfer(
            identifier=identifier,
            nonce=from_.nonce,
            asset=self.asset_address,
            transferred_amount=transferred_amount,
            recipient=to_.address,
            locksroot=current_locksroot,
        )

    def create_lockedtransfer(self, amount, identifier, expiration, hashlock):
        """ Return a LockedTransfer message.

        This message needs to be signed and registered with the channel before sent.
        """
        if not self.isopen:
            raise ValueError('The channel is closed.')

        block_number = self.external_state.get_block_number()
        timeout = expiration - block_number

        # the lock timeout cannot be larger than the settle timeout (otherwise
        # the smart contract cannot check the locks)
        if timeout >= self.settle_timeout:
            log.debug(
                'Lock expiration is larger than settle timeout.',
                expiration=expiration,
                block_number=block_number,
                settle_timeout=self.settle_timeout,
            )

            raise ValueError('Invalid expiration.')

        # the expiration cannot be lower than the reveal timeout (otherwise we
        # dont have enough time to listen for the ChannelSecretRevealed event)
        if timeout <= self.reveal_timeout:
            log.debug(
                'Lock expiration is lower than reveal timeout.',
                expiration=expiration,
                block_number=block_number,
                reveal_timeout=self.reveal_timeout,
            )

            raise ValueError('Invalid expiration.')

        from_ = self.our_state
        to_ = self.partner_state

        distributable = from_.distributable(to_)

        if amount <= 0 or amount > distributable:
            log.debug(
                'Insufficient funds',
                amount=amount,
                distributable=distributable,
            )
            raise ValueError('Insufficient funds')

        lock = Lock(amount, expiration, hashlock)

        updated_locksroot = to_.compute_merkleroot_with(include=lock)
        transferred_amount = from_.transferred_amount

        return LockedTransfer(
            identifier=identifier,
            nonce=from_.nonce,
            asset=self.asset_address,
            transferred_amount=transferred_amount,
            recipient=to_.address,
            locksroot=updated_locksroot,
            lock=lock,
        )

    def create_mediatedtransfer(self, transfer_initiator, transfer_target, fee,
                                amount, identifier, expiration, hashlock):
        """ Return a MediatedTransfer message.

        This message needs to be signed and registered with the channel before
        sent.

        Args:
            transfer_initiator (address): The node that requested the transfer.
            transfer_target (address): The final destination node of the transfer
            amount (float): How much of an asset is being transferred.
            expiration (int): The maximum block number until the transfer
                message can be received.
        """

        locked_transfer = self.create_lockedtransfer(
            amount,
            identifier,
            expiration,
            hashlock,
        )

        mediated_transfer = locked_transfer.to_mediatedtransfer(
            transfer_target,
            transfer_initiator,
            fee,
        )
        return mediated_transfer

    def create_refundtransfer_for(self, transfer):
        """ Return RefundTransfer for `transfer`. """
        lock = transfer.lock

        if not self.our_state.balance_proof.is_pending(lock.hashlock):
            raise ValueError('Unknow hashlock')

        locked_transfer = self.create_lockedtransfer(
            lock.amount,
            1,  # TODO: Perhaps add identifier in the refund transfer too?
            lock.expiration,
            lock.hashlock,
        )

        cancel_transfer = locked_transfer.to_refundtransfer()

        return cancel_transfer

    def create_timeouttransfer_for(self, transfer):
        """ Return a TransferTimeout for `transfer`. """
        lock = transfer.lock

        if not self.our_state.balance_proof.is_pending(lock.hashlock):
            raise ValueError('Unknow hashlock')

        return TransferTimeout(
            transfer.hash,
            lock.hashlock,
        )
Ejemplo n.º 44
0
class RaidenService:
    """ A Raiden node. """
    def __init__(
        self,
        chain: BlockChainService,
        query_start_block: typing.BlockNumber,
        default_registry: TokenNetworkRegistry,
        default_secret_registry: SecretRegistry,
        private_key_bin,
        transport,
        config,
        discovery=None,
    ):
        if not isinstance(private_key_bin,
                          bytes) or len(private_key_bin) != 32:
            raise ValueError('invalid private_key')

        self.tokennetworkids_to_connectionmanagers = dict()
        self.identifier_to_results = defaultdict(list)

        self.chain: BlockChainService = chain
        self.default_registry = default_registry
        self.query_start_block = query_start_block
        self.default_secret_registry = default_secret_registry
        self.config = config
        self.privkey = private_key_bin
        self.address = privatekey_to_address(private_key_bin)
        self.discovery = discovery

        if config['transport_type'] == 'udp':
            endpoint_registration_event = gevent.spawn(
                discovery.register,
                self.address,
                config['external_ip'],
                config['external_port'],
            )
            endpoint_registration_event.link_exception(
                endpoint_registry_exception_handler)

        self.private_key = PrivateKey(private_key_bin)
        self.pubkey = self.private_key.public_key.format(compressed=False)
        self.transport = transport

        self.blockchain_events = BlockchainEvents()
        self.alarm = AlarmTask(chain)
        self.shutdown_timeout = config['shutdown_timeout']
        self.stop_event = Event()
        self.start_event = Event()
        self.chain.client.inject_stop_event(self.stop_event)

        self.wal = None

        self.database_path = config['database_path']
        if self.database_path != ':memory:':
            database_dir = os.path.dirname(config['database_path'])
            os.makedirs(database_dir, exist_ok=True)

            self.database_dir = database_dir
            # Prevent concurrent access to the same db
            self.lock_file = os.path.join(self.database_dir, '.lock')
            self.db_lock = filelock.FileLock(self.lock_file)
        else:
            self.database_path = ':memory:'
            self.database_dir = None
            self.lock_file = None
            self.serialization_file = None
            self.db_lock = None

        if config['transport_type'] == 'udp':
            # If the endpoint registration fails the node will quit, this must
            # finish before starting the transport
            endpoint_registration_event.join()

        self.event_poll_lock = gevent.lock.Semaphore()

        self.start()

    def start(self):
        """ Start the node. """
        if self.stop_event and self.stop_event.is_set():
            self.stop_event.clear()

        if self.database_dir is not None:
            self.db_lock.acquire(timeout=0)
            assert self.db_lock.is_locked

        # The database may be :memory:
        storage = sqlite.SQLiteStorage(self.database_path,
                                       serialize.PickleSerializer())
        self.wal, unapplied_events = wal.restore_from_latest_snapshot(
            node.state_transition,
            storage,
        )

        if self.wal.state_manager.current_state is None:
            block_number = self.chain.block_number()

            state_change = ActionInitChain(
                random.Random(),
                block_number,
                self.chain.network_id,
            )
            self.wal.log_and_dispatch(state_change, block_number)
            payment_network = PaymentNetworkState(
                self.default_registry.address,
                [],  # empty list of token network states as it's the node's startup
            )
            state_change = ContractReceiveNewPaymentNetwork(payment_network)
            self.handle_state_change(state_change)

            # On first run Raiden needs to fetch all events for the payment
            # network, to reconstruct all token network graphs and find opened
            # channels
            last_log_block_number = 0
        else:
            # The `Block` state change is dispatched only after all the events
            # for that given block have been processed, filters can be safely
            # installed starting from this position without losing events.
            last_log_block_number = views.block_number(
                self.wal.state_manager.current_state)

        # Install the filters using the correct from_block value, otherwise
        # blockchain logs can be lost.
        self.install_all_blockchain_filters(
            self.default_registry,
            self.default_secret_registry,
            last_log_block_number,
        )

        # Complete the first_run of the alarm task and synchronize with the
        # blockchain since the last run.
        #
        # Notes about setup order:
        # - The filters must be polled after the node state has been primed,
        # otherwise the state changes won't have effect.
        # - The alarm must complete its first run  before the transport is started,
        #  to avoid rejecting messages for unknown channels.
        self.alarm.register_callback(self._callback_new_block)
        self.alarm.first_run()

        self.alarm.start()

        queueids_to_queues = views.get_all_messagequeues(
            views.state_from_raiden(self))
        self.transport.start(self, queueids_to_queues)

        # Health check needs the transport layer
        self.start_neighbours_healthcheck()

        for event in unapplied_events:
            on_raiden_event(self, event)

        self.start_event.set()

    def start_neighbours_healthcheck(self):
        for neighbour in views.all_neighbour_nodes(
                self.wal.state_manager.current_state):
            if neighbour != ConnectionManager.BOOTSTRAP_ADDR:
                self.start_health_check_for(neighbour)

    def stop(self):
        """ Stop the node. """
        # Needs to come before any greenlets joining
        self.stop_event.set()
        self.transport.stop_and_wait()
        self.alarm.stop_async()

        wait_for = [self.alarm]
        wait_for.extend(getattr(self.transport, 'greenlets', []))
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        gevent.wait(wait_for, timeout=self.shutdown_timeout)

        # Filters must be uninstalled after the alarm task has stopped. Since
        # the events are polled by an alarm task callback, if the filters are
        # uninstalled before the alarm task is fully stopped the callback
        # `poll_blockchain_events` will fail.
        #
        # We need a timeout to prevent an endless loop from trying to
        # contact the disconnected client
        try:
            with gevent.Timeout(self.shutdown_timeout):
                self.blockchain_events.uninstall_all_event_listeners()
        except (gevent.timeout.Timeout, RaidenShuttingDown):
            pass

        self.blockchain_events.reset()

        if self.db_lock is not None:
            self.db_lock.release()

    def __repr__(self):
        return '<{} {}>'.format(self.__class__.__name__, pex(self.address))

    def get_block_number(self):
        return views.block_number(self.wal.state_manager.current_state)

    def handle_state_change(self, state_change, block_number=None):
        log.debug('STATE CHANGE',
                  node=pex(self.address),
                  state_change=state_change)

        if block_number is None:
            block_number = self.get_block_number()

        event_list = self.wal.log_and_dispatch(state_change, block_number)

        for event in event_list:
            log.debug('RAIDEN EVENT',
                      node=pex(self.address),
                      raiden_event=event)

            on_raiden_event(self, event)

        return event_list

    def set_node_network_state(self, node_address, network_state):
        state_change = ActionChangeNodeNetworkState(node_address,
                                                    network_state)
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def start_health_check_for(self, node_address):
        self.transport.start_health_check(node_address)

    def _callback_new_block(self, current_block_number, chain_id):
        """Called once a new block is detected by the alarm task.

        Note:
            This should be called only once per block, otherwise there will be
            duplicated `Block` state changes in the log.

            Therefore this method should be called only once a new block is
            mined with the appropriate block_number argument from the
            AlarmTask.
        """
        # Raiden relies on blockchain events to update its off-chain state,
        # therefore some APIs /used/ to forcefully poll for events.
        #
        # This was done for APIs which have on-chain side-effects, e.g.
        # openning a channel, where polling the event is required to update
        # off-chain state to providing a consistent view to the caller, e.g.
        # the channel exists after the API call returns.
        #
        # That pattern introduced a race, because the events are returned only
        # once per filter, and this method would be called concurrently by the
        # API and the AlarmTask. The following lock is necessary, to ensure the
        # expected side-effects are properly applied (introduced by the commit
        # 3686b3275ff7c0b669a6d5e2b34109c3bdf1921d)
        with self.event_poll_lock:
            for event in self.blockchain_events.poll_blockchain_events(
                    current_block_number):
                # These state changes will be procesed with a block_number
                # which is /larger/ than the ChainState's block_number.
                on_blockchain_event(self, event, current_block_number,
                                    chain_id)

            # On restart the Raiden node will re-create the filters with the
            # ethereum node. These filters will have the from_block set to the
            # value of the latest Block state change. To avoid missing events
            # the Block state change is dispatched only after all of the events
            # have been processed.
            #
            # This means on some corner cases a few events may be applied
            # twice, this will happen if the node crashed and some events have
            # been processed but the Block state change has not been
            # dispatched.
            state_change = Block(current_block_number)
            self.handle_state_change(state_change, current_block_number)

    def sign(self, message):
        """ Sign message inplace. """
        if not isinstance(message, SignedMessage):
            raise ValueError('{} is not signable.'.format(repr(message)))

        message.sign(self.private_key)

    def install_all_blockchain_filters(
        self,
        token_network_registry_proxy,
        secret_registry_proxy,
        from_block,
    ):
        with self.event_poll_lock:
            node_state = views.state_from_raiden(self)
            channels = views.list_all_channelstate(node_state)
            token_networks = views.get_token_network_identifiers(
                node_state,
                token_network_registry_proxy.address,
            )

            self.blockchain_events.add_token_network_registry_listener(
                token_network_registry_proxy,
                from_block,
            )
            self.blockchain_events.add_secret_registry_listener(
                secret_registry_proxy,
                from_block,
            )

            for token_network in token_networks:
                token_network_proxy = self.chain.token_network(token_network)
                self.blockchain_events.add_token_network_listener(
                    token_network_proxy,
                    from_block,
                )

            for channel_state in channels:
                channel_proxy = self.chain.payment_channel(
                    channel_state.token_network_identifier,
                    channel_state.identifier,
                )
                self.blockchain_events.add_payment_channel_listener(
                    channel_proxy,
                    from_block,
                )

    def connection_manager_for_token_network(self, token_network_identifier):
        if not is_binary_address(token_network_identifier):
            raise InvalidAddress('token address is not valid.')

        known_token_networks = views.get_token_network_identifiers(
            views.state_from_raiden(self),
            self.default_registry.address,
        )

        if token_network_identifier not in known_token_networks:
            raise InvalidAddress('token is not registered.')

        manager = self.tokennetworkids_to_connectionmanagers.get(
            token_network_identifier)

        if manager is None:
            manager = ConnectionManager(self, token_network_identifier)
            self.tokennetworkids_to_connectionmanagers[
                token_network_identifier] = manager

        return manager

    def leave_all_token_networks(self):
        state_change = ActionLeaveAllNetworks()
        self.wal.log_and_dispatch(state_change, self.get_block_number())

    def close_and_settle(self):
        log.info('raiden will close and settle all channels now')

        self.leave_all_token_networks()

        connection_managers = [
            cm for cm in self.tokennetworkids_to_connectionmanagers.values()
        ]

        if connection_managers:
            waiting.wait_for_settle_all_channels(
                self,
                self.alarm.sleep_time,
            )

    def mediated_transfer_async(
        self,
        token_network_identifier,
        amount,
        target,
        identifier,
    ):
        """ Transfer `amount` between this node and `target`.

        This method will start an asyncronous transfer, the transfer might fail
        or succeed depending on a couple of factors:

            - Existence of a path that can be used, through the usage of direct
              or intermediary channels.
            - Network speed, making the transfer sufficiently fast so it doesn't
              expire.
        """

        async_result = self.start_mediated_transfer(
            token_network_identifier,
            amount,
            target,
            identifier,
        )

        return async_result

    def direct_transfer_async(self, token_network_identifier, amount, target,
                              identifier):
        """ Do a direct transfer with target.

        Direct transfers are non cancellable and non expirable, since these
        transfers are a signed balance proof with the transferred amount
        incremented.

        Because the transfer is non cancellable, there is a level of trust with
        the target. After the message is sent the target is effectively paid
        and then it is not possible to revert.

        The async result will be set to False iff there is no direct channel
        with the target or the payer does not have balance to complete the
        transfer, otherwise because the transfer is non expirable the async
        result *will never be set to False* and if the message is sent it will
        hang until the target node acknowledge the message.

        This transfer should be used as an optimization, since only two packets
        are required to complete the transfer (from the payers perspective),
        whereas the mediated transfer requires 6 messages.
        """

        self.start_health_check_for(target)

        if identifier is None:
            identifier = create_default_identifier()

        direct_transfer = ActionTransferDirect(
            token_network_identifier,
            target,
            identifier,
            amount,
        )

        self.handle_state_change(direct_transfer)

    def start_mediated_transfer(
        self,
        token_network_identifier,
        amount,
        target,
        identifier,
    ):

        self.start_health_check_for(target)

        if identifier is None:
            identifier = create_default_identifier()

        assert identifier not in self.identifier_to_results

        async_result = AsyncResult()
        self.identifier_to_results[identifier].append(async_result)

        secret = random_secret()
        init_initiator_statechange = initiator_init(
            self,
            identifier,
            amount,
            secret,
            token_network_identifier,
            target,
        )

        # TODO: implement the network timeout raiden.config['msg_timeout'] and
        # cancel the current transfer if it happens (issue #374)
        #
        # Dispatch the state change even if there are no routes to create the
        # wal entry.
        self.handle_state_change(init_initiator_statechange)

        return async_result

    def mediate_mediated_transfer(self, transfer: LockedTransfer):
        init_mediator_statechange = mediator_init(self, transfer)
        self.handle_state_change(init_mediator_statechange)

    def target_mediated_transfer(self, transfer: LockedTransfer):
        self.start_health_check_for(transfer.initiator)
        init_target_statechange = target_init(transfer)
        self.handle_state_change(init_target_statechange)

    # demo send crosstransaction
    def start_crosstransaction(self, token_network_identifier, target_address,
                               initiator_address, sendETH_amount,
                               sendBTC_amount, receiveBTC_address, cross_type,
                               identifier):

        identifier = create_default_crossid()
        async_result = AsyncResult()
        self.identifier_to_results[identifier].append(async_result)

        self.transport.start_health_check(target_address)
        cross_id = identifier
        if (cross_type == 1):
            self.wal.create_crosstransactiontry(initiator_address,
                                                target_address,
                                                token_network_identifier,
                                                sendETH_amount, sendBTC_amount,
                                                receiveBTC_address, cross_id)
            print("write data to sqlite")
            print(self.wal.get_crosstransaction_by_identifier(cross_id))
        crosstransaction_message = Crosstransaction(
            random.randint(0, UINT64_MAX),
            initiator_address,
            target_address,
            token_network_identifier,
            sendETH_amount,
            sendBTC_amount,
            receiveBTC_address,
            cross_type,
            cross_id,
        )
        self.sign(crosstransaction_message)
        self.transport.send_async(
            target_address,
            bytes("123", 'utf-8'),
            crosstransaction_message,
        )
        return async_result

    # demo
    def start_send_crosstansfer(self, cross_id, identifier=None):
        cross_data = self.wal.get_crosstransaction_by_identifier(cross_id)
        print(cross_data)
        amount = cross_data[4]
        target = cross_data[2]
        btc_amount = cross_data[5]
        token_network_identifier = cross_data[3]

        self.transport.start_health_check(target)

        secret = random_secret()

        init_initiator_statechange = initiator_init(
            self,
            cross_id,
            amount,
            secret,
            token_network_identifier,
            target,
        )
        print("init_initiator_statechange: ", init_initiator_statechange)
        self.handle_cross_state_change(init_initiator_statechange, cross_id,
                                       secret, btc_amount)

    def get_crosstransaction_by_crossid(self, cross_id):
        res = self.wal.get_crosstransaction_by_identifier(cross_id)
        res = list(res)
        res[1] = to_normalized_address(res[1])
        res[2] = to_normalized_address(res[2])
        res[3] = to_normalized_address(res[3])

        return res

    def get_crosstransaction_all(self):
        res = self.wal.get_all_crosstransaction()

        return res

    def handle_cross_state_change(self,
                                  state_change,
                                  cross_id,
                                  secret,
                                  btc_amount,
                                  block_number=None):
        if block_number is None:
            block_number = self.get_block_number()

        event_list = self.wal.log_and_dispatch(state_change, block_number)

        row = self.wal.storage.get_lnd(1)
        macaroon = row[4]
        lnd_url = "https://{}/v1/invoices".format(self.config['lnd_address'])
        lnd_headers = {'Grpc-Metadata-macaroon': macaroon}
        lnd_r = base64.b64encode(secret)
        lnd_data = {
            'value': btc_amount,
            'r_preimage': lnd_r.decode('utf-8'),
            'type': "CROSS_CHAIN_INVOICE"
        }

        res = requests.post(lnd_url,
                            headers=lnd_headers,
                            data=json.dumps(lnd_data),
                            verify=False)

        res_json = res.json()
        lnd_r_hash = res_json['r_hash']
        lnd_payment_request = res_json['payment_request']
        print('send invoice succ, lnd_r_hash:', lnd_r_hash)

        for event in event_list:
            log.debug('RAIDEN EVENT',
                      node=pex(self.address),
                      raiden_event=event)

            if type(event) == SendLockedTransfer:
                locked_transfer_message = message_from_sendevent(
                    event, self.address)
                self.sign(locked_transfer_message)
                self.wal.storage.change_crosstransaction_r(
                    cross_id,
                    encode_hex(locked_transfer_message.lock.secrethash),
                    lnd_r_hash)
                tmp_r_hash = base64.b64decode(lnd_r_hash)
                raiden_r_hash = locked_transfer_message.lock.secrethash
                hex_r_hash = encode_hex(tmp_r_hash)
                lnd_string = bytes(lnd_payment_request, "utf-8")
                cross_transfer_message = CrossLockedTransfer(
                    locked_transfer_message, cross_id, lnd_string)
                self.sign(cross_transfer_message)
                self.transport.send_async(cross_transfer_message.recipient,
                                          bytes("456", 'utf-8'),
                                          cross_transfer_message)
                print('corss_message send ok')
                continue

            on_raiden_event(self, event)

    def cross_handle_recieved_locked_transfer(self, transfer, cross_id):
        self.start_health_check_for(transfer.initiator)
        state_change = target_init(transfer)

        block_number = self.get_block_number()

        event_list = self.wal.log_and_dispatch(state_change, block_number)

        for event in event_list:
            log.debug('RAIDEN EVENT',
                      node=pex(self.address),
                      raiden_event=event)

            if type(event) == SendSecretRequest:
                secret_request_message = message_from_sendevent(
                    event, self.address)
                self.sign(secret_request_message)

                cross_secret_request_message = CrossSecretRequest(
                    secret_request_message, cross_id)
                self.sign(cross_secret_request_message)

                self.transport.send_async(
                    event.recipient,
                    event.queue_name,
                    cross_secret_request_message,
                )
                continue
            on_raiden_event(self, event)

        return event_list

    def send_payment_request(self, lnd_string):
        row = self.wal.storage.get_lnd(1)
        macaroon = row[4]
        lnd_url = "https://{}/v1/channels/transactions".format(
            self.config['lnd_address'])
        lnd_headers = {'Grpc-Metadata-macaroon': macaroon}
        data = {'payment_request': lnd_string}
        res = requests.post(lnd_url,
                            headers=lnd_headers,
                            data=json.dumps(data),
                            verify=False)
        if res.status_code == 200:
            print("send payment request to lnd succ")
Ejemplo n.º 45
0
class SubProcess(Greenlet):
    """
    Threaded execution of a command being called.
    """
    def __init__(self, command, timeout=None):
        """
        Initialize the function

        """
        Greenlet.__init__(self, run=None)

        # we abort if this is set
        self._abort = Event()

        # this is set when an command has completed execution
        self._done = Event()

        # Tracks the PID file of item being executed
        self._pid = None

        # The return code is set after the programs execution
        self._returncode = ReturnCode.Unknown

        # The command itself should a list() identifing the executable as the
        # first entry followed by all of the arguments you wish to pass into
        # it.
        self._cmd = command

        # Since we need to poll until the execution of the process is
        # complete, we need to set a poll time.
        self._throttle = 0.5

        # Track when the execution started
        self._execution_begin = None

        # Track when the execution completed
        self._execution_finish = None

        # The number of seconds at most we will allow the execution of the
        # process to run for before we force it to abort it's operation.

        # Setting this to zero disables this timeout restriction
        self._timeout = 0.0

        if timeout:
            self._timeout = timeout

        # These are populated with the output of the stdout and
        # stderr stream.
        self._stdout = StringIO()
        self._stderr = StringIO()

    def elapsed(self):
        """
        Returns the elapsed time (as a float) of the threaded execution which
        includes the number of microseconds.

        """
        if self._execution_begin is None:
            # No elapsed time has taken place yet
            return 0.0

        if self._execution_finish is not None:
            # Execution has completed, we only want to calculate
            # the execution time.
            elapsed_time = self._execution_finish - self._execution_begin

        else:
            # Calculate Elapsed Time
            elapsed_time = datetime.utcnow() - self._execution_begin

        elapsed_time = (elapsed_time.days * 86400) \
                         + elapsed_time.seconds \
                         + (elapsed_time.microseconds/1e6)

        return elapsed_time

    def _run(self):
        """
        Read from the work_queue, process it using an NNTPRequest object.

        """

        # Make sure our done flag is not set
        self._done.clear()

        # Execute our Process
        p1 = subprocess.Popen(
            self._cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        # Calculate Current Time
        self._execution_begin = datetime.utcnow()

        # Store some information
        self._pid = p1.pid

        # Calculate Wait Time
        max_wait_time = self._execution_begin + \
                        timedelta(seconds=self._timeout)

        while p1.poll() is None and not self._abort.is_set():
            # Head of Poll Loop

            if self._timeout and \
               datetime.utcnow() >= max_wait_time:
                # Process aborted (took too long)
                try:
                    kill(self._pid, signal.SIGKILL)
                except:
                    pass

                # Force bad return code
                self._returncode = ReturnCode.Timeout

                # Set our elapsed time to now
                self._execution_finish = datetime.utcnow()

                # Retrieve stdout/stderr
                self._stdout = StringIO(p1.stdout.read())
                self._stderr = StringIO(p1.stderr.read())

                # Make sure no one uses the PID anymore
                self._pid = None

                # Set our done flag
                self._done.set()
                return

            # CPU Throttle
            self._abort.wait(self._throttle)

        if p1.poll() is None or self._abort.is_set():
            # Safety
            try:
                kill(self._pid, signal.SIGKILL)
            except:
                pass

            # Force bad return code
            self._returncode = ReturnCode.Aborted

        else:
            # Store return code
            self._returncode = p1.returncode

        # Execution Completion Time
        self._execution_finish = datetime.utcnow()

        # Retrieve stdout/stderr
        self._stdout = StringIO(p1.stdout.read())
        self._stderr = StringIO(p1.stderr.read())

        # Make sure no one uses the PID anymore
        self._pid = None

        # Set our done flag
        self._done.set()

        # We're done!
        return

    def is_complete(self, timeout=None):
        """
        Returns True if the process has completed its execution
        if timeout is set to a time, then the function blocks up until that
        period of time elapses or the call completes.

        Times should be specified as float values (in seconds).

        """
        if timeout is not None:
            self._done.wait(timeout)

        return self._done.is_set()

    def response_code(self):
        """
        Returns the result

        """
        return self._returncode

    def successful(self):
        """
        Returns True if the calling action was successful or not.  This call
        can be subjective because it bases it's response simply on whether or
        not a zero (0) was returned by the program called. Usually a non-zero
        value means there was a failure.

        """
        return self._returncode is 0

    def stdout(self, as_list=True):
        """
        if as_list is set to True, then the stdout results are split on new
        lines into a list object
        """
        # Ensure we're at the head of our buffer
        self._stdout.seek(0L, SEEK_SET)

        if as_list:
            return NEW_LINE_RE.split(self._stdout.read())
        return self._stdout.read()

    def stderr(self, as_list=True):
        """
        if as_list is set to True, then the stdout results are split on new
        lines into a list object

        """
        # Ensure we're at the head of our buffer
        self._stderr.seek(0L, SEEK_SET)

        if as_list:
            return NEW_LINE_RE.split(self._stderr.read())
        return self._stderr.read()

    def pid(self):
        """
        returns the pid number of the running process, but returns None if
        the process is no longer running.
        """
        return self._pid

    def abort(self):
        """
        Abort the executing command

        """
        self._abort.set()
        try:
            kill(self._pid, signal.SIGKILL)
        except:
            pass

        if self._pid:
            self.join(timeout=10.0)

    def __str__(self):
        """
        returns the command being executed

        """
        return ' '.join(self._cmd)

    def __repr__(self):
        """
        Return a printable version of the file being read

        """
        return '<SubProcess cmd=%s execution_time=%ds return_code=%d />' % (
            self._cmd[0],
            self.elapsed(),
            self._returncode,
        )
Ejemplo n.º 46
0
class HuskarApiIOLoop(IOLoop):
    '''
    HuskarApiIOLoop is responsible for running eventloop connected to
    huskar api. The design is to use long polling for all requests, in
    disregard of whenever it's required.
    '''
    def initialize(self,
                   url,
                   token,
                   cache_dir="/tmp/huskar",
                   max_alive_time=10 * 60,
                   reconnect_gap=60):
        super(HuskarApiIOLoop, self).initialize(url, token, cache_dir)
        self.url_path = join_url(self.url, '/api/data/long_poll')
        self.init_session()
        self.connected = Event()
        self.stop_loop_event = Event()
        self.stopped = Event()
        self.stopped.set()
        self.is_disconnected = Event()
        self.next_watch_completed_event = Event()

        self.greenlet = None
        self.reconnect_gap = reconnect_gap
        self.has_once_connected = False
        self.max_alive_time = (0.8 + 0.2 * random.random()) * max_alive_time

        self.watched_services = Component(self, 'services', cache_dir)
        self.watched_configs = Component(self, 'configs', cache_dir)
        self.watched_switches = Component(self, 'switches', cache_dir)

    def on_watch_list_changed(self, component_name):
        if self.connected.is_set():
            self.force_reinit_session_next_round()

    def force_reinit_session_next_round(self):
        # Race risks
        self.last_session_created_time = 0
        self.next_watch_completed_event.clear()

    def wait_for_next_loop(self, timeout):
        return self.next_watch_completed_event.wait(timeout)

    def check_refresh_session(self):
        if not self.next_watch_completed_event.is_set() and \
                self.last_session_created_time != 0:
            self.next_watch_completed_event.set()
        if time.time() - self.last_session_created_time > self.max_alive_time:
            self.init_session()
            return True
        return False

    def init_session(self):
        import requests
        self.session = requests.Session()
        self.session.headers['User-Agent'] = ' '.join(
            [USER_AGENT,
             self.session.headers.get('User-Agent', '')])
        self.session.headers['Authorization'] = self.token
        self.last_session_created_time = time.time()

        if self._soa_mode is None:
            return
        self.session.headers[SOA_MODE_HEADER] = self._soa_mode
        self.session.headers[SOA_CLUSTER_HEADER] = self._soa_cluster

    def is_running(self):
        return self.greenlet

    def run(self):
        if not self.greenlet:
            self.greenlet = gevent.spawn(self.start_long_poll)

    def stop(self, timeout=None, close_components=True):
        self.stop_loop_event.set()
        if close_components:
            self.watched_configs.close()
            self.watched_services.close()
            self.watched_switches.close()

        if timeout is not None:
            return self.stopped.wait(timeout)

    def wait(self, timeout=10.0):
        if not (self.has_once_connected or self.connected.is_set()):
            return self.connected.wait(timeout=timeout)

    def is_connected(self):
        return self.connected.is_set()

    def event_loop(self):
        try:
            from httplib import IncompleteRead  # Py2
        except ImportError:
            from http.client import IncompleteRead  # Py3
        import requests
        fail_count = Counter(0)

        def loop():
            # Use closure to jump around generator gc issue. See
            # https://groups.google.com/forum/#!topic/comp.lang.python/EhAY4ZmWaIw

            try:
                payload = {
                    k: v
                    for k, v in iteritems({
                        'service': self.watched_services.dict,
                        'config': self.watched_configs.dict,
                        'switch': self.watched_switches.dict
                    }) if v
                }

                r = self.session.post(
                    self.url_path,
                    json=payload,
                    stream=True,
                    timeout=3,
                )
                if not r.ok:
                    logger.error('failed to watch: %d %r', r.status_code,
                                 r.text)
                    r.raise_for_status()

                for i in r.iter_lines(chunk_size=4096, decode_unicode=True):
                    self.handle_message(i)
                    fail_count.reset()
                    if not self.connected.is_set():
                        self.connected.set()
                    self.is_disconnected.clear()
                    self.has_once_connected = True
                    if self.stop_loop_event.is_set():
                        return True
                    if self.check_refresh_session():
                        break
            except (socket.gaierror, socket.error, IncompleteRead,
                    requests.RequestException) as error:
                self.connected.clear()
                self.is_disconnected.set()
                if self.stop_loop_event.is_set():
                    logger.info("Stopping huskar connection event loop")
                    return True
                fail_count.incr()
                message = ''
                exc_cls = HuskarDiscoveryServerError
                if (isinstance(error, requests.RequestException)
                        and error.response is not None):
                    response = error.response
                    if response.status_code < 500:
                        exc_cls = HuskarDiscoveryUserError
                    message = 'status_code: {0}, body: {1!r}'.format(
                        response.status_code, response.content[:200])
                try:
                    reraise(exc_cls(error, self.url_path, message))
                except HuskarDiscoveryException as e:
                    self.notify('polling_error', e)
                retry_wait = (0.5+random.random()) * fail_count.get() *\
                    self.reconnect_gap
                logger.warning('Huskar connection disconnected, '
                               'will retry in %s' % retry_wait,
                               exc_info=True)
                gevent.sleep(retry_wait)

        while True:
            if loop():
                return

    def start_long_poll(self):
        self.connected.clear()
        self.stopped.clear()
        try:
            self.event_loop()
        finally:
            self.stopped.set()
            self.stop_loop_event.clear()
            self.connected.clear()

    def update_watches(self, message, full=False):
        self.watched_services.update(message.get('service'), full=full)
        self.watched_configs.update(message.get('config'), full=full)
        self.watched_switches.update(message.get('switch'), full=full)

    def delete_watches(self, message):
        self.watched_services.delete(message.get('service'))
        self.watched_configs.delete(message.get('config'))
        self.watched_switches.delete(message.get('switch'))

    def handle_message(self, message):
        if self.stopped.is_set():
            return

        if not self.has_once_connected:
            logger.info("Got Huskar messages. Processing...")

        try:
            message = json.loads(message)
        except Exception:
            logger.warning("Error parsing huskar message: %r", message)
            return

        try:
            if message['message'] == 'ping':
                pass
            elif message['message'] == 'update':
                self.update_watches(message['body'])
            elif message['message'] == 'delete':
                self.delete_watches(message['body'])
            elif message['message'] == 'all':
                self.update_watches(message['body'], full=True)
        except Exception as err:
            logger.exception("Error handling huskar api message: %r", err)
Ejemplo n.º 47
0
class BaseService(object):
    service_type = None

    def __init__(self):
        self._send_queue = Queue()
        self._send_queue_cleared = Event()
        self._send_greenlet = None
        self.timeout = INITIAL_TIMEOUT
        self._feedback_queue = Queue()

    def start(self):
        """Start the message sending loop."""
        if self._send_greenlet is None:
            self._send_greenlet = gevent.spawn(self.save_err, self._send_loop)

    def _send_loop(self):
        self._send_greenlet = gevent.getcurrent()
        try:
            logger.info("%s service started" % self.service_type)
            while True:
                message = self._send_queue.get()
                try:
                    self.send_notification(message)
                except Exception:
                    self.error_sending_notification(message)
                else:
                    self.timeout = INITIAL_TIMEOUT
                finally:
                    if self._send_queue.qsize() < 1 and \
                            not self._send_queue_cleared.is_set():
                        self._send_queue_cleared.set()
        except gevent.GreenletExit:
            pass
        finally:
            self._send_greenlet = None
        logger.info("%s service stopped" % self.service_type)

    def stop(self, timeout=10.0):
        if (self._send_greenlet is not None) and \
                (self._send_queue.qsize() > 0):
            self.wait_send(timeout=timeout)

        if self._send_greenlet is not None:
            gevent.kill(self._send_greenlet)
            self._send_greenlet = None
        return self._send_queue.qsize() < 1

    def wait_send(self, timeout=None):
        self._send_queue_cleared.clear()
        return self._send_queue_cleared.wait(timeout=timeout)

    def queue_notification(self, notification):
        self._send_queue.put(notification)

    def send_notification(self, notification):
        raise NotImplementedError

    def save_err(self, func, *args, **kwargs):
        try:
            func(*args, **kwargs)
        except Exception as e:
            self.last_err = e
            raise

    def get_last_error(self):
        return self.last_err

    def error_sending_notification(self, notification):
        logger.exception("Error while pushing")
        self._send_queue.put(notification)
        gevent.sleep(self.timeout)
        # approaching Fibonacci series
        timeout = int(round(float(self.timeout) * 1.6))
        self.timeout = min(timeout, MAX_TIMEOUT)

    def check_blocking(self):
        if self.timeout == INITIAL_TIMEOUT:
            return False
        return True
Ejemplo n.º 48
0
class BaseServer(object):
    """An abstract base class that implements some common functionality for the servers in gevent.

    *listener* can either be an address that the server should bind on or a :class:`gevent.socket.socket`
    instance that is already bound (and put into listening mode in case of TCP socket).

    *spawn*, if provided, is called to create a new greenlet to run the handler. By default, :func:`gevent.spawn` is used.

    Possible values for *spawn*:

    * a :class:`gevent.pool.Pool` instance -- *handle* will be executed
      using :meth:`Pool.spawn` method only if the pool is not full.
      While it is full, all the connection are dropped;
    * :func:`gevent.spawn_raw` -- *handle* will be executed in a raw
      greenlet which have a little less overhead then :class:`gevent.Greenlet` instances spawned by default;
    * ``None`` -- *handle* will be executed right away, in the :class:`Hub` greenlet.
      *handle* cannot use any blocking functions as it means switching to the :class:`Hub`.
    * an integer -- a shortcut for ``gevent.pool.Pool(integer)``
    """
    # the number of seconds to sleep in case there was an error in accept() call
    # for consecutive errors the delay will double until it reaches max_delay
    # when accept() finally succeeds the delay will be reset to min_delay again
    min_delay = 0.01
    max_delay = 1

    # Sets the maximum number of consecutive accepts that a process may perform on
    # a single wake up. High values give higher priority to high connection rates,
    # while lower values give higher priority to already established connections.
    # Default is 100. Note, that in case of multiple working processes on the same
    # listening value, it should be set to a lower value. (pywsgi.WSGIServer sets it
    # to 1 when environ["wsgi.multiprocess"] is true)
    max_accept = 100

    _spawn = Greenlet.spawn

    # the default timeout that we wait for the client connections to close in stop()
    stop_timeout = 1

    fatal_errors = (errno.EBADF, errno.EINVAL, errno.ENOTSOCK)

    def __init__(self, listener, handle=None, spawn='default'):
        self._stop_event = Event()
        self._stop_event.set()
        self._watcher = None
        self._timer = None
        self.pool = None
        try:
            self.set_listener(listener)
            self.set_spawn(spawn)
            self.set_handle(handle)
            self.delay = self.min_delay
            self.loop = get_hub().loop
            if self.max_accept < 1:
                raise ValueError('max_accept must be positive int: %r' %
                                 (self.max_accept, ))
        except:
            self.close()
            raise

    def set_listener(self, listener):
        if hasattr(listener, 'accept'):
            if hasattr(listener, 'do_handshake'):
                raise TypeError(
                    'Expected a regular socket, not SSLSocket: %r' %
                    (listener, ))
            self.family = listener.family
            self.address = listener.getsockname()
            self.socket = listener
        else:
            self.family, self.address = parse_address(listener)

    def set_spawn(self, spawn):
        if spawn == 'default':
            self.pool = None
            self._spawn = self._spawn
        elif hasattr(spawn, 'spawn'):
            self.pool = spawn
            self._spawn = spawn.spawn
        elif isinstance(spawn, (int, long)):
            from gevent.pool import Pool
            self.pool = Pool(spawn)
            self._spawn = self.pool.spawn
        else:
            self.pool = None
            self._spawn = spawn
        if hasattr(self.pool, 'full'):
            self.full = self.pool.full
        if self.pool is not None:
            self.pool._semaphore.rawlink(self._start_accepting_if_started)

    def set_handle(self, handle):
        if handle is not None:
            self.handle = handle
        if hasattr(self, 'handle'):
            self._handle = self.handle
        else:
            raise TypeError("'handle' must be provided")

    def _start_accepting_if_started(self, _event=None):
        if self.started:
            self.start_accepting()

    def start_accepting(self):
        if self._watcher is None:
            # just stop watcher without creating a new one?
            self._watcher = self.loop.io(self.socket.fileno(), 1)
            self._watcher.start(self._do_read)

    def stop_accepting(self):
        if self._watcher is not None:
            self._watcher.stop()
            self._watcher = None
        if self._timer is not None:
            self._timer.stop()
            self._timer = None

    def do_handle(self, *args):
        spawn = self._spawn
        if spawn is None:
            self._handle(*args)
        else:
            spawn(self._handle, *args)

    def _do_read(self):
        for _ in xrange(self.max_accept):
            if self.full():
                self.stop_accepting()
                return
            try:
                args = self.do_read()
                self.delay = self.min_delay
                if not args:
                    return
            except:
                self.loop.handle_error(self, *sys.exc_info())
                ex = sys.exc_info()[1]
                if self.is_fatal_error(ex):
                    self.close()
                    sys.stderr.write('ERROR: %s failed with %s\n' %
                                     (self, str(ex) or repr(ex)))
                    return
                if self.delay >= 0:
                    self.stop_accepting()
                    self._timer = self.loop.timer(self.delay)
                    self._timer.start(self._start_accepting_if_started)
                    self.delay = min(self.max_delay, self.delay * 2)
                break
            else:
                try:
                    self.do_handle(*args)
                except:
                    self.loop.handle_error((args[1:], self), *sys.exc_info())
                    if self.delay >= 0:
                        self.stop_accepting()
                        self._timer = self.loop.timer(self.delay)
                        self._timer.start(self._start_accepting_if_started)
                        self.delay = min(self.max_delay, self.delay * 2)
                    break

    def full(self):
        return False

    def __repr__(self):
        return '<%s at %s %s>' % (type(self).__name__, hex(
            id(self)), self._formatinfo())

    def __str__(self):
        return '<%s %s>' % (type(self).__name__, self._formatinfo())

    def _formatinfo(self):
        if hasattr(self, 'socket'):
            try:
                fileno = self.socket.fileno()
            except Exception as ex:
                fileno = str(ex)
            result = 'fileno=%s ' % fileno
        else:
            result = ''
        try:
            if isinstance(self.address, tuple) and len(self.address) == 2:
                result += 'address=%s:%s' % self.address
            else:
                result += 'address=%s' % (self.address, )
        except Exception as ex:
            result += str(ex) or '<error>'
        try:
            handle = getfuncname(self.__dict__['handle'])
        except Exception:
            handle = None
        if handle is not None:
            result += ' handle=' + handle
        return result

    @property
    def server_host(self):
        """IP address that the server is bound to (string)."""
        if isinstance(self.address, tuple):
            return self.address[0]

    @property
    def server_port(self):
        """Port that the server is bound to (an integer)."""
        if isinstance(self.address, tuple):
            return self.address[1]

    def init_socket(self):
        """If the user initialized the server with an address rather than socket,
        then this function will create a socket, bind it and put it into listening mode.

        It is not supposed to be called by the user, it is called by :meth:`start` before starting
        the accept loop."""
        pass

    @property
    def started(self):
        return not self._stop_event.is_set()

    def start(self):
        """Start accepting the connections.

        If an address was provided in the constructor, then also create a socket,
        bind it and put it into the listening mode.
        """
        self.init_socket()
        self._stop_event.clear()
        try:
            self.start_accepting()
        except:
            self.close()
            raise

    def close(self):
        """Close the listener socket and stop accepting."""
        self._stop_event.set()
        try:
            self.stop_accepting()
        finally:
            try:
                self.socket.close()
            except Exception:
                pass
            finally:
                self.__dict__.pop('socket', None)
                self.__dict__.pop('handle', None)
                self.__dict__.pop('_handle', None)
                self.__dict__.pop('_spawn', None)
                self.__dict__.pop('full', None)
                if self.pool is not None:
                    self.pool._semaphore.unlink(
                        self._start_accepting_if_started)

    @property
    def closed(self):
        return not hasattr(self, 'socket')

    def stop(self, timeout=None):
        """Stop accepting the connections and close the listening socket.

        If the server uses a pool to spawn the requests, then :meth:`stop` also waits
        for all the handlers to exit. If there are still handlers executing after *timeout*
        has expired (default 1 second), then the currently running handlers in the pool are killed."""
        self.close()
        if timeout is None:
            timeout = self.stop_timeout
        if self.pool:
            self.pool.join(timeout=timeout)
            self.pool.kill(block=True, timeout=1)

    def serve_forever(self, stop_timeout=None):
        """Start the server if it hasn't been already started and wait until it's stopped."""
        # add test that serve_forever exists on stop()
        if not self.started:
            self.start()
        try:
            self._stop_event.wait()
        finally:
            Greenlet.spawn(self.stop, timeout=stop_timeout).join()

    def is_fatal_error(self, ex):
        return isinstance(ex, _socket.error) and ex[0] in self.fatal_errors
Ejemplo n.º 49
0
def single_queue_send(
    transport: 'UDPTransport',
    recipient: typing.Address,
    queue: Queue_T,
    event_stop: Event,
    event_healthy: Event,
    event_unhealthy: Event,
    message_retries: int,
    message_retry_timeout: int,
    message_retry_max_timeout: int,
):
    """ Handles a single message queue for `recipient`.

    Notes:
    - This task must be the only consumer of queue.
    - This task can be killed at any time, but the intended usage is to stop it
      with the event_stop.
    - If there are many queues for the same recipient, it is the
      caller's responsibility to not start them together to avoid congestion.
    - This task assumes the endpoint is never cleared after it's first known.
      If this assumption changes the code must be updated to handle unknown
      addresses.
    """

    # A NotifyingQueue is required to implement cancelability, otherwise the
    # task cannot be stopped while the greenlet waits for an element to be
    # inserted in the queue.
    if not isinstance(queue, NotifyingQueue):
        raise ValueError('queue must be a NotifyingQueue.')

    # Reusing the event, clear must be carefully done
    data_or_stop = event_first_of(
        queue,
        event_stop,
    )

    # Wait for the endpoint registration or to quit
    event_first_of(
        event_healthy,
        event_stop,
    ).wait()

    while True:
        data_or_stop.wait()

        if event_stop.is_set():
            return

        # The queue is not empty at this point, so this won't raise Empty.
        # This task being the only consumer is a requirement.
        (messagedata, message_id) = queue.peek(block=False)

        backoff = timeout_exponential_backoff(
            message_retries,
            message_retry_timeout,
            message_retry_max_timeout,
        )

        try:
            acknowledged = retry_with_recovery(
                transport,
                messagedata,
                message_id,
                recipient,
                event_stop,
                event_healthy,
                event_unhealthy,
                backoff,
            )
        except RaidenShuttingDown:  # For a clean shutdown process
            return

        if acknowledged:
            queue.get()

            # Checking the length of the queue does not trigger a
            # context-switch, so it's safe to assume the length of the queue
            # won't change under our feet and when a new item will be added the
            # event will be set again.
            if not queue:
                data_or_stop.clear()

                if event_stop.is_set():
                    return
Ejemplo n.º 50
0
class HttpHealthCheckShareAdjuster(ShareAdjuster):
    def __init__(self,
                 endpoint,
                 signal_update_fn,
                 route='/health',
                 interval=5,
                 timeout=3.0,
                 unhealthy_threshold=2,
                 healthy_threshold=2,
                 port_name=None,
                 http_method='GET'):
        """
    A basic http health check implementation. Parameters match those available
    on an Elastic Loadbalancer.

    Checks for 200 response code.

    Args:
      endpoint - Endpoint to check.
      signal_update_fn - function - function to call on status update.
      route - str - http route to check.
      interval - int - seconds between checks.
      timeout - float - seconds before a check attempt times out.
      unhealthy_threshold - int - failures before endpoint marked unhealthy.
      healthy_threshold - int - successes before endpoint marked healthy.
      port_name - str - Optional name of port to check. EG: 'health'.
      http_method - str - Optional uppercase name of the http verb. EG: GET or HEAD
    """
        super(HttpHealthCheckShareAdjuster,
              self).__init__(endpoint, signal_update_fn)
        self._route = route
        self._interval = int(interval)
        self._timeout = float(timeout)
        self._unhealthy_threshold = int(unhealthy_threshold)
        self._healthy_threshold = int(healthy_threshold)
        self._port_name = port_name
        max_result_len = self._healthy_threshold + self._unhealthy_threshold
        self._check_results = collections.deque(maxlen=max_result_len)
        self._status = HealthCheckStatus.INITIALIZING
        self._stop_event = Event()

        if http_method.upper() not in SUPPORTED_HEALTHCHECK_METHODS:
            raise Exception('http_method only supports: {}'.format(
                ', '.join(SUPPORTED_HEALTHCHECK_METHODS), ))
        self._http_method = http_method.lower()

    @property
    def status(self):
        """
    Get current status of endpoint.

    Returns:
      A HealthCheckStatus value.
    """
        return self._status

    def start(self):
        """
    Start running healthchecks against endpoint.
    """
        spawn_later(self._interval, self._check)
        self._record(HttpHealthCheckLogEvent.STARTED_CHECKER,
                     HttpHealthCheckLogResult.SUCCESS)

    def stop(self):
        """
    Stop running healthchecks against endpoint.
    """
        self._stop_event.set()
        self._record(HttpHealthCheckLogEvent.STOPPED_CHECKER,
                     HttpHealthCheckLogResult.SUCCESS)

    @property
    def auditable_share(self):
        """Return current share adjustment factor.
    """
        if self.status in HEALTHY_STATUSES:
            return 1.0, AuditItem('health', '1.0')
        else:
            return 0.0, AuditItem('health', '0.0')

    def _build_check_uri(self):
        """
    Builds the URI to check.

    Returns:
      Check URI string.
    """
        uri_template = 'http://{0}:{1}{2}'
        if self._port_name:
            port = self._endpoint.context['port_map'][self._port_name]
        else:
            port = self._endpoint.port
        return uri_template.format(self._endpoint.host, port, self._route)

    def _check(self):
        """
    Run healthcheck.

    Args:
      restart_timer - bool - Whether to restart check timer after checking.
    """
        if self._stop_event.is_set():
            return

        check_uri = self._build_check_uri()
        error_log_fn = None
        try:
            self._record(HttpHealthCheckLogEvent.STARTING_CHECK,
                         HttpHealthCheckLogResult.SUCCESS,
                         log_fn=logger.debug)

            r = getattr(requests, self._http_method)(check_uri,
                                                     timeout=self._timeout)

            if r.status_code == requests.codes.ok:
                check_result = HealthCheckResult.SUCCESS
                self._record(HttpHealthCheckLogEvent.RUNNING_CHECK,
                             HttpHealthCheckLogResult.SUCCESS,
                             log_fn=logger.debug)
            else:
                check_result = HealthCheckResult.ERROR_CODE
                self._record(HttpHealthCheckLogEvent.RUNNING_CHECK,
                             HttpHealthCheckLogResult.FAILURE,
                             'status_code:{0}'.format(r.status_code))

        except requests.exceptions.Timeout:
            check_result = HealthCheckResult.TIMEOUT
            self._record(HttpHealthCheckLogEvent.RUNNING_CHECK,
                         HttpHealthCheckLogResult.TIMEOUT)
        except requests.exceptions.ConnectionError as ex:
            if 'gaierror' in unicode(ex):
                check_result = HealthCheckResult.KNOWN_LOCAL_ERROR
                error_log_fn = logger.error
            elif 'connection refused' in unicode(ex).lower():
                check_result = HealthCheckResult.KNOWN_REMOTE_ERROR
                error_log_fn = logger.error
            else:
                check_result = HealthCheckResult.UNKNOWN_ERROR
                error_log_fn = logger.exception
        except Exception:
            check_result = HealthCheckResult.UNKNOWN_ERROR
            error_log_fn = logger.exception

        if error_log_fn:
            error_log_fn('Exception when executing HttpHealthCheck.')
            self._record(HttpHealthCheckLogEvent.RUNNING_CHECK, check_result)

        self._update_status(check_result)
        spawn_later(self._interval, self._check)

    def _record(self, event, result, msg='', log_fn=logger.info):
        """
    Utility to record HttpHealthCheck events and results.

    Args:
      event - HttpHealthCheckLogEvent.
      result - HttpHealthCheckLogResult.
      msg - str - Extra message.
      log_fn - function - logger function to use.
    """
        f = 'event:%(event)s result:%(result)s check_uri:%(check_uri)s msg:%(msg)s'
        context = {
            'event': event,
            'result': result,
            'check_uri': self._build_check_uri(),
            'msg': msg
        }
        log_fn(f, context)

    def _update_status(self, check_result):
        """
    If necessary based on configuration, update status of this check.

    Calls self._callback if set.

    Args:
      check_result - HttpCheckResult
    """
        if check_result in UNCHANGED_RESULTS:
            return

        self._check_results.append(check_result)
        check_results = copy.copy(self._check_results)
        calculated_status = self._status
        healthy_lookback = list(check_results)[-self._healthy_threshold:]
        if len(healthy_lookback) == self._healthy_threshold and \
            all([cr in HEALTHY_RESULTS for cr in healthy_lookback]):
            calculated_status = HealthCheckStatus.HEALTHY

        unhealthy_lookback = list(check_results)[-self._unhealthy_threshold:]
        if len(unhealthy_lookback) == self._unhealthy_threshold and \
            all([cr in UNHEALTHY_RESULTS for cr in unhealthy_lookback]):
            calculated_status = HealthCheckStatus.UNHEALTHY

        if self._status != calculated_status:
            old_status = self._status
            self._status = calculated_status
            self._record(HttpHealthCheckLogEvent.UPDATED_HEALTH_STATUS,
                         HttpHealthCheckLogResult.SUCCESS,
                         '{0} -> {1}'.format(old_status, calculated_status))
            if self._signal_update_fn:
                try:
                    # Execute callback, passing old and new status
                    self._signal_update_fn()
                except Exception:
                    logger.exception('Exception when executing callback on '
                                     'BasicHttpHealthCheck status change.')
                    self._record(HttpHealthCheckLogEvent.RUNNING_CALLBACK,
                                 HttpHealthCheckLogResult.ERROR)
Ejemplo n.º 51
0
class Actor(object):
    """
    The actor class is the abstract base class for all implementing compysition actors.
    In order to be a valid 'module' and connectable with the compysition event flow, a module must be an extension of this class.

    The Actor is responsible for putting events on outbox queues, and consuming incoming events on inbound queues.
    """

    __metaclass__ = abc.ABCMeta

    DEFAULT_EVENT_SERVICE = "default"
    input = Event
    output = Event
    REQUIRED_EVENT_ATTRIBUTES = None
    __NOT_DEFINED = object()

    def __init__(self,
                 name,
                 size=0,
                 blocking_consume=False,
                 rescue=False,
                 max_rescue=5,
                 *args,
                 **kwargs):
        """
        **Base class for all compysition actors**

        Parameters:

            name (str):
                | The instance name
            size (Optional[int]):
                | The max amount of events any outbound queue connected to this actor may contain. A value of 0 represents an infinite qsize
                | (Default: 0)
            blocking_consume (Optional[bool]):
                | Define if this module should spawn a greenlet for every single 'consume' execution, or if
                | it should execute 'consume' and block until that 'consume' is complete. This is usually
                | only necessary if executing work on an event in the order that it was received is critical.
                | (Default: False)

        """
        self.blockdiag_config = {"shape": "box"}
        self.name = name
        self.size = size
        self.pool = QueuePool(size)
        self.logger = Logger(name, self.pool.logs)
        self.__loop = True
        self.threads = RestartPool(logger=self.logger, sleep_interval=1)

        self.__run = GEvent()
        self.__run.clear()
        self.__block = GEvent()
        self.__block.clear()
        self.__blocking_consume = blocking_consume
        self.rescue = rescue
        self.max_rescue = max_rescue

    def block(self):
        self.__block.wait()

    def connect_error_queue(self,
                            destination_queue_name="inbox",
                            *args,
                            **kwargs):
        self.__connect_queue(
            pool_scope=self.pool.error,
            destination_queue_name="error_{0}".format(destination_queue_name),
            *args,
            **kwargs)

    def connect_log_queue(self,
                          destination_queue_name="inbox",
                          *args,
                          **kwargs):
        self.__connect_queue(
            pool_scope=self.pool.logs,
            destination_queue_name="log_{0}".format(destination_queue_name),
            *args,
            **kwargs)

    def connect_queue(self, *args, **kwargs):
        self.__connect_queue(pool_scope=self.pool.outbound, *args, **kwargs)

    def __connect_queue(self,
                        source_queue_name="outbox",
                        destination=None,
                        destination_queue_name="inbox",
                        pool_scope=None,
                        check_existing=True):
        """Connects the <source_queue_name> queue to the <destination> queue.
        If the destination queue already exists, the source queue is changed to be a reference to that queue, as Many to One connections
        are supported, but One to Many is not"""

        source_queue = pool_scope.get(source_queue_name, None)
        destination_queue = destination.pool.inbound.get(
            destination_queue_name, None)

        if check_existing:
            if source_queue:
                raise QueueConnected(
                    "Outbound queue {queue_name} on {source_name} is already connected"
                    .format(queue_name=source_queue_name,
                            source_name=self.name))

            if destination_queue:
                raise QueueConnected(
                    "Inbound queue {queue_name} on {destination_name} is already connected"
                    .format(queue_name=destination_queue_name,
                            destination_name=destination.name))

        if not source_queue:
            if not destination_queue:
                source_queue = pool_scope.add(source_queue_name)
                destination.register_consumer(destination_queue_name,
                                              source_queue)
            elif destination_queue:
                pool_scope.add(source_queue_name, queue=destination_queue)

        else:
            if not destination_queue:
                destination.register_consumer(destination_queue_name,
                                              source_queue)
            else:
                source_queue.dump(destination_queue)
                pool_scope.add(destination_queue.name, queue=destination_queue)

        self.logger.info("Connected queue '{0}' to '{1}.{2}'".format(
            source_queue_name, destination.name, destination_queue_name))

    def loop(self):
        '''The global lock for this module'''

        return self.__loop

    def is_running(self):
        return self.__run.is_set()

    def register_consumer(self, queue_name, queue):
        '''
        Add the passed queue and queue name to
        '''
        self.pool.inbound.add(queue_name, queue=queue)
        self.threads.spawn(self.__consumer, self.consume, queue)

    def start(self):
        '''Starts the module.'''

        if not isinstance(self.input, tuple):
            if isinstance(self.input, list):
                self.input = tuple(self.input)
            else:
                self.input = (self.input, )

        if not isinstance(self.output, tuple):
            if isinstance(self.output, list):
                self.output = tuple(self.output)
            else:
                self.output = (self.output, )

        if hasattr(self, "pre_hook"):
            self.logger.debug("pre_hook() found, executing")
            self.pre_hook()

        self.__run.set()
        self.logger.debug(
            "Started with max queue size of {size} events".format(
                size=self.size))

    def stop(self):
        '''Stops the loop lock and waits until all registered consumers have exit.'''

        self.__loop = False
        self.__block.set()

        # This should do a self.threads.join() but currently it is blocking. This issue needs to be resolved
        # But in the meantime post_hook will execute

        if hasattr(self, "post_hook"):
            self.logger.debug("post_hook() found, executing")
            self.post_hook()

    def send_event(self, event, queues=__NOT_DEFINED, check_output=True):
        """
        Sends event to all registered outbox queues. If multiple queues are consuming the event,
        a deepcopy of the event is sent instead of raw event.
        """

        if queues is self.__NOT_DEFINED:
            queues = self.pool.outbound.values()

        self._loop_send(event, queues)

    def send_error(self, event):
        """
        Calls 'send_event' with all error queues as the 'queues' parameter
        """
        queues = self.pool.error.values()
        self._loop_send(event, queues=queues, check_output=False)

    def _loop_send(self, event, queues, check_output=True):
        """
        :param event:
        :param queues:
        :return:
        """
        if check_output and not isinstance(event, self.output):
            raise InvalidActorOutput(
                "Event was of type '{_type}', expected '{output}'".format(
                    _type=type(event), output=self.output))

        if len(queues) > 0:
            self._send(queues[0], deepcopy(event))
            map(lambda _queue: self._send(_queue, deepcopy(event)), queues[1:])

    def _send(self, queue, event):
        queue.put(event)
        sleep(0)

    def __consumer(self, function, queue):
        '''Greenthread which applies <function> to each element from <queue>
        '''

        self.__run.wait()

        while self.loop():
            queue.wait_until_content()
            try:
                event = queue.get(timeout=10)
            except QueueEmpty:
                pass
            else:
                if self.__blocking_consume:
                    self.__do_consume(function, event, queue)
                else:
                    self.threads.spawn(self.__do_consume,
                                       function,
                                       event,
                                       queue,
                                       restart=False)

        while True:
            if queue.qsize() > 0:
                try:
                    event = queue.get()
                except QueueEmpty as err:
                    break
                else:
                    self.threads.spawn(self.__do_consume,
                                       function,
                                       event,
                                       queue,
                                       restart=False)
            else:
                break

    def __do_consume(self, function, event, queue):
        """
        A function designed to be spun up in a greenlet to maximize concurrency for the __consumer method
        This function actually calls the consume function for the actor
        """
        try:

            if not isinstance(event, self.input):
                new_event = event.convert(self.input[0])
                self.logger.warning(
                    "Incoming event was of type '{_type}' when type {input} was expected. Converted to {converted}"
                    .format(_type=type(event),
                            input=self.input,
                            converted=type(new_event)),
                    event=event)
                event = new_event

            if self.REQUIRED_EVENT_ATTRIBUTES:
                missing = [
                    event.get(attribute)
                    for attribute in self.REQUIRED_EVENT_ATTRIBUTES
                    if not event.get(attribute, None)
                ]
                if len(missing) > 0:
                    raise InvalidActorInput(
                        "Required incoming event attributes were missing: {missing}"
                        .format(missing=missing))

            function(event, origin=queue.name, origin_queue=queue)
        except QueueFull as err:
            err.wait_until_free()
            queue.put(event)
        except InvalidActorInput as error:
            self.logger.error("Invalid input detected: {0}".format(error))
        except InvalidEventConversion:
            self.logger.error(
                "Event was of type '{_type}', expected '{input}'".format(
                    _type=type(event), input=self.input))
        except Exception as err:
            self.logger.warning("Event exception caught: {traceback}".format(
                traceback=traceback.format_exc()),
                                event=event)
            rescue_tracker = "{actor}_rescue_num".format(actor=self.name)
            if self.rescue and event.get(rescue_tracker, 0) < self.max_rescue:
                setattr(event, rescue_tracker,
                        event.get(rescue_tracker, 0) + 1)
                sleep(1)
                queue.put(event)
            else:
                event.error = err
                self.send_error(event)

    def create_event(self, *args, **kwargs):
        if len(self.output) == 1:
            return self.output[0](**kwargs)
        raise ValueError(
            "Unable to call create_event function with multiple output types defined"
        )

    @abc.abstractmethod
    def consume(self, event, *args, **kwargs):
        """
        Args:
            event:  The implementation of event.Event this actor is consuming
            *args:
            **kwargs:
        """
        pass
Ejemplo n.º 52
0
class EchoNode:
    def __init__(self, api, token_address):
        assert isinstance(api, RaidenAPI)
        self.ready = Event()

        self.api = api
        self.token_address = token_address

        existing_channels = self.api.get_channel_list(
            api.raiden.default_registry.address,
            self.token_address,
        )

        open_channels = [
            channel_state for channel_state in existing_channels
            if channel.get_status(channel_state) == CHANNEL_STATE_OPENED
        ]

        if len(open_channels) == 0:
            token = self.api.raiden.chain.token(self.token_address)
            if not token.balance_of(self.api.raiden.address) > 0:
                raise ValueError(
                    'not enough funds for echo node %s for token %s' % (
                        pex(self.api.raiden.address),
                        pex(self.token_address),
                    ))
            self.api.token_network_connect(
                self.api.raiden.default_registry.address,
                self.token_address,
                token.balance_of(self.api.raiden.address),
                initial_channel_target=10,
                joinable_funds_target=.5,
            )

        self.last_poll_offset = 0
        self.received_transfers = Queue()
        self.stop_signal = None  # used to signal REMOVE_CALLBACK and stop echo_workers
        self.greenlets = list()
        self.lock = BoundedSemaphore()
        self.seen_transfers = deque(list(), TRANSFER_MEMORY)
        self.num_handled_transfers = 0
        self.lottery_pool = Queue()
        # register ourselves with the raiden alarm task
        self.api.raiden.alarm.register_callback(self.echo_node_alarm_callback)
        self.echo_worker_greenlet = gevent.spawn(self.echo_worker)
        log.info('Echo node started')

    def echo_node_alarm_callback(self, block_number):
        """ This can be registered with the raiden AlarmTask.
        If `EchoNode.stop()` is called, it will give the return signal to be removed from
        the AlarmTask callbacks.
        """
        if not self.ready.is_set():
            self.ready.set()
        log.debug('echo_node callback', block_number=block_number)
        if self.stop_signal is not None:
            return REMOVE_CALLBACK
        else:
            self.greenlets.append(gevent.spawn(self.poll_all_received_events))
            return True

    def poll_all_received_events(self):
        """ This will be triggered once for each `echo_node_alarm_callback`.
        It polls all channels for `EventPaymentReceivedSuccess` events,
        adds all new events to the `self.received_transfers` queue and
        respawns `self.echo_node_worker`, if it died. """

        locked = False
        try:
            with Timeout(10):
                locked = self.lock.acquire(blocking=False)
                if not locked:
                    return
                else:
                    received_transfers = self.api.get_raiden_events_payment_history(
                        token_address=self.token_address,
                        offset=self.last_poll_offset,
                    )

                    # received transfer is a tuple of (block_number, event)
                    received_transfers = [
                        event for event in received_transfers
                        if type(event) == EventPaymentReceivedSuccess
                    ]

                    for event in received_transfers:
                        transfer = copy.deepcopy(event)
                        self.received_transfers.put(transfer)

                    # set last_poll_block after events are enqueued (timeout safe)
                    if received_transfers:
                        self.last_poll_offset += len(received_transfers)

                    if not self.echo_worker_greenlet.started:
                        log.debug(
                            'restarting echo_worker_greenlet',
                            dead=self.echo_worker_greenlet.dead,
                            successful=self.echo_worker_greenlet.successful(),
                            exception=self.echo_worker_greenlet.exception,
                        )
                        self.echo_worker_greenlet = gevent.spawn(
                            self.echo_worker)
        except Timeout:
            log.info('timeout while polling for events')
        finally:
            if locked:
                self.lock.release()

    def echo_worker(self):
        """ The `echo_worker` works through the `self.received_transfers` queue and spawns
        `self.on_transfer` greenlets for all not-yet-seen transfers. """
        log.debug('echo worker', qsize=self.received_transfers.qsize())
        while self.stop_signal is None:
            if self.received_transfers.qsize() > 0:
                transfer = self.received_transfers.get()
                if transfer in self.seen_transfers:
                    log.debug(
                        'duplicate transfer ignored',
                        initiator=pex(transfer.initiator),
                        amount=transfer.amount,
                        identifier=transfer.identifier,
                    )
                else:
                    self.seen_transfers.append(transfer)
                    self.greenlets.append(
                        gevent.spawn(self.on_transfer, transfer))
            else:
                gevent.sleep(.5)

    def on_transfer(self, transfer):
        """ This handles the echo logic, as described in
        https://github.com/raiden-network/raiden/issues/651:

            - for transfers with an amount that satisfies `amount % 3 == 0`, it sends a transfer
            with an amount of `amount - 1` back to the initiator
            - for transfers with a "lucky number" amount `amount == 7` it does not send anything
            back immediately -- after having received "lucky number transfers" from 7 different
            addresses it sends a transfer with `amount = 49` to one randomly chosen one
            (from the 7 lucky addresses)
            - consecutive entries to the lucky lottery will receive the current pool size as the
            `echo_amount`
            - for all other transfers it sends a transfer with the same `amount` back to the
            initiator """
        echo_amount = 0
        if transfer.amount % 3 == 0:
            log.info(
                'ECHO amount - 1',
                initiator=pex(transfer.initiator),
                amount=transfer.amount,
                identifier=transfer.identifier,
            )
            echo_amount = transfer.amount - 1

        elif transfer.amount == 7:
            log.info(
                'ECHO lucky number draw',
                initiator=pex(transfer.initiator),
                amount=transfer.amount,
                identifier=transfer.identifier,
                poolsize=self.lottery_pool.qsize(),
            )

            # obtain a local copy of the pool
            pool = self.lottery_pool.copy()
            tickets = [pool.get() for _ in range(pool.qsize())]
            assert pool.empty()
            del pool

            if any(ticket.initiator == transfer.initiator
                   for ticket in tickets):
                assert transfer not in tickets
                log.debug(
                    'duplicate lottery entry',
                    initiator=pex(transfer.initiator),
                    identifier=transfer.identifier,
                    poolsize=len(tickets),
                )
                # signal the poolsize to the participant
                echo_amount = len(tickets)

            # payout
            elif len(tickets) == 6:
                log.info('payout!')
                # reset the pool
                assert self.lottery_pool.qsize() == 6
                self.lottery_pool = Queue()
                # add new participant
                tickets.append(transfer)
                # choose the winner
                transfer = random.choice(tickets)
                echo_amount = 49
            else:
                self.lottery_pool.put(transfer)

        else:
            log.debug(
                'echo transfer received',
                initiator=pex(transfer.initiator),
                amount=transfer.amount,
                identifier=transfer.identifier,
            )
            echo_amount = transfer.amount

        if echo_amount:
            log.debug(
                'sending echo transfer',
                target=pex(transfer.initiator),
                amount=echo_amount,
                orig_identifier=transfer.identifier,
                echo_identifier=transfer.identifier + echo_amount,
                token_address=pex(self.token_address),
                num_handled_transfers=self.num_handled_transfers + 1,
            )

            self.api.transfer(
                self.api.raiden.default_registry.address,
                self.token_address,
                echo_amount,
                transfer.initiator,
                identifier=transfer.identifier + echo_amount,
            )
        self.num_handled_transfers += 1

    def stop(self):
        self.stop_signal = True
        self.greenlets.append(self.echo_worker_greenlet)
        gevent.joinall(self.greenlets, raise_error=True)
Ejemplo n.º 53
0
class Manager(object):
    def __init__(self, name, google_key, locale, units, timezone, time_limit,
                 max_attempts, location, cache_type, geofence_file, debug):
        # Set the name of the Manager
        self.name = str(name).lower()
        self._log = self._create_logger(self.name)
        self._rule_log = self.get_child_logger('rules')

        self.__debug = debug

        # Get the Google Maps AP# TODO: Improve error checking
        self._google_key = None
        self._gmaps_service = None
        if str(google_key).lower() != 'none':
            self._google_key = google_key
            self._gmaps_service = GMaps(google_key)
        self._gmaps_reverse_geocode = False
        self._gmaps_distance_matrix = set()

        self._language = locale
        self.__locale = Locale(locale)  # Setup the language-specific stuff
        self.__units = units  # type of unit used for distances
        self.__timezone = timezone  # timezone for time calculations
        self.__time_limit = time_limit  # Minimum time remaining

        # Location should be [lat, lng] (or None for no location)
        self.__location = None
        if str(location).lower() != 'none':
            self.set_location(location)
        else:
            self._log.warning("NO LOCATION SET - this may cause issues "
                              "with distance related DTS.")

        # Create cache
        self.__cache = cache_factory(self, cache_type)

        # Load and Setup the Pokemon Filters
        self._mons_enabled, self._mon_filters = False, OrderedDict()
        self._stops_enabled, self._stop_filters = False, OrderedDict()
        self._gyms_enabled, self._gym_filters = False, OrderedDict()
        self._ignore_neutral = False
        self._eggs_enabled, self._egg_filters = False, OrderedDict()
        self._raids_enabled, self._raid_filters = False, OrderedDict()
        self._weather_enabled, self._weather_filters = False, OrderedDict()

        # Create the Geofences to filter with from given file
        self.geofences = None
        if str(geofence_file).lower() != 'none':
            self.geofences = load_geofence_file(get_path(geofence_file))
        # Create the alarms to send notifications out with
        self._alarms = {}
        self._max_attempts = int(max_attempts)  # TODO: Move to alarm level

        # Initialize Rules
        self.__mon_rules = {}
        self.__stop_rules = {}
        self.__gym_rules = {}
        self.__egg_rules = {}
        self.__raid_rules = {}
        self.__weather_rules = {}

        # Initialize the queue and start the process
        self.__queue = Queue()
        self.__event = Event()
        self.__process = None

    # ~~~~~~~~~~~~~~~~~~~~~~~ MAIN PROCESS CONTROL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Update the object into the queue
    def update(self, obj):
        self.__queue.put(obj)

    # Get the name of this Manager
    def get_name(self):
        return self.name

    # Tell the process to finish up and go home
    def stop(self):
        self._log.info("Manager {} shutting down... {} items in queue."
                       "".format(self.name, self.__queue.qsize()))
        self.__event.set()

    def join(self):
        self.__process.join(timeout=20)
        if not self.__process.ready():
            self._log.warning("Manager {} could not be stopped in time! "
                              "Forcing process to stop.".format(self.name))
            self.__process.kill(timeout=2, block=True)  # Force stop
        else:
            self._log.info("Manager {} successfully stopped!".format(
                self.name))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GMAPS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def enable_gmaps_reverse_geocoding(self):
        """Enable GMaps Reverse Geocoding DTS for triggered Events. """
        if not self._gmaps_service:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No GMaps API key has been set.")
        self._gmaps_reverse_geocode = True

    def disable_gmaps_reverse_geocoding(self):
        """Disable GMaps Reverse Geocoding DTS for triggered Events. """
        self._gmaps_reverse_geocode = False

    def enable_gmaps_distance_matrix(self, mode):
        """Enable 'mode' Distance Matrix DTS for triggered Events. """
        if not self.__location:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No Manager location has been set.")
        elif not self._gmaps_service:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No GMaps API key has been provided.")
        elif mode not in GMaps.TRAVEL_MODES:
            raise ValueError("Unable to enable distance matrix mode: "
                             "{} is not a valid mode.".format(mode))
        self._gmaps_distance_matrix.add(mode)

    def disable_gmaps_dm_walking(self, mode):
        """Disable 'mode' Distance Matrix DTS for triggered Events. """
        if mode not in GMaps.TRAVEL_MODES:
            raise ValueError("Unable to disable distance matrix mode: "
                             "Invalid mode specified.")
        self._gmaps_distance_matrix.discard(mode)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LOGGING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    @staticmethod
    def _create_logger(mgr_name):
        """ Internal method for initializing manager loggers. """
        # Create a Filter to pass on manager name
        log = logging.getLogger('pokealarm.{}'.format(mgr_name))
        return log

    def get_child_logger(self, name):
        """ Get a child logger of this manager. """
        logger = self._log.getChild(name)
        logger.addFilter(ContextFilter())
        return logger

    def set_log_level(self, log_level):
        if log_level == 1:
            self._log.setLevel(logging.WARNING)
        elif log_level == 2:
            self._log.setLevel(logging.INFO)
            self._log.getChild("cache").setLevel(logging.WARNING)
            self._log.getChild("filters").setLevel(logging.WARNING)
            self._log.getChild("alarms").setLevel(logging.WARNING)
        elif log_level == 3:
            self._log.setLevel(logging.INFO)
            self._log.getChild("cache").setLevel(logging.INFO)
            self._log.getChild("filters").setLevel(logging.WARNING)
            self._log.getChild("alarms").setLevel(logging.WARNING)
        elif log_level == 4:
            self._log.setLevel(logging.INFO)
            self._log.getChild("cache").setLevel(logging.INFO)
            self._log.getChild("filters").setLevel(logging.INFO)
            self._log.getChild("alarms").setLevel(logging.INFO)
        elif log_level == 5:
            self._log.setLevel(logging.DEBUG)
            self._log.getChild("cache").setLevel(logging.DEBUG)
            self._log.getChild("filters").setLevel(logging.DEBUG)
            self._log.getChild("alarms").setLevel(logging.DEBUG)
        else:
            raise ValueError("Unable to set verbosity, must be an "
                             "integer between 1 and 5.")
        self._log.debug("Verbosity set to %s", log_level)

    def add_file_logger(self, path, max_size_mb, ct):
        setup_file_handler(self._log, path, max_size_mb, ct)
        self._log.debug("Added new file logger to %s", path)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FILTERS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Enable/Disable Monster notifications
    def set_monsters_enabled(self, boolean):
        self._mons_enabled = parse_bool(boolean)
        self._log.debug("Monster notifications %s",
                        "enabled" if self._mons_enabled else "disabled")

    # Add new Monster Filter
    def add_monster_filter(self, name, settings):
        if name in self._mon_filters:
            raise ValueError("Unable to add Monster Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.MonFilter(self, name, settings)
        self._mon_filters[name] = f
        self._log.debug("Monster filter '%s' set: %s", name, f)

    # Enable/Disable Stops notifications
    def set_stops_enabled(self, boolean):
        self._stops_enabled = parse_bool(boolean)
        self._log.debug("Stops notifications %s!",
                        "enabled" if self._stops_enabled else "disabled")

    # Add new Stop Filter
    def add_stop_filter(self, name, settings):
        if name in self._stop_filters:
            raise ValueError("Unable to add Stop Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.StopFilter(self, name, settings)
        self._stop_filters[name] = f
        self._log.debug("Stop filter '%s' set: %s", name, f)

    # Enable/Disable Gym notifications
    def set_gyms_enabled(self, boolean):
        self._gyms_enabled = parse_bool(boolean)
        self._log.debug("Gyms notifications %s!",
                        "enabled" if self._gyms_enabled else "disabled")

    # Enable/Disable Stops notifications
    def set_ignore_neutral(self, boolean):
        self._ignore_neutral = parse_bool(boolean)
        self._log.debug("Ignore neutral set to %s!", self._ignore_neutral)

    # Add new Gym Filter
    def add_gym_filter(self, name, settings):
        if name in self._gym_filters:
            raise ValueError("Unable to add Gym Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.GymFilter(self, name, settings)
        self._gym_filters[name] = f
        self._log.debug("Gym filter '%s' set: %s", name, f)

    # Enable/Disable Egg notifications
    def set_eggs_enabled(self, boolean):
        self._eggs_enabled = parse_bool(boolean)
        self._log.debug("Egg notifications %s!",
                        "enabled" if self._eggs_enabled else "disabled")

    # Add new Egg Filter
    def add_egg_filter(self, name, settings):
        if name in self._egg_filters:
            raise ValueError("Unable to add Egg Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.EggFilter(self, name, settings)
        self._egg_filters[name] = f
        self._log.debug("Egg filter '%s' set: %s", name, f)

    # Enable/Disable Stops notifications
    def set_raids_enabled(self, boolean):
        self._raids_enabled = parse_bool(boolean)
        self._log.debug("Raid notifications %s!",
                        "enabled" if self._raids_enabled else "disabled")

    # Add new Raid Filter
    def add_raid_filter(self, name, settings):
        if name in self._raid_filters:
            raise ValueError("Unable to add Raid Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.RaidFilter(self, name, settings)
        self._raid_filters[name] = f
        self._log.debug("Raid filter '%s' set: %s", name, f)

    # Enable/Disable Weather notifications
    def set_weather_enabled(self, boolean):
        self._weather_enabled = parse_bool(boolean)
        self._log.debug("Weather notifications %s!",
                        "enabled" if self._weather_enabled else "disabled")

    # Add new Weather Filter
    def add_weather_filter(self, name, settings):
        if name in self._weather_filters:
            raise ValueError("Unable to add Weather Filter: Filter with the "
                             "name {} already exists!".format(name))
        f = Filters.WeatherFilter(self, name, settings)
        self._weather_filters[name] = f
        self._log.debug("Weather filter '%s' set: %s", name, f)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ALARMS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def add_alarm(self, name, settings):
        if name in self._alarms:
            raise ValueError("Unable to add new Alarm: Alarm with the name "
                             "{} already exists!".format(name))
        alarm = Alarms.alarm_factory(self, settings, self._max_attempts,
                                     self._google_key)
        self._alarms[name] = alarm

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RULES API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Add new Monster Rule
    def add_monster_rule(self, name, filters, alarms):
        if name in self.__mon_rules:
            raise ValueError("Unable to add Rule: Monster Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._mon_filters:
                raise ValueError("Unable to create Rule: No Monster Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__mon_rules[name] = Rule(filters, alarms)

    # Add new Stop Rule
    def add_stop_rule(self, name, filters, alarms):
        if name in self.__stop_rules:
            raise ValueError("Unable to add Rule: Stop Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._stop_filters:
                raise ValueError("Unable to create Rule: No Stop Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__stop_rules[name] = Rule(filters, alarms)

    # Add new Gym Rule
    def add_gym_rule(self, name, filters, alarms):
        if name in self.__gym_rules:
            raise ValueError("Unable to add Rule: Gym Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._gym_filters:
                raise ValueError("Unable to create Rule: No Gym Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__gym_rules[name] = Rule(filters, alarms)

    # Add new Egg Rule
    def add_egg_rule(self, name, filters, alarms):
        if name in self.__egg_rules:
            raise ValueError("Unable to add Rule: Egg Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._egg_filters:
                raise ValueError("Unable to create Rule: No Egg Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__egg_rules[name] = Rule(filters, alarms)

    # Add new Raid Rule
    def add_raid_rule(self, name, filters, alarms):
        if name in self.__raid_rules:
            raise ValueError("Unable to add Rule: Raid Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._raid_filters:
                raise ValueError("Unable to create Rule: No Raid Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__raid_rules[name] = Rule(filters, alarms)

    # Add new Weather Rule
    def add_weather_rule(self, name, filters, alarms):
        if name in self.__weather_rules:
            raise ValueError("Unable to add Rule: Weather Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self._weather_filters:
                raise ValueError("Unable to create Rule: No Weather Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self._alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__weather_rules[name] = Rule(filters, alarms)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGER LOADING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HANDLE EVENTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Start it up
    def start(self):
        self.__process = gevent.spawn(self.run)

    def setup_in_process(self):

        # Update config
        config['DEBUG'] = self.__debug
        config['ROOT_PATH'] = os.path.abspath("{}/..".format(
            os.path.dirname(__file__)))

        # Hush some new loggers
        logging.getLogger('requests').setLevel(logging.WARNING)
        logging.getLogger('urllib3').setLevel(logging.WARNING)

        if config['DEBUG'] is True:
            logging.getLogger().setLevel(logging.DEBUG)

        # Conect the alarms and send the start up message
        for alarm in self._alarms.values():
            alarm.connect()
            alarm.startup_message()

    # Main event handler loop
    def run(self):
        self.setup_in_process()
        last_clean = datetime.utcnow()
        while True:  # Run forever and ever

            # Clean out visited every 5 minutes
            if datetime.utcnow() - last_clean > timedelta(minutes=5):
                self._log.debug("Cleaning cache...")
                self.__cache.clean_and_save()
                last_clean = datetime.utcnow()

            try:  # Get next object to process
                event = self.__queue.get(block=True, timeout=5)
            except gevent.queue.Empty:
                # Check if the process should exit process
                if self.__event.is_set():
                    break
                # Explict context yield
                gevent.sleep(0)
                continue

            try:
                kind = type(event)
                self._log.debug("Processing event: %s", event.id)
                if kind == Events.MonEvent:
                    self.process_monster(event)
                elif kind == Events.StopEvent:
                    self.process_stop(event)
                elif kind == Events.GymEvent:
                    self.process_gym(event)
                elif kind == Events.EggEvent:
                    self.process_egg(event)
                elif kind == Events.RaidEvent:
                    self.process_raid(event)
                elif kind == Events.WeatherEvent:
                    self.process_weather(event)
                else:
                    self._log.error(
                        "!!! Manager does not support {} events!".format(kind))
                self._log.debug("Finished event: %s", event.id)
            except Exception as e:
                self._log.error("Encountered error during processing: "
                                "{}: {}".format(type(e).__name__, e))
                self._log.error("Stack trace: \n {}"
                                "".format(traceback.format_exc()))
            # Explict context yield
            gevent.sleep(0)
        # Save cache and exit
        self.__cache.clean_and_save()
        raise gevent.GreenletExit()

    # Set the location of the Manager
    def set_location(self, location):
        # Regex for Lat,Lng coordinate
        prog = re.compile("^(-?\d+\.\d+)[,\s]\s*(-?\d+\.\d+?)$")
        res = prog.match(location)
        if res:  # If location is in a Lat,Lng coordinate
            self.__location = [float(res.group(1)), float(res.group(2))]
        else:
            # Check if key was provided
            if self._gmaps_service is None:
                raise ValueError("Unable to find location coordinates by name"
                                 " - no Google API key was provided.")
            # Attempt to geocode location
            location = self._gmaps_service.geocode(location)
            if location is None:
                raise ValueError("Unable to geocode coordinates from {}. "
                                 "Location will not be set.".format(location))

            self.__location = location
            self._log.info("Location successfully set to '{},{}'.".format(
                location[0], location[1]))

    def _check_filters(self, event, filter_set, filter_names):
        """ Function for checking if an event passes any filters. """
        for name in filter_names:
            f = filter_set.get(name)
            # Filter should always exist, but sanity check anyway
            if f:
                # If the Event passes, return True
                if f.check_event(event) and self.check_geofences(f, event):
                    event.custom_dts = f.custom_dts
                    return True
            else:
                self._log.critical("ERROR: No filter named %s found!", name)
        return False

    def _notify_alarms(self, event, alarm_names, func_name):
        """ Function for triggering notifications to alarms. """
        # Generate the DTS for the event
        dts = event.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((event.lat, event.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode,
                                                    (event.lat, event.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        # Spawn notifications in threads so they can work asynchronously
        threads = []
        for name in alarm_names:
            alarm = self._alarms.get(name)
            if not alarm:
                self._log.critical("ERROR: No alarm named %s found!", name)
                continue
            func = getattr(alarm, func_name)
            threads.append(gevent.spawn(func, dts))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    # Process new Monster data and decide if a notification needs to be sent
    def process_monster(self, mon):
        # type: (Events.MonEvent) -> None
        """ Process a monster event and notify alarms if it passes. """

        # Make sure that monsters are enabled
        if self._mons_enabled is False:
            self._log.debug("Monster ignored: monster notifications "
                            "are disabled.")
            return

        # Set the name for this event so we can log rejects better
        mon.name = self.__locale.get_pokemon_name(mon.monster_id)

        # Check if previously processed and update expiration
        #if self.__cache.monster_expiration(mon.enc_id) is not None:
        #   self._log.debug("{} monster was skipped because it was "
        #                "previously processed.".format(mon.name))
        #  return
        # self.__cache.monster_expiration(mon.enc_id, mon.disappear_time)

        # Check the time remaining
        seconds_left = (mon.disappear_time - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            self._log.debug("{} monster was skipped because only {} seconds "
                            "remained".format(mon.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            mon.distance = get_earth_dist([mon.lat, mon.lng], self.__location,
                                          self.__units)
            mon.direction = get_cardinal_dir([mon.lat, mon.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__mon_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._mon_filters.keys(), self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(mon, self._mon_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(mon, rule.alarm_names, 'pokemon_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Monster %s passed %s rule(s) and triggered %s alarm(s).',
                mon.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Monster %s rejected by all rules.', mon.name)

    def process_stop(self, stop):
        # type: (Events.StopEvent) -> None
        """ Process a stop event and notify alarms if it passes. """

        # Make sure that stops are enabled
        if self._stops_enabled is False:
            self._log.debug("Stop ignored: stop notifications are disabled.")
            return

        # Check for lured
        if stop.expiration is None:
            self._log.debug("Stop ignored: stop was not lured")
            return

        # Check if previously processed and update expiration
        if self.__cache.stop_expiration(stop.stop_id) is not None:
            self._log.debug("Stop {} was skipped because it was "
                            "previously processed.".format(stop.name))
            return
        self.__cache.stop_expiration(stop.stop_id, stop.expiration)

        # Check the time remaining
        seconds_left = (stop.expiration - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            self._log.debug("Stop {} was skipped because only {} seconds "
                            "remained".format(stop.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            stop.distance = get_earth_dist([stop.lat, stop.lng],
                                           self.__location, self.__units)
            stop.direction = get_cardinal_dir([stop.lat, stop.lng],
                                              self.__location)

        # Check for Rules
        rules = self.__stop_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._stop_filters.keys(), self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(stop, self._stop_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(stop, rule.alarm_names, 'pokestop_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Stop %s passed %s rule(s) and triggered %s alarm(s).',
                stop.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Stop %s rejected by all rules.', stop.name)

    def process_gym(self, gym):
        # type: (Events.GymEvent) -> None
        """ Process a gym event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        gym.gym_name = self.__cache.gym_name(gym.gym_id, gym.gym_name)
        gym.gym_description = self.__cache.gym_desc(gym.gym_id,
                                                    gym.gym_description)
        gym.gym_image = self.__cache.gym_image(gym.gym_id, gym.gym_image)

        # Ignore changes to neutral
        if self._ignore_neutral and gym.new_team_id == 0:
            self._log.debug("%s gym update skipped: new team was neutral")
            return

        # Update Team Information
        gym.old_team_id = self.__cache.gym_team(gym.gym_id)
        self.__cache.gym_team(gym.gym_id, gym.new_team_id)

        # Check if notifications are on
        if self._gyms_enabled is False:
            self._log.debug("Gym ignored: gym notifications are disabled.")
            return

        # Doesn't look like anything to me
        if gym.new_team_id == gym.old_team_id:
            self._log.debug("%s gym update skipped: no change detected",
                            gym.gym_id)
            return

        # Calculate distance and direction
        if self.__location is not None:
            gym.distance = get_earth_dist([gym.lat, gym.lng], self.__location,
                                          self.__units)
            gym.direction = get_cardinal_dir([gym.lat, gym.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__gym_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._gym_filters.keys(), self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(gym, self._gym_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(gym, rule.alarm_names, 'gym_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Gym %s passed %s rule(s) and triggered %s alarm(s).',
                gym.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Gym %s rejected by all rules.', gym.name)

    def process_egg(self, egg):
        # type: (Events.EggEvent) -> None
        """ Process a egg event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        egg.gym_name = self.__cache.gym_name(egg.gym_id, egg.gym_name)
        egg.gym_description = self.__cache.gym_desc(egg.gym_id,
                                                    egg.gym_description)
        egg.gym_image = self.__cache.gym_image(egg.gym_id, egg.gym_image)

        # Update Team if Unknown
        if Unknown.is_(egg.current_team_id):
            egg.current_team_id = self.__cache.gym_team(egg.gym_id)

        # Make sure that eggs are enabled
        if self._eggs_enabled is False:
            self._log.debug("Egg ignored: egg notifications are disabled.")
            return

        # Skip if previously processed
        if self.__cache.egg_expiration(egg.gym_id) is not None:
            self._log.debug("Egg {} was skipped because it was "
                            "previously processed.".format(egg.name))
            return
        self.__cache.egg_expiration(egg.gym_id, egg.hatch_time)

        # Check the time remaining
        seconds_left = (egg.hatch_time - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            self._log.debug("Egg {} was skipped because only {} seconds "
                            "remained".format(egg.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            egg.distance = get_earth_dist([egg.lat, egg.lng], self.__location,
                                          self.__units)
            egg.direction = get_cardinal_dir([egg.lat, egg.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__egg_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._egg_filters.keys(), self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(egg, self._egg_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(egg, rule.alarm_names, 'raid_egg_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Egg %s passed %s rule(s) and triggered %s alarm(s).',
                egg.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Egg %s rejected by all rules.', egg.name)

    def process_raid(self, raid):
        # type: (Events.RaidEvent) -> None
        """ Process a raid event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        raid.gym_name = self.__cache.gym_name(raid.gym_id, raid.gym_name)
        raid.gym_description = self.__cache.gym_desc(raid.gym_id,
                                                     raid.gym_description)
        raid.gym_image = self.__cache.gym_image(raid.gym_id, raid.gym_image)

        # Update Team if Unknown
        if Unknown.is_(raid.current_team_id):
            raid.current_team_id = self.__cache.gym_team(raid.gym_id)

        # Make sure that raids are enabled
        if self._raids_enabled is False:
            self._log.debug("Raid ignored: raid notifications are disabled.")
            return

        # Skip if previously processed
        if self.__cache.raid_expiration(raid.gym_id) is not None:
            self._log.debug("Raid {} was skipped because it was "
                            "previously processed.".format(raid.name))
            return
        self.__cache.raid_expiration(raid.gym_id, raid.raid_end)

        # Check the time remaining
        seconds_left = (raid.raid_end - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            self._log.debug("Raid {} was skipped because only {} seconds "
                            "remained".format(raid.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            raid.distance = get_earth_dist([raid.lat, raid.lng],
                                           self.__location, self.__units)
            raid.direction = get_cardinal_dir([raid.lat, raid.lng],
                                              self.__location)

        # Check for Rules
        rules = self.__raid_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._raid_filters.keys(), self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(raid, self._raid_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(raid, rule.alarm_names, 'raid_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Raid %s passed %s rule(s) and triggered %s alarm(s).',
                raid.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Raid %s rejected by all rules.', raid.name)

    def process_weather(self, weather):
        # type: (Events.WeatherEvent) -> None
        """ Process a weather event and notify alarms if it passes. """

        # Set the name for this event so we can log rejects better
        weather.name = self.__locale.get_weather_name(weather.s2_cell_id)

        # Make sure that weather changes are enabled
        if self._weather_enabled is False:
            self._log.debug("Weather ignored: weather change "
                            "notifications are disabled.")
            return

        # Calculate distance and direction
        if self.__location is not None:
            weather.distance = get_earth_dist([weather.lat, weather.lng],
                                              self.__location, self.__units)
            weather.direction = get_cardinal_dir([weather.lat, weather.lng],
                                                 self.__location)

        # Store copy of cache info
        cache_weather_id = self.__cache.cell_weather_id(weather.s2_cell_id)
        cache_day_or_night_id = self.__cache.day_or_night_id(
            weather.s2_cell_id)
        cache_severity_id = self.__cache.severity_id(weather.s2_cell_id)

        # Update cache info
        self.__cache.cell_weather_id(weather.s2_cell_id, weather.weather_id)
        self.__cache.day_or_night_id(weather.s2_cell_id,
                                     weather.day_or_night_id)
        self.__cache.severity_id(weather.s2_cell_id, weather.severity_id)

        # Check and see if the weather hasn't changed and ignore
        if weather.weather_id == cache_weather_id and \
                weather.day_or_night_id == cache_day_or_night_id and \
                weather.severity_id == cache_severity_id:
            self._log.debug(
                "weather of %s, alert of %s, and day or night of %s skipped: "
                "no change detected", weather.weather_id, weather.severity_id,
                weather.day_or_night_id)
            return

        # Check for Rules
        rules = self.__weather_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self._weather_filters.keys(),
                                self._alarms.keys())
            }

        rule_ct, alarm_ct = 0, 0
        for r_name, rule in rules.iteritems():  # For all rules
            passed = self._check_filters(weather, self._weather_filters,
                                         rule.filter_names)
            if passed:
                rule_ct += 1
                alarm_ct += len(rule.alarm_names)
                self._notify_alarms(weather, rule.alarm_names, 'weather_alert')

        if rule_ct > 0:
            self._rule_log.info(
                'Weather %s passed %s rule(s) and triggered %s alarm(s).',
                weather.name, rule_ct, alarm_ct)
        else:
            self._rule_log.info('Weather %s rejected by all rules.',
                                weather.name)

    # Check to see if a notification is within the given range
    # TODO: Move this into filters and add unit tests
    def check_geofences(self, f, e):
        """ Returns true if the event passes the filter's geofences. """
        if self.geofences is None or f.geofences is None:  # No geofences set
            return True
        targets = f.geofences
        if len(targets) == 1 and "all" in targets:
            targets = self.geofences.iterkeys()
        for name in targets:
            gf = self.geofences.get(name)
            if not gf:  # gf doesn't exist
                self._log.error("Cannot check geofence %s: "
                                "does not exist!", name)
            elif gf.contains(e.lat, e.lng):  # e in gf
                self._log.debug("{} is in geofence {}!".format(
                    e.name, gf.get_name()))
                e.geofence = name  # Set the geofence for dts
                return True
            else:  # e not in gf
                self._log.debug("%s not in %s.", e.name, name)
        self._log.debug("%s rejected from filter by geofences.", e.name)
        return False
Ejemplo n.º 54
0
class Container(object):
    """."""

    def __init__(self, docker, runtime, registry, host,
                 image, command, env, ports, options,
                 formation, service, instance,
                 restart=True, tty=False):
        self.docker = docker
        self.runtime = runtime
        self.registry = registry
        self.host = host
        self.id = shortuuid.uuid()
        cmd = ' '.join(command) if isinstance(command, list) else command
        self.log = logging.getLogger('container[{0}/{1}.{2} (image={3}, command="{4}")]'.format(
                formation, service, instance, image, cmd))
        self.image = image
        self.command = command
        self.env = env
        self.ports = ports
        self.options = options
        self.formation = formation
        self.service = service
        self.instance = instance
        self.state = 'init'
        self.tty = tty
        self.reason = None
        self.status_code = None
        self._stopped = Event()
        self._cont_id = None
        self._registration = None
        self._runtime = None
        self._restart = restart
        self._reset()

    def start(self):
        self.log.info("start called")
        gevent.spawn(self._provision_and_start)
        return self

    def _reset(self):
        self._delay = 1
        self._waiting = None

    def restart(self, image, command, env, ports):
        self.image = image
        self.command = command
        self.env = env
        self.ports = ports
        self.status_code = None
        if self._cont_id:
            self.docker.stop(self._cont_id)
        elif self._waiting:
            self._waiting.set()

    def dispose(self):
        """Dispose of the container."""
        if not self._stopped.is_set():
            self._stopped.set()
            if self._cont_id is not None:
                self.docker.stop(self._cont_id)

    def commit(self, repository, tag):
        data = self.docker.inspect_container(self._cont_id)
        self.docker.commit(self._cont_id, repository=repository, tag=tag,
                           conf=data['Config'])

    def attach(self, stdin=True, stdout=True, stderr=True,
               stream=True, logs=False):
        """Attach to container."""
        _int = lambda v: 1 if v else 0
        params = {
            'stdin': _int(stdin),
            'stdout': _int(stdout),
            'stderr': _int(stderr),
            'stream': _int(stream),
            'logs': _int(logs)
            }
        return self.docker.attach_websocket(self._cont_id, params)

    def resize(self, w, h):
        return self.docker.resize_tty(self._cont_id, w, h)

    def _register_with_service_registry(self):
        data = self.docker.inspect_container(self._cont_id)
        announcement = self.registry.build_announcement(
            self.formation, self.service, self.instance,
            dict(_port_mappings_from_inspect_data(data)),
            host=self.host)
        self._registration = self.registry.register(
            self.formation, self.service, self.instance, announcement)
    
    def _provision_and_start(self):
        while not self._stopped.is_set():
            with self._update_state('pulling'):
                self.log.debug("start pulling %r" % (self.image,))
                self.docker.pull(self.image)

            with self._update_state('starting'):
                self._create_container()
            self._set_state('running')

            if self.registry is not None:
                self._register_with_service_registry()

            self.status_code = self.docker.wait(self._cont_id)
            if self._registration is not None:
                self._registration.stop(timeout=5)
                self._registration = None

            if not self._restart:
                break
            elif not self._stopped.is_set():
                self._set_error(
                    "container stopped unexpectedly: exit code {0}".format(
                        self.status_code))
                self._pause()

        # kill the container completely and invalidate our handle.
        #cont_id, self._cont_id = self._cont_id, None
        with self._update_state('done'):
            self.docker.kill(self._cont_id)
            self._runtime.dispose()

    def _pause(self):
        self._delay = min(180, self._delay * 2.71828)
        self.log.info("will wait for {0:.1f} seconds before restarting".format(
                self._delay))
        with self._update_state('error'):
            try:
                self._waiting = Event()
                self._waiting.wait(self._delay)
            finally:
                self._waiting = None

    def _create_container(self):
        """Create container."""
        self._runtime = self.runtime(self)
        result = self.docker.create_container_from_config(
            self._runtime.make_config())
        self._cont_id = result['Id']
        self.docker.start(self._cont_id, port_bindings=_convert_ports_to_port_bindings(self.ports))

    def _set_state(self, state):
        self.log.info('change state to %s from %s' % (state, self.state))
        self.state = state

    def _set_error(self, reason):
        self.reason = reason
        self._set_state('error')
        self.log.warning('error: {0}'.format(self.reason))

    @contextmanager
    def _update_state(self, state):
        self._set_state(state)
        try:
            yield
        except Exception, err:
            self._set_error(str(err))
            raise
Ejemplo n.º 55
0
class DaemonWatchdog(Greenlet):
    """
    DaemonWatchdog::

    Watch Ceph daemons for failures. If an extended failure is detected (i.e.
    not intentional), then the watchdog will unmount file systems and send
    SIGTERM to all daemons. The duration of an extended failure is configurable
    with watchdog_daemon_timeout.

    watchdog_daemon_timeout [default: 300]: number of seconds a daemon
        is allowed to be failed before the watchdog will bark.
    """

    def __init__(self, ctx, manager, config, thrashers):
        Greenlet.__init__(self)
        self.ctx = ctx
        self.config = config
        self.e = None
        self.logger = log.getChild('daemon_watchdog')
        self.manager = manager
        self.name = 'watchdog'
        self.stopping = Event()
        self.thrashers = thrashers

    def _run(self):
        try:
            self.watch()
        except Exception as e:
            # See _run exception comment for MDSThrasher
            self.e = e
            self.logger.exception("exception:")
            # allow successful completion so gevent doesn't see an exception...

    def log(self, x):
        """Write data to logger"""
        self.logger.info(x)

    def stop(self):
        self.stopping.set()

    def bark(self):
        self.log("BARK! unmounting mounts and killing all daemons")
        for mount in self.ctx.mounts.values():
            try:
                mount.umount_wait(force=True)
            except:
                self.logger.exception("ignoring exception:")
        daemons = []
        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster)))
        daemons.extend(filter(lambda daemon: daemon.running() and not daemon.proc.finished, self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster)))
        for daemon in daemons:
            try:
                daemon.signal(signal.SIGTERM)
            except:
                self.logger.exception("ignoring exception:")

    def watch(self):
        self.log("watchdog starting")
        daemon_timeout = int(self.config.get('watchdog_daemon_timeout', 300))
        daemon_failure_time = {}
        while not self.stopping.is_set():
            bark = False
            now = time.time()

            mons = self.ctx.daemons.iter_daemons_of_role('mon', cluster=self.manager.cluster)
            mdss = self.ctx.daemons.iter_daemons_of_role('mds', cluster=self.manager.cluster)
            clients = self.ctx.daemons.iter_daemons_of_role('client', cluster=self.manager.cluster)

            #for daemon in mons:
            #    self.log("mon daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished))
            #for daemon in mdss:
            #    self.log("mds daemon {role}.{id}: running={r}".format(role=daemon.role, id=daemon.id_, r=daemon.running() and not daemon.proc.finished))

            daemon_failures = []
            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mons))
            daemon_failures.extend(filter(lambda daemon: daemon.running() and daemon.proc.finished, mdss))
            for daemon in daemon_failures:
                name = daemon.role + '.' + daemon.id_
                dt = daemon_failure_time.setdefault(name, (daemon, now))
                assert dt[0] is daemon
                delta = now-dt[1]
                self.log("daemon {name} is failed for ~{t:.0f}s".format(name=name, t=delta))
                if delta > daemon_timeout:
                    bark = True

            # If a daemon is no longer failed, remove it from tracking:
            for name in daemon_failure_time.keys():
                if name not in [d.role + '.' + d.id_ for d in daemon_failures]:
                    self.log("daemon {name} has been restored".format(name=name))
                    del daemon_failure_time[name]

            for thrasher in self.thrashers:
                if thrasher.e is not None:
                    self.log("thrasher on fs.{name} failed".format(name=thrasher.fs.name))
                    bark = True

            if bark:
                self.bark()
                return

            sleep(5)

        self.log("watchdog finished")
Ejemplo n.º 56
0
class MDSThrasher(Greenlet):
    """
    MDSThrasher::

    The MDSThrasher thrashes MDSs during execution of other tasks (workunits, etc).

    The config is optional.  Many of the config parameters are a a maximum value
    to use when selecting a random value from a range.  To always use the maximum
    value, set no_random to true.  The config is a dict containing some or all of:

    seed: [no default] seed the random number generator

    randomize: [default: true] enables randomization and use the max/min values

    max_thrash: [default: 1] the maximum number of MDSs that will be thrashed at
      any given time.

    max_thrash_delay: [default: 30] maximum number of seconds to delay before
      thrashing again.

    max_revive_delay: [default: 10] maximum number of seconds to delay before
      bringing back a thrashed MDS

    thrash_in_replay: [default: 0.0] likelihood that the MDS will be thrashed
      during replay.  Value should be between 0.0 and 1.0

    max_replay_thrash_delay: [default: 4] maximum number of seconds to delay while in
      the replay state before thrashing

    thrash_weights: allows specific MDSs to be thrashed more/less frequently.  This option
      overrides anything specified by max_thrash.  This option is a dict containing
      mds.x: weight pairs.  For example, [mds.a: 0.7, mds.b: 0.3, mds.c: 0.0].  Each weight
      is a value from 0.0 to 1.0.  Any MDSs not specified will be automatically
      given a weight of 0.0.  For a given MDS, by default the trasher delays for up
      to max_thrash_delay, trashes, waits for the MDS to recover, and iterates.  If a non-zero
      weight is specified for an MDS, for each iteration the thrasher chooses whether to thrash
      during that iteration based on a random value [0-1] not exceeding the weight of that MDS.

    Examples::


      The following example sets the likelihood that mds.a will be thrashed
      to 80%, mds.b to 20%, and other MDSs will not be thrashed.  It also sets the
      likelihood that an MDS will be thrashed in replay to 40%.
      Thrash weights do not have to sum to 1.

      tasks:
      - ceph:
      - mds_thrash:
          thrash_weights:
            - mds.a: 0.8
            - mds.b: 0.2
          thrash_in_replay: 0.4
      - ceph-fuse:
      - workunit:
          clients:
            all: [suites/fsx.sh]

      The following example disables randomization, and uses the max delay values:

      tasks:
      - ceph:
      - mds_thrash:
          max_thrash_delay: 10
          max_revive_delay: 1
          max_replay_thrash_delay: 4

    """
    def __init__(self, ctx, manager, mds_cluster, config, logger,
                 failure_group, weight):
        super(MDSThrasher, self).__init__()

        self.ctx = ctx
        self.manager = manager
        assert self.manager.is_clean()
        self.mds_cluster = mds_cluster

        self.stopping = Event()
        self.logger = logger
        self.config = config

        self.randomize = bool(self.config.get('randomize', True))
        self.max_thrash_delay = float(self.config.get('thrash_delay', 30.0))
        self.thrash_in_replay = float(
            self.config.get('thrash_in_replay', False))
        assert self.thrash_in_replay >= 0.0 and self.thrash_in_replay <= 1.0, 'thrash_in_replay ({v}) must be between [0.0, 1.0]'.format(
            v=self.thrash_in_replay)

        self.max_replay_thrash_delay = float(
            self.config.get('max_replay_thrash_delay', 4.0))

        self.max_revive_delay = float(self.config.get('max_revive_delay',
                                                      10.0))

        self.failure_group = failure_group
        self.weight = weight

        # TODO support multiple filesystems: will require behavioural change to select
        # which filesystem to act on when doing rank-ish things
        self.fs = Filesystem(self.ctx)

    def _run(self):
        try:
            self.do_thrash()
        except:
            # Log exceptions here so we get the full backtrace (it's lost
            # by the time someone does a .get() on this greenlet)
            self.logger.exception("Exception in do_thrash:")
            raise

    def log(self, x):
        """Write data to logger assigned to this MDThrasher"""
        self.logger.info(x)

    def stop(self):
        self.stopping.set()

    def kill_mds(self, mds):
        if self.config.get('powercycle'):
            (remote, ) = (self.ctx.cluster.only(
                'mds.{m}'.format(m=mds)).remotes.iterkeys())
            self.log('kill_mds on mds.{m} doing powercycle of {s}'.format(
                m=mds, s=remote.name))
            self._assert_ipmi(remote)
            remote.console.power_off()
        else:
            self.ctx.daemons.get_daemon('mds', mds).stop()

    @staticmethod
    def _assert_ipmi(remote):
        assert remote.console.has_ipmi_credentials, (
            "powercycling requested but RemoteConsole is not "
            "initialized.  Check ipmi config.")

    def kill_mds_by_rank(self, rank):
        """
        kill_mds wrapper to kill based on rank passed.
        """
        status = self.mds_cluster.get_mds_info_by_rank(rank)
        self.kill_mds(status['name'])

    def revive_mds(self, mds, standby_for_rank=None):
        """
        Revive mds -- do an ipmpi powercycle (if indicated by the config)
        and then restart (using --hot-standby if specified.
        """
        if self.config.get('powercycle'):
            (remote, ) = (self.ctx.cluster.only(
                'mds.{m}'.format(m=mds)).remotes.iterkeys())
            self.log('revive_mds on mds.{m} doing powercycle of {s}'.format(
                m=mds, s=remote.name))
            self._assert_ipmi(remote)
            remote.console.power_on()
            self.manager.make_admin_daemon_dir(self.ctx, remote)
        args = []
        if standby_for_rank:
            args.extend(['--hot-standby', standby_for_rank])
        self.ctx.daemons.get_daemon('mds', mds).restart(*args)

    def revive_mds_by_rank(self, rank, standby_for_rank=None):
        """
        revive_mds wrapper to revive based on rank passed.
        """
        status = self.mds_cluster.get_mds_info_by_rank(rank)
        self.revive_mds(status['name'], standby_for_rank)

    def get_mds_status_all(self):
        return self.fs.get_mds_map()

    def do_thrash(self):
        """
        Perform the random thrashing action
        """

        self.log('starting mds_do_thrash for failure group: ' + ', '.join(
            ['mds.{_id}'.format(_id=_f) for _f in self.failure_group]))
        while not self.stopping.is_set():
            delay = self.max_thrash_delay
            if self.randomize:
                delay = random.randrange(0.0, self.max_thrash_delay)

            if delay > 0.0:
                self.log('waiting for {delay} secs before thrashing'.format(
                    delay=delay))
                self.stopping.wait(delay)
                if self.stopping.is_set():
                    continue

            skip = random.randrange(0.0, 1.0)
            if self.weight < 1.0 and skip > self.weight:
                self.log(
                    'skipping thrash iteration with skip ({skip}) > weight ({weight})'
                    .format(skip=skip, weight=self.weight))
                continue

            # find the active mds in the failure group
            statuses = [
                self.mds_cluster.get_mds_info(m) for m in self.failure_group
            ]
            actives = filter(lambda s: s and s['state'] == 'up:active',
                             statuses)
            assert len(
                actives) == 1, 'Can only have one active in a failure group'

            active_mds = actives[0]['name']
            active_rank = actives[0]['rank']

            self.log('kill mds.{id} (rank={r})'.format(id=active_mds,
                                                       r=active_rank))
            self.kill_mds_by_rank(active_rank)

            # wait for mon to report killed mds as crashed
            last_laggy_since = None
            itercount = 0
            while True:
                failed = self.fs.get_mds_map()['failed']
                status = self.mds_cluster.get_mds_info(active_mds)
                if not status:
                    break
                if 'laggy_since' in status:
                    last_laggy_since = status['laggy_since']
                    break
                if any([(f == active_mds) for f in failed]):
                    break
                self.log(
                    'waiting till mds map indicates mds.{_id} is laggy/crashed, in failed state, or mds.{_id} is removed from mdsmap'
                    .format(_id=active_mds))
                itercount = itercount + 1
                if itercount > 10:
                    self.log('mds map: {status}'.format(
                        status=self.mds_cluster.get_fs_map()))
                time.sleep(2)
            if last_laggy_since:
                self.log(
                    'mds.{_id} reported laggy/crashed since: {since}'.format(
                        _id=active_mds, since=last_laggy_since))
            else:
                self.log('mds.{_id} down, removed from mdsmap'.format(
                    _id=active_mds, since=last_laggy_since))

            # wait for a standby mds to takeover and become active
            takeover_mds = None
            takeover_rank = None
            itercount = 0
            while True:
                statuses = [
                    self.mds_cluster.get_mds_info(m)
                    for m in self.failure_group
                ]
                actives = filter(lambda s: s and s['state'] == 'up:active',
                                 statuses)
                if len(actives) > 0:
                    assert len(
                        actives
                    ) == 1, 'Can only have one active in failure group'
                    takeover_mds = actives[0]['name']
                    takeover_rank = actives[0]['rank']
                    break
                itercount = itercount + 1
                if itercount > 10:
                    self.log('mds map: {status}'.format(
                        status=self.mds_cluster.get_fs_map()))

            self.log('New active mds is mds.{_id}'.format(_id=takeover_mds))

            # wait for a while before restarting old active to become new
            # standby
            delay = self.max_revive_delay
            if self.randomize:
                delay = random.randrange(0.0, self.max_revive_delay)

            self.log(
                'waiting for {delay} secs before reviving mds.{id}'.format(
                    delay=delay, id=active_mds))
            time.sleep(delay)

            self.log('reviving mds.{id}'.format(id=active_mds))
            self.revive_mds(active_mds, standby_for_rank=takeover_rank)

            status = {}
            while True:
                status = self.mds_cluster.get_mds_info(active_mds)
                if status and (status['state'] == 'up:standby'
                               or status['state'] == 'up:standby-replay'):
                    break
                self.log(
                    'waiting till mds map indicates mds.{_id} is in standby or standby-replay'
                    .format(_id=active_mds))
                time.sleep(2)
            self.log('mds.{_id} reported in {state} state'.format(
                _id=active_mds, state=status['state']))

            # don't do replay thrashing right now
            continue
            # this might race with replay -> active transition...
            if status['state'] == 'up:replay' and random.randrange(
                    0.0, 1.0) < self.thrash_in_replay:

                delay = self.max_replay_thrash_delay
                if self.randomize:
                    delay = random.randrange(0.0, self.max_replay_thrash_delay)
                time.sleep(delay)
                self.log('kill replaying mds.{id}'.format(id=self.to_kill))
                self.kill_mds(self.to_kill)

                delay = self.max_revive_delay
                if self.randomize:
                    delay = random.randrange(0.0, self.max_revive_delay)

                self.log(
                    'waiting for {delay} secs before reviving mds.{id}'.format(
                        delay=delay, id=self.to_kill))
                time.sleep(delay)

                self.log('revive mds.{id}'.format(id=self.to_kill))
                self.revive_mds(self.to_kill)
Ejemplo n.º 57
0
class BaseServer(object):
    """
    An abstract base class that implements some common functionality for the servers in gevent.

    :param listener: Either be an address that the server should bind
        on or a :class:`gevent.socket.socket` instance that is already
        bound (and put into listening mode in case of TCP socket).

    :keyword handle: If given, the request handler. The request
        handler can be defined in a few ways. Most commonly,
        subclasses will implement a ``handle`` method as an
        instance method. Alternatively, a function can be passed
        as the ``handle`` argument to the constructor. In either
        case, the handler can later be changed by calling
        :meth:`set_handle`.

        When the request handler returns, the socket used for the
        request will be closed.

    :keyword spawn: If provided, is called to create a new
        greenlet to run the handler. By default,
        :func:`gevent.spawn` is used (meaning there is no
        artificial limit on the number of concurrent requests). Possible values for *spawn*:

        - a :class:`gevent.pool.Pool` instance -- ``handle`` will be executed
          using :meth:`gevent.pool.Pool.spawn` only if the pool is not full.
          While it is full, no new connections are accepted;
        - :func:`gevent.spawn_raw` -- ``handle`` will be executed in a raw
          greenlet which has a little less overhead then :class:`gevent.Greenlet` instances spawned by default;
        - ``None`` -- ``handle`` will be executed right away, in the :class:`Hub` greenlet.
          ``handle`` cannot use any blocking functions as it would mean switching to the :class:`Hub`.
        - an integer -- a shortcut for ``gevent.pool.Pool(integer)``

    .. versionchanged:: 1.1a1
       When the *handle* function returns from processing a connection,
       the client socket will be closed. This resolves the non-deterministic
       closing of the socket, fixing ResourceWarnings under Python 3 and PyPy.

    """
    #: the number of seconds to sleep in case there was an error in accept() call
    #: for consecutive errors the delay will double until it reaches max_delay
    #: when accept() finally succeeds the delay will be reset to min_delay again
    min_delay = 0.01
    max_delay = 1

    #: Sets the maximum number of consecutive accepts that a process may perform on
    #: a single wake up. High values give higher priority to high connection rates,
    #: while lower values give higher priority to already established connections.
    #: Default is 100. Note, that in case of multiple working processes on the same
    #: listening value, it should be set to a lower value. (pywsgi.WSGIServer sets it
    #: to 1 when environ["wsgi.multiprocess"] is true)
    max_accept = 100

    _spawn = Greenlet.spawn

    #: the default timeout that we wait for the client connections to close in stop()
    stop_timeout = 1

    fatal_errors = (errno.EBADF, errno.EINVAL, errno.ENOTSOCK)

    def __init__(self, listener, handle=None, spawn='default'):
        self._stop_event = Event()
        self._stop_event.set()
        self._watcher = None
        self._timer = None
        self.pool = None
        try:
            self.set_listener(listener)
            self.set_spawn(spawn)
            self.set_handle(handle)
            self.delay = self.min_delay
            self.loop = get_hub().loop
            if self.max_accept < 1:
                raise ValueError('max_accept must be positive int: %r' %
                                 (self.max_accept, ))
        except:
            self.close()
            raise

    def set_listener(self, listener):
        if hasattr(listener, 'accept'):
            if hasattr(listener, 'do_handshake'):
                raise TypeError(
                    'Expected a regular socket, not SSLSocket: %r' %
                    (listener, ))
            self.family = listener.family
            self.address = listener.getsockname()
            self.socket = listener
        else:
            self.family, self.address = parse_address(listener)

    def set_spawn(self, spawn):
        if spawn == 'default':
            self.pool = None
            self._spawn = self._spawn
        elif hasattr(spawn, 'spawn'):
            self.pool = spawn
            self._spawn = spawn.spawn
        elif isinstance(spawn, integer_types):
            from gevent.pool import Pool
            self.pool = Pool(spawn)
            self._spawn = self.pool.spawn
        else:
            self.pool = None
            self._spawn = spawn
        if hasattr(self.pool, 'full'):
            self.full = self.pool.full
        if self.pool is not None:
            self.pool._semaphore.rawlink(self._start_accepting_if_started)

    def set_handle(self, handle):
        if handle is not None:
            self.handle = handle
        if hasattr(self, 'handle'):
            self._handle = self.handle
        else:
            raise TypeError("'handle' must be provided")

    def _start_accepting_if_started(self, _event=None):
        if self.started:
            self.start_accepting()

    def start_accepting(self):
        if self._watcher is None:
            # just stop watcher without creating a new one?
            self._watcher = self.loop.io(self.socket.fileno(), 1)
            self._watcher.start(self._do_read)

    def stop_accepting(self):
        if self._watcher is not None:
            self._watcher.stop()
            self._watcher = None
        if self._timer is not None:
            self._timer.stop()
            self._timer = None

    def do_handle(self, *args):
        spawn = self._spawn
        handle = self._handle
        close = self.do_close

        try:
            if spawn is None:
                _handle_and_close_when_done(handle, close, args)
            else:
                spawn(_handle_and_close_when_done, handle, close, args)
        except:
            close(*args)
            raise

    def do_close(self, *args):
        pass

    def _do_read(self):
        for _ in xrange(self.max_accept):
            if self.full():
                self.stop_accepting()
                return
            try:
                args = self.do_read()
                self.delay = self.min_delay
                if not args:
                    return
            except:
                self.loop.handle_error(self, *sys.exc_info())
                ex = sys.exc_info()[1]
                if self.is_fatal_error(ex):
                    self.close()
                    sys.stderr.write('ERROR: %s failed with %s\n' %
                                     (self, str(ex) or repr(ex)))
                    return
                if self.delay >= 0:
                    self.stop_accepting()
                    self._timer = self.loop.timer(self.delay)
                    self._timer.start(self._start_accepting_if_started)
                    self.delay = min(self.max_delay, self.delay * 2)
                break
            else:
                try:
                    self.do_handle(*args)
                except:
                    self.loop.handle_error((args[1:], self), *sys.exc_info())
                    if self.delay >= 0:
                        self.stop_accepting()
                        self._timer = self.loop.timer(self.delay)
                        self._timer.start(self._start_accepting_if_started)
                        self.delay = min(self.max_delay, self.delay * 2)
                    break

    def full(self):
        return False

    def __repr__(self):
        return '<%s at %s %s>' % (type(self).__name__, hex(
            id(self)), self._formatinfo())

    def __str__(self):
        return '<%s %s>' % (type(self).__name__, self._formatinfo())

    def _formatinfo(self):
        if hasattr(self, 'socket'):
            try:
                fileno = self.socket.fileno()
            except Exception as ex:
                fileno = str(ex)
            result = 'fileno=%s ' % fileno
        else:
            result = ''
        try:
            if isinstance(self.address, tuple) and len(self.address) == 2:
                result += 'address=%s:%s' % self.address
            else:
                result += 'address=%s' % (self.address, )
        except Exception as ex:
            result += str(ex) or '<error>'

        handle = self.__dict__.get('handle')
        if handle is not None:
            fself = getattr(handle, '__self__', None)
            try:
                if fself is self:
                    # Checks the __self__ of the handle in case it is a bound
                    # method of self to prevent recursivly defined reprs.
                    handle_repr = '<bound method %s.%s of self>' % (
                        self.__class__.__name__,
                        handle.__name__,
                    )
                else:
                    handle_repr = repr(handle)

                result += ' handle=' + handle_repr
            except Exception as ex:
                result += str(ex) or '<error>'

        return result

    @property
    def server_host(self):
        """IP address that the server is bound to (string)."""
        if isinstance(self.address, tuple):
            return self.address[0]

    @property
    def server_port(self):
        """Port that the server is bound to (an integer)."""
        if isinstance(self.address, tuple):
            return self.address[1]

    def init_socket(self):
        """If the user initialized the server with an address rather than socket,
        then this function will create a socket, bind it and put it into listening mode.

        It is not supposed to be called by the user, it is called by :meth:`start` before starting
        the accept loop."""
        pass

    @property
    def started(self):
        return not self._stop_event.is_set()

    def start(self):
        """Start accepting the connections.

        If an address was provided in the constructor, then also create a socket,
        bind it and put it into the listening mode.
        """
        self.init_socket()
        self._stop_event.clear()
        try:
            self.start_accepting()
        except:
            self.close()
            raise

    def close(self):
        """Close the listener socket and stop accepting."""
        self._stop_event.set()
        try:
            self.stop_accepting()
        finally:
            try:
                self.socket.close()
            except Exception:
                pass
            finally:
                self.__dict__.pop('socket', None)
                self.__dict__.pop('handle', None)
                self.__dict__.pop('_handle', None)
                self.__dict__.pop('_spawn', None)
                self.__dict__.pop('full', None)
                if self.pool is not None:
                    self.pool._semaphore.unlink(
                        self._start_accepting_if_started)

    @property
    def closed(self):
        return not hasattr(self, 'socket')

    def stop(self, timeout=None):
        """
        Stop accepting the connections and close the listening socket.

        If the server uses a pool to spawn the requests, then
        :meth:`stop` also waits for all the handlers to exit. If there
        are still handlers executing after *timeout* has expired
        (default 1 second, :attr:`stop_timeout`), then the currently
        running handlers in the pool are killed.

        If the server does not use a pool, then this merely stops accepting connections;
        any spawned greenlets that are handling requests continue running until
        they naturally complete.
        """
        self.close()
        if timeout is None:
            timeout = self.stop_timeout
        if self.pool:
            self.pool.join(timeout=timeout)
            self.pool.kill(block=True, timeout=1)

    def serve_forever(self, stop_timeout=None):
        """Start the server if it hasn't been already started and wait until it's stopped."""
        # add test that serve_forever exists on stop()
        if not self.started:
            self.start()
        try:
            self._stop_event.wait()
        finally:
            Greenlet.spawn(self.stop, timeout=stop_timeout).join()

    def is_fatal_error(self, ex):
        return isinstance(ex,
                          _socket.error) and ex.args[0] in self.fatal_errors
Ejemplo n.º 58
0
class Thread(object):
    """ An enhanced replacement for the Python 
    :class:`threading.Thread` class.

    This isn't actually a true thread, instead it uses Gevent to
    implement co-routines. Using :func:`gevent.monkey.patch_all`, all
    Python blocking functions are replaced with non-blocking Gevent
    alternatives which allow 
    """

    __initialized = False

    def __init__(self, group=None, name=None):
        """ Thread constructor

        :param group: should be ``None``; reserved for future 
        extension when a :class:`ThreadGroup` class is implemented.
        :param name: the thread name.  By default, a unique name
        is constructed of the form "Thread-*N*" where *N* is a small 
        decimal number.

        If the subclass overrides the constructor, it must make sure 
        to invoke the base class constructor (``Thread.__init__()``) 
        before doing anything else to the thread.
        """

        # WARNING: Not sure about the side-effects of this...
        # Monkeypatch a bunch of blocking and thread-related
        # constructs to use gevent alternatives. Threads are now
        # co-routines which yield to each other when a Gevent
        # blocking operation is called.
        from gevent import monkey
        monkey.patch_all()

        self.__name = str(name or _newname())
        self.__ident = None
        self.__started = Event()
        self.__stopped = False
        self.__initialized = True

    def start(self):
        """ Start the thread's activity.

        It must be called at most once per thread object.  It
        arranges for the object's :meth:`run` method to be invoked in
        a separate thread of control.

        This method will raise a :exc:`RuntimeError` if called more 
        than once on the same thread object.
        """
        if not self.__initialized:
            raise RuntimeError("thread.__init__() not called")
        if self.__started.is_set():
            raise RuntimeError("thread already started")

        self._bootstrap()

    def _bootstrap(self):
        self.__ident = uuid.uuid4()
        self.__started.set()
        self._g_main = gevent.spawn(self.run)

    def stop(self, blocking=False):
        """ Stop the thread's activity.

        :param blocking: block until thread has stopped completely.
        """
        if self.__stopped:
            raise RuntimeError("threads can only be stopped once")

        self.__stopped = True
        self._g_main.kill()
        self.shutdown()
        if blocking:
            self._g_main.join()

    def run(self):
        """ Method representing the thread's activity.

        You may override this method in a subclass.
        """
        pass

    def join(self, timeout=None):
        """ Wait until the thread terminates.

        This blocks the calling thread until the
        thread whose :meth:`join` method is called terminates -- 
        either normally or through an unhandled exception -- or until
        the optional timeout occurs.

        When the *timeout* argument is present and not ``None``, it 
        should be a floating point number specifying a timeout for 
        the operation in seconds (or fractions thereof). As 
        :meth:`join` always returns ``None``, you must call 
        :meth:`isAlive` after :meth:`join` to decide whether a 
        timeout happened -- if the thread is still alive, the 
        :meth:`join` call timed out.

        When the *timeout* argument is not present or ``None``, the 
        operation will block until the thread terminates.

        A thread can be :meth:`join`\ ed many times.

        :meth:`join` raises a :exc:`RuntimeError` if an attempt is 
        made to join the current thread as that would cause a 
        deadlock. It is also an error to :meth:`join` a thread before
        it has been started and attempts to do so raises the same exception.
        """
        if not self.__initialized:
            raise RuntimeError("Thread.__init__() not called")
        if not self.__started.is_set():
            raise RuntimeError("cannot join thread before it is started")

        self._g_main.join(timeout)

    def shutdown(self):
        """ Cleanup method called when thread is stopping.

        This method is run when the thread is stopped. Any resources
        used by the thread (sockets and such) should be safely closed
        here.

        You may override this method in a subclass.
        """
        pass

    def __repr__(self):
        assert self.__initialized, "Thread.__init__() was not called"
        status = "initial"
        if self.__started.is_set():
            status = "started"
        if self.__stopped:
            status = "stopped"
        if self.__ident is not None:
            status += " %s" % self.__ident
        return "<%s(%s, %s)>" % (self.__class__.__name__, self.__name, status)

    def __enter__(self):
        return self

    def __exit__(self):
        self.stop()

    @property
    def name(self):
        assert self.__initialized, "Thread.__init__() not called"
        return self.__name

    @name.setter
    def name(self, name):
        assert self.__initialized, "Thread.__init__() not called"
        self.__name = str(name)

    @property
    def ident(self):
        assert self.__initialized, "Thread.__init__() not called"
        return self.__ident

    def isAlive(self):
        assert self.__initialized, "Thread.__init__() not called"
        return self.__started.is_set() and not self.__stopped

    is_alive = isAlive

    def getName(self):
        return self.name

    def setName(self, name):
        self.name = name
Ejemplo n.º 59
0
class CustomDaemon(Daemon):
    """
    Custom daemon for test
    """

    DAEMON_LAST_ACTION_FILE = "/tmp/daemon_last_action.txt"

    def _internal_init(self,
                       pidfile,
                       stdin,
                       stdout,
                       stderr,
                       logfile,
                       loglevel,
                       on_start_exit_zero,
                       max_open_files,
                       change_dir,
                       timeout_ms,
                       logtosyslog=True,
                       logtosyslog_facility=SysLogHandler.LOG_LOCAL0,
                       logtoconsole=True,
                       app_name="Test"):

        # Us
        self.is_running = True
        self.start_count = 0
        self.stop_count = 0
        self.reload_count = 0
        self.status_count = 0
        self.start_loop_exited = Event()
        self.last_action = "noaction"

        # Base
        Daemon._internal_init(self, pidfile, stdin, stdout, stderr, logfile,
                              loglevel, on_start_exit_zero, max_open_files,
                              change_dir, timeout_ms, logtosyslog,
                              logtosyslog_facility, logtoconsole, app_name)

        # Log
        logger.debug("Done, self.class=%s", SolBase.get_classname(self))

    @classmethod
    def get_daemon_instance(cls):
        """
        Get a new Daemon instance
        :return CustomDaemon
        :rtype CustomDaemon
        """
        return CustomDaemon()

    def _write_state(self):
        """
        Write state
        """
        f = open(CustomDaemon.DAEMON_LAST_ACTION_FILE, "w")
        buf = "" \
              "pid={0}\nppid={1}\nis_running={2}\nstart_count={3}\nstop_count={4}\n" \
              "reload_count={5}\nstatus_count={6}\nlast_action={7}\nstart_loop_exited={8}\n" \
            .format(os.getpid(),
                    os.getppid(),
                    self.is_running,
                    self.start_count, self.stop_count, self.reload_count, self.status_count,
                    self.last_action,
                    self.start_loop_exited.is_set(),
                    )
        f.write(buf)
        f.close()

    def _on_stop(self):
        """
        Test
        """
        logger.info("Called")
        self.is_running = False
        self.stop_count += 1
        self.last_action = "stop"
        self._write_state()

        # Signal
        self.is_running = False

        # As described in https://github.com/gevent/gevent/issues/799
        # - signals run into the main thread
        # - we cannot wait or switch here => direct exit
        return

    def _on_reload(self, *args, **kwargs):
        """
        Test
        """
        logger.info("Called")
        self.reload_count += 1
        self.last_action = "reload"
        self._write_state()

    def _on_start(self):
        """
        Test
        """
        logger.info("Called")
        self.start_count += 1
        self.last_action = "start"
        self._write_state()

        logger.info("Engaging running loop")
        while self.is_running:
            SolBase.sleep(10)
        logger.info("Exited running loop")

        self._write_state()
        self.start_loop_exited.set()
        logger.debug("Exited")

    def _on_status(self, *argv, **kwargs):
        """
        Test
        """
        logger.info("Called")
        self.status_count += 1
        self.last_action = "status"
        self._write_state()
Ejemplo n.º 60
0
class Manager(object):
    def __init__(self, name, google_key, locale, units, timezone, time_limit,
                 max_attempts, location, quiet, cache_type, filter_file,
                 geofence_file, alarm_file, debug):
        # Set the name of the Manager
        self.__name = str(name).lower()
        log.info("----------- Manager '{}' ".format(self.__name) +
                 " is being created.")
        self.__debug = debug

        # Get the Google Maps API
        self._google_key = google_key
        self._gmaps_service = GMaps(google_key)
        self._gmaps_reverse_geocode = False
        self._gmaps_distance_matrix = set()

        self._language = locale
        self.__locale = Locale(locale)  # Setup the language-specific stuff
        self.__units = units  # type of unit used for distances
        self.__timezone = timezone  # timezone for time calculations
        self.__time_limit = time_limit  # Minimum time remaining

        # Location should be [lat, lng] (or None for no location)
        self.__location = None
        if str(location).lower() != 'none':
            self.set_location(location)
        else:
            log.warning("NO LOCATION SET - " +
                        " this may cause issues with distance related DTS.")

        # Quiet mode
        self.__quiet = quiet

        # Create cache
        self.__cache = cache_factory(cache_type, self.__name)

        # Load and Setup the Pokemon Filters
        self.__mons_enabled, self.__mon_filters = False, OrderedDict()
        self.__stops_enabled, self.__stop_filters = False, OrderedDict()
        self.__gyms_enabled, self.__gym_filters = False, OrderedDict()
        self.__ignore_neutral = False
        self.__eggs_enabled, self.__egg_filters = False, OrderedDict()
        self.__raids_enabled, self.__raid_filters = False, OrderedDict()
        self.__weather_enabled, self.__weather_filters = False, OrderedDict()
        self.__quest_enabled, self.__quest_filters = False, OrderedDict()
        self.load_filter_file(get_path(filter_file))

        # Create the Geofences to filter with from given file
        self.geofences = None
        if str(geofence_file).lower() != 'none':
            self.geofences = load_geofence_file(get_path(geofence_file))
        # Create the alarms to send notifications out with
        self.__alarms = {}
        self.load_alarms_file(get_path(alarm_file), int(max_attempts))

        # Initialize Rules
        self.__mon_rules = {}
        self.__stop_rules = {}
        self.__gym_rules = {}
        self.__egg_rules = {}
        self.__raid_rules = {}
        self.__weather_rules = {}
        self.__quest_rules = {}

        # Initialize the queue and start the process
        self.__queue = Queue()
        self.__event = Event()
        self.__process = None

        log.info("----------- Manager '{}' ".format(self.__name) +
                 " successfully created.")

    # ~~~~~~~~~~~~~~~~~~~~~~~ MAIN PROCESS CONTROL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Update the object into the queue
    def update(self, obj):
        self.__queue.put(obj)

    # Get the name of this Manager
    def get_name(self):
        return self.__name

    # Tell the process to finish up and go home
    def stop(self):
        log.info("Manager {} shutting down... ".format(self.__name) +
                 "{} items in queue.".format(self.__queue.qsize()))
        self.__event.set()

    def join(self):
        self.__process.join(timeout=20)
        if not self.__process.ready():
            log.warning("Manager {} could not be stopped in time!"
                        " Forcing process to stop.".format(self.__name))
            self.__process.kill(timeout=2, block=True)  # Force stop
        else:
            log.info("Manager {} successfully stopped!".format(self.__name))

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GMAPS API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    def enable_gmaps_reverse_geocoding(self):
        """Enable GMaps Reverse Geocoding DTS for triggered Events. """
        if not self._gmaps_service:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No GMaps API key has been set.")
        self._gmaps_reverse_geocode = True

    def disable_gmaps_reverse_geocoding(self):
        """Disable GMaps Reverse Geocoding DTS for triggered Events. """
        self._gmaps_reverse_geocode = False

    def enable_gmaps_distance_matrix(self, mode):
        """Enable 'mode' Distance Matrix DTS for triggered Events. """
        if not self.__location:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No Manager location has been set.")
        elif not self._gmaps_service:
            raise ValueError("Unable to enable Google Maps Reverse Geocoding."
                             "No GMaps API key has been provided.")
        elif mode not in GMaps.TRAVEL_MODES:
            raise ValueError("Unable to enable distance matrix mode: "
                             "{} is not a valid mode.".format(mode))
        self._gmaps_distance_matrix.add(mode)

    def disable_gmaps_dm_walking(self, mode):
        """Disable 'mode' Distance Matrix DTS for triggered Events. """
        if mode not in GMaps.TRAVEL_MODES:
            raise ValueError("Unable to disable distance matrix mode: "
                             "Invalid mode specified.")
        self._gmaps_distance_matrix.discard(mode)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RULES API ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Add new Monster Rule
    def add_monster_rule(self, name, filters, alarms):
        if name in self.__mon_rules:
            raise ValueError("Unable to add Rule: Monster Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__mon_filters:
                raise ValueError("Unable to create Rule: No Monster Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__mon_rules[name] = Rule(filters, alarms)

    # Add new Stop Rule
    def add_stop_rule(self, name, filters, alarms):
        if name in self.__stop_rules:
            raise ValueError("Unable to add Rule: Stop Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__stop_filters:
                raise ValueError("Unable to create Rule: No Stop Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__stop_rules[name] = Rule(filters, alarms)

    # Add new Gym Rule
    def add_gym_rule(self, name, filters, alarms):
        if name in self.__gym_rules:
            raise ValueError("Unable to add Rule: Gym Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__gym_filters:
                raise ValueError("Unable to create Rule: No Gym Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__gym_rules[name] = Rule(filters, alarms)

    # Add new Egg Rule
    def add_egg_rule(self, name, filters, alarms):
        if name in self.__egg_rules:
            raise ValueError("Unable to add Rule: Egg Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__egg_filters:
                raise ValueError("Unable to create Rule: No Egg Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__egg_rules[name] = Rule(filters, alarms)

    # Add new Raid Rule
    def add_raid_rule(self, name, filters, alarms):
        if name in self.__raid_rules:
            raise ValueError("Unable to add Rule: Raid Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__raid_filters:
                raise ValueError("Unable to create Rule: No Raid Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__raid_rules[name] = Rule(filters, alarms)

    # Add new Weather Rule
    def add_weather_rule(self, name, filters, alarms):
        if name in self.__weather_rules:
            raise ValueError("Unable to add Rule: Weather Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__weather_filters:
                raise ValueError("Unable to create Rule: No weather Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__weather_rules[name] = Rule(filters, alarms)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Add new Quest Rule
    def add_quest_rule(self, name, filters, alarms):
        if name in self.__quest_rules:
            raise ValueError("Unable to add Rule: Quest Rule with the name "
                             "{} already exists!".format(name))

        for filt in filters:
            if filt not in self.__quest_filters:
                raise ValueError("Unable to create Rule: No quest Filter "
                                 "named {}!".format(filt))

        for alarm in alarms:
            if alarm not in self.__alarms:
                raise ValueError("Unable to create Rule: No Alarm "
                                 "named {}!".format(alarm))

        self.__quest_rules[name] = Rule(filters, alarms)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MANAGER LOADING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    @staticmethod
    def load_filter_section(section, sect_name, filter_type):
        defaults = section.pop('defaults', {})
        default_dts = defaults.pop('custom_dts', {})
        filter_set = OrderedDict()
        for name, settings in section.pop('filters', {}).iteritems():
            settings = dict(defaults.items() + settings.items())
            try:
                local_dts = dict(default_dts.items() +
                                 settings.pop('custom_dts', {}).items())
                if len(local_dts) > 0:
                    settings['custom_dts'] = local_dts
                filter_set[name] = filter_type(name, settings)
                log.debug("Filter '%s' set as the following: %s", name,
                          filter_set[name].to_dict())
            except Exception as e:
                log.error("Encountered error inside filter named '%s'.", name)
                raise e  # Pass the error up
        for key in section:  # Reject leftover parameters
            raise ValueError("'{}' is not a recognized parameter for the "
                             "'{}' section.".format(key, sect_name))
        return filter_set

    # Load in a new filters file
    def load_filter_file(self, file_path):
        try:
            log.info("Loading Filters from file at {}".format(file_path))
            with open(file_path, 'r') as f:
                filters = json.load(f, object_pairs_hook=OrderedDict)
            if type(filters) is not OrderedDict:
                log.critical("Filters files must be a JSON object:"
                             " { \"monsters\":{...},... }")
                raise ValueError("Filter file did not contain a dict.")
        except ValueError as e:
            log.error("Encountered error while loading Filters:"
                      " {}: {}".format(type(e).__name__, e))
            log.error(
                "PokeAlarm has encountered a 'ValueError' while loading the "
                "Filters file. This typically means the file isn't in the "
                "correct json format. Try loading the file contents into a "
                "json validator.")
            log.debug("Stack trace: \n {}".format(traceback.format_exc()))
            sys.exit(1)
        except IOError as e:
            log.error("Encountered error while loading Filters: "
                      "{}: {}".format(type(e).__name__, e))
            log.error("PokeAlarm was unable to find a filters file "
                      "at {}. Please check that this file exists "
                      "and that PA has read permissions.".format(file_path))
            log.debug("Stack trace: \n {}".format(traceback.format_exc()))
            sys.exit(1)

        try:
            # Load Monsters Section
            log.info("Parsing 'monsters' section.")
            section = filters.pop('monsters', {})
            self.__mons_enabled = bool(section.pop('enabled', False))
            self.__mon_filters = self.load_filter_section(
                section, 'monsters', Filters.MonFilter)

            # Load Stops Section
            log.info("Parsing 'stops' section.")
            section = filters.pop('stops', {})
            self.__stops_enabled = bool(section.pop('enabled', False))
            self.__stop_filters = self.load_filter_section(
                section, 'stops', Filters.StopFilter)

            # Load Gyms Section
            log.info("Parsing 'gyms' section.")
            section = filters.pop('gyms', {})
            self.__gyms_enabled = bool(section.pop('enabled', False))
            self.__ignore_neutral = bool(section.pop('ignore_neutral', False))
            self.__gym_filters = self.load_filter_section(
                section, 'gyms', Filters.GymFilter)

            # Load Eggs Section
            log.info("Parsing 'eggs' section.")
            section = filters.pop('eggs', {})
            self.__eggs_enabled = bool(section.pop('enabled', False))
            self.__egg_filters = self.load_filter_section(
                section, 'eggs', Filters.EggFilter)

            # Load Raids Section
            log.info("Parsing 'raids' section.")
            section = filters.pop('raids', {})
            self.__raids_enabled = bool(section.pop('enabled', False))
            self.__raid_filters = self.load_filter_section(
                section, 'raids', Filters.RaidFilter)

            # Load Weather Section
            log.info("Parsing 'weather' section.")
            section = filters.pop('weather', {})
            self.__weather_enabled = bool(section.pop('enabled', True))
            self.__weather_filters = self.load_filter_section(
                section, 'weather', Filters.WeatherFilter)

            # Load Quest Section
            log.info("Parsing 'quest' section.")
            section = filters.pop('quest', {})
            self.__quest_enabled = bool(section.pop('enabled', True))
            self.__quest_filters = self.load_filter_section(
                section, 'quest', Filters.QuestFilter)

            return  # exit function

        except Exception as e:
            log.error("Encountered error while parsing Filters. "
                      "This is because of a mistake in your Filters file.")
            log.error("{}: {}".format(type(e).__name__, e))
            log.debug("Stack trace: \n {}".format(traceback.format_exc()))
            sys.exit(1)

    def load_alarms_file(self, file_path, max_attempts):
        log.info("Loading Alarms from the file at {}".format(file_path))
        try:
            with open(file_path, 'r') as f:
                alarm_settings = json.load(f)
            if type(alarm_settings) is not dict:
                log.critical(
                    "Alarms file must be an object of Alarms objects " +
                    "- { 'alarm1': {...}, ... 'alarm5': {...} }")
                sys.exit(1)
            self.__alarms = {}
            for name, alarm in alarm_settings.iteritems():
                if parse_boolean(
                        require_and_remove_key(
                            'active', alarm,
                            "Alarm objects in file.")) is True:
                    self.__alarms[name] = Alarms.alarm_factory(
                        alarm, max_attempts, self._google_key)
                else:
                    log.debug("Alarm not activated: {}".format(alarm['type']) +
                              " because value not set to \"True\"")
            log.info("{} active alarms found.".format(len(self.__alarms)))
            return  # all done
        except ValueError as e:
            log.error("Encountered error while loading Alarms file: " +
                      "{}: {}".format(type(e).__name__, e))
            log.error(
                "PokeAlarm has encountered a 'ValueError' while loading the " +
                " Alarms file. This typically means your file isn't in the " +
                "correct json format. Try loading your file contents into" +
                " a json validator.")
        except IOError as e:
            log.error("Encountered error while loading Alarms: " +
                      "{}: {}".format(type(e).__name__, e))
            log.error("PokeAlarm was unable to find a filters file " +
                      "at {}. Please check that this file".format(file_path) +
                      " exists and PA has read permissions.")
        except Exception as e:
            log.error("Encountered error while loading Alarms: " +
                      "{}: {}".format(type(e).__name__, e))
        log.debug("Stack trace: \n {}".format(traceback.format_exc()))
        sys.exit(1)

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HANDLE EVENTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    # Start it up
    def start(self):
        self.__process = gevent.spawn(self.run)

    def setup_in_process(self):

        # Update config
        config['DEBUG'] = self.__debug
        config['ROOT_PATH'] = os.path.abspath("{}/..".format(
            os.path.dirname(__file__)))

        # Hush some new loggers
        logging.getLogger('requests').setLevel(logging.WARNING)
        logging.getLogger('urllib3').setLevel(logging.WARNING)

        if config['DEBUG'] is True:
            logging.getLogger().setLevel(logging.DEBUG)

        # Conect the alarms and send the start up message
        for alarm in self.__alarms.values():
            alarm.connect()
            alarm.startup_message()

    # Main event handler loop
    def run(self):
        self.setup_in_process()
        last_clean = datetime.utcnow()
        while True:  # Run forever and ever

            # Clean out visited every 5 minutes
            if datetime.utcnow() - last_clean > timedelta(minutes=5):
                log.debug("Cleaning cache...")
                self.__cache.clean_and_save()
                last_clean = datetime.utcnow()

            try:  # Get next object to process
                event = self.__queue.get(block=True, timeout=5)
            except gevent.queue.Empty:
                # Check if the process should exit process
                if self.__event.is_set():
                    break
                # Explict context yield
                gevent.sleep(0)
                continue

            try:
                kind = type(event)
                log.debug("Processing event: %s", event.id)
                if kind == Events.MonEvent:
                    self.process_monster(event)
                elif kind == Events.StopEvent:
                    self.process_stop(event)
                elif kind == Events.GymEvent:
                    self.process_gym(event)
                elif kind == Events.EggEvent:
                    self.process_egg(event)
                elif kind == Events.RaidEvent:
                    self.process_raid(event)
                elif kind == Events.WeatherEvent:
                    self.process_weather(event)
                elif kind == Events.QuestEvent:
                    self.process_quest(event)
                else:
                    log.error("!!! Manager does not support " +
                              "{} events!".format(kind))
                log.debug("Finished event: %s", event.id)
            except Exception as e:
                log.error("Encountered error during processing: " +
                          "{}: {}".format(type(e).__name__, e))
                log.debug("Stack trace: \n {}".format(traceback.format_exc()))
            # Explict context yield
            gevent.sleep(0)
        # Save cache and exit
        self.__cache.clean_and_save()
        raise gevent.GreenletExit()

    # Set the location of the Manager
    def set_location(self, location):
        # Regex for Lat,Lng coordinate
        prog = re.compile("^(-?\d+\.\d+)[,\s]\s*(-?\d+\.\d+?)$")
        res = prog.match(location)
        if res:  # If location is in a Lat,Lng coordinate
            self.__location = [float(res.group(1)), float(res.group(2))]
        else:
            # Check if key was provided
            if self._gmaps_service is None:
                raise ValueError("Unable to find location coordinates by name"
                                 " - no Google API key was provided.")
            # Attempt to geocode location
            location = self._gmaps_service.geocode(location)
            if location is None:
                raise ValueError("Unable to geocode coordinates from {}. "
                                 "Location will not be set.".format(location))

            self.__location = location
            log.info("Location successfully set to '{},{}'.".format(
                location[0], location[1]))

    # Process new Monster data and decide if a notification needs to be sent
    def process_monster(self, mon):
        # type: (Events.MonEvent) -> None
        """ Process a monster event and notify alarms if it passes. """

        # Make sure that monsters are enabled
        if self.__mons_enabled is False:
            log.debug("Monster ignored: monster notifications are disabled.")
            return

        # Set the name for this event so we can log rejects better
        mon.name = self.__locale.get_pokemon_name(mon.monster_id)

        # Check if previously processed and update expiration
        if self.__cache.monster_expiration(mon.enc_id) is not None:
            log.debug("{} monster was skipped because it was previously "
                      "processed.".format(mon.name))
            return
        self.__cache.monster_expiration(mon.enc_id, mon.disappear_time)

        # Check the time remaining
        seconds_left = (mon.disappear_time - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            log.debug("{} monster was skipped because only {} seconds remained"
                      "".format(mon.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            mon.distance = get_earth_dist([mon.lat, mon.lng], self.__location,
                                          self.__units)
            mon.direction = get_cardinal_dir([mon.lat, mon.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__mon_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__mon_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__mon_filters.get(f_name)
                passed = f.check_event(mon) and self.check_geofences(f, mon)
                if not passed:
                    continue  # go to next filter
                mon.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} monster notification"
                             " has been triggered in rule '{}'!"
                             "".format(mon.name, r_name))
                self._trigger_mon(mon, rule.alarm_names)
                break  # Next rule

    def _trigger_mon(self, mon, alarms):
        # Generate the DTS for the event
        dts = mon.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((mon.lat, mon.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode, (mon.lat, mon.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.pokemon_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    def process_stop(self, stop):
        # type: (Events.StopEvent) -> None
        """ Process a stop event and notify alarms if it passes. """

        # Make sure that stops are enabled
        if self.__stops_enabled is False:
            log.debug("Stop ignored: stop notifications are disabled.")
            return

        # Check for lured
        if stop.expiration is None:
            log.debug("Stop ignored: stop was not lured")
            return

        # Check if previously processed and update expiration
        if self.__cache.stop_expiration(stop.stop_id) is not None:
            log.debug("Stop {} was skipped because it was previously "
                      "processed.".format(stop.name))
            return
        self.__cache.stop_expiration(stop.stop_id, stop.expiration)

        # Check the time remaining
        seconds_left = (stop.expiration - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            log.debug("Stop {} was skipped because only {} seconds remained"
                      "".format(stop.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            stop.distance = get_earth_dist([stop.lat, stop.lng],
                                           self.__location, self.__units)
            stop.direction = get_cardinal_dir([stop.lat, stop.lng],
                                              self.__location)

        # Check for Rules
        rules = self.__stop_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__stop_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__stop_filters.get(f_name)
                passed = f.check_event(stop) and self.check_geofences(f, stop)
                if not passed:
                    continue  # go to next filter
                stop.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} stop notification"
                             " has been triggered in rule '{}'!"
                             "".format(stop.name, r_name))
                self._trigger_stop(stop, rule.alarm_names)
                break  # Next rule

    def _trigger_stop(self, stop, alarms):
        # Generate the DTS for the event
        dts = stop.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((stop.lat, stop.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode, (stop.lat, stop.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.pokestop_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:
            thread.join()

    def process_gym(self, gym):
        # type: (Events.GymEvent) -> None
        """ Process a gym event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        gym.gym_name = self.__cache.gym_name(gym.gym_id, gym.gym_name)
        gym.gym_description = self.__cache.gym_desc(gym.gym_id,
                                                    gym.gym_description)
        gym.gym_image = self.__cache.gym_image(gym.gym_id, gym.gym_image)

        # Ignore changes to neutral
        if self.__ignore_neutral and gym.new_team_id == 0:
            log.debug("%s gym update skipped: new team was neutral")
            return

        # Update Team Information
        gym.old_team_id = self.__cache.gym_team(gym.gym_id)
        self.__cache.gym_team(gym.gym_id, gym.new_team_id)

        # Check if notifications are on
        if self.__gyms_enabled is False:
            log.debug("Gym ignored: gym notifications are disabled.")
            return

        # Doesn't look like anything to me
        if gym.new_team_id == gym.old_team_id:
            log.debug("%s gym update skipped: no change detected", gym.gym_id)
            return

        # Calculate distance and direction
        if self.__location is not None:
            gym.distance = get_earth_dist([gym.lat, gym.lng], self.__location,
                                          self.__units)
            gym.direction = get_cardinal_dir([gym.lat, gym.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__gym_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__gym_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__gym_filters.get(f_name)
                passed = f.check_event(gym) and self.check_geofences(f, gym)
                if not passed:
                    continue  # go to next filter
                gym.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} gym notification"
                             " has been triggered in rule '{}'!"
                             "".format(gym.name, r_name))
                self._trigger_gym(gym, rule.alarm_names)
                break  # Next rule

    def _trigger_gym(self, gym, alarms):
        # Generate the DTS for the event
        dts = gym.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((gym.lat, gym.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode, (gym.lat, gym.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.gym_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    def process_egg(self, egg):
        # type: (Events.EggEvent) -> None
        """ Process a egg event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        egg.gym_name = self.__cache.gym_name(egg.gym_id, egg.gym_name)
        egg.gym_description = self.__cache.gym_desc(egg.gym_id,
                                                    egg.gym_description)
        egg.gym_image = self.__cache.gym_image(egg.gym_id, egg.gym_image)

        # Update Team if Unknown
        if Unknown.is_(egg.current_team_id):
            egg.current_team_id = self.__cache.gym_team(egg.gym_id)

        # Make sure that eggs are enabled
        if self.__eggs_enabled is False:
            log.debug("Egg ignored: egg notifications are disabled.")
            return

        # Skip if previously processed
        if self.__cache.egg_expiration(egg.gym_id) is not None:
            log.debug("Egg {} was skipped because it was previously "
                      "processed.".format(egg.name))
            return
        self.__cache.egg_expiration(egg.gym_id, egg.hatch_time)

        # Check the time remaining
        seconds_left = (egg.hatch_time - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            log.debug("Egg {} was skipped because only {} seconds remained"
                      "".format(egg.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            egg.distance = get_earth_dist([egg.lat, egg.lng], self.__location,
                                          self.__units)
            egg.direction = get_cardinal_dir([egg.lat, egg.lng],
                                             self.__location)

        # Check for Rules
        rules = self.__egg_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__egg_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__egg_filters.get(f_name)
                passed = f.check_event(egg) and self.check_geofences(f, egg)
                if not passed:
                    continue  # go to next filter
                egg.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} egg notification"
                             " has been triggered in rule '{}'!"
                             "".format(egg.name, r_name))
                self._trigger_egg(egg, rule.alarm_names)
                break  # Next rule

    def _trigger_egg(self, egg, alarms):
        # Generate the DTS for the event
        dts = egg.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((egg.lat, egg.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode, (egg.lat, egg.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.raid_egg_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    def process_raid(self, raid):
        # type: (Events.RaidEvent) -> None
        """ Process a raid event and notify alarms if it passes. """

        # Update Gym details (if they exist)
        raid.gym_name = self.__cache.gym_name(raid.gym_id, raid.gym_name)
        raid.gym_description = self.__cache.gym_desc(raid.gym_id,
                                                     raid.gym_description)
        raid.gym_image = self.__cache.gym_image(raid.gym_id, raid.gym_image)

        # Update Team if Unknown
        if Unknown.is_(raid.current_team_id):
            raid.current_team_id = self.__cache.gym_team(raid.gym_id)

        # Make sure that raids are enabled
        if self.__raids_enabled is False:
            log.debug("Raid ignored: raid notifications are disabled.")
            return

        # Skip if previously processed
        if self.__cache.raid_expiration(raid.gym_id) is not None:
            log.debug("Raid {} was skipped because it was previously "
                      "processed.".format(raid.name))
            return
        self.__cache.raid_expiration(raid.gym_id, raid.raid_end)

        # Check the time remaining
        seconds_left = (raid.raid_end - datetime.utcnow()).total_seconds()
        if seconds_left < self.__time_limit:
            log.debug("Raid {} was skipped because only {} seconds remained"
                      "".format(raid.name, seconds_left))
            return

        # Calculate distance and direction
        if self.__location is not None:
            raid.distance = get_earth_dist([raid.lat, raid.lng],
                                           self.__location, self.__units)
            raid.direction = get_cardinal_dir([raid.lat, raid.lng],
                                              self.__location)

        # Check for Rules
        rules = self.__raid_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__raid_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__raid_filters.get(f_name)
                passed = f.check_event(raid) and self.check_geofences(f, raid)
                if not passed:
                    continue  # go to next filter
                raid.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} raid notification"
                             " has been triggered in rule '{}'!"
                             "".format(raid.name, r_name))
                self._trigger_raid(raid, rule.alarm_names)
                break  # Next rule

    def _trigger_raid(self, raid, alarms):
        # Generate the DTS for the event
        dts = raid.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((raid.lat, raid.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode, (raid.lat, raid.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.raid_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    def process_weather(self, weather):
        # type: (Events.WeatherEvent) -> None
        """ Process a weather event and notify alarms if it passes. """

        # Make sure that weather is enabled
        if self.__weather_enabled is False:
            log.debug("Weather ignored: weather notifications are disabled.")
            return

        # Skip if previously processed
        if self.__cache.get_cell_weather(
                weather.weather_cell_id) == weather.condition:
            log.debug("Weather alert for cell {} was skipped "
                      "because it was already {} weather.".format(
                          weather.weather_cell_id, weather.condition))
            return
        self.__cache.update_cell_weather(weather.weather_cell_id,
                                         weather.condition)

        # Check for Rules
        rules = self.__weather_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default":
                Rule(self.__weather_filters.keys(), self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__weather_filters.get(f_name)
                passed = f.check_event(weather) and \
                    self.check_weather_geofences(f, weather)
                if not passed:
                    continue  # go to next filter
                weather.custom_dts = f.custom_dts

                if self.__quiet is False:
                    log.info("{} weather notification"
                             " has been triggered in rule '{}'!"
                             "".format(weather.weather_cell_id, r_name))
                self._trigger_weather(weather, rule.alarm_names)
                break  # Next rule

    def _trigger_weather(self, weather, alarms):

        dts = weather.generate_dts(self.__locale, self.__timezone,
                                   self.__units)

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.weather_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:  # Wait for all alarms to finish
            thread.join()

    def process_quest(self, quest):
        # type: (Events.QuestEvent) -> None
        """ Process a quest event and notify alarms if it passes. """

        # Make sure that stops are enabled
        if self.__quest_enabled is False:
            log.debug("Quest ignored: quest notifications are disabled.")
            return

        # Check if previously processed and update expiration
        if self.__cache.quest_reward(quest.stop_id) is not None:
            log.debug("Quest {} was skipped because it was previously "
                      "processed.".format(quest.stop_name))
            return
        self.__cache.quest_reward(quest.stop_id, quest.reward)

        # Calculate distance and direction
        if self.__location is not None:
            quest.distance = get_earth_dist([quest.lat, quest.lng],
                                            self.__location, self.__units)
            quest.direction = get_cardinal_dir([quest.lat, quest.lng],
                                               self.__location)

        # Check for Rules
        rules = self.__quest_rules
        if len(rules) == 0:  # If no rules, default to all
            rules = {
                "default": Rule(self.__quest_filters.keys(),
                                self.__alarms.keys())
            }

        for r_name, rule in rules.iteritems():  # For all rules
            for f_name in rule.filter_names:  # Check Filters in Rules
                f = self.__quest_filters.get(f_name)
                passed = f.check_event(quest) and self.check_geofences(
                    f, quest)
                if not passed:
                    continue  # go to next filter
                quest.custom_dts = f.custom_dts
                if self.__quiet is False:
                    log.info("{} quest notification"
                             " has been triggered in rule '{}'!"
                             "".format(quest.stop_name, r_name))
                self._trigger_quest(quest, rule.alarm_names)
                break  # Next rule

    def _trigger_quest(self, quest, alarms):
        # Generate the DTS for the event
        dts = quest.generate_dts(self.__locale, self.__timezone, self.__units)

        # Get GMaps Triggers
        if self._gmaps_reverse_geocode:
            dts.update(
                self._gmaps_service.reverse_geocode((quest.lat, quest.lng),
                                                    self._language))
        for mode in self._gmaps_distance_matrix:
            dts.update(
                self._gmaps_service.distance_matrix(mode,
                                                    (quest.lat, quest.lng),
                                                    self.__location,
                                                    self._language,
                                                    self.__units))

        threads = []
        # Spawn notifications in threads so they can work in background
        for name in alarms:
            alarm = self.__alarms.get(name)
            if alarm:
                threads.append(gevent.spawn(alarm.quest_alert, dts))
            else:
                log.critical("Alarm '{}' not found!".format(name))

        for thread in threads:
            thread.join()

    # Check to see if a notification is within the given range
    def check_geofences(self, f, e):
        """ Returns true if the event passes the filter's geofences. """
        if self.geofences is None or f.geofences is None:  # No geofences set
            return True
        targets = f.geofences
        if len(targets) == 1 and "all" in targets:
            targets = self.geofences.iterkeys()
        for name in targets:
            gf = self.geofences.get(name)
            if not gf:  # gf doesn't exist
                log.error("Cannot check geofence %s: does not exist!", name)
            elif gf.contains(e.lat, e.lng):  # e in gf
                log.debug("{} is in geofence {}!".format(
                    e.name, gf.get_name()))
                e.geofence = name  # Set the geofence for dts
                return True
            else:  # e not in gf
                log.debug("%s not in %s.", e.name, name)
        f.reject(e, "not in geofences")
        return False

# Check to see if a weather notification s2 cell
# overlaps with a given range (geofence)

    def check_weather_geofences(self, f, weather):
        """ Returns true if the event passes the filter's geofences. """
        if self.geofences is None or f.geofences is None:  # No geofences set
            return True
        targets = f.geofences
        if len(targets) == 1 and "all" in targets:
            targets = self.geofences.iterkeys()
        for name in targets:
            gf = self.geofences.get(name)
            if not gf:  # gf doesn't exist
                log.error("Cannot check geofence %s: does not exist!", name)
            elif gf.check_overlap(weather):  # weather cell overlaps gf
                log.debug("{} is in geofence {}!".format(
                    weather.weather_cell_id, gf.get_name()))
                weather.geofence = name  # Set the geofence for dts
                return True
            else:  # weather not in gf
                log.debug("%s not in %s.", weather.weather_cell_id, name)
        f.reject(weather, "not in geofences")
        return False