class XMPPKeepAliveSupervisor(Supervisor, Subject): name = 'XMPP_KEEPALIVE_SUPERVISOR' log = getLoggerAdapter(log, id=name) def __init__(self): Supervisor.__init__(self) Subject.__init__(self) self.lastTime = datetime.utcnow() self.metric = Metric(name='elapsed_time', value=None, unit='msec', source=self.name, scale=1000, sampling=True) def perform_keep_alive(self): self.lastTime = datetime.utcnow() self.notifyObservers() elapsedTime = datetime.utcnow() - self.lastTime usecs = elapsedTime.seconds * 1000000 + elapsedTime.microseconds self.log.info('Finished XMPP_KEEP_ALIVE. Elapsed %s usecs', usecs) self.metric.add(usecs) def startService(self): Supervisor.startService(self) if conf.XMPP_KEEP_ALIVE_TIME > 0: t = LoopingCall(self.perform_keep_alive) self.registerTask(t) t.start(conf.XMPP_KEEP_ALIVE_TIME, now=False)
class BaseHandlerNoLog(cyclone.web.Application, RedisMixin): def __init__(self): handlers = [ (r"/1/google/messages/(.+)", v1.handlers.GoogleMessagesHandler), (r"/1/google/contacts/(.+)", v1.handlers.GoogleContactsHandler), (r"/1/google/(.+)", v1.handlers.GoogleHandler) ] settings = dict(debug=conf.CYCLONE_DEBUG, ) cyclone.web.Application.__init__(self, handlers, **settings) self.log = getLoggerAdapter(log) self.metric = Metric(name='time', value=None, unit=v1.handlers.METRIC_UNIT_TIME, source=v1.handlers.METRIC_SOURCE, sampling=True) self.metric_response_codes = { 1: Metric(name='response_1XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 2: Metric(name='response_2XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 3: Metric(name='response_3XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 4: Metric(name='response_4XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 5: Metric(name='response_5XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE) } @IncrementMetric(name='total', unit=v1.handlers.METRIC_UNIT, source=v1.handlers.METRIC_SOURCE) def log_request(self, handler): self._request_time = 1000 * handler.request.request_time() self.metric.add(self._request_time) status_metric = self.metric_response_codes.get( handler.get_status() / 100, None) if not status_metric is None: status_metric.add(1) metric = getattr(handler, 'metric', None) if metric: metric.add(self._request_time)
def __init__(self): Supervisor.__init__(self) Subject.__init__(self) self.lastTime = datetime.utcnow() self.metric = Metric(name='elapsed_time', value=None, unit='msec', source=self.name, scale=1000, sampling=True)
class AuthRedisProtocol(redis.RedisProtocol): CONNECTIONS_METRIC = Metric(name='connections', value=None, unit='connections', source='REDIS', reset=False) password = None log = None @IncrementMetric(name='connectionMade', unit='calls', source='REDIS') @defer.inlineCallbacks def connectionMade(self): self.CONNECTIONS_METRIC.add(1) if not self.password is None: try: yield self.auth(self.password) yield redis.RedisProtocol.connectionMade(self) except Exception, e: self.factory.maxRetries = conf.BACKEND_MAX_RETRIES self.factory.maxDelay = conf.BACKEND_MAX_DELAY self.transport.loseConnection() msg = "Redis Error.%s: %r" % (e.__class__.__name__, e) self.factory.connectionError(msg) self.log.warning(msg) defer.returnValue(None) else:
class RedisMetrics(Observer): SOURCE='REDIS' UNIT='keys' def __init__(self): self._connection = RedisMixin.redis_conn self._keys=Metric(name="keys", value=None, unit=self.UNIT, source=self.SOURCE) failed_queue_name = FailedQueue().name self._items={conf.DIST_QUEUE_LOGIN: Metric(name='queue_%s'%(conf.DIST_QUEUE_LOGIN), value=None, unit=self.UNIT, source=self.SOURCE), conf.DIST_QUEUE_PUSH: Metric(name='queue_%s'%(conf.DIST_QUEUE_PUSH), value=None, unit=self.UNIT, source=self.SOURCE), conf.DIST_QUEUE_RELOGIN: Metric(name='queue_%s'%(conf.DIST_QUEUE_RELOGIN), value=None, unit=self.UNIT, source=self.SOURCE), failed_queue_name: Metric(name='queue_%s'%(failed_queue_name), value=None, unit=self.UNIT, source=self.SOURCE) } @defer.inlineCallbacks def notify(self): keys = yield self._connection.dbsize() self._keys.add(keys) for key in self._items: queue = Queue(name=key, connection=self._connection) items = yield queue.count self._items[key].add(items)
def _create_metrics(self, collectionName): return {'count': Metric(name='%s.documents'%(collectionName), value=None, unit='documents', source='MONGO'), 'avgObjSize': Metric(name='%s.document_size'%(collectionName), value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'size': Metric(name='%s.size'%(collectionName), value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'storageSize': Metric(name='%s.storage_size'%(collectionName), value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'nindexes': Metric(name='%s.indexes'%(collectionName), value=None, unit='indexes', source='MONGO'), 'totalIndexSize': Metric(name='%s.index_size'%(collectionName), value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE) }
def __init__(self): self._connection = RedisMixin.redis_conn self._keys=Metric(name="keys", value=None, unit=self.UNIT, source=self.SOURCE) failed_queue_name = FailedQueue().name self._items={conf.DIST_QUEUE_LOGIN: Metric(name='queue_%s'%(conf.DIST_QUEUE_LOGIN), value=None, unit=self.UNIT, source=self.SOURCE), conf.DIST_QUEUE_PUSH: Metric(name='queue_%s'%(conf.DIST_QUEUE_PUSH), value=None, unit=self.UNIT, source=self.SOURCE), conf.DIST_QUEUE_RELOGIN: Metric(name='queue_%s'%(conf.DIST_QUEUE_RELOGIN), value=None, unit=self.UNIT, source=self.SOURCE), failed_queue_name: Metric(name='queue_%s'%(failed_queue_name), value=None, unit=self.UNIT, source=self.SOURCE) }
class KatooAPNSProtocol(APNSProtocol): CONNECTIONS_METRIC = Metric(name='connections', value=None, unit='connections', source=METRIC_SOURCE, reset=False) @IncrementMetric(name='connectionMade', unit='calls', source=METRIC_SOURCE) def connectionMade(self): self.CONNECTIONS_METRIC.add(1) return APNSProtocol.connectionMade(self) @IncrementMetric(name='connectionLost', unit='calls', source=METRIC_SOURCE) def connectionLost(self, reason): self.CONNECTIONS_METRIC.add(-1) return APNSProtocol.connectionLost(self, reason)
class Timer(service.Service, Singleton): TIMER_ACCURATE_WARNING_METRIC = Metric(name='check_auth_renewal', value=None, unit='events', source='TIMER', reset=False) def constructor(self): self._time = datetime.utcnow() self.log = getLoggerAdapter(getLogger(__name__, "INFO"), id='TIMER') self._interval = conf.TIMER_INTERVAL self._maxinterval = self._interval * 3 def _updateTime(self): last_time, self._time = self._time, datetime.utcnow() elapsed_seconds = (self._time - last_time).seconds if elapsed_seconds > self._maxinterval: self.TIMER_ACCURATE_WARNING_METRIC.add(1) self.log.warning( 'Timer not too much accurate. Elapsed %s seconds without update', elapsed_seconds) def startService(self): self.log.info('Started Timer') self._task = LoopingCall(self._updateTime) self._task.start(self._interval, now=False) return service.Service.startService(self) def stopService(self): if self.running: self._task.stop() self.log.info('Stopped Timer') return service.Service.stopService(self) @property def time(self): return self._time def utcnow(self): return self._time def isoformat(self): return "%sZ" % self._time.isoformat()
class GoogleContactsHandler(MyRequestHandler): METRICS = { 'put': Metric(name='time_put_google_contacts', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True) } @defer.inlineCallbacks def get(self, key): self.constructor(key) raise cyclone.web.HTTPError(404) @defer.inlineCallbacks def post(self, key): self.constructor(key) raise cyclone.web.HTTPError(404) @IncrementMetric(name='put_google_contacts', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def put(self, key): self.constructor(key, contact_arguments, metric=self.METRICS['put']) user = yield GoogleUser.load(key) if user is None or not user.connected: raise cyclone.web.HTTPError(404) jid = self.args.pop('jid') try: yield API(key, queue=user.worker).update_contact( user.userid, jid, **self.args) except XMPPUserNotLogged as e: raise cyclone.web.HTTPError(500, str(e)) self._response_json({'success': True, 'reason': 'ok'}) @defer.inlineCallbacks def delete(self, key): self.constructor(key) raise cyclone.web.HTTPError(404)
def __init__(self): handlers = [ (r"/1/google/messages/(.+)", v1.handlers.GoogleMessagesHandler), (r"/1/google/contacts/(.+)", v1.handlers.GoogleContactsHandler), (r"/1/google/(.+)", v1.handlers.GoogleHandler) ] settings = dict(debug=conf.CYCLONE_DEBUG, ) cyclone.web.Application.__init__(self, handlers, **settings) self.log = getLoggerAdapter(log) self.metric = Metric(name='time', value=None, unit=v1.handlers.METRIC_UNIT_TIME, source=v1.handlers.METRIC_SOURCE, sampling=True) self.metric_response_codes = { 1: Metric(name='response_1XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 2: Metric(name='response_2XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 3: Metric(name='response_3XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 4: Metric(name='response_4XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE), 5: Metric(name='response_5XX', value=None, unit='requests', source=v1.handlers.METRIC_SOURCE) }
class GoogleHandler(GenericXMPPHandler): CONNECTIONS_METRIC=Metric(name='connections', value=None, unit='connections', source=METRIC_SOURCE, reset=False) CONNECTION_TIME_METRIC=Metric(name='connection_time', value=None, unit='seconds', source=METRIC_SOURCE, sampling=True) CONNECTION_KEEP_ALIVE_TIME_METRIC=Metric(name='connection_last_time_keep_alive', value=None, unit='seconds', source=METRIC_SOURCE, sampling=True) def __init__(self, client): GenericXMPPHandler.__init__(self, client) self.user = client.user self.roster = RosterManager(self.user.userid, self.log) def isOwnBareJid(self, jid): return self.client.jid.user == jid.user and self.client.jid.host == jid.host @defer.inlineCallbacks def getContact(self, jid, barejid=None): barejid = jid.userhost() if barejid is None else barejid roster_item = yield self.roster.get(barejid) if roster_item is None: roster_item = GoogleRosterItem(_userid=self.user.userid, _jid=barejid) roster_item.name = jid.user #Set in roster to retrieve later yield self.roster.set(jid, name=roster_item.name) defer.returnValue(roster_item) @IncrementMetric(name='connection_established', unit=METRIC_UNIT, source=METRIC_SOURCE) def onConnectionEstablished(self): self.CONNECTIONS_METRIC.add(1) self.log.info('CONNECTION_ESTABLISHED %s', self.user.jid) @IncrementMetric(name='connection_lost', unit=METRIC_UNIT, source=METRIC_SOURCE) def onConnectionLost(self, reason): self.CONNECTIONS_METRIC.add(-1) currTime = Timer().utcnow() connectedTime = self.client.connectedTime lastTimeKeepAlive = (currTime - KatooApp().getService('XMPP_KEEPALIVE_SUPERVISOR').lastTime).seconds isAuthenticating = self.client.isAuthenticating() self.log.info('CONNECTION_LOST %s. Connected Time: %s. LastTimeKeepAlive: %s. Authenticating: %s. Reason %s', self.user.jid, connectedTime, lastTimeKeepAlive, isAuthenticating, str(reason)) self.CONNECTION_TIME_METRIC.add(connectedTime) self.CONNECTION_KEEP_ALIVE_TIME_METRIC.add(lastTimeKeepAlive) if not isAuthenticating: if connectedTime < conf.XMPP_MIN_CONNECTED_TIME: self.client.retries += 1 if self.client.retries >= conf.XMPP_MAX_RETRIES: self.client.onMaxRetries() return else: self.client.retries = 0 @IncrementMetric(name='connection_authenticated', unit=METRIC_UNIT, source=METRIC_SOURCE) def onAuthenticated(self): self.log.info('CONNECTION_AUTHENTICATED %s', self.user.jid) #Set away state to be restored with right value when presences will be received self.user.away = True self.user.save() #Send Available and getting roster self.protocol.available(show=conf.XMPP_STATE, priority=conf.XMPP_PRIORITY, statuses={'en-US': conf.XMPP_MOOD} if conf.XMPP_MOOD else None) d = self.protocol.getRoster() d.addCallback(self.protocol.onRosterReceived) @IncrementMetric(name='presence_available', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def onAvailableReceived(self, jid, state): if self.isOwnBareJid(jid) and jid.resource == self.user.resource: #TODO: Test function with eq or hash operators with timeit self.log.info('APP_GO_ONLINE %s',self.user.jid) self.user.away = False yield self.user.save() else: self.log.debug('XMPP_GO_ONLINE %s <- %s@%s/%r State: %r', self.user.jid, jid.user, jid.host, jid.resource, state) if self.user.haveAvailablePresenceContacts() and (state is None or state == 'chat') and self.user.pushtoken and self.client.connectedTime > 60: #Connection has not been re-established and presence is ok barejid = jid.userhost() if self.user.isContactInAvailablePresence(barejid): roster_item = yield self.getContact(jid, barejid) message = u'{0} {1} {2}'.format(u'\U0001f514', roster_item.contactName, translate.TRANSLATORS[self.user.lang]._('available')) API(self.user.userid).sendpush(message=message, token=self.user.pushtoken, badgenumber=self.user.badgenumber, sound='katoo.aif', jid=barejid, ignore=False, type='available') if not roster_item.notifyWhenAvailable: self.user.removeAvailablePresenceContact(barejid) yield self.user.save() roster_item.notifyWhenAvailable = None yield roster_item.save() @IncrementMetric(name='presence_unavailable', unit=METRIC_UNIT, source=METRIC_SOURCE) def onUnavailableReceived(self, jid): if self.isOwnBareJid(jid) and jid.resource == self.user.resource: #TODO: Test function with eq or hash operators with timeit self.log.info('APP_GO_AWAY %s', self.user.jid) self.user.away = True return self.user.save() self.log.debug('XMPP_GO_OFFLINE %s <- %s@%s/%r', self.user.jid, jid.user, jid.host, jid.resource) @IncrementMetric(name='roster_received', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def onRosterReceived(self, roster): yield self.roster.processRoster(roster) if not self.user.connected: #Remove data due to user is disconnected while processing is performed yield GoogleRosterItem.remove(self.user.userid) self.log.info('ROSTER_RECEIVED. PROCESSED ROSTER %s', self.user.jid) @IncrementMetric(name='roster_set', unit=METRIC_UNIT, source=METRIC_SOURCE) def onRosterSet(self, item): self.log.debug('onRosterSet to %s <- item %s', self.user.jid, item) fromjid, name = self.roster.getName(item) if name: self.roster.set(fromjid, name=name) @IncrementMetric(name='roster_remove', unit=METRIC_UNIT, source=METRIC_SOURCE) def onRosterRemove(self, item): #We don't remove roster items self.log.debug('onRosterRemove to %s <- item %s', self.user.jid, item) @IncrementMetric(name='message_received', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def onMessageReceived(self, fromjid, msgid, body): self.log.debug("MESSAGE_RECEIVED to %s. msgid(%s) from(%s): %r", self.user.jid, msgid, fromjid, body) barefromjid=fromjid.userhost() message = GoogleMessage(userid=self.user.userid, fromid=barefromjid, msgid=msgid, data=body) try: yield message.save() if self.user.pushtoken and self.user.away: roster_item = yield self.getContact(fromjid, barefromjid) self.user.badgenumber += 1 self.log.debug('SENDING_PUSH %s. RosterItem: %s, User data: %s', self.user.jid, roster_item, self.user) if roster_item.snoozePushTime: API(self.user.userid).sendpush(message='', token=self.user.pushtoken, badgenumber=self.user.badgenumber) else: API(self.user.userid).sendchatmessage(msg=body, token=self.user.pushtoken, badgenumber=self.user.badgenumber, jid=roster_item.jid, fullname=roster_item.contactName, sound=self.user.favoritesound if roster_item.favorite else self.user.pushsound, favorite_emoji=roster_item.favoriteEmoji, lang=self.user.lang) yield self.user.save() except Exception as e: self.log.err(e, 'ON_MESSAGE_RECEIVED_EXCEPTION')
class XMPPGoogle(ReauthXMPPClient, Observer): CHECK_AUTH_RENEWAL_METRIC=Metric(name='check_auth_renewal', value=None, unit=METRIC_UNIT, source=METRIC_SOURCE) def __init__(self, user, app): ReauthXMPPClient.__init__(self, jid=jid.JID("%s/%s"%(user.jid,conf.XMPP_RESOURCE)), password=user.token, host="talk.google.com", port=5222, logid=user.userid) Observer.__init__(self) self.user = user self.retries = 0 self.logTraffic = conf.XMPP_LOG_TRAFFIC #Initialize protocol self.handler = GoogleHandler(self) protocol = CompleteBotProtocol(self.handler) protocol.setHandlerParent(self) self.setServiceParent(app) #Register in XMPP_KEEPALIVE_SERVICE KatooApp().getService('XMPP_KEEPALIVE_SUPERVISOR').registerObserver(self) def notify(self): #Check if it is mandatory to do AUTH_RENEWAL if self.lastTimeAuth >= self.AUTH_RENEWAL_TIME: self.log.info('Launching AUTH_RENEWAL as periodic task') self.CHECK_AUTH_RENEWAL_METRIC.add(1) reactor.callLater(0, self.onAuthenticationRenewal, reason=None) if self.connectedTime <= conf.XMPP_KEEP_ALIVE_TIME+10: self.log.info('Saving user data to prevent USER_MIGRATION_STOPPED due to not clear write concerns in MONGO. User: %s', self.user) reactor.callLater(0, self.user.save) #Send Keep Alive return self.handler.protocol.send(' ') @property def name(self): return self.user.userid @property def roster(self): return self.handler.roster @IncrementMetric(name='error_stream', unit=METRIC_UNIT, source=METRIC_SOURCE) def _onStreamError(self, reason): self.log.err(reason, 'STREAM_EROR_EVENT %s'%(self.user.jid)) @IncrementMetric(name='authentication_renewal', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def onAuthenticationRenewal(self, reason): self.log.info('AUTH_RENEWAL_EVENT %s', self.user.jid) postdata={'client_id': conf.GOOGLE_CLIENT_ID, 'client_secret': conf.GOOGLE_CLIENT_SECRET, 'refresh_token': self.user.refreshtoken, 'grant_type': 'refresh_token'} e = '' try: #response = yield cyclone.httpclient.fetch(url=conf.GOOGLE_OAUTH2_URL, postdata=postdata) response = yield cyclone.httpclient.fetch(conf.GOOGLE_OAUTH2_URL, postdata=urllib.urlencode(postdata)) if response.code != 200: raise ValueError('Wrong response code:%s. Body: %s'%(response.code, response.body)) data = json.loads(response.body) self.log.debug('AUTH_RENEWAL_NEW_DATA %s. New auth data: %s', self.user.jid, data) self.user.token = data['access_token'] #Updating authenticator password with new credentials self.factory.authenticator.password = self.user.token self._lastTimeAuth = Timer().utcnow() yield self.user.save() except Exception as ex: e = ex self.log.err(e, 'AUTH_RENEWAL_ERROR %s'%(self.user.jid)) finally: #Calling to super to perform default behaviour (decrement counter to stop connection in the next retry if not success) ReauthXMPPClient.onAuthenticationRenewal(self, e) @IncrementMetric(name='error_authentication', unit=METRIC_UNIT, source=METRIC_SOURCE) def onAuthenticationError(self, reason): self.log.err(reason, 'AUTH_ERROR_EVENT %s'%(self.user.jid)) return self.disconnect() @IncrementMetric(name='error_maxretries', unit=METRIC_UNIT, source=METRIC_SOURCE) def onMaxRetries(self): self.log.error('CONNECTION_MAX_RETRIES %s', self.user.jid) return self.disconnect() @IncrementMetric(name='disconnect', unit=METRIC_UNIT, source=METRIC_SOURCE) def disconnect(self, change_state=True): self.log.info('DISCONNECTED %s', self.user.jid) #Unregister in XMPP_KEEPALIVE_SERVICE KatooApp().getService('XMPP_KEEPALIVE_SUPERVISOR').unregisterObserver(self) deferred_list = [defer.maybeDeferred(self.disownServiceParent)] if change_state: self.user.away = True self.user.connected = False deferred_list.append(self.user.save()) deferred_list.append(GoogleMessage.updateRemoveTime(self.user.userid, self.user.lastTimeConnected)) deferred_list.append(GoogleRosterItem.remove(self.user.userid)) return defer.DeferredList(deferred_list, consumeErrors=True) def __str__(self): return '<%s object at %s. name: %s>(user: %s)'%(self.__class__.__name__, hex(id(self)), self.name, self.user)
class GoogleMessagesHandler(MyRequestHandler): METRICS = { 'get': Metric(name='time_get_google_messages', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True), 'delete': Metric(name='time_delete_google_messages', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True) } @IncrementMetric(name='get_google_messages', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def get(self, key): self.constructor(key, metric=self.METRICS['get']) user = yield GoogleUser.load(key) if user is None: raise cyclone.web.HTTPError(404) messages = yield GoogleMessage.getMessages(key) self._response_json({ 'current_time': Timer().isoformat(), 'success': True, 'messages': messages, 'len': len(messages), 'reason': 'ok', 'connected': user.connected }) @defer.inlineCallbacks def post(self, key): self.constructor(key) raise cyclone.web.HTTPError(404) @defer.inlineCallbacks def put(self, key): self.constructor(key) raise cyclone.web.HTTPError(404) @IncrementMetric(name='delete_google_messages', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def delete(self, key): self.constructor(key, metric=self.METRICS['delete']) user = yield GoogleUser.load(key) if user is None: raise cyclone.web.HTTPError(404) #Remove messages from database (pending to implement) #update badgenumber try: yield GoogleMessage.flushMessages(key) if user.connected: yield API(key, queue=user.worker).update(key, **{'badgenumber': 0}) self._response_json({'success': True, 'reason': 'ok'}) except XMPPUserNotLogged as e: raise cyclone.web.HTTPError(500, str(e))
class GoogleHandler(MyRequestHandler): FAKE_PURCHASE_ENABLED = True if conf.FAKE_PURCHASE_URL else False METRICS = { 'get': Metric(name='time_get_google', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True), 'post': Metric(name='time_post_google', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True), 'put': Metric(name='time_put_google', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True), 'delete': Metric(name='time_delete_google', value=None, unit=METRIC_UNIT_TIME, source=METRIC_SOURCE, sampling=True) } @IncrementMetric(name='get_google', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def get(self, key): self.constructor(key, metric=self.METRICS['get']) user = yield GoogleUser.load(key) if user is None or not user.connected: raise cyclone.web.HTTPError(404) response_data = { 'success': True, 'reason': 'ok', 'resource_connected': not user.away } self._response_json(response_data) @IncrementMetric(name='post_google', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def post(self, key): self.constructor(key, login_arguments, metric=self.METRICS['post']) user = yield GoogleUser.load(key) user_to_logout = None pushtoken = self.args['_pushtoken'] if user is None: if pushtoken: user_to_logout = yield GoogleUser.load(pushtoken=pushtoken) elif user.connected: if user.jid != self.args['_jid']: user_to_logout = user if not user_to_logout is None: if user is None: self.log.info('WEB_HANDLER_LOGOUT %s with the same pushtoken', user_to_logout.userid) else: self.log.info('WEB_HANDLER_LOGOUT %s with other jid: %s->%s', key, user.jid, self.args['_jid']) if user_to_logout.connected: yield API(key, queue=user_to_logout.worker, synchronous_call=True).logout(user_to_logout.userid) else: yield GoogleUser.remove(user_to_logout.userid) user = None try: response_data = {'success': False, 'reason': 'Already logged'} if user is None or not user.connected: if user and not user.connected: yield GoogleUser.remove(user.userid, flush_messages=False) user = GoogleUser(_userid=key, _version=self.user_agent.version, _iosversion=self.user_agent.iosVersion, _hwmodel=self.user_agent.hwmodel, **self.args) yield API(key).login(user) response_data = {'success': True, 'reason': 'ok'} except XMPPUserAlreadyLogged: pass if self.FAKE_PURCHASE_ENABLED: self.perform_fakepurchase(userid=user.jid, deviceid=user.userid) response_data.update( dict(background_time=conf.XMPP_BACKGROUND_TIME, resource_prefix=conf.XMPP_RESOURCE, gtalk_priority=conf.XMPP_GTALK_PRIORITY, ads_gift=self.FAKE_PURCHASE_ENABLED)) self._response_json(response_data) @IncrementMetric(name='put_google', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def put(self, key): self.constructor(key, update_arguments, metric=self.METRICS['put']) user = yield GoogleUser.load(key) if user is None or not user.connected: raise cyclone.web.HTTPError(404) try: yield API(key, queue=user.worker).update( key, _version=self.user_agent.version, _iosversion=self.user_agent.iosVersion, _hwmodel=self.user_agent.hwmodel, **self.args) self._response_json({ 'success': True, 'reason': 'ok', 'background_time': conf.XMPP_BACKGROUND_TIME, 'resource_prefix': conf.XMPP_RESOURCE, 'gtalk_priority': conf.XMPP_GTALK_PRIORITY, 'ads_gift': self.FAKE_PURCHASE_ENABLED }) except XMPPUserNotLogged as e: raise cyclone.web.HTTPError(500, str(e)) @IncrementMetric(name='delete_google', unit=METRIC_UNIT, source=METRIC_SOURCE) @defer.inlineCallbacks def delete(self, key): self.constructor(key, metric=self.METRICS['delete']) user = yield GoogleUser.load(key) if user is None: raise cyclone.web.HTTPError(404) if user.connected: yield API(key, queue=user.worker).logout(key) else: yield user.remove(user.userid) self._response_json({'success': True, 'reason': 'ok'})
class AuthMongoProtocol(txmongo.MongoProtocol): CONNECTIONS_METRIC = Metric(name='connections', value=None, unit='connections', source='MONGO', reset=False) username = None password = None database = None log = None def _authenticate(self, name, password): """ Send an authentication command for this database. mostly stolen from pymongo """ if not isinstance(name, basestring): raise TypeError("name must be an instance of basestring") if not isinstance(password, basestring): raise TypeError("password must be an instance of basestring") d = defer.Deferred() # First get the nonce Collection(self.database, "$cmd").find_one({ "getnonce": 1 }, _proto=self).addCallback(self._authenticate_with_nonce, name, password, d).addErrback( self._auth_error, d) return d def _authenticate_with_nonce(self, result, name, password, d): nonce = result['nonce'] key = helpers._auth_key(nonce, name, password) # hacky because order matters auth_command = SON(authenticate=1) auth_command['user'] = unicode(name) auth_command['nonce'] = nonce auth_command['key'] = key # Now actually authenticate Collection(self.database, "$cmd").find_one(auth_command, _proto=self).addCallback( self._authenticated, d).addErrback(self._auth_error, d) def _authenticated(self, result, d): """might want to just call callback with 0.0 instead of errback""" ok = result['ok'] if ok: d.callback(ok) else: d.errback(ValueError(result['errmsg'])) def _auth_error(self, reason): self.log.warning("Auth error with Mongo reason=%s", reason) @IncrementMetric(name='connectionMade', unit='calls', source='MONGO') @defer.inlineCallbacks def connectionMade(self): self.CONNECTIONS_METRIC.add(1) if not self.username is None: try: yield self._authenticate(self.username, self.password) yield txmongo.MongoProtocol.connectionMade(self) except Exception, e: self.factory.maxRetries = conf.BACKEND_MAX_RETRIES self.transport.loseConnection() msg = "Mongo Error.%s: %r" % (e.__class__.__name__, e) self.log.warning(msg) defer.returnValue(None) else:
def _init_metrics_versions(self, name, key): versions = yield GoogleUser.get_distinct(key, spec={'_connected': True}) self._versions[key] = dict([(version, Metric(name='%s.%s'%(name,version), value=None, unit='versions', source='MONGO')) for version in versions])
class GlobalSupervisor(Supervisor): name = 'GLOBAL_SUPERVISOR' log = getLoggerAdapter(log, id=name) DISCONNECT_AWAY_METRIC = Metric(name='away_user_disconnected', value=None, unit='events', source='XMPPGOOGLE') def __init__(self): Supervisor.__init__(self) self.lock = defer.DeferredLock() self._checkworkerstasks = [ self.processDeathWorkers, self.processBadAssignedWorkers, self.processOnMigrationUsers, self.checkRunningWorkers ] self._globalmetrics = [RedisMetrics, MongoMetrics] def _attach_global_metrics(self): service = KatooApp().getService(MetricsSupervisor.name) for metric in self._globalmetrics: service.registerObserver(metric()) @defer.inlineCallbacks def checkRunningWorkers(self): workers = yield Worker.getWorkers(Worker.redis_workers_keys) if workers: self.log.info('CHECKING_RUNNING_WORKERS %s', len(workers)) for worker in workers: name = worker.get('name') key = worker.get('key') lastTime = worker.get('lastTime') if key is None or name is None or lastTime is None: self.log.warning('WORKER_DATA_WRONG %s', worker) continue death = worker.get('death') if death is None: lastTime = parser.parse(lastTime) delta = datetime.utcnow() - lastTime if delta.seconds > conf.SUPERVISOR_WORKER_REFRESH_TIME: self.log.warning( 'REGISTERING_WORKER_DEATH %s has not been updated since %s second(s)', name, delta.seconds) w = Worker([], name=name) w.log = self.log yield w.register_death() @defer.inlineCallbacks def processOnMigrationUsers(self): onMigration_users = yield GoogleUser.get_onMigration() total_users = len(onMigration_users) if total_users > 0: self.log.info("ON_MIGRATION_USERS %s", total_users) now = datetime.utcnow() for data in onMigration_users: user = GoogleUser(**data) delta_time = now - user.onMigrationTime if delta_time.seconds < conf.XMPP_KEEP_ALIVE_TIME + 30: if 60 < delta_time.seconds < 70: self.log.warning( '[%s] USER_MIGRATION_STOPPED %s second(s) ago. Waiting to fix the problem with XMPP_KEEP_ALIVE. User state: %s', user.userid, delta_time.seconds, user) continue self.log.warning( '[%s] USER_MIGRATION_STOPPED %s second(s) ago. Performing new relogin. User state: %s', user.userid, delta_time.seconds, user) user.worker = user.userid user.onMigrationTime = '' yield user.save() yield API(user.userid).relogin(user, pending_jobs=[]) @defer.inlineCallbacks def getPendingJobs(self, userid, queue_name): queue = Queue(queue_name) job_ids = yield queue.job_ids jobs = [] index = 0 for job_id in job_ids: try: job = yield Job.fetch(job_id, connection=queue.connection) if job.meta.get('userid') == userid: jobs.append(job_id) except Exception as e: self.log.err( e, '[%s] Exception fetching job %s with index %s while getPendingJobs in queue %s' % (userid, job_id, index, queue_name)) yield queue.remove(job_id) finally: index += 1 defer.returnValue(jobs) @defer.inlineCallbacks def processDeathWorkers(self): #avoid process death workers when service is not running death_workers = yield Worker.getWorkers( Worker.redis_death_workers_keys) if self.running else [] if death_workers: self.log.info('DEATH_WORKERS %s', [worker.get('name') for worker in death_workers]) for worker in death_workers: name = worker.get('name') if conf.DIST_QUEUE_LOGIN in worker.get('queues', []): connected_users = yield GoogleUser.get_connected(name) total_users = len(connected_users) self.log.info( 'Reconnecting %s connected user(s) of death worker %s', total_users, name) last_user_index = total_users - 1 for i in xrange(total_users): try: data = connected_users[i] user = GoogleUser(**data) #Update worker as userid to enqueue new jobs in user own queue user.worker = user.userid yield user.save() #Get pending jobs reactor.callLater(0, self.reloginUser, user, name, i == last_user_index) self.log.info( '[%s] Reconnecting %s/%s user(s) of worker %s', user.userid, i + 1, total_users, name) except Exception as e: self.log.err( e, '[%s] Exception while reconnecting' % (data['_userid'])) #Remove worker and queue when no users were assigned if total_users == 0: yield self.removeWorker(name) else: yield self.removeWorker(name) @defer.inlineCallbacks def processBadAssignedWorkers(self): assigned_workers = yield GoogleUser.get_assigned_workers() running_workers = yield Worker.getWorkers(Worker.redis_workers_keys) running_workers = [ worker.get('name') for worker in running_workers if not worker.get('name') is None ] death_workers = yield Worker.getWorkers( Worker.redis_death_workers_keys) death_workers = [ worker.get('name') for worker in death_workers if not worker.get('name') is None ] registered_workers = set(running_workers + death_workers) assigned_workers = set(assigned_workers) bad_workers = assigned_workers.difference(registered_workers) if bad_workers: self.log.warning( 'BAD_WORKERS %s are assigned to users. Running %s Death %s', bad_workers, len(running_workers), len(death_workers)) for worker in bad_workers: bad_users = yield GoogleUser.get_connected(worker_name=worker) total_bad_users = len(bad_users) if total_bad_users > 0: self.log.info( 'Reconnecting %s users assigned to bad worker %s', total_bad_users, worker) last_user_index = total_bad_users - 1 for i in xrange(total_bad_users): try: data = bad_users[i] user = GoogleUser(**data) user.worker = user.userid yield user.save() reactor.callLater(0, self.reloginUser, user, worker, i == last_user_index) self.log.info( '[%s] Reconnecting %s/%s user(s) of worker %s', user.userid, i + 1, total_bad_users, worker) except Exception as e: self.log.err( e, '[%s] Exception while reconnecting' % (data['_userid'])) #Remove worker and queue when no users were assigned if total_bad_users == 0: yield self.removeWorker(worker) @defer.inlineCallbacks def checkWorkers(self): try: for task in self._checkworkerstasks: if self.running: yield task() else: self.log.info( 'CheckWorkers task %s not launched. Supervisor not running', task) except Exception as e: self.log.err(e, 'Exception in checkWorkers task %s' % (task)) @defer.inlineCallbacks def disconnectAwayUsers(self): away_users = yield GoogleUser.get_away() away_users = [] if not away_users else away_users self.log.info('CHECKING_AWAY_USERS: %s', len(away_users)) for data in away_users: try: user = GoogleUser(**data) API(user.userid, queue=user.worker).disconnect(user.userid) APNSAPI(user.userid).sendpush(message=u'{0} {1}'.format( u'\ue252', translate.TRANSLATORS[user.lang]._('disconnected')), token=user.pushtoken, badgenumber=user.badgenumber, sound='') self.DISCONNECT_AWAY_METRIC.add(1) except Exception as e: self.log.err( e, '[%s] Exception disconnecting user' % (data['_userid'])) @defer.inlineCallbacks def reconnectUsers(self): connected_users = yield GoogleUser.get_connected() total_users = len(connected_users) self.log.info('reconnectUsers reconnecting %s users', total_users) for i in xrange(total_users): data = connected_users[i] try: user = GoogleUser(**data) worker, user.worker = user.worker, user.userid yield user.save() #Enqueing in the next loop iteration of twisted event loop reactor.callLater(0, self.reloginUser, user, user.worker) self.log.info('[%s] Reconnecting %s/%s user(s)', user.userid, i + 1, total_users) except Exception as e: self.log.err( e, '[%s] Exception while reconnecting' % (data['_userid'])) @defer.inlineCallbacks def reloginUser(self, user, last_worker, removeWorker=False): try: pending_jobs = yield self.getPendingJobs(user.userid, last_worker) yield API(user.userid).relogin(user, pending_jobs) except Exception as e: self.log.err(e, '[%s] Exception while reconnecting' % (user.userid)) finally: if removeWorker: yield self.removeWorker(last_worker) @defer.inlineCallbacks def removeWorker(self, name): self.log.info("Removing worker/queue %s from system", name) #Remove worker from death workers worker = Worker(queues=[], name=name) yield worker.remove(worker.key) #Remove own queue of worker queue = Queue(name) yield queue.empty() def startService(self): Supervisor.startService(self) t = LoopingCall(self.disconnectAwayUsers) self.registerTask(t) t.start(conf.TASK_DISCONNECT_SECONDS, now=False) if conf.TASK_RECONNECT_ALL_USERS: reactor.callLater(conf.TWISTED_WARMUP, self.lock.run, self.reconnectUsers) else: reactor.callLater(conf.TWISTED_WARMUP, self.lock.run, self.processDeathWorkers) if conf.REDIS_WORKERS > 0: t = LoopingCall(self.lock.run, self.checkWorkers) self.registerTask(t) t.start(conf.TASK_CHECK_WORKERS, now=False) reactor.callLater(conf.TWISTED_WARMUP, self._attach_global_metrics)
def __init__(self): self._models = [GoogleMessage.model, GoogleRosterItem.model, GoogleUser.model] self._metrics = dict([(model.collection, self._create_metrics(model.collection)) for model in self._models]) self._user_metrics = {'connected': Metric(name='googleusers.connected', value=None, unit='users', source='MONGO'), 'away': Metric(name='googleusers.away', value=None, unit='users', source='MONGO'), 'onLine': Metric(name='googleusers.onLine', value=None, unit='users', source='MONGO'), 'disconnected': Metric(name='googleusers.disconnected', value=None, unit='users', source='MONGO'), 'onRelogin': Metric(name='googleusers.onRelogin', value=None, unit='users', source='MONGO'), 'nopushtoken': Metric(name='googleusers.nopushtoken', value=None, unit='users', source='MONGO'), 'runningago24': Metric(name='googleusers.running24hago', value=None, unit='users', source='MONGO'), 'runningago12': Metric(name='googleusers.running12hago', value=None, unit='users', source='MONGO'), 'runningago01': Metric(name='googleusers.running1hago', value=None, unit='users', source='MONGO') } self._global_metrics = {'objects': Metric(name='documents', value=None, unit='documents', source='MONGO'), 'avgObjSize': Metric(name='document_size', value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'dataSize': Metric(name='size', value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'storageSize': Metric(name='storage_size', value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'fileSize': Metric(name='file_size', value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE), 'indexes': Metric(name='indexes', value=None, unit='indexes', source='MONGO'), 'indexSize': Metric(name='index_size', value=None, unit=conf.MONGO_STORAGE_UNIT, source='MONGO', scale=conf.MONGO_STORAGE_UNIT_SCALE) } self._user_queries = {'connected': {'_connected': True}, 'away': {'_connected': True, '_away': True}, 'onLine': {'_connected': True, '_away': False}, 'disconnected': {'_connected': False}, 'onRelogin': {'_connected': True, '_onReloging': True}, 'nopushtoken':{'_connected': True, '_pushtoken': ''}, 'runningago24': None, 'runningago12': None, 'runningago01': None } self._versions = {} for name, key in [('katoo', '_version'), ('ios', '_iosversion'), ('hwmodel', '_hwmodel')]: self._init_metrics_versions(name, key)