def processBadAssignedWorkers(self): assigned_workers = yield GoogleUser.get_assigned_workers() running_workers = yield Worker.getWorkers(Worker.redis_workers_keys) running_workers = [ worker.get('name') for worker in running_workers if not worker.get('name') is None ] death_workers = yield Worker.getWorkers( Worker.redis_death_workers_keys) death_workers = [ worker.get('name') for worker in death_workers if not worker.get('name') is None ] registered_workers = set(running_workers + death_workers) assigned_workers = set(assigned_workers) bad_workers = assigned_workers.difference(registered_workers) if bad_workers: self.log.warning( 'BAD_WORKERS %s are assigned to users. Running %s Death %s', bad_workers, len(running_workers), len(death_workers)) for worker in bad_workers: bad_users = yield GoogleUser.get_connected(worker_name=worker) total_bad_users = len(bad_users) if total_bad_users > 0: self.log.info( 'Reconnecting %s users assigned to bad worker %s', total_bad_users, worker) last_user_index = total_bad_users - 1 for i in xrange(total_bad_users): try: data = bad_users[i] user = GoogleUser(**data) user.worker = user.userid yield user.save() reactor.callLater(0, self.reloginUser, user, worker, i == last_user_index) self.log.info( '[%s] Reconnecting %s/%s user(s) of worker %s', user.userid, i + 1, total_bad_users, worker) except Exception as e: self.log.err( e, '[%s] Exception while reconnecting' % (data['_userid'])) #Remove worker and queue when no users were assigned if total_bad_users == 0: yield self.removeWorker(worker)
def processDeathWorkers(self): #avoid process death workers when service is not running death_workers = yield Worker.getWorkers( Worker.redis_death_workers_keys) if self.running else [] if death_workers: self.log.info('DEATH_WORKERS %s', [worker.get('name') for worker in death_workers]) for worker in death_workers: name = worker.get('name') if conf.DIST_QUEUE_LOGIN in worker.get('queues', []): connected_users = yield GoogleUser.get_connected(name) total_users = len(connected_users) self.log.info( 'Reconnecting %s connected user(s) of death worker %s', total_users, name) last_user_index = total_users - 1 for i in xrange(total_users): try: data = connected_users[i] user = GoogleUser(**data) #Update worker as userid to enqueue new jobs in user own queue user.worker = user.userid yield user.save() #Get pending jobs reactor.callLater(0, self.reloginUser, user, name, i == last_user_index) self.log.info( '[%s] Reconnecting %s/%s user(s) of worker %s', user.userid, i + 1, total_users, name) except Exception as e: self.log.err( e, '[%s] Exception while reconnecting' % (data['_userid'])) #Remove worker and queue when no users were assigned if total_users == 0: yield self.removeWorker(name) else: yield self.removeWorker(name)
def processOnMigrationUsers(self): onMigration_users = yield GoogleUser.get_onMigration() total_users = len(onMigration_users) if total_users > 0: self.log.info("ON_MIGRATION_USERS %s", total_users) now = datetime.utcnow() for data in onMigration_users: user = GoogleUser(**data) delta_time = now - user.onMigrationTime if delta_time.seconds < conf.XMPP_KEEP_ALIVE_TIME + 30: if 60 < delta_time.seconds < 70: self.log.warning( '[%s] USER_MIGRATION_STOPPED %s second(s) ago. Waiting to fix the problem with XMPP_KEEP_ALIVE. User state: %s', user.userid, delta_time.seconds, user) continue self.log.warning( '[%s] USER_MIGRATION_STOPPED %s second(s) ago. Performing new relogin. User state: %s', user.userid, delta_time.seconds, user) user.worker = user.userid user.onMigrationTime = '' yield user.save() yield API(user.userid).relogin(user, pending_jobs=[])