class Redirector(Greenlet): def __init__(self, msg): self.sock_type = msg.sock_type self.info = msg self.sock = socket.socket(socket.AF_INET, self.sock_type) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.links = Group() super(Redirector, self).__init__() def _run(self): self.sock.bind(self.info.local) self.sock.listen(64) while True: cli, addr = self.sock.accept() self.links.start(Linker(self.info.remote, self.sock_type, cli)) def kill(self): self.links.kill() super(Redirector, self).kill() self.sock.close() def dump(self): return dict(protocol=self.info.protocol, local='%s:%d' % self.info.local, remote='%s:%d' % self.info.remote)
class PbClient: def __init__(self, ip, port): self.addr = (ip, int(port)) self.callback_dict = {} self.greenlets = Group() self.soc = socket() self.interval = 1/200 self.received_data = b'' def connect(self): self.soc.connect(self.addr) self.greenlets.spawn(self.on_message) def send(self, data): self.soc.send(data) def close(self): try: self.greenlets.kill() self.soc.close() except: pass finally: self.soc = None def on_message(self): try: while self.soc: temp = self.soc.recv(2048) # gevent.sleep(self.interval) if temp: self.received_data += temp self.handle_message() except EnvironmentError: pass except: raise def handle_message(self): try: if len(self.received_data) < HEAD_SIZE: return data = self.get_pack_stream() data_dict = DotDict(DataStruct.parse(data)) cmd_name = CommandMapping(data_dict['CommandID']) self.callback_dict[cmd_name] = data_dict logger.info("Receive data from {}:{}".format(cmd_name, data_dict)) self.handle_message() except ValueError: logger.warn("Receive unknown commandID: {}".format(cmd_name)) except TypeError as e: logger.error("Receive invalid data:", exc_info=True) def get_pack_stream(self): '''获取单个完整包的数据流''' size_stream = self.received_data[1:4] pack_size = pack_size_struct.parse(size_stream) pack_stream = self.received_data[:pack_size] self.received_data = self.received_data[pack_size:] return pack_stream
class MbtClient(aWebSocketClient): def __init__(self, *args): super().__init__(*args) self.callback_dict = {} self.greenlets = Group() self.greenlets.spawn(self.onMessage) def onMessage(self): while True: try: #收到的data类型为ws4py.messaging.TextMessage data = self.receive() #data.data是bytes类型,先转化为str parsed_data = json.loads(data.data.decode()) cmd = parsed_data["head"]["cmd"] cmd_name = CommandMapping(cmd) body_dict = json.loads(parsed_data["body"]) parsed_data["body"] = body_dict self.callback_dict[cmd_name] = DotDict(parsed_data) logger.info("Receive data from {}:{}".format(cmd_name, parsed_data)) except ValueError: logger.warn("Receive unknown commandID: {}".format(cmd)) except TypeError: logger.error("Receive invalid data that is not JSON serializable:{}".format(data.data)) def closed(self, code, reason): self.greenlets.kill() logger.debug("Socket was closed:code-{};reason-{}".format(code, reason))
def reset_update_server(self, server_name): from gevent.pool import Group group = Group() def local(): import dns.resolver return dns.resolver.query(server_name, 'TXT').response def recursive(): import dns.resolver from settings import NAME_SERVER ns = dns.resolver.query(NAME_SERVER, 'NS').response.answer[0] ns = ns.items[0].target.to_text() import socket ns = socket.gethostbyname(ns) import dns.message import dns.query q = dns.message.make_query(server_name, 'TXT') return dns.query.udp(q, ns) def public(ns): def _public(): import dns.message import dns.query q = dns.message.make_query(server_name, 'TXT') return dns.query.udp(q, ns) return _public workers = [ group.apply_async(i) for i in [ local, recursive, public('119.29.29.29'), public('114.114.114.114'), public('8.8.8.8'), ] ] for result in gevent.iwait(workers, 10): if result.successful(): result = result.value break else: log.exception(result.exception) else: group.kill() return False group.kill() result = result.answer[0] url = result.items[0].strings[0] self.set_update_url(url) return True
class SlaveRunner(Runner): def __init__(self, host, port): super().__init__(host, port) self.slave_id = socket.gethostname() self.client = rpc.Client(self.host, self.port) self.greenlet = Group() self.greenlet.spawn(self.work) self.client.send(rpc.Message('slave_ready', None, self.slave_id)) def stats_reporter(self): while True: logger.info("starting....." + str(time.time())) # work = Worker(self.client,self.greenlet) time.sleep(SLAVE_REPORT_INTERVAL) self.client.send( Message('slave_complete', None, socket.gethostname())) def work(self): while True: print(">>>>>>>>>><<<<<<<<") msg = self.client.recv() logger.info(msg.type) if msg.type == 'slave_start': pass elif msg.type == 'slave_stop': # self.stop() self.client.send(Message("slave_stop", None, self.slave_id)) self.client.send(Message("slave_ready", None, self.slave_id)) elif msg.type == 'slave_quit': logger.info("Got quit message from master, shutting down...") self.stop() self.greenlet.kill()
def reset_update_server(self, server_name): from gevent.pool import Group group = Group() def local(): import dns.resolver return dns.resolver.query(server_name, 'TXT').response def recursive(): import dns.resolver from settings import NAME_SERVER ns = dns.resolver.query(NAME_SERVER, 'NS').response.answer[0] ns = ns.items[0].target.to_text() import socket ns = socket.gethostbyname(ns) import dns.message import dns.query q = dns.message.make_query(server_name, 'TXT') return dns.query.udp(q, ns) def public(ns): def _public(): import dns.message import dns.query q = dns.message.make_query(server_name, 'TXT') return dns.query.udp(q, ns) return _public workers = [group.apply_async(i) for i in [ local, recursive, public('119.29.29.29'), public('114.114.114.114'), public('8.8.8.8'), ]] for result in gevent.iwait(workers, 10): if result.successful(): result = result.value break else: log.exception(result.exception) else: group.kill() return False group.kill() result = result.answer[0] url = result.items[0].strings[0] self.set_update_url(url) return True
class ATransaction(metaclass=ABCMeta): @abstractmethod def __init__(self, _id): self.key = getattr(self, "key", None) self.id = _id self.main_thread: Greenlet = None self.threads = Group() self.ready_commit = event.Event() self.commit = event.Event() self.fail = event.Event() self.done = event.Event() self.result = event.AsyncResult() @property def status(self): if self.fail.ready(): return EStatus.FAIL if self.done.ready(): return EStatus.DONE if self.commit.ready(): return EStatus.COMMIT if self.ready_commit.ready(): return EStatus.READY_COMMIT return EStatus.IN_PROGRESS def run(self): self.main_thread = self._spawn() return self.main_thread @g_async def _spawn(self): pass @g_async def clean(self): self.main_thread.kill() self.threads.kill() @g_async def do_commit(self): pass @g_async def do_rollback(self): pass def get_result(self): return self.result.get() def __hash__(self): return hash(self.id) def __repr__(self): return f"<ATransaction {self.id}>" def __str__(self): return f"ATransaction: #{self.id}"
def stop_users(self, user_count, stop_rate=None): """ Stop `user_count` weighted users at a rate of `stop_rate` """ if user_count == 0 or stop_rate == 0: return bucket = self.weight_users(user_count) user_count = len(bucket) to_stop = [] for g in self.user_greenlets: for l in bucket: user = g.args[0] if isinstance(user, l): to_stop.append(user) bucket.remove(l) break if not to_stop: return if stop_rate is None or stop_rate >= user_count: sleep_time = 0 logger.info("Stopping %i users" % (user_count)) else: sleep_time = 1.0 / stop_rate logger.info("Stopping %i users at rate of %g users/s" % (user_count, stop_rate)) async_calls_to_stop = Group() stop_group = Group() while True: user_to_stop: User = to_stop.pop(random.randint(0, len(to_stop) - 1)) logger.debug("Stopping %s" % user_to_stop._greenlet.name) if user_to_stop._greenlet is greenlet.getcurrent(): # User called runner.quit(), so dont block waiting for killing to finish" user_to_stop._group.killone(user_to_stop._greenlet, block=False) elif self.environment.stop_timeout: async_calls_to_stop.add(gevent.spawn_later(0, User.stop, user_to_stop, force=False)) stop_group.add(user_to_stop._greenlet) else: async_calls_to_stop.add(gevent.spawn_later(0, User.stop, user_to_stop, force=True)) if to_stop: gevent.sleep(sleep_time) else: break async_calls_to_stop.join() if not stop_group.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout ) stop_group.kill(block=True) logger.info("%i Users have been stopped" % user_count)
def stop_users(self, user_count, stop_rate=None): """ Stop `user_count` weighted users at a rate of `stop_rate` """ if user_count == 0 or stop_rate == 0: return bucket = self.weight_users(user_count) user_count = len(bucket) to_stop = [] for g in self.user_greenlets: for l in bucket: user = g.args[0] if l == type(user): to_stop.append(user) bucket.remove(l) break if not to_stop: return if stop_rate == None or stop_rate >= user_count: sleep_time = 0 logger.info("Stopping %i users immediately" % (user_count)) else: sleep_time = 1.0 / stop_rate logger.info("Stopping %i users at rate of %g users/s" % (user_count, stop_rate)) if self.environment.stop_timeout: stop_group = Group() while True: user_to_stop = to_stop.pop(random.randint(0, len(to_stop) - 1)) logger.debug('Stopping %s' % user_to_stop._greenlet.name) if self.environment.stop_timeout: if not user_to_stop.stop(self.user_greenlets, force=False): # User.stop() returns False if the greenlet was not stopped, so we'll need # to add it's greenlet to our stopping Group so we can wait for it to finish it's task stop_group.add(user_to_stop._greenlet) else: user_to_stop.stop(self.user_greenlets, force=True) if to_stop: gevent.sleep(sleep_time) else: break if self.environment.stop_timeout and not stop_group.join( timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stop_group.kill(block=True) logger.info("%i Users have been stopped" % user_count)
def stop_users(self, user_classes_stop_count: Dict[str, int]): async_calls_to_stop = Group() stop_group = Group() for user_class, stop_count in user_classes_stop_count.items(): if self.user_classes_count[user_class] == 0: continue to_stop = [] for user_greenlet in self.user_greenlets: if len(to_stop) == stop_count: break try: user = user_greenlet.args[0] except IndexError: logger.error( "While stopping users, we encountered a user that didnt have proper args %s", user_greenlet ) continue if isinstance(user, self.user_classes_by_name[user_class]): to_stop.append(user) if not to_stop: continue while True: user_to_stop: User = to_stop.pop() logger.debug("Stopping %s" % user_to_stop.greenlet.name) if user_to_stop.greenlet is greenlet.getcurrent(): # User called runner.quit(), so don't block waiting for killing to finish user_to_stop.group.killone(user_to_stop.greenlet, block=False) elif self.environment.stop_timeout: async_calls_to_stop.add(gevent.spawn_later(0, user_to_stop.stop, force=False)) stop_group.add(user_to_stop.greenlet) else: async_calls_to_stop.add(gevent.spawn_later(0, user_to_stop.stop, force=True)) if not to_stop: break async_calls_to_stop.join() if not stop_group.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout ) stop_group.kill(block=True) logger.debug( "%g users have been stopped, %g still running", sum(user_classes_stop_count.values()), self.user_count )
def test_kill_processlet_group(proc): group = Group() group.greenlet_class = lets.Processlet group.spawn(raise_when_killed) group.spawn(raise_when_killed) group.spawn(raise_when_killed) group.join(0) assert len(proc.children()) == 3 group.kill() assert len(proc.children()) == 0 for job in group: with pytest.raises(Killed): job.get() assert job.exit_code == 1
def stop_user_instances(self, users): if self.environment.stop_timeout: stopping = Group() for user in users: if not user.stop(self.user_greenlets, force=False): # User.stop() returns False if the greenlet was not stopped, so we'll need # to add it's greenlet to our stopping Group so we can wait for it to finish it's task stopping.add(user._greenlet) if not stopping.join(timeout=self.environment.stop_timeout): logger.info("Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stopping.kill(block=True) else: for user in users: user.stop(self.user_greenlets, force=True)
class Manager(object): def __init__(self, config_addr, keyfile, certfile, cacerts, backlog=10): if isinstance(config_addr, basestring): ip, port = config_addr.split(':') config_addr = (ip, int(port)) self.keyfile = keyfile self.certfile = certfile self.cacerts = cacerts self.config_addr = config_addr self.backlog = backlog self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.end_evt = Event() self.clients = Group() self.redirectors = {} self.msg_processors = { 'redirect': self.m_redirect, 'list_redirect': self.m_list_redirect, 'drop_redirect': self.m_drop_redirect, 'shutdown': self.m_shutdown, } logging.info('manager initialized') def run(self): logging.info('manager start to run') self.sock.bind(self.config_addr) logging.info('manager bind to: %s:%d' % self.config_addr) self.sock.listen(self.backlog) accept_let = gevent.spawn(self.accept_let) self.end_evt.wait() logging.info('shutdown evt recved') accept_let.kill() self.clients.kill() def accept_let(self): while True: sock, addr = self.sock.accept() try: sock = ssl.wrap_socket(sock, keyfile=self.keyfile, certfile=self.certfile, server_side=True, cert_reqs=ssl.CERT_REQUIRED, ca_certs=self.cacerts) except ssl.SSLError, e: print e continue self.clients.spawn(self.client_let, sock, addr)
def test_gevent(num, depth, period, settle): global SWITCH_COUNT import gevent from gevent.pool import Group from gevent.hub import sleep horde = Group() for x in xrange(num): horde.spawn(stack_filler, depth, sleep) gevent.sleep(settle) print("settle period over, {:.2f} sw/sec, testing".format(SWITCH_COUNT/(1.0*settle))) SWITCH_COUNT=0 gevent.sleep(period) print("testing period over, {:.2f} sw/sec".format(SWITCH_COUNT/(1.0*period))) horde.kill()
def kill_locust_instances(self, users): if self.environment.stop_timeout: dying = Group() for user in users: if not user.stop(self.locusts, force=False): # Locust.stop() returns False if the greenlet was not killed, so we'll need # to add it's greenlet to our dying Group so we can wait for it to finish it's task dying.add(user._greenlet) if not dying.join(timeout=self.environment.stop_timeout): logger.info( "Not all locusts finished their tasks & terminated in %s seconds. Killing them..." % self.environment.stop_timeout) dying.kill(block=True) else: for user in users: user.stop(self.locusts, force=True)
class MetricCollection(UserList): """ MetricCollection aggregates all metrics from config by uve_type. For each uve_type a MetricTypeCollection class is created. """ def __init__(self, auth=None): super().__init__() self.scrapers = Group() self.scrape_pool = Pool(size=Config().scraper.pool_size) self.session = Session(auth=auth) self.session.mount( "http://", HTTPAdapter( max_retries=Retry( total=Config().scraper.max_retry, connect=Config().scraper.max_retry, read=Config().scraper.max_retry, backoff_factor=0.3, ), pool_connections=10, ), ) metric_types = {} for metric_config in Config().metrics: if metric_config.uve_type not in metric_types: metric_types[metric_config.uve_type] = [] metric_types[metric_config.uve_type].append(metric_config) for uve_type, metric_configs in metric_types.items(): self.append( MetricTypeCollection( self.session, uve_type, metric_configs, self.scrapers, self.scrape_pool, )) def scrape(self): for instance in self: instance.scrape() try: self.scrapers.join() except KeyboardInterrupt: self.scrape_pool.kill(StopScrape) self.scrapers.kill(StopScrape) return
def get(self, ip, timeout=None): """Queries all DNSBLs in the group for matches. :param ip: The IP address to check for. :param timeout: Timeout in seconds before canceling remaining queries. :returns: A :class:`set()` containing the DNSBL domain names that matched a record for the IP address. """ matches = set() group = Group() with gevent.Timeout(timeout, None): for dnsbl in self.dnsbls: thread = self.pool.spawn(self._run_dnsbl_get, matches, dnsbl, ip) group.add(thread) group.join() group.kill() return matches
class MultiZerologEmitter(gevent.Greenlet): """Emitter using multiple loggers which are configured by the zerolog server. """ def __init__(self, interval): super(MultiZerologEmitter, self).__init__() self.interval = interval self.greenlets = Group() #self.loggers = 'foo foo.lib foo.web foo.web.request foo.web.db'.split() self.loggers = 'foo foo.lib foo.lib.bar'.split() self.levels = 'critical error warning info debug'.split() self._keep_going = True def _run(self): self.greenlets.add(gevent.spawn(self.__random_logger)) #for logger_name in self.loggers: # self.greenlets.add(gevent.spawn(self.__logger, logger_name)) self.greenlets.join() def __logger(self, logger_name): #loggers = 'app app.sub app.sub.lib'.split() logger = zerolog.getLogger(logger_name) index = 0 while self._keep_going: level = random.choice(self.levels) message = "{0} {1} {2}".format(index, logger_name, level) getattr(logger, level)(message) index += 1 gevent.sleep(self.interval) def __random_logger(self): index = 0 while self._keep_going: logger = zerolog.getLogger(random.choice(self.loggers)) level = random.choice(self.levels) message = "{0} {1} {2}".format(index, logger.name, level) getattr(logger, level)(message) index += 1 gevent.sleep(self.interval) def kill(self, exception=gevent.GreenletExit, **kwargs): self._keep_going = False self.greenlets.kill() super(MultiZerologEmitter, self).kill(exception=exception, **kwargs)
class TCPClient(): def __init__(self): self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._group = Group() self._send_buffer = Queue() self._recv_buffer = Queue() def connect(self, address): self._socket.connect(address) self._group.spawn(self._recv_loop) self._group.spawn(self._send_loop) def disconnect(self): self._group.kill() self._socket.close() self._group.join() def _recv_loop(self): buf = "" u4 = lambda x: unpack("<I", x)[0] while True: data = self._socket.recv(8192) buf += data while len(buf) > 0: length = u4(buf[:4]) if len(buf) - 4 < length: break self._recv_buffer.put(buf[4:4 + length]) buf = buf[4 + length:] def _send_loop(self): while True: data = self._send_buffer.get() self._socket.sendall(data) def write(self, data): self._send_buffer.put(data) def get_packet(self): return self._recv_buffer.get()
class Dispatcher(gevent.Greenlet): def __init__(self, collector, publisher, quiet=False): super(Dispatcher, self).__init__() self.collector = collector self.publisher = publisher self.quiet = quiet self.greenlets = Group() self.channel = gevent.queue.Queue(0) self._keep_going = True def _run(self): self.greenlets.spawn(self.__collect) self.greenlets.spawn(self.__publish) self.greenlets.join() def kill(self, exception=gevent.GreenletExit, **kwargs): self._keep_going = False self.greenlets.kill() super(Dispatcher, self).kill(exception=exception, **kwargs) def __collect(self): while self._keep_going: message = self.collector.recv_multipart() self.channel.put(message) gevent.sleep() def __publish(self): while self._keep_going: message = self.channel.get() if not self.quiet: # message is assumed to be a tuple of: (topic, record_json) topic,record_json = message topic = topic.decode() name_and_level = topic[len(zerolog.stream_prefix):] logger_name,level_name = name_and_level.split(':') logger = zerolog.getLocalLogger(logger_name) if logger.isEnabledFor(logging.getLevelName(level_name)): # inject log record into local logger record_dict = json.loads(record_json.decode()) record = logging.makeLogRecord(record_dict) logger.handle(record) self.publisher.send_multipart(message) gevent.sleep()
def kill_locust_greenlets(self, greenlets): """ Kill running locust greenlets. If options.stop_timeout is set, we try to stop the Locust users gracefully """ if self.options.stop_timeout: dying = Group() for g in greenlets: locust = g.args[0] if locust._state == LOCUST_STATE_WAITING: self.locusts.killone(g) else: locust._state = LOCUST_STATE_STOPPING dying.add(g) if not dying.join(timeout=self.options.stop_timeout): logger.info("Not all locusts finished their tasks & terminated in %s seconds. Killing them..." % self.options.stop_timeout) dying.kill(block=True) else: for g in greenlets: self.locusts.killone(g)
class SyncStack: class SyncLayer: def __init__(self, path): self.path = path self.delta = None def __init__(self): self.layers = [] self.latest_tag = 0 self.sync_group = Group() def append(self, path): self.layers.append(self.SyncLayer(path)) def has_unknown_delta(self): return self.latest_tag < len(self.layers) def need_sync(self, worker): return worker.sync_tag < self.latest_tag def update_delta(self, worker): assert self.has_unknown_delta() assert worker.sync_tag == self.latest_tag layer = self.layers[self.latest_tag] layer.delta = worker.calc_dir_delta(layer.path) self.latest_tag += 1 def start_sync(self, worker): def _sync(): while worker.sync_tag < self.latest_tag: layer = self.layers[worker.sync_tag] worker.sync_with_delta(layer.delta, layer.path) worker.set_syncing(False) worker.set_syncing(True) self.sync_group.spawn(_sync) def stop(self): self.sync_group.kill()
def get_reasons(self, matches, ip, timeout=None): """Gets the reasons for each matching DNSBL for the IP address. :param matches: The DNSBL matches, as returned by :meth:`.get()`. :param ip: The IP address to get reasons for. :param timeout: Timeout in seconds before canceling remaining queries. :returns: A :class:`dict()` keyed by the DNSBL domain names from the ``matches`` argument with the values being the reasons each DNSBL matched or ``None``. """ reasons = dict.fromkeys(matches) group = Group() with gevent.Timeout(timeout, None): for dnsbl in self.dnsbls: if dnsbl.address in matches: thread = self.pool.spawn(self._run_dnsbl_get_reason, reasons, dnsbl, ip) group.add(thread) group.join() group.kill() return reasons
from pinga.config import get_sites_list from pinga.events.producer import Producer from pinga.log import get_logger def _check_site_thread(producer, site): while True: result = check_site(site) producer.emit(json.dumps(result)) gevent.sleep(5) if __name__ == "__main__": logger = get_logger() group = Group() producer = Producer() sites_list = get_sites_list()["sites"] logger.info(f"List of sites to be checked by Pinga {sites_list}") for site in sites_list: group.add(gevent.spawn(_check_site_thread, producer, site)) try: logger.info("Starting Pinga producer...") group.join() except KeyboardInterrupt: logger.info("Pinga producer interrupted. Goodbye.") group.kill()
class SlaveLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(SlaveLocustRunner, self).__init__(*args, **kwargs) self.client_id = socket.gethostname() + "_" + uuid4().hex self.client = rpc.Client(self.master_host, self.master_port, self.client_id) self.greenlet = Group() self.greenlet.spawn(self.heartbeat).link_exception(callback=self.noop) self.greenlet.spawn(self.worker).link_exception(callback=self.noop) self.client.send(Message("client_ready", None, self.client_id)) self.slave_state = STATE_INIT self.greenlet.spawn( self.stats_reporter).link_exception(callback=self.noop) # register listener for when all locust users have hatched, and report it to the master node def on_hatch_complete(user_count): self.client.send( Message("hatch_complete", {"count": user_count}, self.client_id)) self.slave_state = STATE_RUNNING events.hatch_complete += on_hatch_complete # register listener that adds the current number of spawned locusts to the report that is sent to the master node def on_report_to_master(client_id, data): data["user_count"] = self.user_count events.report_to_master += on_report_to_master # register listener that sends quit message to master def on_quitting(): self.client.send(Message("quit", None, self.client_id)) events.quitting += on_quitting # register listener thats sends locust exceptions to master def on_locust_error(locust_instance, exception, tb): formatted_tb = "".join(traceback.format_tb(tb)) self.client.send( Message("exception", { "msg": str(exception), "traceback": formatted_tb }, self.client_id)) events.locust_error += on_locust_error def heartbeat(self): while True: self.client.send( Message('heartbeat', {'state': self.slave_state}, self.client_id)) gevent.sleep(self.heartbeat_interval) def worker(self): while True: msg = self.client.recv() if msg.type == "hatch": self.slave_state = STATE_HATCHING self.client.send(Message("hatching", None, self.client_id)) job = msg.data self.hatch_rate = job["hatch_rate"] #self.num_clients = job["num_clients"] self.host = job["host"] self.hatching_greenlet = gevent.spawn( lambda: self.start_hatching(locust_count=job["num_clients" ], hatch_rate=job["hatch_rate"])) elif msg.type == "stop": self.stop() self.client.send( Message("client_stopped", None, self.client_id)) self.client.send(Message("client_ready", None, self.client_id)) self.slave_state = STATE_INIT elif msg.type == "quit": logger.info("Got quit message from master, shutting down...") self.stop() self.greenlet.kill(block=True) def stats_reporter(self): while True: data = {} events.report_to_master.fire(client_id=self.client_id, data=data) try: self.client.send(Message("stats", data, self.client_id)) except: logger.error("Connection lost to master server. Aborting...") break gevent.sleep(SLAVE_REPORT_INTERVAL)
class MasterLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(MasterLocustRunner, self).__init__(*args, **kwargs) class SlaveNodesDict(dict): def get_by_state(self, state): return [c for c in six.itervalues(self) if c.state == state] @property def all(self): return six.itervalues(self) @property def ready(self): return self.get_by_state(STATE_INIT) @property def hatching(self): return self.get_by_state(STATE_HATCHING) @property def running(self): return self.get_by_state(STATE_RUNNING) self.clients = SlaveNodesDict() self.server = rpc.Server(self.master_bind_host, self.master_bind_port) self.greenlet = Group() self.greenlet.spawn( self.heartbeat_worker).link_exception(callback=self.noop) self.greenlet.spawn( self.client_listener).link_exception(callback=self.noop) # listener that gathers info on how many locust users the slaves has spawned def on_slave_report(client_id, data): if client_id not in self.clients: logger.info("Discarded report from unrecognized slave %s", client_id) return self.clients[client_id].user_count = data["user_count"] events.slave_report += on_slave_report # register listener that sends quit message to slave nodes def on_quitting(): self.quit() events.quitting += on_quitting @property def user_count(self): return sum([c.user_count for c in six.itervalues(self.clients)]) def start_hatching(self, locust_count, hatch_rate): num_slaves = len(self.clients.ready) + len(self.clients.running) + len( self.clients.hatching) if not num_slaves: logger.warning( "You are running in distributed mode but have no slave servers connected. " "Please connect slaves prior to swarming.") return self.num_clients = locust_count self.hatch_rate = hatch_rate slave_num_clients = locust_count // (num_slaves or 1) slave_hatch_rate = float(hatch_rate) / (num_slaves or 1) remaining = locust_count % num_slaves logger.info("Sending hatch jobs to %d ready clients", num_slaves) if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} events.master_start_hatching.fire() for client in (self.clients.ready + self.clients.running + self.clients.hatching): data = { "hatch_rate": slave_hatch_rate, "num_clients": slave_num_clients, "host": self.host, "stop_timeout": None } if remaining > 0: data["num_clients"] += 1 remaining -= 1 self.server.send_to_client(Message("hatch", data, client.id)) self.stats.start_time = time() self.state = STATE_HATCHING def stop(self): self.state = STATE_STOPPING for client in self.clients.all: self.server.send_to_client(Message("stop", None, client.id)) events.master_stop_hatching.fire() def quit(self): for client in self.clients.all: self.server.send_to_client(Message("quit", None, client.id)) self.greenlet.kill(block=True) def heartbeat_worker(self): while True: gevent.sleep(self.heartbeat_interval) for client in self.clients.all: if client.heartbeat < 0 and client.state != STATE_MISSING: logger.info( 'Slave %s failed to send heartbeat, setting state to missing.' % str(client.id)) client.state = STATE_MISSING client.user_count = 0 else: client.heartbeat -= 1 def client_listener(self): while True: client_id, msg = self.server.recv_from_client() msg.node_id = client_id if msg.type == "client_ready": id = msg.node_id self.clients[id] = SlaveNode( id, heartbeat_liveness=self.heartbeat_liveness) logger.info( "Client %r reported as ready. Currently %i clients ready to swarm." % (id, len(self.clients.ready + self.clients.running + self.clients.hatching))) # balance the load distribution when new client joins if self.state == STATE_RUNNING or self.state == STATE_HATCHING: self.start_hatching(self.num_clients, self.hatch_rate) ## emit a warning if the slave's clock seem to be out of sync with our clock #if abs(time() - msg.data["time"]) > 5.0: # warnings.warn("The slave node's clock seem to be out of sync. For the statistics to be correct the different locust servers need to have synchronized clocks.") elif msg.type == "client_stopped": del self.clients[msg.node_id] logger.info("Removing %s client from running clients" % (msg.node_id)) elif msg.type == "heartbeat": if msg.node_id in self.clients: self.clients[ msg.node_id].heartbeat = self.heartbeat_liveness self.clients[msg.node_id].state = msg.data['state'] elif msg.type == "stats": events.slave_report.fire(client_id=msg.node_id, data=msg.data) elif msg.type == "hatching": self.clients[msg.node_id].state = STATE_HATCHING elif msg.type == "hatch_complete": self.clients[msg.node_id].state = STATE_RUNNING self.clients[msg.node_id].user_count = msg.data["count"] if len(self.clients.hatching) == 0: count = sum(c.user_count for c in six.itervalues(self.clients)) events.hatch_complete.fire(user_count=count) elif msg.type == "quit": if msg.node_id in self.clients: del self.clients[msg.node_id] logger.info( "Client %r quit. Currently %i clients connected." % (msg.node_id, len(self.clients.ready))) elif msg.type == "exception": self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"]) if not self.state == STATE_INIT and all( map( lambda x: x.state != STATE_RUNNING and x.state != STATE_HATCHING, self.clients.all)): self.state = STATE_STOPPED @property def slave_count(self): return len(self.clients.ready) + len(self.clients.hatching) + len( self.clients.running)
class MasterLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(MasterLocustRunner, self).__init__(*args, **kwargs) class SlaveNodesDict(dict): def get_by_state(self, state): return [c for c in self.itervalues() if c.state == state] @property def ready(self): return self.get_by_state(STATE_INIT) @property def hatching(self): return self.get_by_state(STATE_HATCHING) @property def running(self): return self.get_by_state(STATE_RUNNING) self.clients = SlaveNodesDict() self.client_stats = {} self.client_errors = {} self._request_stats = {} self.server = rpc.Server() self.greenlet = Group() self.greenlet.spawn(self.client_listener).link_exception(self.noop) # listener that gathers info on how many locust users the slaves has spawned def on_slave_report(client_id, data): self.clients[client_id].user_count = data["user_count"] events.slave_report += on_slave_report # register listener that sends quit message to slave nodes def on_quitting(): self.quit() events.quitting += on_quitting @property def user_count(self): return sum([c.user_count for c in self.clients.itervalues()]) def start_hatching(self, locust_count, hatch_rate): self.num_clients = locust_count slave_num_clients = locust_count / ( (len(self.clients.ready) + len(self.clients.running)) or 1) slave_hatch_rate = float(hatch_rate) / ( (len(self.clients.ready) + len(self.clients.running)) or 1) logger.info("Sending hatch jobs to %i ready clients" % (len(self.clients.ready) + len(self.clients.running))) if not (len(self.clients.ready) + len(self.clients.running)): logger.warning( "You are running in distributed mode but have no slave servers connected. Please connect slaves prior to swarming." ) return if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} for client in self.clients.itervalues(): data = { "hatch_rate": slave_hatch_rate, "num_clients": slave_num_clients, "num_requests": self.num_requests, "host": self.host, "stop_timeout": None } self.server.send(Message("hatch", data, None)) self.stats.start_time = time() self.state = STATE_HATCHING def stop(self): for client in self.clients.hatching + self.clients.running: self.server.send(Message("stop", None, None)) def quit(self): for client in self.clients.itervalues(): self.server.send(Message("quit", None, None)) self.greenlet.kill(block=True) def client_listener(self): while True: msg = self.server.recv() if msg.type == "client_ready": id = msg.node_id self.clients[id] = SlaveNode(id) logger.info( "Client %r reported as ready. Currently %i clients ready to swarm." % (id, len(self.clients.ready))) ## emit a warning if the slave's clock seem to be out of sync with our clock #if abs(time() - msg.data["time"]) > 5.0: # warnings.warn("The slave node's clock seem to be out of sync. For the statistics to be correct the different locust servers need to have synchronized clocks.") elif msg.type == "client_stopped": del self.clients[msg.node_id] if len(self.clients.hatching + self.clients.running) == 0: self.state = STATE_STOPPED logger.info("Removing %s client from running clients" % (msg.node_id)) elif msg.type == "stats": events.slave_report.fire(msg.node_id, msg.data) elif msg.type == "hatching": self.clients[msg.node_id].state = STATE_HATCHING elif msg.type == "hatch_complete": self.clients[msg.node_id].state = STATE_RUNNING self.clients[msg.node_id].user_count = msg.data["count"] if len(self.clients.hatching) == 0: count = sum(c.user_count for c in self.clients.itervalues()) events.hatch_complete.fire(count) elif msg.type == "quit": if msg.node_id in self.clients: del self.clients[msg.node_id] logger.info( "Client %r quit. Currently %i clients connected." % (msg.node_id, len(self.clients.ready))) elif msg.type == "exception": self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"]) @property def slave_count(self): return len(self.clients.ready) + len(self.clients.hatching) + len( self.clients.running)
class SlaveLocustRunner(DistributedLocustRunner): def noop(self, *args, **kw): pass def __init__(self, *args, **kwargs): super(SlaveLocustRunner, self).__init__(*args, **kwargs) self.client_id = socket.gethostname() + "_" + md5(str(time() + random.randint(0,10000))).hexdigest() self.client = rpc.Client(self.master_host) self.greenlet = Group() self.greenlet.spawn(self.worker).link_exception(receiver=self.noop) self.client.send(Message("client_ready", None, self.client_id)) self.greenlet.spawn(self.stats_reporter).link_exception(receiver=self.noop) # register listener for when all locust users have hatched, and report it to the master node def on_hatch_complete(count): self.client.send(Message("hatch_complete", {"count":count}, self.client_id)) events.hatch_complete += on_hatch_complete # register listener that adds the current number of spawned locusts to the report that is sent to the master node def on_report_to_master(client_id, data): data["user_count"] = self.user_count events.report_to_master += on_report_to_master # register listener that sends quit message to master def on_quitting(): self.client.send(Message("quit", None, self.client_id)) events.quitting += on_quitting # register listener thats sends locust exceptions to master def on_locust_error(locust, e, tb): formatted_tb = "".join(traceback.format_tb(tb)) self.client.send(Message("exception", {"msg" : str(e), "traceback" : formatted_tb}, self.client_id)) events.locust_error += on_locust_error def worker(self): while True: msg = self.client.recv() if msg.type == "hatch": self.client.send(Message("hatching", None, self.client_id)) job = msg.data self.hatch_rate = job["hatch_rate"] #self.num_clients = job["num_clients"] self.num_requests = job["num_requests"] self.host = job["host"] self.hatching_greenlet = gevent.spawn(lambda: self.start_hatching(locust_count=job["num_clients"], hatch_rate=job["hatch_rate"])) elif msg.type == "stop": self.stop() self.client.send(Message("client_stopped", None, self.client_id)) self.client.send(Message("client_ready", None, self.client_id)) elif msg.type == "quit": logger.info("Got quit message from master, shutting down...") self.stop() self.greenlet.kill(block=True) def stats_reporter(self): while True: data = {} events.report_to_master.fire(self.client_id, data) try: self.client.send(Message("stats", data, self.client_id)) except: logger.error("Connection lost to master server. Aborting...") break gevent.sleep(SLAVE_REPORT_INTERVAL)
class MasterLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(MasterLocustRunner, self).__init__(*args, **kwargs) class SlaveNodesDict(dict): def get_by_state(self, state): return [c for c in self.itervalues() if c.state == state] @property def ready(self): return self.get_by_state(STATE_INIT) @property def hatching(self): return self.get_by_state(STATE_HATCHING) @property def running(self): return self.get_by_state(STATE_RUNNING) self.clients = SlaveNodesDict() self.client_stats = {} self.client_errors = {} self._request_stats = {} self.server = rpc.Server() self.greenlet = Group() self.greenlet.spawn(self.client_listener).link_exception(receiver=self.noop) # listener that gathers info on how many locust users the slaves has spawned def on_slave_report(client_id, data): self.clients[client_id].user_count = data["user_count"] events.slave_report += on_slave_report # register listener that sends quit message to slave nodes def on_quitting(): self.quit() events.quitting += on_quitting def noop(self, *args, **kw): pass @property def user_count(self): return sum([c.user_count for c in self.clients.itervalues()]) def start_hatching(self, locust_count, hatch_rate): self.num_clients = locust_count slave_num_clients = locust_count / ((len(self.clients.ready) + len(self.clients.running)) or 1) slave_hatch_rate = float(hatch_rate) / ((len(self.clients.ready) + len(self.clients.running)) or 1) logger.info("Sending hatch jobs to %i ready clients" % (len(self.clients.ready) + len(self.clients.running))) if not (len(self.clients.ready)+len(self.clients.running)): logger.warning("You are running in distributed mode but have no slave servers connected. Please connect slaves prior to swarming.") return if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} for client in self.clients.itervalues(): data = {"hatch_rate":slave_hatch_rate, "num_clients":slave_num_clients, "num_requests": self.num_requests, "host":self.host, "stop_timeout":None} self.server.send(Message("hatch", data, None)) self.stats.start_time = time() self.state = STATE_HATCHING def stop(self): for client in self.clients.hatching + self.clients.running: self.server.send(Message("stop", None, None)) def quit(self): for client in self.clients.itervalues(): self.server.send(Message("quit", None, None)) self.greenlet.kill(block=True) def client_listener(self): while True: msg = self.server.recv() if msg.type == "client_ready": id = msg.node_id self.clients[id] = SlaveNode(id) logger.info("Client %r reported as ready. Currently %i clients ready to swarm." % (id, len(self.clients.ready))) ## emit a warning if the slave's clock seem to be out of sync with our clock #if abs(time() - msg.data["time"]) > 5.0: # warnings.warn("The slave node's clock seem to be out of sync. For the statistics to be correct the different locust servers need to have synchronized clocks.") elif msg.type == "client_stopped": del self.clients[msg.node_id] if len(self.clients.hatching + self.clients.running) == 0: self.state = STATE_STOPPED logger.info("Removing %s client from running clients" % (msg.node_id)) elif msg.type == "stats": events.slave_report.fire(msg.node_id, msg.data) elif msg.type == "hatching": self.clients[msg.node_id].state = STATE_HATCHING elif msg.type == "hatch_complete": self.clients[msg.node_id].state = STATE_RUNNING self.clients[msg.node_id].user_count = msg.data["count"] if len(self.clients.hatching) == 0: count = sum(c.user_count for c in self.clients.itervalues()) events.hatch_complete.fire(count) elif msg.type == "quit": if msg.node_id in self.clients: del self.clients[msg.node_id] logger.info("Client %r quit. Currently %i clients connected." % (msg.node_id, len(self.clients.ready))) elif msg.type == "exception": self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"]) @property def slave_count(self): return len(self.clients.ready) + len(self.clients.hatching) + len(self.clients.running)
def user_input(players, inputlet, timeout=25, type='single', trans=None): ''' Type can be 'single', 'all' or 'any' ''' assert type in ('single', 'all', 'any') assert not type == 'single' or len(players) == 1 timeout = max(0, timeout) g = Game.getgame() inputlet.timeout = timeout players = players[:] if not trans: with InputTransaction(inputlet.tag(), players) as trans: return user_input(players, inputlet, timeout, type, trans) t = {'single': '', 'all': '&', 'any': '|'}[type] tag = 'I{0}:{1}:'.format(t, inputlet.tag()) ilets = {p: copy(inputlet) for p in players} for p in players: ilets[p].actor = p results = {p: None for p in players} synctags = {p: g.get_synctag() for p in players} orig_players = players[:] input_group = GreenletGroup() g.gr_groups.add(input_group) _input_group = set() till = time.time() + timeout + 5 try: inputany_player = None def get_input_waiter(p, t): try: # should be [tag, <Data for Inputlet.parse>] # tag likes 'I?:ChooseOption:2345' tag, rst = p.client.gexpect(t) return rst except EndpointDied: return None for p in players: t = tag + str(synctags[p]) w = input_group.spawn(get_input_waiter, p, t) _input_group.add(w) w.player = p w.gr_name = 'get_input_waiter: p=%r, tag=%s' % (p, t) for p in players: g.emit_event('user_input_start', (trans, ilets[p])) while players: # NOTICE: This is a must. # TLE would be raised at other part (notably my.post_process) in the original solution # (wrapping large parts of code in 'with TimeLimitExceeded(): ...') with TimeLimitExceeded(max(till - time.time(), 0)): w = waitany(_input_group) _input_group.discard(w) try: rst = w.get() p, data = w.player, rst except: p, data = w.player, None g.players.client.gwrite('R{}{}'.format(tag, synctags[p]), data) my = ilets[p] try: rst = my.parse(data) except: log.error('user_input: exception in .process()', exc_info=1) # ----- FOR DEBUG ----- if g.IS_DEBUG: raise # ----- END FOR DEBUG ----- rst = None rst = my.post_process(p, rst) g.emit_event('user_input_finish', (trans, my, rst)) players.remove(p) results[p] = rst if type == 'any' and rst is not None: inputany_player = p break except TimeLimitExceeded: pass finally: input_group.kill() # timed-out players for p in players: my = ilets[p] rst = my.parse(None) rst = my.post_process(p, rst) results[p] = rst g.emit_event('user_input_finish', (trans, my, rst)) g.players.client.gwrite('R{}{}'.format(tag, synctags[p]), None) if type == 'single': return results[orig_players[0]] elif type == 'any': if not inputany_player: return None, None return inputany_player, results[inputany_player] elif type == 'all': return OrderedDict([(p, results[p]) for p in orig_players]) assert False, 'WTF?!'
class Consumer(object): """High level NSQ consumer. A Consumer will connect to the nsqd tcp addresses or poll the provided nsqlookupd http addresses for the configured topic and send signals to message handlers connected to the :attr:`on_message` signal or provided by ``message_handler``. Messages will automatically be finished when the message handle returns unless :meth:`message.enable_async() <gnsq.Message.enable_async>` is called. If an exception occurs or :class:`~gnsq.errors.NSQRequeueMessage` is raised, the message will be requeued. The Consumer will handle backing off of failed messages up to a configurable ``max_interval`` as well as automatically reconnecting to dropped connections. Example usage:: from gnsq import Consumer consumer = gnsq.Consumer('topic', 'channel', 'localhost:4150') @consumer.on_message.connect def handler(consumer, message): print 'got message:', message.body consumer.start() :param topic: specifies the desired NSQ topic :param channel: specifies the desired NSQ channel :param nsqd_tcp_addresses: a sequence of string addresses of the nsqd instances this consumer should connect to :param lookupd_http_addresses: a sequence of string addresses of the nsqlookupd instances this consumer should query for producers of the specified topic :param name: a string that is used for logging messages (defaults to ``'gnsq.consumer.{topic}.{channel}'``) :param message_handler: the callable that will be executed for each message received :param max_tries: the maximum number of attempts the consumer will make to process a message after which messages will be automatically discarded :param max_in_flight: the maximum number of messages this consumer will pipeline for processing. this value will be divided evenly amongst the configured/discovered nsqd producers :param requeue_delay: the default delay to use when requeueing a failed message :param lookupd_poll_interval: the amount of time in seconds between querying all of the supplied nsqlookupd instances. A random amount of time based on this value will be initially introduced in order to add jitter when multiple consumers are running :param lookupd_poll_jitter: the maximum fractional amount of jitter to add to the lookupd poll loop. This helps evenly distribute requests even if multiple consumers restart at the same time. :param low_ready_idle_timeout: the amount of time in seconds to wait for a message from a producer when in a state where RDY counts are re-distributed (ie. `max_in_flight` < `num_producers`) :param max_backoff_duration: the maximum time we will allow a backoff state to last in seconds. If zero, backoff wil not occur :param backoff_on_requeue: if ``False``, backoff will only occur on exception :param **kwargs: passed to :class:`~gnsq.NsqdTCPClient` initialization """ def __init__(self, topic, channel, nsqd_tcp_addresses=[], lookupd_http_addresses=[], name=None, message_handler=None, max_tries=5, max_in_flight=1, requeue_delay=0, lookupd_poll_interval=60, lookupd_poll_jitter=0.3, low_ready_idle_timeout=10, max_backoff_duration=128, backoff_on_requeue=True, **kwargs): if not nsqd_tcp_addresses and not lookupd_http_addresses: raise ValueError('must specify at least one nsqd or lookupd') self.nsqd_tcp_addresses = parse_nsqds(nsqd_tcp_addresses) self.lookupds = parse_lookupds(lookupd_http_addresses) self.iterlookupds = cycle(self.lookupds) self.topic = topic self.channel = channel self.max_tries = max_tries self.max_in_flight = max_in_flight self.requeue_delay = requeue_delay self.lookupd_poll_interval = lookupd_poll_interval self.lookupd_poll_jitter = lookupd_poll_jitter self.low_ready_idle_timeout = low_ready_idle_timeout self.backoff_on_requeue = backoff_on_requeue self.max_backoff_duration = max_backoff_duration self.conn_kwargs = kwargs if name: self.name = name else: self.name = '%s.%s.%s' % (__name__, self.topic, self.channel) if message_handler is not None: self.on_message.connect(message_handler, weak=False) self.logger = logging.getLogger(self.name) self._state = INIT self._redistributed_ready_event = Event() self._connection_backoffs = defaultdict(self._create_backoff) self._message_backoffs = defaultdict(self._create_backoff) self._connections = {} self._workers = Group() self._killables = Group() @cached_property def on_message(self): """Emitted when a message is received. The signal sender is the consumer and the ``message`` is sent as an argument. The ``message_handler`` param is connected to this signal. """ return blinker.Signal(doc='Emitted when a message is received.') @cached_property def on_response(self): """Emitted when a response is received. The signal sender is the consumer and the ``response`` is sent as an argument. """ return blinker.Signal(doc='Emitted when a response is received.') @cached_property def on_error(self): """Emitted when an error is received. The signal sender is the consumer and the ``error`` is sent as an argument. """ return blinker.Signal(doc='Emitted when a error is received.') @cached_property def on_finish(self): """Emitted after a message is successfully finished. The signal sender is the consumer and the ``message_id`` is sent as an argument. """ return blinker.Signal(doc='Emitted after the a message is finished.') @cached_property def on_requeue(self): """Emitted after a message is requeued. The signal sender is the consumer and the ``message_id`` and ``timeout`` are sent as arguments. """ return blinker.Signal(doc='Emitted after the a message is requeued.') @cached_property def on_giving_up(self): """Emitted after a giving up on a message. Emitted when a message has exceeded the maximum number of attempts (``max_tries``) and will no longer be requeued. This is useful to perform tasks such as writing to disk, collecting statistics etc. The signal sender is the consumer and the ``message`` is sent as an argument. """ return blinker.Signal(doc='Sent after a giving up on a message.') @cached_property def on_auth(self): """Emitted after a connection is successfully authenticated. The signal sender is the consumer and the ``conn`` and parsed ``response`` are sent as arguments. """ return blinker.Signal(doc='Emitted when a response is received.') @cached_property def on_exception(self): """Emitted when an exception is caught while handling a message. The signal sender is the consumer and the ``message`` and ``error`` are sent as arguments. """ return blinker.Signal(doc='Emitted when an exception is caught.') @cached_property def on_close(self): """Emitted after :meth:`close`. The signal sender is the consumer. """ return blinker.Signal(doc='Emitted after the consumer is closed.') def start(self, block=True): """Start discovering and listing to connections.""" if self._state == INIT: if not any(self.on_message.receivers_for(blinker.ANY)): raise RuntimeError('no receivers connected to on_message') self.logger.debug('starting %s...', self.name) self._state = RUNNING self.query_nsqd() if self.lookupds: self.query_lookupd() self._killables.add(self._workers.spawn(self._poll_lookupd)) self._killables.add(self._workers.spawn(self._poll_ready)) else: self.logger.warn('%s already started', self.name) if block: self.join() def close(self): """Immediately close all connections and stop workers.""" if not self.is_running: return self._state = CLOSED self.logger.debug('killing %d worker(s)', len(self._killables)) self._killables.kill(block=False) self.logger.debug('closing %d connection(s)', len(self._connections)) for conn in self._connections: conn.close_stream() self.on_close.send(self) def join(self, timeout=None, raise_error=False): """Block until all connections have closed and workers stopped.""" self._workers.join(timeout, raise_error) @property def is_running(self): """Check if consumer is currently running.""" return self._state == RUNNING @property def is_starved(self): """Evaluate whether any of the connections are starved. This property should be used by message handlers to reliably identify when to process a batch of messages. """ return any(conn.is_starved for conn in self._connections) @property def total_ready_count(self): return sum(c.ready_count for c in self._connections) @property def total_in_flight(self): return sum(c.in_flight for c in self._connections) def query_nsqd(self): self.logger.debug('querying nsqd...') for address in self.nsqd_tcp_addresses: address, port = address.split(':') self.connect_to_nsqd(address, int(port)) def query_lookupd(self): self.logger.debug('querying lookupd...') lookupd = next(self.iterlookupds) try: producers = lookupd.lookup(self.topic)['producers'] self.logger.debug('found %d producers', len(producers)) except Exception as error: self.logger.warn( 'Failed to lookup %s on %s (%s)', self.topic, lookupd.address, error) return for producer in producers: self.connect_to_nsqd( producer['broadcast_address'], producer['tcp_port']) def _poll_lookupd(self): try: delay = self.lookupd_poll_interval * self.lookupd_poll_jitter gevent.sleep(random.random() * delay) while True: gevent.sleep(self.lookupd_poll_interval) self.query_lookupd() except gevent.GreenletExit: pass def _poll_ready(self): try: while True: if self._redistributed_ready_event.wait(5): self._redistributed_ready_event.clear() self._redistribute_ready_state() except gevent.GreenletExit: pass def _redistribute_ready_state(self): if not self.is_running: return if len(self._connections) > self.max_in_flight: ready_state = self._get_unsaturated_ready_state() else: ready_state = self._get_saturated_ready_state() for conn, count in ready_state.items(): if conn.ready_count == count: self.logger.debug('[%s] RDY count already %d', conn, count) continue self.logger.debug('[%s] sending RDY %d', conn, count) try: conn.ready(count) except NSQSocketError as error: self.logger.warn('[%s] RDY %d failed (%r)', conn, count, error) def _get_unsaturated_ready_state(self): ready_state = {} active = [] for conn, state in self._connections.items(): if state == BACKOFF: ready_state[conn] = 0 else: active.append(conn) random.shuffle(active) for conn in active[self.max_in_flight:]: ready_state[conn] = 0 for conn in active[:self.max_in_flight]: ready_state[conn] = 1 return ready_state def _get_saturated_ready_state(self): ready_state = {} active = [] now = time.time() for conn, state in self._connections.items(): if state == BACKOFF: ready_state[conn] = 0 elif state in (INIT, THROTTLED): ready_state[conn] = 1 elif (now - conn.last_message) > self.low_ready_idle_timeout: self.logger.info( '[%s] idle connection, giving up RDY count', conn) ready_state[conn] = 1 else: active.append(conn) if not active: return ready_state ready_available = self.max_in_flight - sum(ready_state.values()) connection_max_in_flight = ready_available // len(active) for conn in active: ready_state[conn] = connection_max_in_flight for conn in random.sample(active, ready_available % len(active)): ready_state[conn] += 1 return ready_state def redistribute_ready_state(self): self._redistributed_ready_event.set() def connect_to_nsqd(self, address, port): if not self.is_running: return conn = NsqdTCPClient(address, port, **self.conn_kwargs) if conn in self._connections: self.logger.debug('[%s] already connected', conn) return self._connections[conn] = INIT self.logger.debug('[%s] connecting...', conn) conn.on_message.connect(self.handle_message) conn.on_response.connect(self.handle_response) conn.on_error.connect(self.handle_error) conn.on_finish.connect(self.handle_finish) conn.on_requeue.connect(self.handle_requeue) conn.on_auth.connect(self.handle_auth) try: conn.connect() conn.identify() if conn.max_ready_count < self.max_in_flight: msg = ( '[%s] max RDY count %d < consumer max in flight %d, ' 'truncation possible') self.logger.warning( msg, conn, conn.max_ready_count, self.max_in_flight) conn.subscribe(self.topic, self.channel) except NSQException as error: self.logger.warn('[%s] connection failed (%r)', conn, error) self.handle_connection_failure(conn) return # Check if we've closed since we started if not self.is_running: self.handle_connection_failure(conn) return self.logger.info('[%s] connection successful', conn) self.handle_connection_success(conn) def _listen(self, conn): try: conn.listen() except NSQException as error: self.logger.warning('[%s] connection lost (%r)', conn, error) self.handle_connection_failure(conn) def handle_connection_success(self, conn): self._workers.spawn(self._listen, conn) self.redistribute_ready_state() if str(conn) not in self.nsqd_tcp_addresses: return self._connection_backoffs[conn].success() def handle_connection_failure(self, conn): del self._connections[conn] conn.close_stream() if not self.is_running: return self.redistribute_ready_state() if str(conn) not in self.nsqd_tcp_addresses: return seconds = self._connection_backoffs[conn].failure().get_interval() self.logger.debug('[%s] retrying in %ss', conn, seconds) gevent.spawn_later( seconds, self.connect_to_nsqd, conn.address, conn.port) def handle_auth(self, conn, response): metadata = [] if response.get('identity'): metadata.append("Identity: %r" % response['identity']) if response.get('permission_count'): metadata.append("Permissions: %d" % response['permission_count']) if response.get('identity_url'): metadata.append(response['identity_url']) self.logger.info('[%s] AUTH accepted %s', conn, ' '.join(metadata)) self.on_auth.send(self, conn=conn, response=response) def handle_response(self, conn, response): self.logger.debug('[%s] response: %s', conn, response) self.on_response.send(self, response=response) def handle_error(self, conn, error): self.logger.debug('[%s] error: %s', conn, error) self.on_error.send(self, error=error) def _handle_message(self, message): if self.max_tries and message.attempts > self.max_tries: self.logger.warning( "giving up on message '%s' after max tries %d", message.id, self.max_tries) self.on_giving_up.send(self, message=message) return message.finish() self.on_message.send(self, message=message) if not self.is_running: return if message.is_async(): return if message.has_responded(): return message.finish() def handle_message(self, conn, message): self.logger.debug('[%s] got message: %s', conn, message.id) try: return self._handle_message(message) except NSQRequeueMessage as error: if error.backoff is None: backoff = self.backoff_on_requeue else: backoff = error.backoff except Exception as error: backoff = True self.logger.exception( '[%s] caught exception while handling message', conn) self.on_exception.send(self, message=message, error=error) if not self.is_running: return if message.has_responded(): return try: message.requeue(self.requeue_delay, backoff) except NSQException as error: self.logger.warning( '[%s] error requeueing message (%r)', conn, error) def _create_backoff(self): return BackoffTimer(max_interval=self.max_backoff_duration) def _start_backoff(self, conn): self._connections[conn] = BACKOFF interval = self._message_backoffs[conn].get_interval() gevent.spawn_later(interval, self._start_throttled, conn) self.logger.info('[%s] backing off for %s seconds', conn, interval) self.redistribute_ready_state() def _start_throttled(self, conn): if self._connections.get(conn) != BACKOFF: return self._connections[conn] = THROTTLED self.logger.info('[%s] testing backoff state with RDY 1', conn) self.redistribute_ready_state() def _complete_backoff(self, conn): if self._message_backoffs[conn].is_reset(): self._connections[conn] = RUNNING self.logger.info('backoff complete, resuming normal operation') self.redistribute_ready_state() else: self._start_backoff(conn) def _finish_message(self, conn, backoff): if not self.max_backoff_duration: return try: state = self._connections[conn] except KeyError: return if state == BACKOFF: return if backoff: self._message_backoffs[conn].failure() self._start_backoff(conn) elif state == THROTTLED: self._message_backoffs[conn].success() self._complete_backoff(conn) elif state == INIT: self._connections[conn] = RUNNING self.redistribute_ready_state() def handle_finish(self, conn, message_id): self.logger.debug('[%s] finished message: %s', conn, message_id) self._finish_message(conn, backoff=False) self.on_finish.send(self, message_id=message_id) def handle_requeue(self, conn, message_id, timeout, backoff): self.logger.debug( '[%s] requeued message: %s (%s)', conn, message_id, timeout) self._finish_message(conn, backoff=backoff) self.on_requeue.send(self, message_id=message_id, timeout=timeout)
class Manager(Configurable): def __init__(self, options): self.manager_id = socket.gethostname() + "_" + str(uuid.uuid1()) self.master_host = options.master_host self.master_port = options.master_port self.client = rpc.Client(self.master_host, self.master_port) self.config = self.load_config_data(options) self.workers = Group() self.session = requests.Session() self.min_wait = 1000 self.max_wait = 1000 self.stats = global_stats self.worker_count = 0 self.debug = options.debug self.client.send(Message('client-started','greetings to master',self.identity)) # placing stats reporter in its own group, so it doesn't get killed # with the workers. self.reporter = Group().spawn(self.stats_reporter) return @property def identity(self): return self.manager_id def listener(self): while True: msg = self.client.recv() if msg.type == 'quit': print "shutting down client: {}".format(self.identity) self.quit() elif msg.type == 'start': print "starting {} workers".format(msg.data) self.start_workers(msg.data) elif msg.type == 'stop': self.stop_workers(msg.data) else: print "Don't know what to do with message: {}".format(msg.type) def quit(self): # close down all the workers self.stop_workers(0) # message back to master? self.client.send(Message('client-quit', "i'm a gonner", self.identity)) # exit exit(0) def worker(self): verify = True if hasattr(self.config, 'verifySSL'): verify = self.config.verifySSL payload = None request = self.config.request() if request.get('method').lower() == 'post' and request.get('payload'): payload = open(request.get('payload', 'rb')).read() while True: url = self.config.request().get('url') stats = {} if self.debug: print "[{0}] requesting url: {1}".format(datetime.datetime.now(), url) start = time.time() try: if request.get('method').lower() == 'post': response = self.session.post(url, data=payload, verify=verify) else: response = self.session.get(url, verify=verify) #capture connection errors when the remote is down. except RequestException as e: events.request_failure.fire( request_type = 'GET', name = url, response_time = 0, exception = e ) else: stats['duration'] = int((time.time() - start) * 1000) stats['content_size'] = len(response.content) stats['status_code'] = response.status_code stats['url'] = url if self.debug: print "[{2}] {1} :{0}".format(stats['url'], stats['duration'], datetime.datetime.now()) try: # calling this will throw an error for anything other than a # successful response. response.raise_for_status() except (MissingSchema, InvalidSchema, InvalidURL): raise except RequestException as e: events.request_failure.fire( request_type = 'GET', name = url, response_time = stats['duration'], exception = e ) else: events.request_success.fire( request_type = request.get('method').upper(), name = url, response_time = stats['duration'], response_length = stats['content_size'] ) if stats['duration'] > SLOW_REQUEST_THRESHOLD: events.request_slow.fire ( name = url, response_time = stats['duration'], response_length = stats['content_size'] ) if self.debug and int((time.time() - start) * 1000) > SLOW_REQUEST_THRESHOLD: print "[{0}] ****** slow request {1} :{2}".format(datetime.datetime.now(), stats['url'], stats['duration']) millis = random.randint(self.min_wait, self.max_wait) seconds = millis / 1000.0 gevent.sleep(seconds) def start_workers(self, count): # put random sleep in here, so that all workers aren't started at exactly the # same time between clients. Should stop a peak/trough request cycle millis = random.randint(1, 1000) seconds = millis / 1000.0 gevent.sleep(seconds) self.stop_workers(count) for i in range(count): self.workers.spawn(self.worker) self.worker_count += count; print "Started {} workers".format(count) return def stop_workers(self,count): self.workers.kill(block=True) self.worker_count = 0 print "stopped all workers" return def stats_reporter(self): while True: data = {'workers': self.worker_count} events.report_to_master.fire(client_id=self.manager_id, data=data) if self.debug: print data self.client.send(Message('client-stats', data, self.manager_id)) gevent.sleep(STATS_REPORT_INTERVAL)
class Task: """ Usage: import requests_store a = requests_store.Start(url, num_worker, num_tasks) It will start some fixed number workers who will do some fixed no of tasks.After finishing they will stats """ def __init__(self, url='http://localhost:8080', num_worker = 10, num_tasks = 100, jobKey=None): self.task_queue = gevent.queue.JoinableQueue() self.greenlet = Group() self._status = STATE_INIT self.url = url self.num_worker = num_worker self.num_tasks = num_tasks self.jobKey = jobKey self.spawn_workers() self.add_to_queue() self._status = STATE_HATCHING def status(): doc = "The status property." def fget(self): return self._status def fset(self, value): self._status = value def fdel(self): del self._status return locals() status = property(**status()) def start(self, masterUrl, masterPort, slavePort, jobKey): """Call object.start to start spawning""" start_time = time.time() self._status = STATE_RUNNING self.task_queue.join() # block until all tasks are done self._status = STATE_FINISHED final_report = {} final_report["summary"] = json.loads(self.json_output_status(jobKey)) final_report["time_series"] = json.loads(self.json_output_timeseries(jobKey)) final_report["time_stamp"] = json.loads(self.json_output_timestamp(jobKey)) final_report["job_status"] = self.status final_report["port"] = slavePort final_report['jobKey'] = jobKey requests.post("http://" + masterUrl + ":" + masterPort + "/jobresult", data = json.dumps(final_report)) def stop(self): """ Set self.work False so that condition in worker becomes false.""" self.greenlet.kill() self._status = STATE_STOPPED def spawn_workers(self): for i in xrange(self.num_worker): self.greenlet.spawn(self.worker) def add_to_queue(self): for item in xrange(self.num_tasks): self.task_queue.put(item) def print_results(self): """Important stats are printed here""" # print "Num Workers: %s \nNumtasks: %s" %(self.num_worker, self.num_tasks) # print "Total requests saved: %s" %requests_stats.global_stats.num_requests # print "Total requests failed: %s" %requests_stats.global_stats.num_failures # print requests_stats.global_stats.entries # print 'Time taken for %s tasks ' % self.num_tasks, tic() # for key in requests_stats.global_stats.get('/', 'GET').__dict__.keys(): # print '%s: %s' % (key,requests_stats.global_stats.get('/', 'GET').__dict__[key]) # print 'data_per_sec: %s' % requests_stats.global_stats.get('/', 'GET').data_per_sec # print requests_stats.global_stats.get('/', 'GET').json_output_timeseries() # print requests_stats.global_stats.get('/', 'GET').json_output_status() # print 'Median Response Time: %s' % requests_stats.global_stats.get('/', 'GET').median_response_time def json_output_timeseries(self, jobKey): return requests_stats.global_stats.get('/', 'GET', jobKey).json_output_timeseries() def json_output_timestamp(self, jobKey): return json.dumps(requests_stats.global_stats.get('/', 'GET', jobKey).data_per_sec) def json_output_status(self, jobKey): return requests_stats.global_stats.get('/', 'GET', jobKey).json_output_status() def reset_stats(self): """ Resets all stats """ requests_stats.global_stats.get('/', 'GET').reset() def do_work(self,item): """This method defines the task that the workers have to do.""" self.request('GET', self.url) # gevent.sleep(1) # try: # r = requests.get(self.url) # if r.status_code == 200: # requests_stats.on_request_success(r.request.method, '/', timedelta.total_seconds(r.elapsed)*1000, len(r.content)) # except (MissingSchema, InvalidSchema, InvalidURL): # raise # except RequestException as e: # requests_stats.on_request_failure('GET', '/', e) def worker(self): """Each worker picks a task from task_queue and completes it.""" while True: item = self.task_queue.get() try: self.do_work(item) finally: self.task_queue.task_done() def request(self, method, url, name = None, **kwargs): # store meta data that is used when reporting the request to locust's statistics request_meta = {"jobKey" : self.jobKey} # # set up pre_request hook for attaching meta data to the request object # request_meta["start_time"] = time.time() # gevent.sleep(1) print method, url response = self._send_request_safe_mode(method, url, **kwargs) # record the consumed time in milliseconds request_meta["response_time"] = round(timedelta.total_seconds(response.elapsed)*1000,3) or 0 request_meta["request_type"] = response.request.method # request_meta["name"] = name or (response.history and response.history[0] or response).request.path_url request_meta["name"] = '/' try: response.raise_for_status() except RequestException as e: request_meta['exception'] = e requests_stats.on_request_failure(**request_meta) else: request_meta["response_length"] = len(response.content or "") requests_stats.on_request_success(**request_meta) def _send_request_safe_mode(self, method, url, **kwargs): """ Send an HTTP request, and catch any exception that might occur due to connection problems. Safe mode has been removed from requests 1.x. """ try: return requests.request(method, url, **kwargs) except (MissingSchema, InvalidSchema, InvalidURL): raise except RequestException as e: r = LocustResponse() r.error = e r.status_code = 0 # with this status_code, content returns None r.request = Request(method, url).prepare() return r
class Bootstep(bootsteps.StartStopStep): queue = 'bonjour' def start(self, worker): print "Starting bonjour..." self._to_resolve = Queue() self.group = Group() self.group.spawn(self._browse_loop) self.group.spawn(self._resolve_loop) def stop(self, worker): print "Stopping bonjour..." self.group.kill() def _browse_loop(self): # FIXME: Would be nice to detect more than just airplay... regtype = "_airplay._tcp" ref = pybonjour.DNSServiceBrowse( regtype=regtype, callBack=self._browse_callback) try: while True: wait_read(ref.fileno()) pybonjour.DNSServiceProcessResult(ref) finally: ref.close() def _browse_callback( self, sdRef, flags, iface, errorCode, service, regtype, replydomain): self._to_resolve.put((iface, service, regtype, replydomain)) def _resolve_loop(self): while True: iface, service, regtype, replydomain = self._to_resolve.get() ref = pybonjour.DNSServiceResolve( 0, iface, service, regtype, replydomain, self._resolve_callback) try: wait_read(ref.fileno()) pybonjour.DNSServiceProcessResult(ref) finally: ref.close() def _resolve_callback( self, ref, flags, iface_index, errorCode, fullname, host, port, txt): if errorCode != pybonjour.kDNSServiceErr_NoError: return if not (flags & pybonjour.kDNSServiceFlagsAdd): kind = "goodbye" else: kind = "hello" d, created = Device.get_or_create( fullname=fullname, defaults={'host': host, 'port': port, 'txt': txt}) d.host = host d.port = port d.txt = txt d.save() broadcast(kind="network-device.%s" % kind, fullname=fullname, flags=flags, iface_index=iface_index, host=host, port=port, txt=txt, ) print ' '.join(fullname, host, port, txt)
class DatasetDownloader(object): _queue_item_type = namedtuple("queue_item", ("hour", "sleep_until", "filename", "expect_pressures", "bad_downloads")) def __init__(self, directory, ds_time, timeout=120, first_file_timeout=600, bad_download_retry_limit=3, write_dataset=True, write_gribmirror=True, deadline=None, dataset_host="ftp.ncep.noaa.gov", dataset_path="/pub/data/nccf/com/gfs/prod/gfs.{0}/"): # set these ASAP for close() via __del__ if __init__ raises something self.success = False self._dataset = None self._gribmirror = None self._tmp_directory = None assert ds_time.hour in (0, 6, 12, 18) assert ds_time.minute == ds_time.second == ds_time.microsecond == 0 if not (write_dataset or write_gribmirror): raise ValueError("Choose write_datset or write_gribmirror " "(or both)") if deadline is None: deadline = max(datetime.now() + timedelta(hours=2), ds_time + timedelta(hours=9, minutes=30)) self.directory = directory self.ds_time = ds_time self.timeout = timeout self.first_file_timeout = first_file_timeout self.write_dataset = write_dataset self.write_gribmirror = write_gribmirror self.bad_download_retry_limit = bad_download_retry_limit self.deadline = deadline self.dataset_host = dataset_host self.dataset_path = dataset_path self.have_first_file = False self.files_complete = 0 self.files_count = 0 self.completed = Event() ds_time_str = self.ds_time.strftime("%Y%m%d%H") self.remote_directory = dataset_path.format(ds_time_str) self._greenlets = Group() self.unpack_lock = RLock() # Items in the queue are # (hour, sleep_until, filename, ...) # so they sort by hour, and then if a not-found adds a delay to # a specific file, files from that hour without the delay # are tried first self._files = PriorityQueue() # areas in self.dataset.array are considered 'undefined' until # self.checklist[index[:3]] is True, since unpack_grib may # write to them, and then abort via ValueError before marking # updating the checklist if the file turns out later to be bad # the checklist also serves as a sort of final sanity check: # we also have "does this file contain all the records we think it # should" checklists; see Worker._download_file self._checklist = make_checklist() def open(self): logger.info("downloader: opening files for dataset %s", self.ds_time) self._tmp_directory = \ tempfile.mkdtemp(dir=self.directory, prefix="download.") os.chmod(self._tmp_directory, 0o775) logger.debug("Temporary directory is %s", self._tmp_directory) if self.write_dataset: self._dataset = \ Dataset(self.ds_time, directory=self._tmp_directory, new=True) if self.write_gribmirror: fn = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=Dataset.SUFFIX_GRIBMIRROR) logger.debug("Opening gribmirror (truncate and write) %s %s", self.ds_time, fn) self._gribmirror = open(fn, "w+") def download(self): logger.info("download of %s starting", self.ds_time) ttl, addresses = resolve_ipv4(self.dataset_host) logger.debug("Resolved to %s IPs", len(addresses)) addresses = [inet_ntoa(x) for x in addresses] total_timeout = self.deadline - datetime.now() total_timeout_secs = total_timeout.total_seconds() if total_timeout_secs < 0: raise ValueError("Deadline already passed") else: logger.debug("Deadline in %s", total_timeout) self._add_files() self._run_workers(addresses, total_timeout_secs) if not self.completed.is_set(): raise ValueError("timed out") if not self._checklist.all(): raise ValueError("incomplete: records missing") self.success = True logger.debug("downloaded %s successfully", self.ds_time) def _add_files(self): filename_prefix = self.ds_time.strftime("gfs.t%Hz.pgrb2") for hour in Dataset.axes.hour: hour_str = "{0:02}".format(hour) for bit, exp_pr in (("f", Dataset.pressures_pgrb2f), ("bf", Dataset.pressures_pgrb2bf)): self._files.put(self._queue_item_type( hour, 0, filename_prefix + bit + hour_str, exp_pr, 0)) self.files_count += 1 logger.info("Need to download %s files", self.files_count) def _run_workers(self, addresses, total_timeout_secs): logger.debug("Spawning %s workers", len(addresses) * 2) # don't ask _join_all to raise the first exception it catches # if we're already raising something in the except block raising = False try: for worker_id, address in enumerate(addresses * 2): w = DownloadWorker(self, worker_id, address) w.start() w.link() self._greenlets.add(w) # worker unhandled exceptions are raised in this greenlet # via link(). They can appear in completed.wait and # greenlets.kill(block=True) only (the only times that this # greenlet will yield) self.completed.wait(timeout=total_timeout_secs) except: # includes LinkedCompleted - a worker should not exit cleanly # until we .kill them below logger.debug("_run_workers catch %s (will reraise)", sys.exc_info()[1]) raising = True raise finally: # don't leak workers. self._join_all(raise_exception=(not raising)) def _join_all(self, raise_exception=False): # we need the loop to run to completion and so have it catch and # hold or discard exceptions for later. # track the first exception caught and re-raise that exc_info = None while len(self._greenlets): try: self._greenlets.kill(block=True) except greenlet.LinkedCompleted: # now that we've killed workers, these are expected. # ignore. pass except greenlet.LinkedFailed as e: if exc_info is None and raise_exception: logger.debug("_join_all catch %s " "(will reraise)", e) exc_info = sys.exc_info() else: logger.debug("_join_all discarding %s " "(already have exc)", e) if exc_info is not None: try: raise exc_info[1], None, exc_info[2] finally: # avoid circular reference del exc_info def _file_complete(self): self.files_complete += 1 self.have_first_file = True if self.files_complete == self.files_count: self.completed.set() logger.info("progress %s/%s %s%%", self.files_complete, self.files_count, self.files_complete / self.files_count * 100) def close(self, move_files=None): if move_files is None: move_files = self.success if self._dataset is not None or self._gribmirror is not None or \ self._tmp_directory is not None: if move_files: logger.info("moving downloaded files") else: logger.info("deleting failed download files") if self._dataset is not None: self._dataset.close() self._dataset = None if move_files: self._move_file() else: self._delete_file() if self._gribmirror is not None: self._gribmirror.close() self._gribmirror = None if move_files: self._move_file(Dataset.SUFFIX_GRIBMIRROR) else: self._delete_file(Dataset.SUFFIX_GRIBMIRROR) if self._tmp_directory is not None: self._remove_download_directory() self._tmp_directory = None def __del__(self): self.close() def _remove_download_directory(self): l = os.listdir(self._tmp_directory) if l: logger.warning("cleaning %s unknown file%s in temporary directory", len(l), '' if len(l) == 1 else 's') logger.debug("removing temporary directory") shutil.rmtree(self._tmp_directory) def _move_file(self, suffix=''): fn1 = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=suffix) fn2 = Dataset.filename(self.ds_time, directory=self.directory, suffix=suffix) logger.debug("renaming %s to %s", fn1, fn2) os.rename(fn1, fn2) def _delete_file(self, suffix=''): fn = Dataset.filename(self.ds_time, directory=self._tmp_directory, suffix=suffix) logger.warning("deleting %s", fn) os.unlink(fn)
class Server(gevent.Greenlet): def __init__(self, config, context=None, quiet=False): super(Server, self).__init__() self.config = config self.context = context or zmq.Context.instance() self.quiet = quiet # dict of the zeromq sockets we use self.sockets = {} _collect = self.context.socket(zmq.SUB) _collect.setsockopt_string(zmq.SUBSCRIBE, '') _collect.bind(zerolog.get_endpoint(self.config['endpoints']['collect'])) self.sockets['collect'] = _collect _publish = self.context.socket(zmq.XPUB) _publish.hwm = 100000 _publish.linger = 1000 _publish.setsockopt(zmq.XPUB_VERBOSE, 1) _publish.bind(zerolog.get_endpoint(self.config['endpoints']['publish'])) self.sockets['publish'] = _publish _control = self.context.socket(zmq.ROUTER) _control.linger = 0 _control.bind(zerolog.get_endpoint(self.config['endpoints']['control'])) self.sockets['control'] = _control self.manager = ConfigManager(self.sockets['publish'], self.config) self.controller = Controller(self.sockets['control'], self.manager) self.dispatcher = Dispatcher(self.sockets['collect'], self.sockets['publish'], quiet=self.quiet) self.greenlets = Group() self.log = logging.getLogger('zerolog') self._keep_going = True def _run(self): self.greenlets.start(self.manager) self.greenlets.start(self.controller) self.greenlets.start(self.dispatcher) #self.greenlets.add(gevent.spawn(self.__client_emulator)) self.greenlets.join() def kill(self, exception=gevent.GreenletExit, **kwargs): self._keep_going = False self.greenlets.kill() for _socket in self.sockets.values(): _socket.close() super(Server, self).kill(exception=exception, **kwargs) def __client_emulator(self): """Emulate a tool/sysadmin changing log levels. """ levels = 'critical error warning info debug'.split() import random while self._keep_going: loggers = list(self.manager.subscribed_loggers) self.log.info('subscribed loggers: {0}'.format(loggers)) if loggers: logger_name = random.choice(list(loggers)) self.manager.update(logger_name, { 'level': random.choice(levels), 'propagate': random.choice([0,1]), }) self.manager.configure(logger_name) gevent.sleep(5)
def user_input(players, inputlet, timeout=25, type='single', trans=None): ''' Type can be 'single', 'all' or 'any' ''' assert type in ('single', 'all', 'any') assert not type == 'single' or len(players) == 1 timeout = max(0, timeout) g = Game.getgame() inputlet.timeout = timeout players = list(players) if not trans: with InputTransaction(inputlet.tag(), players) as trans: return user_input(players, inputlet, timeout, type, trans) t = {'single': '', 'all': '&', 'any': '|'}[type] tag = 'I{0}:{1}:'.format(t, inputlet.tag()) ilets = {p: copy(inputlet) for p in players} for p in players: ilets[p].actor = p results = {p: None for p in players} synctags = {p: g.get_synctag() for p in players} orig_players = players[:] input_group = GreenletGroup() g.gr_groups.add(input_group) _input_group = set() try: inputany_player = None def get_input_waiter(p, t): try: # should be [tag, <Data for Inputlet.parse>] # tag likes 'I?:ChooseOption:2345' if p.is_npc: ilet = ilets[p] p.handle_user_input(trans, ilet) return ilet.data() else: tag, rst = p.client.gexpect(t) return rst except EndpointDied: return None for p in players: t = tag + str(synctags[p]) w = input_group.spawn(get_input_waiter, p, t) _input_group.add(w) w.player = p w.game = g # for Game.getgame() w.gr_name = 'get_input_waiter: p=%r, tag=%s' % (p, t) for p in players: g.emit_event('user_input_start', (trans, ilets[p])) bottom_halves = [] def flush(): for t, data, trans, my, rst in bottom_halves: g.players.client.gwrite(t, data) g.emit_event('user_input_finish', (trans, my, rst)) bottom_halves[:] = [] for w in iwait(_input_group, timeout=timeout + 5): try: rst = w.get() p, data = w.player, rst except: p, data = w.player, None my = ilets[p] try: rst = my.parse(data) except: log.error('user_input: exception in .process()', exc_info=1) # ----- FOR DEBUG ----- if g.IS_DEBUG: raise # ----- END FOR DEBUG ----- rst = None rst = my.post_process(p, rst) bottom_halves.append(( 'R{}{}'.format(tag, synctags[p]), data, trans, my, rst )) players.remove(p) results[p] = rst if type != 'any': flush() if type == 'any' and rst is not None: inputany_player = p break except TimeLimitExceeded: pass finally: input_group.kill() # flush bottom halves flush() # timed-out players for p in players: my = ilets[p] rst = my.parse(None) rst = my.post_process(p, rst) results[p] = rst g.emit_event('user_input_finish', (trans, my, rst)) g.players.client.gwrite('R{}{}'.format(tag, synctags[p]), None) if type == 'single': return results[orig_players[0]] elif type == 'any': if not inputany_player: return None, None return inputany_player, results[inputany_player] elif type == 'all': return OrderedDict([(p, results[p]) for p in orig_players]) assert False, 'WTF?!'
class MasterLocustRunner(DistributedLocustRunner): def __init__(self, locust_classes, options): super(MasterLocustRunner, self).__init__(locust_classes, options) class SlaveNodesDict(dict): def get_by_state(self, state): return [c for c in six.itervalues(self) if c.state == state] @property def ready(self): return self.get_by_state(STATE_INIT) @property def hatching(self): return self.get_by_state(STATE_HATCHING) @property def running(self): return self.get_by_state(STATE_RUNNING) self.run_id = None self.clients = SlaveNodesDict() self.server = rpc.Server(self.master_bind_host, self.master_bind_port) self.greenlet = Group() self.greenlet.spawn( self.client_listener).link_exception(callback=self.noop) if options.consumer: if options.consumer_database == "influx": self.consumer = InfluxStatsWriter(self, options) elif options.consumer_database == "mongo": self.consumer = MongoStatsWriter(self, options) else: logger.warning("Unrecognized consumer database: " + options.consumer_database) # listener that gathers info on how many locust users the slaves has spawned def on_slave_report(client_id, data): if client_id not in self.clients: logger.info("Discarded report from unrecognized slave %s", client_id) return self.clients[client_id].user_count = data["user_count"] events.slave_report += on_slave_report # register listener that sends quit message to slave nodes def on_quitting(): self.quit() events.quitting += on_quitting @property def user_count(self): return sum([c.user_count for c in six.itervalues(self.clients)]) def start_hatching(self, locust_count, hatch_rate, run_id=None): if len(self.clients.hatching) > 0: logger.warning( "Clients are still hatching, ignoring hatch request") return num_slaves = len(self.clients.ready) + len(self.clients.running) if not num_slaves: logger.warning( "You are running in distributed mode but have no slave servers connected. " "Please connect slaves prior to swarming.") return if self.state == STATE_RUNNING or self.state == STATE_STOPPED: events.master_rehatching.fire() self.num_clients = locust_count slave_num_clients = locust_count // (num_slaves or 1) slave_hatch_rate = float(hatch_rate) / (num_slaves or 1) remaining = locust_count % num_slaves self.run_id = run_id logger.info("Sending hatch jobs to %d ready clients", num_slaves) if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} events.master_start_hatching.fire() for client in six.itervalues(self.clients): data = { "hatch_rate": slave_hatch_rate, "num_clients": slave_num_clients, "num_requests": self.num_requests, "host": self.host, "stop_timeout": None } if remaining > 0: data["num_clients"] += 1 remaining -= 1 self.server.send(Message("hatch", data, None)) self.stats.start_time = time() self.state = STATE_HATCHING def stop(self): for client in self.clients.hatching + self.clients.running: self.server.send(Message("stop", None, None)) events.master_stop_hatching.fire() def quit(self): for client in six.itervalues(self.clients): self.server.send(Message("quit", None, None)) self.greenlet.kill(block=True) def client_listener(self): while True: msg = self.server.recv() if msg.type == "client_ready": id = msg.node_id self.clients[id] = SlaveNode(id) logger.info( "Client %r reported as ready. Currently %i clients ready to swarm." % (id, len(self.clients.ready))) ## emit a warning if the slave's clock seem to be out of sync with our clock #if abs(time() - msg.data["time"]) > 5.0: # warnings.warn("The slave node's clock seem to be out of sync. For the statistics to be correct the different locust servers need to have synchronized clocks.") elif msg.type == "client_stopped": del self.clients[msg.node_id] if len(self.clients.hatching + self.clients.running) == 0: self.state = STATE_STOPPED logger.info("Removing %s client from running clients" % (msg.node_id)) elif msg.type == "stats": events.slave_report.fire(client_id=msg.node_id, data=msg.data) elif msg.type == "hatching": self.clients[msg.node_id].state = STATE_HATCHING elif msg.type == "hatch_complete": self.clients[msg.node_id].state = STATE_RUNNING self.clients[msg.node_id].user_count = msg.data["count"] if len(self.clients.hatching) == 0: count = sum(c.user_count for c in six.itervalues(self.clients)) events.hatch_complete.fire(user_count=count) elif msg.type == "quit": if msg.node_id in self.clients: del self.clients[msg.node_id] logger.info( "Client %r quit. Currently %i clients connected." % (msg.node_id, len(self.clients.ready))) elif msg.type == "exception": self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"]) @property def slave_count(self): return len(self.clients.ready) + len(self.clients.hatching) + len( self.clients.running)
class SlaveLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(SlaveLocustRunner, self).__init__(*args, **kwargs) self.client_id = socket.gethostname() + "_" + md5( str(time() + random.randint(0, 10000)).encode('utf-8')).hexdigest() self.client = rpc.Client(self.master_host, self.master_port) self.greenlet = Group() self.greenlet.spawn(self.worker).link_exception(callback=self.noop) self.client.send(Message("client_ready", None, self.client_id)) self.greenlet.spawn( self.stats_reporter).link_exception(callback=self.noop) # register listener for when all locust users have hatched, and report it to the master node def on_hatch_complete(user_count): self.client.send( Message("hatch_complete", {"count": user_count}, self.client_id)) events.hatch_complete += on_hatch_complete # register listener that adds the current number of spawned locusts to the report that is sent to the master node def on_report_to_master(client_id, data): data["user_count"] = self.user_count events.report_to_master += on_report_to_master # register listener that sends quit message to master def on_quitting(): self.client.send(Message("quit", None, self.client_id)) events.quitting += on_quitting # register listener thats sends locust exceptions to master def on_locust_error(locust_instance, exception, tb): formatted_tb = "".join(traceback.format_tb(tb)) self.client.send( Message("exception", { "msg": str(exception), "traceback": formatted_tb }, self.client_id)) events.locust_error += on_locust_error def worker(self): while True: msg = self.client.recv() if msg.type == "hatch": self.client.send(Message("hatching", None, self.client_id)) job = msg.data self.hatch_rate = job["hatch_rate"] #self.num_clients = job["num_clients"] self.num_requests = job["num_requests"] self.host = job["host"] self.client_index = job["client_index"] self.hatching_greenlet = gevent.spawn( lambda: self.start_hatching(locust_count=job["num_clients" ], hatch_rate=job["hatch_rate"])) elif msg.type == "stop": self.stop() self.client.send( Message("client_stopped", None, self.client_id)) self.client.send(Message("client_ready", None, self.client_id)) elif msg.type == "quit": logger.info("Got quit message from master, shutting down...") self.stop() self.greenlet.kill(block=True) elif msg.type == "switch": logger.info("Test file switch to %s", self.available_locustfiles[msg.data].values()) self.locust_classes = self.available_locustfiles[ msg.data].values() elif msg.type == "config": logger.info( "Got new config from master, updating this slave config") fileio.write(configuration.CONFIG_PATH, msg.data['config']) events.master_new_configuration.fire( new_config=msg.data['config']) elif msg.type == "python_file": logger.info( "Uploaded test file from master detected, writing now") new_file = msg.data upload_status, upload_message = fileio.write( new_file['full_path'], new_file['content']) if upload_status is False: logger.info("error while creating new file: " + upload_message) self.reload_tests() def stats_reporter(self): while True: data = {} events.report_to_master.fire(client_id=self.client_id, data=data) try: self.client.send(Message("stats", data, self.client_id)) except: logger.error("Connection lost to master server. Aborting...") break gevent.sleep(SLAVE_REPORT_INTERVAL)
class LocustRunner(object): def __init__(self, locust_classes, hatch_rate, num_clients, num_requests=None, host=None): self.locust_classes = locust_classes self.hatch_rate = hatch_rate self.num_clients = num_clients self.num_requests = num_requests self.host = host self.locusts = Group() self.state = STATE_INIT self.hatching_greenlet = None # register listener that resets stats when hatching is complete def on_hatch_complete(count): self.state = STATE_RUNNING print "Resetting stats\n" RequestStats.reset_all() events.hatch_complete += on_hatch_complete @property def request_stats(self): return RequestStats.requests @property def errors(self): return RequestStats.errors @property def user_count(self): return len(self.locusts) def weight_locusts(self, amount, stop_timeout = None): """ Distributes the amount of locusts for each WebLocust-class according to it's weight returns a list "bucket" with the weighted locusts """ bucket = [] weight_sum = sum((locust.weight for locust in self.locust_classes)) for locust in self.locust_classes: if not locust.tasks: warnings.warn("Notice: Found locust (%s) got no tasks. Skipping..." % locust.__name__) continue if self.host is not None: locust.host = self.host if stop_timeout is not None: locust.stop_timeout = stop_timeout # create locusts depending on weight percent = locust.weight / float(weight_sum) num_locusts = int(round(amount * percent)) bucket.extend([locust for x in xrange(0, num_locusts)]) return bucket def spawn_locusts(self, spawn_count=None, stop_timeout=None, wait=False): if spawn_count is None: spawn_count = self.num_clients if self.num_requests is not None: RequestStats.global_max_requests = self.num_requests bucket = self.weight_locusts(spawn_count, stop_timeout) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING self.num_clients = spawn_count else: self.num_clients += spawn_count print "\nHatching and swarming %i clients at the rate %g clients/s...\n" % (spawn_count, self.hatch_rate) occurence_count = dict([(l.__name__, 0) for l in self.locust_classes]) def hatch(): sleep_time = 1.0 / self.hatch_rate while True: if not bucket: print "All locusts hatched: %s" % ", ".join(["%s: %d" % (name, count) for name, count in occurence_count.iteritems()]) events.hatch_complete.fire(self.num_clients) return locust = bucket.pop(random.randint(0, len(bucket)-1)) occurence_count[locust.__name__] += 1 def start_locust(_): try: locust()() except RescheduleTaskImmediately: pass except GreenletExit: pass new_locust = self.locusts.spawn(start_locust, locust) if len(self.locusts) % 10 == 0: print "%i locusts hatched" % len(self.locusts) gevent.sleep(sleep_time) hatch() if wait: self.locusts.join() print "All locusts dead\n" print_stats(self.request_stats) print_percentile_stats(self.request_stats) #TODO use an event listener, or such, for this? def kill_locusts(self, kill_count): """ Kill a kill_count of weighted locusts from the Group() object in self.locusts """ bucket = self.weight_locusts(kill_count) kill_count = len(bucket) self.num_clients -= kill_count print "killing locusts:", kill_count dying = [] for g in self.locusts: for l in bucket: if l == g.args[0]: dying.append(g) bucket.remove(l) break for g in dying: self.locusts.killone(g) events.hatch_complete.fire(self.num_clients) def start_hatching(self, locust_count=None, hatch_rate=None, wait=False): print "start hatching", locust_count, hatch_rate, self.state if self.state != STATE_RUNNING and self.state != STATE_HATCHING: RequestStats.clear_all() RequestStats.global_start_time = time() # Dynamically changing the locust count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.num_clients > locust_count: # Kill some locusts kill_count = self.num_clients - locust_count self.kill_locusts(kill_count) elif self.num_clients < locust_count: # Spawn some locusts if hatch_rate: self.hatch_rate = hatch_rate spawn_count = locust_count - self.num_clients self.spawn_locusts(spawn_count=spawn_count) else: if hatch_rate: self.hatch_rate = hatch_rate if locust_count: self.spawn_locusts(locust_count, wait=wait) else: self.spawn_locusts(wait=wait) def stop(self): # if we are currently hatching locusts we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.locusts.kill(block=True) self.state = STATE_STOPPED def start_ramping(self, hatch_rate=None, max_locusts=1000, hatch_stride=100, percent=0.95, response_time_limit=2000, acceptable_fail=0.05, precision=200, start_count=0, calibration_time=15): from rampstats import current_percentile if hatch_rate: self.hatch_rate = hatch_rate def ramp_down_help(clients, hatch_stride): print "ramping down..." hatch_stride = max(hatch_stride/2, precision) clients -= hatch_stride self.start_hatching(clients, self.hatch_rate) return clients, hatch_stride def ramp_up(clients, hatch_stride, boundery_found=False): while True: if self.state != STATE_HATCHING: if self.num_clients >= max_locusts: print "ramp up stopped due to max locusts limit reached:", max_locusts client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) gevent.sleep(calibration_time) fail_ratio = RequestStats.sum_stats().fail_ratio if fail_ratio > acceptable_fail: print "ramp up stopped due to acceptable fail ratio %d%% exceeded with fail ratio %d%%" % (acceptable_fail*100, fail_ratio*100) client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) p = current_percentile(percent) if p >= response_time_limit: print "ramp up stopped due to percentile response times getting high:", p client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) if boundery_found and hatch_stride <= precision: print "sweet spot found, ramping stopped!" return print "ramping up..." if boundery_found: hatch_stride = max((hatch_stride/2),precision) clients += hatch_stride self.start_hatching(clients, self.hatch_rate) gevent.sleep(1) def ramp_down(clients, hatch_stride): while True: if self.state != STATE_HATCHING: if self.num_clients < max_locusts: gevent.sleep(calibration_time) fail_ratio = RequestStats.sum_stats().fail_ratio if fail_ratio <= acceptable_fail: p = current_percentile(percent) if p <= response_time_limit: if hatch_stride <= precision: print "sweet spot found, ramping stopped!" return print "ramping up..." hatch_stride = max((hatch_stride/2),precision) clients += hatch_stride self.start_hatching(clients, self.hatch_rate) return ramp_up(clients, hatch_stride, True) print "ramping down..." hatch_stride = max((hatch_stride/2),precision) clients -= hatch_stride if clients > 0: self.start_hatching(clients, self.hatch_rate) else: print "WARNING: no responses met the ramping thresholds, check your ramp configuration, locustfile and \"--host\" address" print "ramping stopped!" return gevent.sleep(1) if start_count > self.num_clients: self.start_hatching(start_count, hatch_rate) ramp_up(start_count, hatch_stride)
class Irc(Greenlet): """ irc connection abstraction. inherits: start(), join(), kill(exception=GreenletExit, block=False, timeout=None), ready(), successful(), etc __init__ only receives: tag: "myfreenode", fd: 2 or None the other options are accessed through the following properties, which delegate calls to conf.get: servers: [Server("server.address +1234"), ...], encoding: "utf-8" network: "freenode", nick: "sqrl", username: "******", password: "******", nickservpassword: "******", realname: "real name", chans: ["#channel": {"blacklist": ["ex"]}, ...], scripts: ["users", "seen", "com", "version", "wik", "title", "choice", "dic", "ex", ...], masters: ["*!*@unaffiliated/squirrel", ...] """ def __init__(self, tag, fd=None, me=None): super(Irc, self).__init__() self.tag = tag self.net = Network(fd) self.group = Group() self.logger = logging.getLogger(self.tag) self.logger.setLevel(1) self.formatter = CuteFormatter(maxbytes=400, encoding=self.encoding) self.connected = fd is not None if self.connected: self.me = me else: self.me = (self.nick, None, None) def __repr__(self): return u"Irc(tag=%s)" % self.tag ############################################################################################### core def _run(self): """ greenlet starts here connect to irc, serve in a loop run self.disconnect() and self.onunload() on GreenletExit and die """ self.onload() # let it fail try: while True: if not self.connected: self.connect() # let it fail (should not happen, relly) for chan in self.chans: self.joinchan(chan) while True: try: line = self.net.getline() line = line.decode(self.encoding) try: msg = Message(irc=self, line=line) except MessageMalformed as e: self.onexception(e, unexpected=True) continue self.onmessage(msg) if type(msg) == Message and msg.command == "ping": self.send("PONG %s" % msg.params[0]) elif type(msg) == Numeric and msg.num == 1: self.me = (msg.target, None, None) self.onconnected(msg) self.privmsg(msg.target, "id") elif msg.frommyself: self.me = msg.sender self.formatter.maxbytes = 512 - 7 - len("".join(self.me).encode(self.encoding)) # :nick!user@host <PRIVMSG :text>+\r\n" self.logger.log(OTHER, "i am {0[0]}!{0[1]}@{0[2]} and i can send {1} bytes".format(self.me, self.formatter.maxbytes)) except ConnectionFailed as e: self.onexception(e, unexpected=True) self.disconnect() break except GreenletRehash: # don't disconnect raise except GreenletExit: # same as above, but disconnect self.disconnect() # let it fail (should not happen, relly) raise except Exception as e: self.onexception(e, unexpected=True) finally: self.onunload() # let it fail def shutdown(self, exception=GreenletExit): """ kills sender greenlet, all greenlets in group, oneself this will cause _run to exit and the thing should get deleted from memory ! if exception is GreenletExit, disconnects """ self.group.kill(exception, block=False) self.kill(exception, block=False) ############################################################################################### my cute methods def send(self, data, log=True): """ encodes and sends one line """ self.net.send(data.encode(self.encoding) + "\r\n") self.onsent(data, log) def connect(self): """ connects to servers[currectserver] """ delay = 0 while True: for server in self.servers: try: self.onconnect(server) self.net.connect(server.address, server.ssl) self.send(u"NICK {0.nick}".format(self)) self.send(u"USER {0.username} lol wut :{0.realname}".format(self)) self.connected = True return except ConnectionFailed as e: self.onexception(e) sleep(10 + delay) delay += 1 def disconnect(self): """ good-bye! """ try: self.ondisconnect() finally: self.net.disconnect() self.connected = False def notice(self, target, line, *args, **kwargs): line = self._format(u"NOTICE {target} :{line}", target, line, *args, **kwargs) self.send(line, False) self.onsentnotice(target, line[9 + len(target):]) def privmsg(self, target, line, nick=None, *args, **kwargs): command = u"PRIVMSG {target} :{line}" if nick is None else u"PRIVMSG {target} :%s: {line}" % nick line = self._format(command, target, line, *args, **kwargs) self.send(line, False) self.onsentprivmsg(target, line[10 + len(target):]) def action(self, target, line, *args, **kwargs): line = self._format(u"PRIVMSG {target} :\x01ACTION {line}\x01", target, line, *args, **kwargs) self.send(line, False) self.onsentaction(target, line[18 + len(target):]) def joinchan(self, chan): self.send(u"JOIN " + chan) def _format(self, command, target, line, *args, **kwargs): kwargs["lang"] = conf.getdefault("lang", self.tag, target, "en") line = self.formatter.translate_format_string(line, **kwargs) return self.formatter.format(command.format(target=target, line=line), *args, **kwargs) ############################################################################################### replace me def onconnect(self, server): """ called when we *start* connecting """ self.logger.log(OTHER, "connecting to %s..." % server) def onconnected(self, msg): """ called when we have successfully connected to a server """ self.logger.log(OTHER, "connected to %s!" % msg.sender[0]) def ondisconnect(self): """ called when we disconnect """ self.logger.log(OTHER, "disconnecting") def onload(self): """ called when the thing starts """ self.logger.log(OTHER, "loading") def onunload(self): """ called when the thing dies """ self.logger.log(OTHER, "unloading") def onexception(self, e, unexpected=False): """ called when any bot's internal exception occurs (the exception gets handled) """ if unexpected: self.logger.exception(unicode(e)) else: self.logger.error(unicode(e)) ### def onmessage(self, msg): """ called when the thing receives any irc message """ if type(msg) == Privmsg: self.onprivmsg(msg) elif type(msg) == Action: self.onaction(msg) else: self.logger.log(IN, msg.line) def onprivmsg(self, msg): dt = msg.sender[0] if msg.target == self.me[0] else msg.target self.logger.log(IN_PRIVMSG, "%s | <%s> %s", dt, msg.sender[0], msg.message) def onaction(self, msg): self.logger.log(IN_ACTION, "%s | * %s %s", msg.target, msg.sender[0], msg.message) ### def onsent(self, text, unprocessed): """ called on every send. if unprocessed is True, onsenetprivmsg and the sech have not been called """ if unprocessed: self.logger.log(OUT, text) def onsentprivmsg(self, target, text): self.logger.log(OUT_PRIVMSG, "%s | <%s> %s", target, self.me[0], text) def onsentnotice(self, target, text): self.logger.log(OUT_NOTICE, "%s | <%s> %s", target, self.me[0], text) def onsentaction(self, target, text): self.logger.log(OUT_ACTION, "%s | * %s %s", target, self.me[0], text)
class LocustRunner(object): def __init__(self, locust_classes, hatch_rate, num_clients, num_requests=None, host=None): self.locust_classes = locust_classes self.hatch_rate = hatch_rate self.num_clients = num_clients self.num_requests = num_requests self.host = host self.locusts = Group() self.state = STATE_INIT self.hatching_greenlet = None self.exceptions = {} self.stats = global_stats # register listener that resets stats when hatching is complete def on_hatch_complete(count): self.state = STATE_RUNNING logger.info("Resetting stats\n") self.stats.reset_all() events.hatch_complete += on_hatch_complete @property def request_stats(self): return self.stats.entries @property def errors(self): return self.stats.errors @property def user_count(self): return len(self.locusts) def weight_locusts(self, amount, stop_timeout = None): """ Distributes the amount of locusts for each WebLocust-class according to it's weight returns a list "bucket" with the weighted locusts """ bucket = [] weight_sum = sum((locust.weight for locust in self.locust_classes if locust.task_set)) for locust in self.locust_classes: if not locust.task_set: warnings.warn("Notice: Found Locust class (%s) got no task_set. Skipping..." % locust.__name__) continue if self.host is not None: locust.host = self.host if stop_timeout is not None: locust.stop_timeout = stop_timeout # create locusts depending on weight percent = locust.weight / float(weight_sum) num_locusts = int(round(amount * percent)) bucket.extend([locust for x in xrange(0, num_locusts)]) return bucket def spawn_locusts(self, spawn_count=None, stop_timeout=None, wait=False): if spawn_count is None: spawn_count = self.num_clients if self.num_requests is not None: self.stats.max_requests = self.num_requests bucket = self.weight_locusts(spawn_count, stop_timeout) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING self.num_clients = spawn_count else: self.num_clients += spawn_count logger.info("Hatching and swarming %i clients at the rate %g clients/s..." % (spawn_count, self.hatch_rate)) occurence_count = dict([(l.__name__, 0) for l in self.locust_classes]) def hatch(): sleep_time = 1.0 / self.hatch_rate while True: if not bucket: logger.info("All locusts hatched: %s" % ", ".join(["%s: %d" % (name, count) for name, count in occurence_count.iteritems()])) events.hatch_complete.fire(self.num_clients) return locust = bucket.pop(random.randint(0, len(bucket)-1)) occurence_count[locust.__name__] += 1 def start_locust(_): try: locust().run() except GreenletExit: pass new_locust = self.locusts.spawn(start_locust, locust) if len(self.locusts) % 10 == 0: logger.debug("%i locusts hatched" % len(self.locusts)) gevent.sleep(sleep_time) hatch() if wait: self.locusts.join() logger.info("All locusts dead\n") def kill_locusts(self, kill_count): """ Kill a kill_count of weighted locusts from the Group() object in self.locusts """ bucket = self.weight_locusts(kill_count) kill_count = len(bucket) self.num_clients -= kill_count logger.info("Killing %i locusts" % kill_count) dying = [] for g in self.locusts: for l in bucket: if l == g.args[0]: dying.append(g) bucket.remove(l) break for g in dying: self.locusts.killone(g) events.hatch_complete.fire(self.num_clients) def start_hatching(self, locust_count=None, hatch_rate=None, wait=False): if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.stats.start_time = time() self.exceptions = {} # Dynamically changing the locust count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.num_clients > locust_count: # Kill some locusts kill_count = self.num_clients - locust_count self.kill_locusts(kill_count) elif self.num_clients < locust_count: # Spawn some locusts if hatch_rate: self.hatch_rate = hatch_rate spawn_count = locust_count - self.num_clients self.spawn_locusts(spawn_count=spawn_count) else: if hatch_rate: self.hatch_rate = hatch_rate if locust_count: self.spawn_locusts(locust_count, wait=wait) else: self.spawn_locusts(wait=wait) def stop(self): # if we are currently hatching locusts we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.locusts.kill(block=True) self.state = STATE_STOPPED def log_exception(self, node_id, msg, formatted_tb): key = hash(formatted_tb) row = self.exceptions.setdefault(key, {"count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set()}) row["count"] += 1 row["nodes"].add(node_id) self.exceptions[key] = row
class LocustRunner(object): def __init__(self, locust_classes, hatch_rate, num_clients, num_requests=None, host=None): self.locust_classes = locust_classes self.hatch_rate = hatch_rate self.num_clients = num_clients self.num_requests = num_requests self.host = host self.locusts = Group() self.state = STATE_INIT self.hatching_greenlet = None # register listener that resets stats when hatching is complete def on_hatch_complete(count): self.state = STATE_RUNNING logger.info("Resetting stats\n") RequestStats.reset_all() events.hatch_complete += on_hatch_complete @property def request_stats(self): return RequestStats.requests @property def errors(self): return RequestStats.errors @property def user_count(self): return len(self.locusts) def weight_locusts(self, amount, stop_timeout = None): """ Distributes the amount of locusts for each WebLocust-class according to it's weight returns a list "bucket" with the weighted locusts """ bucket = [] weight_sum = sum((locust.weight for locust in self.locust_classes if locust.tasks)) for locust in self.locust_classes: if not locust.tasks: warnings.warn("Notice: Found locust (%s) got no tasks. Skipping..." % locust.__name__) continue if self.host is not None: locust.host = self.host if stop_timeout is not None: locust.stop_timeout = stop_timeout # create locusts depending on weight percent = locust.weight / float(weight_sum) num_locusts = int(round(amount * percent)) bucket.extend([locust for x in xrange(0, num_locusts)]) return bucket def spawn_locusts(self, spawn_count=None, stop_timeout=None, wait=False): if spawn_count is None: spawn_count = self.num_clients if self.num_requests is not None: RequestStats.global_max_requests = self.num_requests bucket = self.weight_locusts(spawn_count, stop_timeout) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING self.num_clients = spawn_count else: self.num_clients += spawn_count logger.info("Hatching and swarming %i clients at the rate %g clients/s..." % (spawn_count, self.hatch_rate)) occurence_count = dict([(l.__name__, 0) for l in self.locust_classes]) def hatch(): sleep_time = 1.0 / self.hatch_rate while True: if not bucket: logger.info("All locusts hatched: %s" % ", ".join(["%s: %d" % (name, count) for name, count in occurence_count.iteritems()])) events.hatch_complete.fire(self.num_clients) return locust = bucket.pop(random.randint(0, len(bucket)-1)) occurence_count[locust.__name__] += 1 def start_locust(_): try: locust()() except RescheduleTaskImmediately: pass except GreenletExit: pass new_locust = self.locusts.spawn(start_locust, locust) if len(self.locusts) % 10 == 0: logger.debug("%i locusts hatched" % len(self.locusts)) gevent.sleep(sleep_time) hatch() if wait: self.locusts.join() logger.info("All locusts dead\n") print_stats(self.request_stats) print_percentile_stats(self.request_stats) #TODO use an event listener, or such, for this? def kill_locusts(self, kill_count): """ Kill a kill_count of weighted locusts from the Group() object in self.locusts """ bucket = self.weight_locusts(kill_count) kill_count = len(bucket) self.num_clients -= kill_count logger.debug("killing locusts: %i", kill_count) dying = [] for g in self.locusts: for l in bucket: if l == g.args[0]: dying.append(g) bucket.remove(l) break for g in dying: self.locusts.killone(g) events.hatch_complete.fire(self.num_clients) def start_hatching(self, locust_count=None, hatch_rate=None, wait=False): if self.state != STATE_RUNNING and self.state != STATE_HATCHING: RequestStats.clear_all() RequestStats.global_start_time = time() # Dynamically changing the locust count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.num_clients > locust_count: # Kill some locusts kill_count = self.num_clients - locust_count self.kill_locusts(kill_count) elif self.num_clients < locust_count: # Spawn some locusts if hatch_rate: self.hatch_rate = hatch_rate spawn_count = locust_count - self.num_clients self.spawn_locusts(spawn_count=spawn_count) else: if hatch_rate: self.hatch_rate = hatch_rate if locust_count: self.spawn_locusts(locust_count, wait=wait) else: self.spawn_locusts(wait=wait) def stop(self): # if we are currently hatching locusts we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.locusts.kill(block=True) self.state = STATE_STOPPED def start_ramping(self, hatch_rate=None, max_locusts=1000, hatch_stride=100, percent=0.95, response_time_limit=2000, acceptable_fail=0.05, precision=200, start_count=0, calibration_time=15): from rampstats import current_percentile if hatch_rate: self.hatch_rate = hatch_rate def ramp_down_help(clients, hatch_stride): print "ramping down..." hatch_stride = max(hatch_stride/2, precision) clients -= hatch_stride self.start_hatching(clients, self.hatch_rate) return clients, hatch_stride def ramp_up(clients, hatch_stride, boundery_found=False): while True: if self.state != STATE_HATCHING: if self.num_clients >= max_locusts: print "ramp up stopped due to max locusts limit reached:", max_locusts client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) gevent.sleep(calibration_time) fail_ratio = RequestStats.sum_stats().fail_ratio if fail_ratio > acceptable_fail: print "ramp up stopped due to acceptable fail ratio %d%% exceeded with fail ratio %d%%" % (acceptable_fail*100, fail_ratio*100) client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) p = current_percentile(percent) if p >= response_time_limit: print "ramp up stopped due to percentile response times getting high:", p client, hatch_stride = ramp_down_help(clients, hatch_stride) return ramp_down(clients, hatch_stride) if boundery_found and hatch_stride <= precision: print "sweet spot found, ramping stopped!" return print "ramping up..." if boundery_found: hatch_stride = max((hatch_stride/2),precision) clients += hatch_stride self.start_hatching(clients, self.hatch_rate) gevent.sleep(1) def ramp_down(clients, hatch_stride): while True: if self.state != STATE_HATCHING: if self.num_clients < max_locusts: gevent.sleep(calibration_time) fail_ratio = RequestStats.sum_stats().fail_ratio if fail_ratio <= acceptable_fail: p = current_percentile(percent) if p <= response_time_limit: if hatch_stride <= precision: print "sweet spot found, ramping stopped!" return print "ramping up..." hatch_stride = max((hatch_stride/2),precision) clients += hatch_stride self.start_hatching(clients, self.hatch_rate) return ramp_up(clients, hatch_stride, True) print "ramping down..." hatch_stride = max((hatch_stride/2),precision) clients -= hatch_stride if clients > 0: self.start_hatching(clients, self.hatch_rate) else: print "WARNING: no responses met the ramping thresholds, check your ramp configuration, locustfile and \"--host\" address" print "ramping stopped!" return gevent.sleep(1) if start_count > self.num_clients: self.start_hatching(start_count, hatch_rate) ramp_up(start_count, hatch_stride)
class LocustRunner(object): def __init__(self, locust_classes, options): self.options = options self.locust_classes = locust_classes self.hatch_rate = options.hatch_rate self.host = options.host self.locusts = Group() self.greenlet = Group() self.state = STATE_INIT self.hatching_greenlet = None self.stepload_greenlet = None self.current_cpu_usage = 0 self.cpu_warning_emitted = False self.greenlet.spawn(self.monitor_cpu) self.exceptions = {} self.stats = global_stats self.step_load = options.step_load # register listener that resets stats when hatching is complete def on_hatch_complete(user_count): self.state = STATE_RUNNING if self.options.reset_stats: logger.info("Resetting stats\n") self.stats.reset_all() events.hatch_complete += on_hatch_complete def __del__(self): # don't leave any stray greenlets if runner is removed if len(self.greenlet) > 0: self.greenlet.kill(block=False) @property def request_stats(self): return self.stats.entries @property def errors(self): return self.stats.errors @property def user_count(self): return len(self.locusts) def cpu_log_warning(self): """Called at the end of the test to repeat the warning & return the status""" if self.cpu_warning_emitted: logger.warning( "Loadgen CPU usage was too high at some point during the test! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) return True return False def weight_locusts(self, amount): """ Distributes the amount of locusts for each WebLocust-class according to it's weight returns a list "bucket" with the weighted locusts """ bucket = [] weight_sum = sum((locust.weight for locust in self.locust_classes if locust.task_set)) residuals = {} for locust in self.locust_classes: if not locust.task_set: warnings.warn( "Notice: Found Locust class (%s) got no task_set. Skipping..." % locust.__name__) continue if self.host is not None: locust.host = self.host # create locusts depending on weight percent = locust.weight / float(weight_sum) num_locusts = int(round(amount * percent)) bucket.extend([locust for x in range(num_locusts)]) # used to keep track of the amount of rounding was done if we need # to add/remove some instances from bucket residuals[locust] = amount * percent - round(amount * percent) if len(bucket) < amount: # We got too few locust classes in the bucket, so we need to create a few extra locusts, # and we do this by iterating over each of the Locust classes - starting with the one # where the residual from the rounding was the largest - and creating one of each until # we get the correct amount for locust in [ l for l, r in sorted( residuals.items(), key=lambda x: x[1], reverse=True) ][:amount - len(bucket)]: bucket.append(locust) elif len(bucket) > amount: # We've got too many locusts due to rounding errors so we need to remove some for locust in [ l for l, r in sorted(residuals.items(), key=lambda x: x[1]) ][:len(bucket) - amount]: bucket.remove(locust) return bucket def spawn_locusts(self, spawn_count, wait=False): bucket = self.weight_locusts(spawn_count) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING existing_count = len(self.locusts) logger.info( "Hatching and swarming %i users at the rate %g users/s (%i users already running)..." % (spawn_count, self.hatch_rate, existing_count)) occurrence_count = dict([(l.__name__, 0) for l in self.locust_classes]) def hatch(): sleep_time = 1.0 / self.hatch_rate while True: if not bucket: logger.info( "All locusts hatched: %s (%i already running)" % ( ", ".join([ "%s: %d" % (name, count) for name, count in occurrence_count.items() ]), existing_count, )) events.hatch_complete.fire(user_count=len(self.locusts)) return locust = bucket.pop(random.randint(0, len(bucket) - 1)) occurrence_count[locust.__name__] += 1 new_locust = locust() def start_locust(_): try: new_locust.run(runner=self) except GreenletExit: pass self.locusts.spawn(start_locust, new_locust) if len(self.locusts) % 10 == 0: logger.debug("%i locusts hatched" % len(self.locusts)) if bucket: gevent.sleep(sleep_time) hatch() if wait: self.locusts.join() logger.info("All locusts dead\n") def kill_locusts(self, kill_count): """ Kill a kill_count of weighted locusts from the Group() object in self.locusts """ bucket = self.weight_locusts(kill_count) kill_count = len(bucket) logger.info("Killing %i locusts" % kill_count) dying = [] for g in self.locusts: for l in bucket: if l == type(g.args[0]): dying.append(g) bucket.remove(l) break self.kill_locust_greenlets(dying) events.hatch_complete.fire(user_count=self.user_count) def kill_locust_greenlets(self, greenlets): """ Kill running locust greenlets. If options.stop_timeout is set, we try to stop the Locust users gracefully """ if self.options.stop_timeout: dying = Group() for g in greenlets: locust = g.args[0] if locust._state == LOCUST_STATE_WAITING: self.locusts.killone(g) else: locust._state = LOCUST_STATE_STOPPING dying.add(g) if not dying.join(timeout=self.options.stop_timeout): logger.info( "Not all locusts finished their tasks & terminated in %s seconds. Killing them..." % self.options.stop_timeout) dying.kill(block=True) else: for g in greenlets: self.locusts.killone(g) def monitor_cpu(self): process = psutil.Process() while True: self.current_cpu_usage = process.cpu_percent() if self.current_cpu_usage > 90 and not self.cpu_warning_emitted: logging.warning( "Loadgen CPU usage above 90%! This may constrain your throughput and may even give inconsistent response time measurements! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) self.cpu_warning_emitted = True gevent.sleep(CPU_MONITOR_INTERVAL) def start_hatching(self, locust_count, hatch_rate, wait=False): if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} self.cpu_warning_emitted = False self.slave_cpu_warning_emitted = False events.locust_start_hatching.fire() # Dynamically changing the locust count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.user_count > locust_count: # Kill some locusts kill_count = self.user_count - locust_count self.kill_locusts(kill_count) elif self.user_count < locust_count: # Spawn some locusts self.hatch_rate = hatch_rate spawn_count = locust_count - self.user_count self.spawn_locusts(spawn_count=spawn_count) else: events.hatch_complete.fire(user_count=self.user_count) else: self.hatch_rate = hatch_rate self.spawn_locusts(locust_count, wait=wait) def start_stepload(self, locust_count, hatch_rate, step_locust_count, step_duration): if locust_count < step_locust_count: logger.error( "Invalid parameters: total locust count of %d is smaller than step locust count of %d" % (locust_count, step_locust_count)) return self.total_clients = locust_count self.hatch_rate = hatch_rate self.step_clients_growth = step_locust_count self.step_duration = step_duration if self.stepload_greenlet: logger.info( "There is an ongoing swarming in Step Load mode, will stop it now." ) self.stepload_greenlet.kill() logger.info( "Start a new swarming in Step Load mode: total locust count of %d, hatch rate of %d, step locust count of %d, step duration of %d " % (locust_count, hatch_rate, step_locust_count, step_duration)) self.state = STATE_INIT self.stepload_greenlet = self.greenlet.spawn(self.stepload_worker) self.stepload_greenlet.link_exception(callback=self.noop) def stepload_worker(self): current_num_clients = 0 while self.state == STATE_INIT or self.state == STATE_HATCHING or self.state == STATE_RUNNING: current_num_clients += self.step_clients_growth if current_num_clients > int(self.total_clients): logger.info('Step Load is finished.') break self.start_hatching(current_num_clients, self.hatch_rate) logger.info('Step loading: start hatch job of %d locust.' % (current_num_clients)) gevent.sleep(self.step_duration) def stop(self): # if we are currently hatching locusts we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.kill_locust_greenlets([g for g in self.locusts]) self.state = STATE_STOPPED events.locust_stop_hatching.fire() def quit(self): self.stop() self.greenlet.kill(block=True) def log_exception(self, node_id, msg, formatted_tb): key = hash(formatted_tb) row = self.exceptions.setdefault(key, { "count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set() }) row["count"] += 1 row["nodes"].add(node_id) self.exceptions[key] = row def noop(self, *args, **kwargs): """ Used to link() greenlets to in order to be compatible with gevent 1.0 """ pass
def gmond_poll( sources, timeout=graphite_min_cycle, to_escalate=None, to_break=None, src_escalate=[1, 1, 2.0], default_port=8649, libc_gethostbyname=gethostname ): '''XML with values is fetched from possibly-multiple sources, first full dump received is returned. sources: iterable of sources to query - either hostname/ip or tuple of (hostname/ip, port) src_escalate: # number of sources to query simultaneously in the beginning and add after each to_escalate int - how many sources to query after each to_escalate passes, float (0-1.0) - percentage of sources, or iterable of ints/floats - value to use for each step, last one being used for the rest to_escalate: # timeout before starting querying additional sources int/float or iterable of these ([1,2,3] would mean "wait 1s, then 2s, then 3s") to_break: int/float # timeout to stop waiting for data for one source (break connection) timeout: int/float # global timeout (not counting libc.gethostbyname for all sources, if used), also used to calculate sensible values for to_*, if none specified''' log = logging.getLogger('gmond_amqp.poller') # Otherwise gevent does it's own (although parallel) # gethostbyname, ignoring libc (ldap, nis, /etc/hosts), which is wrong # Obvious downside, is that it's serial - i.e. all hosts will be resolved here and now, # before any actual xml fetching takes place, can be delayed but won't suck any less if not libc_gethostbyname: libc_gethostbyname = lambda x: x sources = list( (libc_gethostbyname(src[0]), int(src[1]) if len(src)>1 else default_port) for src in ((src.rsplit(':', 1) if isinstance( src, types.StringTypes ) else src) for src in sources) ) # First calculate number of escalation tiers, then pick proper intervals src_escalate = list(reversed( src_escalate if isinstance(src_escalate, Iterable) else [src_escalate] )) src_slice, src_count = src_escalate.pop(), len(sources) src_tiers = list() while sources: src_tier, sources = sources[:src_slice], sources[src_slice:] src_tiers.append(src_tier) if src_escalate: src_slice = src_escalate.pop() if isinstance(src_slice, float): src_slice = int(src_count / src_slice) if to_escalate is None: to_escalate = [ 1, # 1s should be enough for everyone! ((timeout - 1) / 2.0) / ((len(src_tiers) - 1) or 1) ] # so they'll fit in half-timeout if not isinstance(to_escalate, Iterable): to_escalate = [to_escalate] if to_break is None: to_break = timeout src_tiers = zip(it.chain(to_escalate, it.repeat(to_escalate[-1])), src_tiers) log.debug('Escalation tiers: {}'.format(src_tiers)) def fetch_from_src(source): try: with Timeout(to_break),\ closing(socket.socket( socket.AF_INET, socket.SOCK_STREAM )) as sock: log.debug('Fetching from source: {}'.format(source)) sock.connect(source) buff = bytes() while True: chunk = sock.recv(1*2**20) if not chunk: return buff buff += chunk except (Timeout, socket.error) as err: log.debug('Connection to source {} failed ({err})'.format(source, err=err)) return DataPollError # indicates failure src_tiers = list(reversed(src_tiers)) queries, result, sentinel = Group(), Queue(), None try: with Timeout(timeout): while src_tiers: to, src_tier = src_tiers.pop() for src in src_tier: src = queries.spawn(fetch_from_src, src) src.link(result.put) src.link_exception() if sentinel is None or sentinel.ready(): sentinel = gevent.spawn(queries.join) sentinel.link(result.put) # to break/escalate if they all died try: with Timeout(to if src_tiers else None): while True: res = result.get(block=True).get(block=True, timeout=0) if res is None: raise Timeout elif res is not DataPollError: return res except Timeout: pass if src_tiers: log.debug('Escalating to the next tier: {}'.format(src_tiers[-1])) else: raise Timeout except Timeout: raise DataPollError('No sources could be reached in time') finally: queries.kill(block=True)
class Runner(object): """ Orchestrates the load test by starting and stopping the users. Use one of the :meth:`create_local_runner <locust.env.Environment.create_local_runner>`, :meth:`create_master_runner <locust.env.Environment.create_master_runner>` or :meth:`create_worker_runner <locust.env.Environment.create_worker_runner>` methods on the :class:`Environment <locust.env.Environment>` instance to create a runner of the desired type. """ def __init__(self, environment): self.environment = environment self.user_greenlets = Group() self.greenlet = Group() self.state = STATE_INIT self.hatching_greenlet = None self.stepload_greenlet = None self.current_cpu_usage = 0 self.cpu_warning_emitted = False self.greenlet.spawn( self.monitor_cpu).link_exception(greenlet_exception_handler) self.exceptions = {} self.target_user_count = None # set up event listeners for recording requests def on_request_success(request_type, name, response_time, response_length, **kwargs): self.stats.log_request(request_type, name, response_time, response_length) def on_request_failure(request_type, name, response_time, response_length, exception, **kwargs): self.stats.log_request(request_type, name, response_time, response_length) self.stats.log_error(request_type, name, exception) self.environment.events.request_success.add_listener( on_request_success) self.environment.events.request_failure.add_listener( on_request_failure) self.connection_broken = False # register listener that resets stats when hatching is complete def on_hatch_complete(user_count): self.state = STATE_RUNNING if environment.reset_stats: logger.info("Resetting stats\n") self.stats.reset_all() self.environment.events.hatch_complete.add_listener(on_hatch_complete) def __del__(self): # don't leave any stray greenlets if runner is removed if self.greenlet and len(self.greenlet) > 0: self.greenlet.kill(block=False) @property def user_classes(self): return self.environment.user_classes @property def stats(self) -> RequestStats: return self.environment.stats @property def errors(self): return self.stats.errors @property def user_count(self): """ :returns: Number of currently running users """ return len(self.user_greenlets) def cpu_log_warning(self): """Called at the end of the test to repeat the warning & return the status""" if self.cpu_warning_emitted: logger.warning( "Loadgen CPU usage was too high at some point during the test! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) return True return False def weight_users(self, amount): """ Distributes the amount of users for each WebLocust-class according to it's weight returns a list "bucket" with the weighted users """ bucket = [] weight_sum = sum([user.weight for user in self.user_classes]) residuals = {} for user in self.user_classes: if self.environment.host is not None: user.host = self.environment.host # create users depending on weight percent = user.weight / float(weight_sum) num_users = int(round(amount * percent)) bucket.extend([user for x in range(num_users)]) # used to keep track of the amount of rounding was done if we need # to add/remove some instances from bucket residuals[user] = amount * percent - round(amount * percent) if len(bucket) < amount: # We got too few User classes in the bucket, so we need to create a few extra users, # and we do this by iterating over each of the User classes - starting with the one # where the residual from the rounding was the largest - and creating one of each until # we get the correct amount for user in [ l for l, r in sorted( residuals.items(), key=lambda x: x[1], reverse=True) ][:amount - len(bucket)]: bucket.append(user) elif len(bucket) > amount: # We've got too many users due to rounding errors so we need to remove some for user in [ l for l, r in sorted(residuals.items(), key=lambda x: x[1]) ][:len(bucket) - amount]: bucket.remove(user) return bucket def spawn_users(self, spawn_count, hatch_rate, wait=False): bucket = self.weight_users(spawn_count) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING existing_count = len(self.user_greenlets) logger.info( "Hatching and swarming %i users at the rate %g users/s (%i users already running)..." % (spawn_count, hatch_rate, existing_count)) occurrence_count = dict([(l.__name__, 0) for l in self.user_classes]) def hatch(): sleep_time = 1.0 / hatch_rate while True: if not bucket: logger.info( "All users hatched: %s (%i already running)" % ( ", ".join([ "%s: %d" % (name, count) for name, count in occurrence_count.items() ]), existing_count, )) self.environment.events.hatch_complete.fire( user_count=len(self.user_greenlets)) return user_class = bucket.pop(random.randint(0, len(bucket) - 1)) occurrence_count[user_class.__name__] += 1 new_user = user_class(self.environment) new_user.start(self.user_greenlets) if len(self.user_greenlets) % 10 == 0: logger.debug("%i users hatched" % len(self.user_greenlets)) if bucket: gevent.sleep(sleep_time) hatch() if wait: self.user_greenlets.join() logger.info("All users stopped\n") def stop_users(self, user_count): """ Stop a stop_count of weighted users from the Group() object in self.users """ bucket = self.weight_users(user_count) user_count = len(bucket) logger.info("Stopping %i users" % user_count) to_stop = [] for g in self.user_greenlets: for l in bucket: user = g.args[0] if l == type(user): to_stop.append(user) bucket.remove(l) break self.stop_user_instances(to_stop) self.environment.events.hatch_complete.fire(user_count=self.user_count) def stop_user_instances(self, users): if self.environment.stop_timeout: stopping = Group() for user in users: if not user.stop(self.user_greenlets, force=False): # User.stop() returns False if the greenlet was not stopped, so we'll need # to add it's greenlet to our stopping Group so we can wait for it to finish it's task stopping.add(user._greenlet) if not stopping.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stopping.kill(block=True) else: for user in users: user.stop(self.user_greenlets, force=True) def monitor_cpu(self): process = psutil.Process() while True: self.current_cpu_usage = process.cpu_percent() if self.current_cpu_usage > 90 and not self.cpu_warning_emitted: logging.warning( "Loadgen CPU usage above 90%! This may constrain your throughput and may even give inconsistent response time measurements! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) self.cpu_warning_emitted = True gevent.sleep(CPU_MONITOR_INTERVAL) def start(self, user_count, hatch_rate, wait=False): """ Start running a load test :param user_count: Number of users to start :param hatch_rate: Number of users to spawn per second :param wait: If True calls to this method will block until all users are spawned. If False (the default), a greenlet that spawns the users will be started and the call to this method will return immediately. """ if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} self.cpu_warning_emitted = False self.worker_cpu_warning_emitted = False self.target_user_count = user_count # Dynamically changing the user count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.user_count > user_count: # Stop some users stop_count = self.user_count - user_count self.stop_users(stop_count) elif self.user_count < user_count: # Spawn some users spawn_count = user_count - self.user_count self.spawn_users(spawn_count=spawn_count, hatch_rate=hatch_rate) else: self.environment.events.hatch_complete.fire( user_count=self.user_count) else: self.hatch_rate = hatch_rate self.spawn_users(user_count, hatch_rate=hatch_rate, wait=wait) def start_stepload(self, user_count, hatch_rate, step_user_count, step_duration): if user_count < step_user_count: logger.error( "Invalid parameters: total user count of %d is smaller than step user count of %d" % (user_count, step_user_count)) return self.total_users = user_count if self.stepload_greenlet: logger.info( "There is an ongoing swarming in Step Load mode, will stop it now." ) self.stepload_greenlet.kill() logger.info( "Start a new swarming in Step Load mode: total user count of %d, hatch rate of %d, step user count of %d, step duration of %d " % (user_count, hatch_rate, step_user_count, step_duration)) self.state = STATE_INIT self.stepload_greenlet = self.greenlet.spawn(self.stepload_worker, hatch_rate, step_user_count, step_duration) self.stepload_greenlet.link_exception(greenlet_exception_handler) def stepload_worker(self, hatch_rate, step_users_growth, step_duration): current_num_users = 0 while self.state == STATE_INIT or self.state == STATE_HATCHING or self.state == STATE_RUNNING: current_num_users += step_users_growth if current_num_users > int(self.total_users): logger.info('Step Load is finished.') break self.start(current_num_users, hatch_rate) logger.info('Step loading: start hatch job of %d user.' % (current_num_users)) gevent.sleep(step_duration) def stop(self): """ Stop a running load test by stopping all running users """ self.state = STATE_CLEANUP # if we are currently hatching users we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.stop_user_instances([g.args[0] for g in self.user_greenlets]) self.state = STATE_STOPPED self.cpu_log_warning() def quit(self): """ Stop any running load test and kill all greenlets for the runner """ self.stop() self.greenlet.kill(block=True) def log_exception(self, node_id, msg, formatted_tb): key = hash(formatted_tb) row = self.exceptions.setdefault(key, { "count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set() }) row["count"] += 1 row["nodes"].add(node_id) self.exceptions[key] = row
class MasterLocustRunner(DistributedLocustRunner): def __init__(self, *args, **kwargs): super(MasterLocustRunner, self).__init__(*args, **kwargs) class SlaveNodesDict(dict): def get_by_state(self, state): return [c for c in six.itervalues(self) if c.state == state] @property def ready(self): return self.get_by_state(STATE_INIT) @property def hatching(self): return self.get_by_state(STATE_HATCHING) @property def running(self): return self.get_by_state(STATE_RUNNING) self.clients = SlaveNodesDict() self.server = rpc.Server(self.master_bind_host, self.master_bind_port) self.greenlet = Group() self.greenlet.spawn(self.client_listener).link_exception(callback=self.noop) # listener that gathers info on how many locust users the slaves has spawned def on_slave_report(client_id, data): if client_id not in self.clients: logger.info("Discarded report from unrecognized slave %s", client_id) return self.clients[client_id].user_count = data["user_count"] events.slave_report += on_slave_report # register listener that sends quit message to slave nodes def on_quitting(): self.quit() events.quitting += on_quitting @property def user_count(self): return sum([c.user_count for c in six.itervalues(self.clients)]) def start_hatching(self, locust_count=None, hatch_rate=None, wait=False): num_slaves = len(self.clients.ready) + len(self.clients.running) if not num_slaves: logger.warning("You are running in distributed mode but have no slave servers connected. " "Please connect slaves prior to swarming.") return self.num_clients = locust_count slave_num_clients = locust_count // (num_slaves or 1) slave_hatch_rate = float(hatch_rate) / (num_slaves or 1) remaining = locust_count % num_slaves logger.info("Sending hatch jobs to %d ready clients", num_slaves) if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.exceptions = {} events.master_start_hatching.fire() for _ in six.itervalues(self.clients): data = { "hatch_rate": slave_hatch_rate, "num_clients": slave_num_clients, "num_requests": self.num_requests, "host": self.host, "stop_timeout": None } if remaining > 0: data["num_clients"] += 1 remaining -= 1 self.server.send(Message("hatch", data, None)) self.stats.start_time = time() self.state = STATE_HATCHING def stop(self): for _ in self.clients.hatching + self.clients.running: self.server.send(Message("stop", None, None)) events.master_stop_hatching.fire() def quit(self): for _ in six.itervalues(self.clients): self.server.send(Message("quit", None, None)) self.greenlet.kill(block=True) def client_listener(self): while True: msg = self.server.recv() if msg.type == "client_ready": node_id = msg.node_id self.clients[node_id] = SlaveNode(node_id) logger.info("Client %r reported as ready. Currently %i clients ready to swarm.", node_id, len(self.clients.ready)) elif msg.type == "client_stopped": del self.clients[msg.node_id] if len(self.clients.hatching + self.clients.running) == 0: self.state = STATE_STOPPED logger.info("Removing %s client from running clients", msg.node_id) elif msg.type == "stats": events.slave_report.fire(client_id=msg.node_id, data=msg.data) elif msg.type == "hatching": self.clients[msg.node_id].state = STATE_HATCHING elif msg.type == "hatch_complete": self.clients[msg.node_id].state = STATE_RUNNING self.clients[msg.node_id].user_count = msg.data["count"] if len(self.clients.hatching) == 0: count = sum(c.user_count for c in six.itervalues(self.clients)) events.hatch_complete.fire(user_count=count) elif msg.type == "quit": if msg.node_id in self.clients: del self.clients[msg.node_id] logger.info("Client %r quit. Currently %i clients connected.", msg.node_id, len(self.clients.ready)) elif msg.type == "exception": self.log_exception(msg.node_id, msg.data["msg"], msg.data["traceback"]) @property def slave_count(self): return len(self.clients.ready) + len(self.clients.hatching) + len(self.clients.running)
class LocustRunner(object): def __init__(self, locust_classes, options): self.options = options self.locust_classes = locust_classes self.hatch_rate = options.hatch_rate self.num_clients = options.num_clients self.host = options.host self.locusts = Group() self.greenlet = self.locusts self.state = STATE_INIT self.hatching_greenlet = None self.exceptions = {} self.stats = global_stats # register listener that resets stats when hatching is complete def on_hatch_complete(user_count): self.state = STATE_RUNNING if self.options.reset_stats: logger.info("Resetting stats\n") self.stats.reset_all() events.hatch_complete += on_hatch_complete @property def request_stats(self): return self.stats.entries @property def errors(self): return self.stats.errors @property def user_count(self): return len(self.locusts) def weight_locusts(self, amount, stop_timeout=None): """ Distributes the amount of locusts for each WebLocust-class according to it's weight returns a list "bucket" with the weighted locusts """ bucket = [] weight_sum = sum((locust.weight for locust in self.locust_classes if locust.task_set)) for locust in self.locust_classes: if not locust.task_set: warnings.warn( "Notice: Found Locust class (%s) got no task_set. Skipping..." % locust.__name__) continue if self.host is not None: locust.host = self.host if stop_timeout is not None: locust.stop_timeout = stop_timeout # create locusts depending on weight percent = locust.weight / float(weight_sum) num_locusts = int(round(amount * percent)) bucket.extend([locust for x in xrange(0, num_locusts)]) return bucket def spawn_locusts(self, spawn_count=None, stop_timeout=None, wait=False): if spawn_count is None: spawn_count = self.num_clients bucket = self.weight_locusts(spawn_count, stop_timeout) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.state = STATE_HATCHING self.num_clients = spawn_count else: self.num_clients += spawn_count logger.info( "Hatching and swarming %i clients at the rate %g clients/s..." % (spawn_count, self.hatch_rate)) occurrence_count = dict([(l.__name__, 0) for l in self.locust_classes]) def hatch(): sleep_time = 1.0 / self.hatch_rate while True: if not bucket: logger.info("All locusts hatched: %s" % ", ".join([ "%s: %d" % (name, count) for name, count in six.iteritems(occurrence_count) ])) events.hatch_complete.fire(user_count=self.num_clients) return locust = bucket.pop(random.randint(0, len(bucket) - 1)) occurrence_count[locust.__name__] += 1 def start_locust(_): try: locust().run(runner=self) except GreenletExit: pass new_locust = self.locusts.spawn(start_locust, locust) if len(self.locusts) % 10 == 0: logger.debug("%i locusts hatched" % len(self.locusts)) gevent.sleep(sleep_time) hatch() if wait: self.locusts.join() logger.info("All locusts dead\n") def kill_locusts(self, kill_count): """ Kill a kill_count of weighted locusts from the Group() object in self.locusts """ bucket = self.weight_locusts(kill_count) kill_count = len(bucket) self.num_clients -= kill_count logger.info("Killing %i locusts" % kill_count) dying = [] for g in self.locusts: for l in bucket: if l == g.args[0]: dying.append(g) bucket.remove(l) break total_dying = len(dying) logger.info("Dying %i locusts" % total_dying) logger.info("Total of %i clients before" % self.num_clients) for g in dying: try: self.locusts.killone(g, timeout=5) logger.info("Locust killed") except: logger.info("Error on locust kill") logger.info("Total of %i clients after" % self.num_clients) events.hatch_complete.fire(user_count=self.num_clients) def start_hatching(self, locust_count=None, hatch_rate=None, wait=False): if self.state != STATE_RUNNING and self.state != STATE_HATCHING: self.stats.clear_all() self.stats.start_time = time() self.exceptions = {} events.locust_start_hatching.fire() # Dynamically changing the locust count if self.state != STATE_INIT and self.state != STATE_STOPPED: self.state = STATE_HATCHING if self.num_clients > locust_count: # Kill some locusts kill_count = self.num_clients - locust_count self.kill_locusts(kill_count) elif self.num_clients < locust_count: # Spawn some locusts if hatch_rate: self.hatch_rate = hatch_rate spawn_count = locust_count - self.num_clients self.spawn_locusts(spawn_count=spawn_count) else: events.hatch_complete.fire(user_count=self.num_clients) else: if hatch_rate: self.hatch_rate = hatch_rate if locust_count is not None: self.spawn_locusts(locust_count, wait=wait) else: self.spawn_locusts(wait=wait) def stop(self): # if we are currently hatching locusts we need to kill the hatching greenlet first if self.hatching_greenlet and not self.hatching_greenlet.ready(): self.hatching_greenlet.kill(block=True) self.locusts.kill(block=True) self.state = STATE_STOPPED events.locust_stop_hatching.fire() def quit(self): self.stop() self.greenlet.kill(block=True) def log_exception(self, node_id, msg, formatted_tb): key = hash(formatted_tb) row = self.exceptions.setdefault(key, { "count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set() }) row["count"] += 1 row["nodes"].add(node_id) self.exceptions[key] = row
class FTPPool(object): def __init__(self, addr, user, pwd, min_=1, max_=0, stat=None): self.addr = addr self.user = user self.pwd = pwd self.pool = set() self.busy = Group() self.stat = stat self.max_ = max_ for _ in xrange(min_): self.pool.add(self._connect()) def _connect(self): ftp = FTP(self.addr) try: ftp.login(self.user, self.pwd) except error_temp as e: print e return return ftp def _get_ftp(self): if self.pool: ftp = self.pool.pop() else: while self.max_ and len(self.busy) > self.max_: sleep(0.1) while True: ftp = self._connect() if ftp: break return ftp def _release_ftp(self, gr, ftp): if gr.successful(): self.pool.add(ftp) else: ftp.close() del ftp self.busy.discard(gr) if self.stat: self.stat.set_busy(len(self.busy)) def spawn_ftp(self, func, *a, **kw): ftp = self._get_ftp() gr = spawn(func, ftp, *a, **kw) gr.link(lambda g: self._release_ftp(g, ftp)) self.busy.add(gr) if self.stat: self.stat.set_busy(len(self.busy)) def wait(self): self.busy.join() def break_all(self): self.busy.kill()
class GeventReactor(posixbase.PosixReactorBase): """Implement gevent-powered reactor based on PosixReactorBase.""" implements(IReactorGreenlets) def __init__(self,*args): self.greenlet = None self.greenletpool = Group() self._reads = {} self._writes = {} self._callqueue = [] self._wake = 0 self._wait = 0 self.resolver = GeventResolver(self) self.addToGreenletPool = self.greenletpool.add posixbase.PosixReactorBase.__init__(self,*args) self._initThreads() self._initThreadPool() self._initGreenletPool() def mainLoop(self): """This main loop yields to gevent until the end, handling function calls along the way.""" self.greenlet = gevent.getcurrent() callqueue = self._callqueue seconds = self.seconds try: while 1: self._wait = 0 now = seconds() if len(callqueue) > 0: self._wake = delay = callqueue[0].time delay -= now else: self._wake = now+300 delay = 300 try: self._wait = 1 gevent.sleep(max(0,delay)) self._wait = 0 except Reschedule: continue now = seconds() while 1: try: c = callqueue[0] except IndexError: break if c.time <= now: del callqueue[0] try: c() except GreenletExit: raise except: log.msg('Unexpected error in main loop.') log.err() else: break except (GreenletExit,KeyboardInterrupt): pass log.msg('Main loop terminated.') self.fireSystemEvent('shutdown') def addReader(self,selectable): """Add a FileDescriptor for notification of data available to read.""" try: self._reads[selectable].resume() except KeyError: self._reads[selectable] = g = Stream.spawn(self,selectable,'doRead') self.addToGreenletPool(g) def addWriter(self,selectable): """Add a FileDescriptor for notification of data available to write.""" try: self._writes[selectable].resume() except KeyError: self._writes[selectable] = g = Stream.spawn(self,selectable,'doWrite') self.addToGreenletPool(g) def removeReader(self,selectable): """Remove a FileDescriptor for notification of data available to read.""" try: if selectable.disconnected: self._reads[selectable].kill(block=False) del self._reads[selectable] else: self._reads[selectable].pause() except KeyError: pass def removeWriter(self,selectable): """Remove a FileDescriptor for notification of data available to write.""" try: if selectable.disconnected: self._writes[selectable].kill(block=False) del self._writes[selectable] else: self._writes[selectable].pause() except KeyError: pass def discardReader(self,selectable): """Remove a FileDescriptor without checking.""" try: del self._reads[selectable] except KeyError: pass def discardWriter(self,selectable): """Remove a FileDescriptor without checking.""" try: del self._writes[selectable] except KeyError: pass def getReaders(self): return self._reads.keys() def getWriters(self): return self._writes.keys() def removeAll(self): return self._removeAll(self._reads,self._writes) # IReactorTime seconds = staticmethod(runtimeSeconds) def callLater(self,*args,**kw): if isinstance(args[0],DelayedCall): c = args[0] try: self._callqueue.remove(c) except ValueError: return None else: c = DelayedCall(self,self.seconds()+args[0],args[1],args[2:],kw,seconds=self.seconds) insort(self._callqueue,c) self.reschedule() return c def getDelayedCalls(self): return list(self._callqueue) def cancelCallLater(self,callID): # deprecated self._callqueue.remove(callID) self.reschedule() # IReactorGreenlets def _initGreenletPool(self): self.greenletpoolShutdownID = self.addSystemEventTrigger('during','shutdown',self._stopGreenletPool) def _stopGreenletPool(self): self.greenletpool.kill() def getGreenletPool(self): return self.greenletpool def callInGreenlet(self,*args,**kwargs): self.addToGreenletPool(Greenlet.spawn_later(0,*args,**kwargs)) def callFromGreenlet(self,*args,**kw): c = DelayedCall(self,self.seconds(),args[0],args[1:],kw,seconds=self.seconds) insort(self._callqueue,c) self.reschedule() return c def suggestGreenletPoolSize(self,size): pass def addToGreenletPool(self,g): self.greenletpool.add(g) # IReactorThreads def _initThreads(self): # do not initialize ThreadedResolver, since we are using GeventResolver self.usingThreads = True callFromThread = callFromGreenlet # IReactorCore def stop(self): self._callqueue.insert(0,DelayedCall(self,0,gevent.sleep,(),{},seconds=self.seconds)) gevent.kill(self.greenlet) def reschedule(self): if self._wait and len(self._callqueue) > 0 and self._callqueue[0].time < self._wake: gevent.kill(self.greenlet,Reschedule) self._wait = 0
class Slave(Client): def __init__(self, *args, **kwargs): super(Slave, self).__init__(*args, **kwargs) self.client_id = socket.gethostname() + "_" + md5( str(time() + random.randint(0, 10000)).encode('utf-8')).hexdigest() logger.info("Client id:%r" % self.client_id) self.state = STATE_INIT self.slave_num = 0 self.file_name = '' self.cpu_num = ps.cpu_count() self.processes = [] self.greenlet = Group() # 加载gevent协程 self.greenlet.spawn(self.worker).link_exception(callback=self.noop) self.greenlet.spawn(self.ready_loop).link_exception(callback=self.noop) def on_quitting(): self.send(Message("quit", None, self.client_id)) self.greenlet.kill(block=True) events.quitting += on_quitting # 消息收发循环,通过gevent协程加载 def worker(self): while True: msg = self.recv() if msg.node_id == self.client_id: logger.info('Slave: Get new msg from master - [%s]' % msg.type) # 接收压测脚本保存到./script文件夹中 if msg.type == "send_script": logger.info("Save script to file...") if not os.path.exists("./script/"): os.mkdir("./script/") self.file_name = os.path.join("./script/", msg.data["filename"]) with codecs.open(self.file_name, 'w', encoding='utf-8') as f: f.write(msg.data["script"]) logger.info("Script saved into file:%s" % self.file_name) # 运行locust压测进程,完成后返回成功启动的进程数给master elif msg.type == "run": if self.state != STATE_RUNNING: self.run_locusts(master_host=self.host, nums=msg.data["num"], file_name=msg.data["filename"]) if self.slave_num > 0: self.state = STATE_RUNNING logger.info("Client %s run OK!" % self.client_id) else: self.state = STATE_INIT self.send(Message("slave_num", self.slave_num, self.client_id)) # 停止locust压测进程并更新状态给master elif msg.type == "stop": logger.info("Client %s stopped!" % self.client_id) self.stop() #self.send(Message("client_ready", self.slave_num, self.client_id)) self.send(Message("slave_num", self.slave_num, self.client_id)) # 退出slave,当master退出时收到此消息 elif msg.type == "quit": logger.info("Got quit message from master, shutting down...") self.stop() self.greenlet.kill(block=True) # 获取当前客户端的压测文件列表 elif msg.type == "get_filelist": if os.path.exists("./script/"): file_list = [] for root, dirs, files in os.walk("./script/"): for f in files: if os.path.splitext(f)[1] == '.py': file_list.append(f) self.send(Message("file_list", file_list, self.client_id)) else: self.send(Message("file_list", None, self.client_id)) # 获取当前客户端的资源状态:包括IP, CPU,内存,网络 elif msg.type == "get_psinfo": ip = socket.gethostbyname(socket.gethostname()) nets = ps.net_io_counters() sleep(1) nets1 = ps.net_io_counters() net = {'sent': nets1.bytes_sent / 1024, 'recv': nets1.bytes_recv / 1024, 'per_sec_sent': (nets1.bytes_sent - nets.bytes_sent) / 1024, 'per_sec_recv': (nets1.bytes_recv - nets.bytes_recv) / 1024} cpu_times = ps.cpu_percent(interval=0.1) cpu_logical_nums = ps.cpu_count() cpu_nums = ps.cpu_count(logical=False) cpu_freq = ps.cpu_freq() if cpu_freq is not None: cpu = {'num': cpu_nums, 'logical_num': cpu_logical_nums, 'percent': cpu_times, 'freq': {'current': cpu_freq.current, 'min': cpu_freq.min, 'max': cpu_freq.max}} else: cpu = {'num': cpu_nums, 'logical_num': cpu_logical_nums, 'percent': cpu_times, 'freq': {'current': 0, 'min': 0, 'max': 0}} mems = ps.virtual_memory() mem = {'total': mems.total / 1024 / 1024, 'available': mems.available / 1024 / 1024, 'percent': mems.percent} psinfo = {"cpu": cpu, "mem": mem, "net": net, "IP": ip} self.send(Message("psinfo", psinfo, self.client_id)) # 清除压测脚本文件夹 elif msg.type == "clear_folder": if os.path.exists("./script/"): shutil.rmtree("./script") self.send(Message("clear_folder", None, self.client_id)) # 每分钟向master上报状态 def ready_loop(self): while True: # 发送ready状态至master logger.info('Send ready to server!') self.send(Message("slave_ready", self.slave_num, self.client_id)) gevent.sleep(60) # 退出locust压测进程 def stop(self): self.state = STATE_STOPPED self.slave_num = 0 for p in self.processes: procs = p.children() for proc in procs: proc.terminate() p.terminate() logger.info("Quit a locust client process!") self.processes = [] # 运行locust压测进程 def run_locusts(self, master_host, nums, file_name): # 设置压测进程数,不大于CPU逻辑核心数 if int(nums) > self.cpu_num or int(nums) < 1: slave_num = self.cpu_num else: slave_num = int(nums) # 设置各压测进程的压测脚本,如果web端选择的小于进程数,则循环选择 script_file = [] for i in range(slave_num): script_file.append(os.path.join('./script/', file_name[i % len(file_name)])) # 启动压测进程 for i in range(slave_num): cmd = 'locust -f %s --slave --no-reset-stats --master-host=%s' % (script_file[i], master_host) print cmd p = ps.Popen(cmd, shell=True, stdout=None, stderr=None) self.processes.append(p) sleep(1) # 更新启动成功的压测进程列表 proc = [] for p in self.processes: if p.poll() is None: proc.append(p) self.processes = proc self.slave_num = len(proc)
class Runner: """ Orchestrates the load test by starting and stopping the users. Use one of the :meth:`create_local_runner <locust.env.Environment.create_local_runner>`, :meth:`create_master_runner <locust.env.Environment.create_master_runner>` or :meth:`create_worker_runner <locust.env.Environment.create_worker_runner>` methods on the :class:`Environment <locust.env.Environment>` instance to create a runner of the desired type. """ def __init__(self, environment): self.environment = environment self.user_greenlets = Group() self.greenlet = Group() self.state = STATE_INIT self.spawning_greenlet = None self.shape_greenlet = None self.shape_last_state = None self.current_cpu_usage = 0 self.cpu_warning_emitted = False self.greenlet.spawn( self.monitor_cpu).link_exception(greenlet_exception_handler) self.exceptions = {} self.target_user_count = None # set up event listeners for recording requests def on_request_success(request_type, name, response_time, response_length, **kwargs): self.stats.log_request(request_type, name, response_time, response_length) def on_request_failure(request_type, name, response_time, response_length, exception, **kwargs): self.stats.log_request(request_type, name, response_time, response_length) self.stats.log_error(request_type, name, exception) self.environment.events.request_success.add_listener( on_request_success) self.environment.events.request_failure.add_listener( on_request_failure) self.connection_broken = False # register listener that resets stats when spawning is complete def on_spawning_complete(user_count): self.update_state(STATE_RUNNING) if environment.reset_stats: logger.info("Resetting stats\n") self.stats.reset_all() self.environment.events.spawning_complete.add_listener( on_spawning_complete) def __del__(self): # don't leave any stray greenlets if runner is removed if self.greenlet and len(self.greenlet) > 0: self.greenlet.kill(block=False) @property def user_classes(self): return self.environment.user_classes @property def stats(self) -> RequestStats: return self.environment.stats @property def errors(self): return self.stats.errors @property def user_count(self): """ :returns: Number of currently running users """ return len(self.user_greenlets) def update_state(self, new_state): """ Updates the current state """ # I (cyberwiz) commented out this logging, because it is too noisy even for debug level # Uncomment it if you are specifically debugging state transitions # logger.debug("Updating state to '%s', old state was '%s'" % (new_state, self.state)) self.state = new_state def cpu_log_warning(self): """Called at the end of the test to repeat the warning & return the status""" if self.cpu_warning_emitted: logger.warning( "CPU usage was too high at some point during the test! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) return True return False def weight_users(self, amount) -> List[Type[User]]: """ Distributes the amount of users for each WebLocust-class according to it's weight returns a list "bucket" with the weighted users """ bucket = [] weight_sum = sum([user.weight for user in self.user_classes]) residuals = {} for user in self.user_classes: if self.environment.host is not None: user.host = self.environment.host # create users depending on weight percent = user.weight / float(weight_sum) num_users = int(round(amount * percent)) bucket.extend([user for x in range(num_users)]) # used to keep track of the amount of rounding was done if we need # to add/remove some instances from bucket residuals[user] = amount * percent - round(amount * percent) if len(bucket) < amount: # We got too few User classes in the bucket, so we need to create a few extra users, # and we do this by iterating over each of the User classes - starting with the one # where the residual from the rounding was the largest - and creating one of each until # we get the correct amount for user in [ l for l, r in sorted( residuals.items(), key=lambda x: x[1], reverse=True) ][:amount - len(bucket)]: bucket.append(user) elif len(bucket) > amount: # We've got too many users due to rounding errors so we need to remove some for user in [ l for l, r in sorted(residuals.items(), key=lambda x: x[1]) ][:len(bucket) - amount]: bucket.remove(user) return bucket def spawn_users(self, spawn_count, spawn_rate, wait=False): bucket = self.weight_users(spawn_count) spawn_count = len(bucket) if self.state == STATE_INIT or self.state == STATE_STOPPED: self.update_state(STATE_SPAWNING) existing_count = len(self.user_greenlets) logger.info( "Spawning %i users at the rate %g users/s (%i users already running)..." % (spawn_count, spawn_rate, existing_count)) occurrence_count = dict([(l.__name__, 0) for l in self.user_classes]) def spawn(): sleep_time = 1.0 / spawn_rate while True: if not bucket: logger.info("All users spawned: %s (%i total running)" % ( ", ".join([ "%s: %d" % (name, count) for name, count in occurrence_count.items() ]), len(self.user_greenlets), )) self.environment.events.spawning_complete.fire( user_count=len(self.user_greenlets)) return user_class = bucket.pop(random.randint(0, len(bucket) - 1)) occurrence_count[user_class.__name__] += 1 new_user = user_class(self.environment) new_user.start(self.user_greenlets) if len(self.user_greenlets) % 10 == 0: logger.debug("%i users spawned" % len(self.user_greenlets)) if bucket: gevent.sleep(sleep_time) spawn() if wait: self.user_greenlets.join() logger.info("All users stopped\n") def stop_users(self, user_count, stop_rate=None): """ Stop `user_count` weighted users at a rate of `stop_rate` """ if user_count == 0 or stop_rate == 0: return bucket = self.weight_users(user_count) user_count = len(bucket) to_stop = [] for user_greenlet in self.user_greenlets: try: user = user_greenlet.args[0] except IndexError: logger.error( "While stopping users, we encountered a user that didnt have proper args %s", user_greenlet) continue for user_class in bucket: if isinstance(user, user_class): to_stop.append(user) bucket.remove(user_class) break if not to_stop: return if stop_rate is None or stop_rate >= user_count: sleep_time = 0 logger.info("Stopping %i users" % (user_count)) else: sleep_time = 1.0 / stop_rate logger.info("Stopping %i users at rate of %g users/s" % (user_count, stop_rate)) async_calls_to_stop = Group() stop_group = Group() while True: user_to_stop: User = to_stop.pop( random.randint(0, len(to_stop) - 1)) logger.debug("Stopping %s" % user_to_stop._greenlet.name) if user_to_stop._greenlet is greenlet.getcurrent(): # User called runner.quit(), so dont block waiting for killing to finish" user_to_stop._group.killone(user_to_stop._greenlet, block=False) elif self.environment.stop_timeout: async_calls_to_stop.add( gevent.spawn_later(0, User.stop, user_to_stop, force=False)) stop_group.add(user_to_stop._greenlet) else: async_calls_to_stop.add( gevent.spawn_later(0, User.stop, user_to_stop, force=True)) if to_stop: gevent.sleep(sleep_time) else: break async_calls_to_stop.join() if not stop_group.join(timeout=self.environment.stop_timeout): logger.info( "Not all users finished their tasks & terminated in %s seconds. Stopping them..." % self.environment.stop_timeout) stop_group.kill(block=True) logger.info("%i Users have been stopped, %g still running" % (user_count, len(self.user_greenlets))) def monitor_cpu(self): process = psutil.Process() while True: self.current_cpu_usage = process.cpu_percent() if self.current_cpu_usage > 90 and not self.cpu_warning_emitted: logging.warning( "CPU usage above 90%! This may constrain your throughput and may even give inconsistent response time measurements! See https://docs.locust.io/en/stable/running-locust-distributed.html for how to distribute the load over multiple CPU cores or machines" ) self.cpu_warning_emitted = True gevent.sleep(CPU_MONITOR_INTERVAL) def start(self, user_count, spawn_rate, wait=False): """ Start running a load test :param user_count: Total number of users to start :param spawn_rate: Number of users to spawn per second :param wait: If True calls to this method will block until all users are spawned. If False (the default), a greenlet that spawns the users will be started and the call to this method will return immediately. """ if self.state != STATE_RUNNING and self.state != STATE_SPAWNING: self.stats.clear_all() self.exceptions = {} self.cpu_warning_emitted = False self.worker_cpu_warning_emitted = False self.target_user_count = user_count if self.state != STATE_INIT and self.state != STATE_STOPPED: logger.debug( "Updating running test with %d users, %.2f spawn rate and wait=%r" % (user_count, spawn_rate, wait)) self.update_state(STATE_SPAWNING) if self.user_count > user_count: # Stop some users stop_count = self.user_count - user_count self.stop_users(stop_count, spawn_rate) elif self.user_count < user_count: # Spawn some users spawn_count = user_count - self.user_count self.spawn_users(spawn_count=spawn_count, spawn_rate=spawn_rate) else: self.environment.events.spawning_complete.fire( user_count=self.user_count) else: self.spawn_rate = spawn_rate self.spawn_users(user_count, spawn_rate=spawn_rate, wait=wait) def start_shape(self): if self.shape_greenlet: logger.info( "There is an ongoing shape test running. Editing is disabled") return logger.info( "Shape test starting. User count and spawn rate are ignored for this type of load test" ) self.update_state(STATE_INIT) self.shape_greenlet = self.greenlet.spawn(self.shape_worker) self.shape_greenlet.link_exception(greenlet_exception_handler) self.environment.shape_class.reset_time() def shape_worker(self): logger.info("Shape worker starting") while self.state == STATE_INIT or self.state == STATE_SPAWNING or self.state == STATE_RUNNING: new_state = self.environment.shape_class.tick() if new_state is None: logger.info("Shape test stopping") if self.environment.parsed_options and self.environment.parsed_options.headless: self.quit() else: self.stop() elif self.shape_last_state == new_state: gevent.sleep(1) else: user_count, spawn_rate = new_state logger.info( "Shape test updating to %d users at %.2f spawn rate" % (user_count, spawn_rate)) self.start(user_count=user_count, spawn_rate=spawn_rate) self.shape_last_state = new_state def stop(self): """ Stop a running load test by stopping all running users """ logger.debug("Stopping all users") self.update_state(STATE_CLEANUP) # if we are currently spawning users we need to kill the spawning greenlet first if self.spawning_greenlet and not self.spawning_greenlet.ready(): self.spawning_greenlet.kill(block=True) self.stop_users(self.user_count) self.update_state(STATE_STOPPED) self.cpu_log_warning() def quit(self): """ Stop any running load test and kill all greenlets for the runner """ self.stop() self.greenlet.kill(block=True) def log_exception(self, node_id, msg, formatted_tb): key = hash(formatted_tb) row = self.exceptions.setdefault(key, { "count": 0, "msg": msg, "traceback": formatted_tb, "nodes": set() }) row["count"] += 1 row["nodes"].add(node_id) self.exceptions[key] = row
class GClient(object): """ A generic gevent-based network client, that implements common send and receive functionality. Useful members: group: A gevent.pool.Group() tied to the lifetime of the client. When stopping, all greenlets in the group will be killed. started: True if the client has been started stopped: True if the client has been stopped running: True if the client has been started but not stopped """ def __init__(self, logger=None): self.group = Group() self.started = False self._send_queue = Queue() self._stopping = False self._stopped = AsyncResult() if not hasattr(self, 'logger'): # let subclass overrride if they want if not logger: logger = logging.getLogger('gclient').getChild(type(self).__name__) self.logger = logger def start(self): """Start the client, performing some connection step and beginning processing.""" if self.started: raise Exception("Already started") self.started = True self.logger.debug("{} starting".format(self)) self._start() self._send_loop_worker = self.group.spawn(self._send_loop) self._recv_loop_worker = self.group.spawn(self._recv_loop) self.logger.info("{} started".format(self)) def _start(self): """Override this with code that creates and initializes a connection""" def stop(self, ex=None): """Stop the client, optionally referencing some exception. This will kill all greenlets in group and do any specific stop handling. Anyone waiting on the client stopping will have the exception raised, if any. """ if self._stopping: self.wait_for_stop() return if not self.started: self.started = True self._stopping = True if ex: self.logger.info("{} stopping with error".format(self), exc_info=True) else: self.logger.info("{} stopping".format(self)) # since the greenlet calling stop() might be in self.group, we make a new greenlet to do the work @gevent.spawn def stop_worker(): self.group.kill(block=True) while not self._send_queue.empty(): msg, event = self._send_queue.get(block=False) event.set() self._stop(ex) if ex: self._stopped.set_exception(ex) else: self._stopped.set(None) self.logger.debug("{} fully stopped".format(self)) stop_worker.get() def _stop(self, ex=None): """Optionally override this with specific cleanup code for stopping the client, such as closing the connection.""" pass def wait_for_stop(self): """Block until the client has stopped, re-raising the exception it was stopped with, if any.""" self._stopped.get() @property def stopped(self): return self._stopped.ready() @property def running(self): return self.started and not self.stopped def send(self, msg, block=False): """Enqueue some kind of message to be sent. If block=True, block until actually sent. If block=False, returns a gevent.event.Event() that will be set when actually sent, or the client is stopped. Note that messages are sent in order, so using either of these shouldn't often be needed. """ if self._stopping: raise Exception("Can't send to stopped client") event = Event() self._send_queue.put((msg, event)) if block: event.wait() else: return event def _send_loop(self): try: for msg, event in self._send_queue: self._send(msg) event.set() except Exception as ex: self.stop(ex) def _send(self, msg): """Override this with specific code for sending a message. It may raise to indicate a failure that will stop the client.""" def _recv_loop(self): try: self._receive() except Exception as ex: self.stop(ex) else: self.stop() def _receive(self): """Override this with code that receives data. It may return to indicate a graceful close,