class TorskelEventLogController(object): def __init__(self): self.logger = tornado.log.gen_log self.queue = Queue() def add_log_event(self, event): """ Put event into queue :param event: :return: """ if isinstance(event, dict): self.logger.debug(event) self.queue.put(event) async def write_log_from_queue(self, db, collection_name, events_writer_func) -> type(None): """ Retrieves events from the queue. and performs the insert into the database """ qsize = self.queue.qsize() if options.show_log_event_writer: self.logger.info(f'Writing events... queue size = {qsize}') if qsize > 0: step = qsize if qsize <= options.task_list_size else \ options.task_list_size inserts_list = [await self.queue.get() for _ in range(step)] if len(inserts_list) > 0: await events_writer_func(db, collection_name, inserts_list)
class ConnectionPool(object): def __init__(self, servers, maxsize=15, minsize=1, loop=None, debug=0): loop = loop if loop is not None else tornado.ioloop.IOLoop.instance() if debug: logging.basicConfig( level=logging.DEBUG, format="'%(levelname)s %(asctime)s" " %(module)s:%(lineno)d %(process)d %(thread)d %(message)s'") self._loop = loop self._servers = servers self._minsize = minsize self._debug = debug self._in_use = set() self._pool = Queue(maxsize) @gen.coroutine def clear(self): """Clear pool connections.""" while not self._pool.empty(): conn = yield self._pool.get() conn.close_socket() def size(self): return len(self._in_use) + self._pool.qsize() @gen.coroutine def acquire(self): """Acquire connection from the pool, or spawn new one if pool maxsize permits. :return: ``Connetion`` (reader, writer) """ while self.size() < self._minsize: _conn = yield self._create_new_conn() yield self._pool.put(_conn) conn = None while not conn: if not self._pool.empty(): conn = yield self._pool.get() if conn is None: conn = yield self._create_new_conn() self._in_use.add(conn) raise gen.Return(conn) @gen.coroutine def _create_new_conn(self): conn = yield Connection.get_conn(self._servers, self._debug) raise gen.Return(conn) def release(self, conn): self._in_use.remove(conn) try: self._pool.put_nowait(conn) except (QueueEmpty, QueueFull): conn.close_socket()
class KafkaTopicConsumer(object): """ Tornado compatible class for consuming messages from a Kafka topic. The mode of operation is executing the kafka consumer code into its own thread, then communicate with the tornado IO pool code through callbacks in the i/o loop and queues. Depends on pykafka. """ def __init__(self, **kwargs): self.kafka_hosts = kwargs['kafka_hosts'] self.topic_name = kwargs['topic_name'] self.io_loop = ioloop.IOLoop.instance() self.message_q = Queue(maxsize=128) self.exit = False self.kafka_process = Thread(target=self._consumer_loop) self.kafka_process.start() # Bear in mind that this method is run on a separate thread !!! def _consumer_loop(self, **kwargs): print "Connecting to %s" % self.kafka_hosts kafka_client = KafkaClient(hosts=self.kafka_hosts) topic_name = self.topic_name topic = kafka_client.topics[topic_name] # Generate consumer id if necessary if 'consumer_id' in kwargs: consumer_id = kwargs['consumer_id'] else: rand_id = hex(random.getrandbits(32)).rstrip("L").lstrip("0x") or "0" consumer_id = "ush_consumer_%s" % rand_id count = 0 consumer = topic.get_simple_consumer(consumer_id, consumer_timeout_ms=1000) while True: # exit if required if self.exit: del kafka_client return # be careful with saturating the queue (queue maxsize / 2) if self.message_q.qsize() > 64: time.sleep(1) continue try: m = consumer.consume() if m is not None and m.value is not None: value = json.loads(m.value) # Pass the value to the main thread through a callback in its io loop, the call is thread-safe self.io_loop.add_callback(self._put, value) # count += 1 if (count % 100) == 0: print "INFO: processed %d messages on topic %s" % (count, self.topic_name) except Exception, e: # TODO: more better logging import sys, traceback exc_type, exc_value, exc_traceback = sys.exc_info() print "Error occurred while consuming kafka item" traceback.print_exception(exc_type, exc_value, exc_traceback, limit=16, file=sys.stdout)
class PopularCategories: def __init__(self): self.categories = {} self.update_queue = Queue() @gen.coroutine def add_for_processing(self, predictions): yield self.update_queue.put(predictions) @gen.coroutine def process_queue(self): if self.update_queue.qsize() > 0: for i in range(self.update_queue.qsize()): predictions = yield self.update_queue.get() try: self._update_categories(predictions) finally: self.update_queue.task_done() # update top 5 top_5 = sorted(self.categories.items(), key=lambda x: x[1], reverse=True)[:5] mapped = map(lambda x: to_json_result(x[0], x[1]), top_5) yield update_top_5(list(mapped)) def _update_categories(self, new_predictions): predictions = new_predictions.argsort()[0] # update categories total for prediction in predictions: label = configuration.image_labels[prediction] score = new_predictions[0][prediction] if label in self.categories: update_score = (self.categories[label] + score) / 2 else: update_score = score self.categories[label] = update_score
class TornadoQuerierBase(object): def __init__(self): self.tasks = TornadoQueue() def gen_task(self): raise NotImplementError() def run_task(self, task): raise NotImplementError() def prepare(self): self.running = True def cleanup(self): self.running = False @coroutine def run_worker(self, worker_id, f): while self.tasks.qsize() > 0: task = yield self.tasks.get() LOG.debug('worker[%d]: current task is %s' % (worker_id, task)) try: yield f(task) pass except Exception as e: LOG.warning(str(e)) finally: self.tasks.task_done() task = None LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks)) @coroutine def start(self, num_workers=1): self.prepare() # add tasks tasks = yield self.gen_task() for task in tasks: yield self.tasks.put(task) # start shoot workers for worker_id in range(num_workers): LOG.debug('starting worker %d' % worker_id) self.run_worker(worker_id, self.run_task) yield self.tasks.join() self.cleanup()
class StreamClient(object): MAX_SIZE = 60 def __init__(self, steam_id): self.id = generate_id() self.stream_id = steam_id self.queue = Queue(StreamClient.MAX_SIZE) @coroutine def send(self, item): yield self.queue.put(item) @coroutine def fetch(self): item = yield self.queue.get() self.queue.task_done() return item def empty(self): return self.queue.qsize() == 0
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=timedelta(seconds=0.25), user_agent_addition=''): if not has_tornado: raise ImportError( 'TornadoTransmission requires tornado, but it was not found.' ) self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency.total_seconds()): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields() }) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
class SQSDrain(object): """Implementation of IDrain that writes to an AWS SQS queue. """ def __init__(self, logger, loop, sqs_client, metric_prefix='emitter'): self.emitter = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self._send_queue = Queue() self._should_flush_queue = Event() self._flush_handle = None self.loop.spawn_callback(self._onSend) @gen.coroutine def _flush_send_batch(self, batch_size): send_batch = [ self._send_queue.get_nowait() for pos in range(min(batch_size, self.emitter.max_messages)) ] try: response = yield self.emitter.send_message_batch(*send_batch) except SQSError as err: self.logger.exception('Error encountered flushing data to SQS: %s', err) self.output_error.set() for msg in send_batch: self._send_queue.put_nowait(msg) else: if response.Failed: self.output_error.set() for req in response.Failed: self.logger.error('Message failed to send: %s', req.Id) self._send_queue.put_nowait(req) @gen.coroutine def _onSend(self): respawn = True while respawn: qsize = self._send_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: try: yield self._flush_send_batch(qsize) except Exception as err: self.logger.exception(err) self.output_error.set() qsize = self._send_queue.qsize() # We've cleared the backlog, remove any possible future flush if self._flush_handle: self.loop.remove_timeout(self._flush_handle) self._flush_handle = None self._should_flush_queue.clear() yield self._should_flush_queue.wait() @gen.coroutine def close(self, timeout=None): self.state = CLOSING yield self._send_queue.join(timeout) def emit_nowait(self, msg): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() raise QueueFull() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) self.logger.debug("Drain emitting") self._send_queue.put_nowait(msg) @gen.coroutine def emit(self, msg, timeout=None): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) yield self._send_queue.put(msg, timeout)
class TornadoTransmission(): def __init__(self, max_concurrent_batches=10, block_on_send=False, block_on_response=False, max_batch_size=100, send_frequency=0.25, user_agent_addition=''): if not has_tornado: raise ImportError('TornadoTransmission requires tornado, but it was not found.') self.block_on_send = block_on_send self.block_on_response = block_on_response self.max_batch_size = max_batch_size self.send_frequency = send_frequency user_agent = "libhoney-py/" + VERSION if user_agent_addition: user_agent += " " + user_agent_addition self.http_client = AsyncHTTPClient( force_instance=True, defaults=dict(user_agent=user_agent)) # libhoney adds events to the pending queue for us to send self.pending = Queue(maxsize=1000) # we hand back responses from the API on the responses queue self.responses = Queue(maxsize=2000) self.batch_data = {} self.sd = statsd.StatsClient(prefix="libhoney") self.batch_sem = Semaphore(max_concurrent_batches) def start(self): ioloop.IOLoop.current().spawn_callback(self._sender) def send(self, ev): '''send accepts an event and queues it to be sent''' self.sd.gauge("queue_length", self.pending.qsize()) try: if self.block_on_send: self.pending.put(ev) else: self.pending.put_nowait(ev) self.sd.incr("messages_queued") except QueueFull: response = { "status_code": 0, "duration": 0, "metadata": ev.metadata, "body": "", "error": "event dropped; queue overflow", } if self.block_on_response: self.responses.put(response) else: try: self.responses.put_nowait(response) except QueueFull: # if the response queue is full when trying to add an event # queue is full response, just skip it. pass self.sd.incr("queue_overflow") # We're using the older decorator/yield model for compatibility with # Python versions before 3.5. # See: http://www.tornadoweb.org/en/stable/guide/coroutines.html#python-3-5-async-and-await @gen.coroutine def _sender(self): '''_sender is the control loop that pulls events off the `self.pending` queue and submits batches for actual sending. ''' events = [] last_flush = time.time() while True: try: ev = yield self.pending.get(timeout=self.send_frequency) if ev is None: # signals shutdown yield self._flush(events) return events.append(ev) if (len(events) > self.max_batch_size or time.time() - last_flush > self.send_frequency): yield self._flush(events) events = [] except TimeoutError: yield self._flush(events) events = [] last_flush = time.time() @gen.coroutine def _flush(self, events): if not events: return for dest, group in group_events_by_destination(events).items(): yield self._send_batch(dest, group) @gen.coroutine def _send_batch(self, destination, events): ''' Makes a single batch API request with the given list of events. The `destination` argument contains the write key, API host and dataset name used to build the request.''' start = time.time() status_code = 0 try: # enforce max_concurrent_batches yield self.batch_sem.acquire() url = urljoin(urljoin(destination.api_host, "/1/batch/"), destination.dataset) payload = [] for ev in events: event_time = ev.created_at.isoformat() if ev.created_at.tzinfo is None: event_time += "Z" payload.append({ "time": event_time, "samplerate": ev.sample_rate, "data": ev.fields()}) req = HTTPRequest( url, method='POST', headers={ "X-Honeycomb-Team": destination.writekey, "Content-Type": "application/json", }, body=json.dumps(payload, default=json_default_handler), ) self.http_client.fetch(req, self._response_callback) # store the events that were sent so we can process responses later # it is important that we delete these eventually, or we'll run into memory issues self.batch_data[req] = {"start": start, "events": events} except Exception as e: # Catch all exceptions and hand them to the responses queue. self._enqueue_errors(status_code, e, start, events) finally: self.batch_sem.release() def _enqueue_errors(self, status_code, error, start, events): for ev in events: self.sd.incr("send_errors") self._enqueue_response(status_code, "", error, start, ev.metadata) def _response_callback(self, resp): # resp.request should be the same HTTPRequest object built by _send_batch # and mapped to values in batch_data events = self.batch_data[resp.request]["events"] start = self.batch_data[resp.request]["start"] try: status_code = resp.code resp.rethrow() statuses = [d["status"] for d in json.loads(resp.body)] for ev, status in zip(events, statuses): self._enqueue_response(status, "", None, start, ev.metadata) self.sd.incr("messages_sent") except Exception as e: self._enqueue_errors(status_code, e, start, events) self.sd.incr("send_errors") finally: # clean up the data for this batch del self.batch_data[resp.request] def _enqueue_response(self, status_code, body, error, start, metadata): resp = { "status_code": status_code, "body": body, "error": error, "duration": (time.time() - start) * 1000, "metadata": metadata } if self.block_on_response: self.responses.put(resp) else: try: self.responses.put_nowait(resp) except QueueFull: pass def close(self): '''call close to send all in-flight requests and shut down the senders nicely. Times out after max 20 seconds per sending thread plus 10 seconds for the response queue''' try: self.pending.put(None, 10) except QueueFull: pass # signal to the responses queue that nothing more is coming. try: self.responses.put(None, 10) except QueueFull: pass def get_response_queue(self): ''' return the responses queue on to which will be sent the response objects from each event send''' return self.responses
class SQSSource(object): """Implementation of ISource that receives messages from a SQS queue. """ max_delete_delay = 5 def __init__(self, logger, loop, gate, sqs_client, metric_prefix='source'): self.gate = gate self.collector = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.end_of_input = Event() self.input_error = Event() self.state = RUNNING self._delete_queue = Queue() self._should_flush_queue = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self.loop.spawn_callback(self.onInput) self.loop.spawn_callback(self._onDelete) @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.warning('Closing source') yield self._delete_queue.join(timeout) @gen.coroutine def _flush_delete_batch(self, batch_size): delete_batch = [ self._delete_queue.get_nowait() for pos in range(min(batch_size, self.collector.max_messages)) ] try: response = yield self.collector.delete_message_batch(*delete_batch) except SQSError as err: lmsg = 'Error encountered deleting processed messages in SQS: %s' self.logger.exception(lmsg, err) self.input_error.set() for msg in delete_batch: self._delete_queue.put_nowait(msg) else: if response.Failed: self.input_error.set() for req in response.Failed: self.logger.error('Message failed to delete: %s', req.Id) self._delete_queue.put_nowait(req) @gen.coroutine def _onDelete(self): respawn = True while respawn: try: qsize = self._delete_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: yield self._flush_delete_batch(qsize) qsize = self._delete_queue.qsize() self._should_flush_queue.clear() yield self._should_flush_queue.wait() except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False @gen.coroutine def onInput(self): respawn = True retry_timeout = INITIAL_TIMEOUT # We use an algorithm similar to TCP window scaling, # so that we request fewer messages when we encounter # back pressure from our gate/drain and request more # when we flushed a complete batch window_size = self.collector.max_messages while respawn: try: response = yield self.collector.receive_message_batch( max_messages=window_size, ) if response.Messages: # We need to have low latency to delete messages # we've processed retry_timeout = INITIAL_TIMEOUT else: retry_timeout = min(retry_timeout * 2, MAX_TIMEOUT) yield gen.sleep(retry_timeout.total_seconds()) sent_full_batch = True for position, msg in enumerate(response.Messages): try: self.gate.put_nowait(msg) except QueueFull: self.logger.debug('Gate queue full; yielding') sent_full_batch = False # TODO: is it worth trying to batch and schedule # a flush at this point instead of many # single deletes? yield self.gate.put(msg) self._should_flush_queue.set() self._delete_queue.put_nowait(msg) statsd.increment('%s.queued' % self.metric_prefix, tags=[self.sender_tag]) # If we were able to flush the entire batch without waiting, # increase our window size to max_messages if sent_full_batch and \ window_size < self.collector.max_messages: window_size += 1 # Otherwise ask for less next time elif not sent_full_batch and window_size > 1: window_size -= 1 except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False
class FileSystemWatcher(object): def __init__(self, watch_paths, on_changed=None, interval=1.0, recursive=True): """Constructor. Args: watch_paths: A list of filesystem paths to watch for changes. on_changed: Callback to call when one or more changes to the watch path are detected. interval: The minimum interval at which to notify about changes (in seconds). recursive: Should the watch path be monitored recursively for changes? """ if isinstance(watch_paths, basestring): watch_paths = [watch_paths] watch_paths = [os.path.abspath(path) for path in watch_paths] for path in watch_paths: if not os.path.exists(path) or not os.path.isdir(path): raise MissingFolderError(path) self.watch_paths = watch_paths self.interval = interval * 1000.0 self.recursive = recursive self.periodic_callback = PeriodicCallback(self.check_fs_events, self.interval) self.on_changed = on_changed self.observer = Observer() for path in self.watch_paths: self.observer.schedule(WatcherEventHandler(self), path, self.recursive) self.started = False self.fs_event_queue = Queue() def track_event(self, event): self.fs_event_queue.put(event) @gen.coroutine def check_fs_events(self): drained_events = [] while self.fs_event_queue.qsize() > 0: drained_events.append(self.fs_event_queue.get_nowait()) if len(drained_events) > 0 and callable(self.on_changed): logger.debug( "Detected %d file system change(s) - triggering callback" % len(drained_events)) self.on_changed(drained_events) def start(self): if not self.started: self.observer.start() self.periodic_callback.start() self.started = True logger.debug("Started file system watcher for paths:\n%s" % "\n".join(self.watch_paths)) def shutdown(self, timeout=None): if self.started: self.periodic_callback.stop() self.observer.stop() self.observer.join(timeout=timeout) self.started = False logger.debug("Shut down file system watcher for path:\n%s" % "\n".join(self.watch_paths))
class TestHttpWatcherServer(AsyncTestCase): temp_path = None watcher_server = None expected_httpwatcher_js = read_resource( os.path.join("scripts", "httpwatcher.min.js")) reload_tracker_queue = None def setUp(self): super(TestHttpWatcherServer, self).setUp() logging.basicConfig( level=logging.DEBUG, format='%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', ) self.temp_path = init_temp_path() write_file( self.temp_path, "index.html", "<!DOCTYPE html><html><head><title>Hello world</title></head>" + "<body>Test</body></html>") self.reload_tracker_queue = Queue() def test_watching(self): self.watcher_server = HttpWatcherServer(self.temp_path, host="localhost", port=5555, watcher_interval=0.1) self.watcher_server.listen() self.exec_watch_server_tests("") self.watcher_server.shutdown() def test_watching_non_standard_base_path(self): self.watcher_server = HttpWatcherServer( self.temp_path, host="localhost", port=5555, watcher_interval=0.1, server_base_path="/non-standard/") self.watcher_server.listen() self.exec_watch_server_tests("/non-standard/") self.watcher_server.shutdown() def track_reload_custom(self): self.reload_tracker_queue.put("Gotcha!") def test_custom_callback(self): # starts off empty self.assertEqual(self.reload_tracker_queue.qsize(), 0) self.watcher_server = HttpWatcherServer( self.temp_path, on_reload=lambda: self.track_reload_custom(), host="localhost", port=5555, watcher_interval=0.1) self.watcher_server.listen() self.exec_watch_server_tests("") # make sure our custom callback has been called self.assertGreater(self.reload_tracker_queue.qsize(), 0) self.watcher_server.shutdown() def exec_watch_server_tests(self, base_path): client = AsyncHTTPClient() client.fetch("http://localhost:5555" + base_path, self.stop) response = self.wait() self.assertEqual(200, response.code) html = html5lib.parse(response.body) ns = get_html_namespace(html) self.assertEqual( "Hello world", html_findall(html, ns, "./{ns}head/{ns}title")[0].text.strip()) script_tags = html_findall(html, ns, "./{ns}body/{ns}script") self.assertEqual(2, len(script_tags)) self.assertEqual("http://localhost:5555/httpwatcher.min.js", script_tags[0].attrib['src']) self.assertEqual('httpwatcher("ws://localhost:5555/httpwatcher");', script_tags[1].text.strip()) # if it's a non-standard base path if len(base_path.strip("/")) > 0: # we shouldn't be able to find anything at the root base path client.fetch("http://localhost:5555/", self.stop) response = self.wait() self.assertEqual(404, response.code) # fetch the httpwatcher.min.js file client.fetch("http://localhost:5555/httpwatcher.min.js", self.stop) response = self.wait() self.assertEqual(200, response.code) self.assertEqual(self.expected_httpwatcher_js, response.body) # now connect via WebSockets websocket_connect("ws://localhost:5555/httpwatcher").add_done_callback( lambda future: self.stop(future.result())) websocket_client = self.wait() # trigger a watcher reload write_file(self.temp_path, "README.txt", "Hello world!") IOLoop.current().call_later( 1.0, lambda: websocket_client.read_message(lambda future: self. stop(future.result()))) msg = json.loads(self.wait()) self.assertIn("command", msg) self.assertEqual("reload", msg["command"])
class CommandQueue(object): def __init__(self): self.all_my_pins = set() self.my_controls = set() async def start_cycle(self): await self.reset_queue() #my_controller.execute_loop IOLoop.current().spawn_callback(lambda: self.execute_loop()) async def reset_queue(self): self.my_queue = Queue(maxsize=4) # the two blocks are to keep the queue alive await self.add_waiting_block() #await self.add_waiting_block() async def enqueue_command(self, command): await self.my_queue.put(command) async def waitqueue(self): await self.my_queue.join() def register_control(self, control): self.my_controls.add(control) new_pins = control.get_my_pins() self.all_my_pins.update(new_pins) control.set_owner(self) for pin in new_pins: GPIO.setup(pin, GPIO.OUT) # GPIO Assign mode self.all_off() def all_off(self): # emergency mode: all pins go to zero # self.reset_queue() for pin in self.all_my_pins: GPIO.output(pin, GPIO.HIGH) def print_qsize(self): print("qsize", self.get_qsize()) def get_qsize(self): return self.my_queue.qsize() async def add_waiting_block(self, verbose=False): """Adds a waiting block ONLY if the queue needs it. """ qsize = self.get_qsize() if verbose: print(f"deciding on adding a waiting block (qsize: {qsize})") # A waiting block is added only if the queue is empty if qsize < 1: await self.enqueue_command({'pin': None, 'dt': 0.2}) if verbose: print("adding wait block") self.print_qsize() else: if verbose: print("no waiting block!") async def execute_one_command(self, comm, verbose=1): pin = comm['pin'] dt = comm['dt'] # waiting blocks keep the thermodynamic equilibrium. # When we execute a command, the queue gets shorter. # This compensates. # Note if the queue is long enough, the waiting block # won't be added. await self.add_waiting_block() # none for waiting blocks if pin is not None: print("processing -- pin:", pin, "t:", dt, "s") GPIO.output(pin, GPIO.LOW) # on await gen.sleep(dt) GPIO.output(pin, GPIO.HIGH) # out else: if verbose >= 2: print("processing -- waiting block", dt, "s") await gen.sleep(dt) async def execute_loop(self): print("starting loop") #consumer #try: while (True): async for comm in self.my_queue: #print(self.my_queue) await self.execute_one_command(comm) self.my_queue.task_done() if self.get_qsize() == 0: break
def get_data(cls, account, source_filter, limit=100, skip=0): """ Gathers commit information from GH GET https://api.github.com/repos/:owner/:repo/commits Header: Accept: application/vnd.github.v3+json """ if not account or not account.enabled: raise ValueError('cannot gather information without a valid account') client = AsyncHTTPClient() source_filter = GitHubRepositoryDateFilter(source_filter) if source_filter.repository is None: raise ValueError('required parameter projects missing') default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"} # first we grab our list of commits uri = "https://api.github.com/repos/{}/commits".format(source_filter.repository) qs = source_filter.get_qs() if qs != '': uri = uri + '?' + qs app_log.info("Starting retrieval of commit list for account {}".format(account._id)) if limit is not None and limit <= 100: # we can handle our limit right here uri += "?per_page={}".format(limit) elif limit is None: uri += "?per_page=100" # maximum number per page for GitHub API taken = 0 queue = Queue() sem = BoundedSemaphore(FETCH_CONCURRENCY) done, working = set(), set() while uri is not None: app_log.info( "({}) Retrieving next page, received {} commits thus far".format(account._id, taken)) req = account.get_request(uri, headers=default_headers) response = yield client.fetch(req) page_data = json.loads(response.body.decode('utf-8')) taken += page_data.__len__() for item in page_data: queue.put(item.get('url', None)) if limit is None or taken < limit: # parse the Link header from GitHub (https://developer.github.com/v3/#pagination) links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) else: break if queue.qsize() > 500: raise HTTPError(413, 'too many commits') app_log.info("({}) Commit list retrieved, fetching info for {} commits".format(account._id, taken)) # open our list cls.write('[') # our worker to actually fetch the info @gen.coroutine def fetch_url(): current_url = yield queue.get() try: if current_url in working: return page_no = working.__len__() app_log.info("Fetching page {}".format(page_no)) working.add(current_url) req = account.get_request(current_url) client = AsyncHTTPClient() response = yield client.fetch(req) response_data = json.loads(response.body.decode('utf-8')) obj = { 'date': response_data['commit']['author']['date'], 'author': response_data['commit']['author']['name'], 'added_files': [file for file in response_data['files'] if file['status'] == 'added'].__len__(), 'deleted_files': [file for file in response_data['files'] if file['status'] == 'deleted'].__len__(), 'modified_files': [file for file in response_data['files'] if file['status'] == 'modified'].__len__(), 'additions': response_data['stats']['additions'], 'deletions': response_data['stats']['deletions'] } if done.__len__() > 0: cls.write(',') cls.write(json.dumps(obj)) done.add(current_url) app_log.info("Page {} downloaded".format(page_no)) finally: queue.task_done() sem.release() @gen.coroutine def worker(): while True: yield sem.acquire() fetch_url() # start our concurrency worker worker() try: # wait until we're done yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME)) except gen.TimeoutError: app_log.warning("Request exceeds maximum time, cutting response short") finally: # close our list cls.write(']') app_log.info("Finished retrieving commits for {}".format(account._id))
class Kernel(W.Widget): """ An evented kernel """ execution_state = T.Unicode(allow_none=True).tag(sync=True) name = T.Unicode("python3").tag(sync=True) stdout = T.Tuple([]).tag(sync=True) stderr = T.Tuple([]).tag(sync=True) progress = T.Float(0.0).tag(sync=True) file_name = T.Unicode(allow_none=True).tag(sync=True) ipynb = T.Dict(allow_none=True).tag(sync=True) widgets = T.Tuple([]).tag(sync=True, **W.widget_serialization) executor = ThreadPoolExecutor(multiprocessing.cpu_count()) _view_klass = DefaultKernelView def __init__(self, *args, **kwargs): super(Kernel, self).__init__(*args, **kwargs) self._kernel_client = None self._kernel_manager = None self._queue = Queue() self._listening = False def save(self): if not self.ipynb: return if not self.file_name: self.file_name = str(uuid.uuid4()).split("-")[0] save_notebook(self.file_name, self.ipynb) def shutdown(self): if self._kernel_manager: self.widgets = [] self.execution_state = "shutdown" self._kernel_manager.request_shutdown() self._kernel_manager.cleanup() self.execution_state = "down" self._kernel_client = None self._kernel_manager = None def run(self, cell_nodes=None, shutdown=None): cell_nodes = cell_nodes or self.ipynb["cells"] or [] @coroutine def _run(): self.progress = 0.0 if shutdown: self.shutdown() while self._queue.qsize(): yield self._queue.get() for cell in cell_nodes: yield self._queue.put(cell) if self._kernel_client is None: yield self.client() if not self._listening: IOLoop.instance().add_callback(self.listen) try: while self._queue.qsize(): self._current_cell = yield self._queue.get() yield self.run_one(self._current_cell) except Exception as err: print(f"ERROR {self.name} {err}") print(traceback.format_exc()) finally: if shutdown: self.shutdown() return IOLoop.instance().add_callback(_run) return self @coroutine def listen(self): channels = [ self._kernel_client.iopub_channel, self._kernel_client.shell_channel, ] for channel in channels: self._listen_one(channel) self._listening = True def _listen_one(self, channel): stream = ZMQStream(channel.socket) handler = self._make_handler() @stream.on_recv def _listen(raw): try: ident, smsg = channel.session.feed_identities(raw) msg = channel.session.deserialize(smsg) IOLoop.instance().add_callback(handler, msg) except Exception as err: pass # print("MSGERROR", err) # print(traceback.format_exc()) @run_on_executor def client(self): km = self._kernel_manager if km is None: km = IOLoopKernelManager(kernel_name=self.name) self._kernel_manager = km km.start_kernel() kc = self._kernel_manager.client() self._kernel_client = kc kc.start_channels() try: kc.wait_for_ready(timeout=5) except RuntimeError: kc.stop_channels() km.shutdown_kernel() raise def rerun(self, shutdown=False): self.shutdown() self.run(shutdown=shutdown) return self def view(self, view_klass=None): return (view_klass or self._view_klass)(kernel=self) def _make_handler(self, cell=None): def _on_msg(msg): msg_type = msg['header']['msg_type'] handler = getattr(self, "on_msg_{}".format(msg_type), None) if handler is not None: handler(msg, cell or getattr(self, "_current_cell"), msg["content"]) else: self.log.warn(f"UNHANDLED MSG TYPE: {msg_type}\n---\n%s\n%s", pformat(msg), f"You should implement on_msg_{msg_type}") return _on_msg @run_on_executor def run_one(self, cell): msg_id = self._kernel_client.execute( "\n".join(cell["source"]) ) return msg_id def on_msg_stream(self, msg, cell, content): if content["name"] == "stdout": self.stdout += content["text"], elif content["name"] == "stderr": self.stderr += content["text"], else: self.log.warning(f"UNHANDLED STREAM {content.name}", msg) def on_msg_execute_result(self, msg, cell, content): cell["outputs"] += [{ "data": content["data"], "output_type": "execute_result", "metadata": {} }] def on_msg_display_data(self, msg, cell, content): cell["outputs"] += [{ "data": content["data"], "output_type": "display_data", "metadata": {} }] def on_msg_status(self, msg, cell, content): self.execution_state = content["execution_state"] def on_msg_execute_input(self, msg, cell, content): pass def on_msg_execute_reply(self, msg, cell, content): pass def on_msg_comm_reply(self, msg, cell, content): pass def on_msg_comm_open(self, msg, cell, content): comm_id = content["comm_id"] state, buffer_paths, buffers = W.widgets.widget._remove_buffers( content["data"]["state"]) comm = W.widgets.widget.Comm( comm_id=comm_id, target_name='jupyter.widget', data={'state': state, 'buffer_paths': buffer_paths}, buffers=buffers, metadata={'version': W._version.__protocol_version__} ) W.Widget.handle_comm_opened(comm, msg) widget = W.Widget.widgets[comm_id] @widget.observe def _(change): if self._kernel_client is None: return update_msg = self._kernel_client.session.msg("comm_msg", { "comm_id": widget.comm.comm_id, "data": { "method": "update", "state": { change["name"]: change["new"] }, "buffer_paths": [] } }) def _send(): self._kernel_client.shell_channel.send(update_msg) IOLoop.instance().add_callback(_send) self.widgets += (widget,) def on_msg_comm_msg(self, msg, cell, content): method = content.get("data", {}).get("method") if method == "update": for widget in self.widgets: if widget.comm.comm_id == content["comm_id"]: for k, v in content["data"]["state"].items(): setattr(widget, k, v) else: self.log.warning(f"UNKNOWN METHOD {method}\n---\n%s", pformat(msg)) def on_msg_error(self, msg, cell, content): self.log.error(f"ERROR\n---\n%s", pformat(msg)) def on_msg_shutdown_replay(self, msg, cell, content): self.execution_state = "shutdown"