async def _search_worker(peer_queue: Queue, search_path: str, request_body: Optional[bytes], method: str, auth_header: Optional[str], responses: list): client = AsyncHTTPClient() async for peer in peer_queue: if peer is None: # Exit signal return try: responses.append(( peer, await peer_fetch( client, peer, f"api/{search_path}", request_body=request_body, method=method, # Only pass the bearer token to our own node (otherwise it could be hijacked) # TODO: How to share auth? auth_header=(auth_header if peer == CHORD_URL else None), ))) except HTTPError as e: # TODO: Less broad of an exception responses.append((peer, None)) print( f"[{SERVICE_NAME} {datetime.now()}] Connection issue or timeout with peer {peer}.\n" f" Error: {str(e)}", flush=True) finally: peer_queue.task_done()
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self.connected_event.wait() else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self.disconnected_event.wait() else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield self.message_queue.get() if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: env = yield self.presence_queue.get() if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
async def search_worker( cls, # Input queue dataset_queue: Queue, # Input values dataset_object_schema: dict, join_query, data_type_queries, exclude_from_auto_join: Tuple[str, ...], auth_header: Optional[str], # Output references dataset_objects_dict: dict, dataset_join_queries: dict, ): async for dataset in dataset_queue: if dataset is None: # Exit signal return try: dataset_id = dataset["identifier"] dataset_results, dataset_join_query, _ = await run_search_on_dataset( dataset_object_schema, dataset, join_query, data_type_queries, exclude_from_auto_join, cls.include_internal_results, auth_header, ) dataset_objects_dict[dataset_id] = dataset_results dataset_join_queries[dataset_id] = dataset_join_query except HTTPError as e: # Thrown from run_search_on_dataset # Metadata service error # TODO: Better message # TODO: Set error code outside worker? print( f"[{SERVICE_NAME} {datetime.now()}] [ERROR] Error from dataset search: {str(e)}", file=sys.stderr, flush=True) finally: dataset_queue.task_done()
class TornadoQuerierBase(object): def __init__(self): self.tasks = TornadoQueue() def gen_task(self): raise NotImplementError() def run_task(self, task): raise NotImplementError() def prepare(self): self.running = True def cleanup(self): self.running = False @coroutine def run_worker(self, worker_id, f): while self.tasks.qsize() > 0: task = yield self.tasks.get() LOG.debug('worker[%d]: current task is %s' % (worker_id, task)) try: yield f(task) pass except Exception as e: LOG.warning(str(e)) finally: self.tasks.task_done() task = None LOG.debug('worker[%d]: all tasks done %s' % (worker_id, self.tasks)) @coroutine def start(self, num_workers=1): self.prepare() # add tasks tasks = yield self.gen_task() for task in tasks: yield self.tasks.put(task) # start shoot workers for worker_id in range(num_workers): LOG.debug('starting worker %d' % worker_id) self.run_worker(worker_id, self.run_task) yield self.tasks.join() self.cleanup()
class TopicAppllication(tornado.web.Application): def __init__(self): handlers = [ url(r'/', MainHandler) ] self.queue = Queue(maxsize=10) super(TopicAppllication, self).__init__(handlers=handlers, debug=True) @gen.coroutine def consumer(self): item = yield self.queue.get() try: print item finally: self.queue.task_done()
class MessageRouter(object): def __init__(self, message_sender, default_handler=None): self._queue = Queue() self.message_sender = message_sender self.default_handler = default_handler self._message_handlers = {} self._working = False def register_message_handler(self, message, handler): assert isinstance(message, MessageMeta) assert hasattr(handler, '__call__') self._message_handlers[message.__name__] = handler @gen.coroutine def put_message(self, message): assert isinstance(message, Message) yield self._queue.put(message) @gen.coroutine def start(self): self._working = True while self._working: message = yield self._queue.get() try: # TODO: Maybe we need to add special handling for BarrierRequest handler = self._message_handlers.get(message.type, self.default_handler) if handler: yield handler(message) except Exception as e: exc_type, exc_value, exc_tb = sys.exc_info() error_type, error_subtype, error_message, extended_message = errors.exception_to_error_args( exc_type, exc_value, exc_tb) response = Error.from_request( message, error_type=error_type, error_subtype=error_subtype, message=error_message, extended_message=extended_message) yield self.message_sender.send_message_ignore_response( response) finally: self._queue.task_done() def stop(self): self._working = False
class FirehoseWebSocket(tornado.websocket.WebSocketHandler): @tornado.gen.coroutine def open(self): print "hose open" global queues self.queue = Queue() queues.append(self.queue) while True: item = yield self.queue.get() self.queue.task_done() self.write_message(json.dumps(item)) @tornado.gen.coroutine def on_close(self): global queues yield self.queue.join() queues.remove(self.queue)
class CommandQueue(): def __init__(self): self.queue = Queue() @gen.coroutine def process_command(self): while True: item = yield self.queue.get() try: yield gen.sleep(0.1) command, view = item view.write_message({command[0]: command[1]}) finally: self.queue.task_done() def put(self, item): self.queue.put(item)
class StreamClient(object): MAX_SIZE = 60 def __init__(self, steam_id): self.id = generate_id() self.stream_id = steam_id self.queue = Queue(StreamClient.MAX_SIZE) @coroutine def send(self, item): yield self.queue.put(item) @coroutine def fetch(self): item = yield self.queue.get() self.queue.task_done() return item def empty(self): return self.queue.qsize() == 0
class PopularCategories: def __init__(self): self.categories = {} self.update_queue = Queue() @gen.coroutine def add_for_processing(self, predictions): yield self.update_queue.put(predictions) @gen.coroutine def process_queue(self): if self.update_queue.qsize() > 0: for i in range(self.update_queue.qsize()): predictions = yield self.update_queue.get() try: self._update_categories(predictions) finally: self.update_queue.task_done() # update top 5 top_5 = sorted(self.categories.items(), key=lambda x: x[1], reverse=True)[:5] mapped = map(lambda x: to_json_result(x[0], x[1]), top_5) yield update_top_5(list(mapped)) def _update_categories(self, new_predictions): predictions = new_predictions.argsort()[0] # update categories total for prediction in predictions: label = configuration.image_labels[prediction] score = new_predictions[0][prediction] if label in self.categories: update_score = (self.categories[label] + score) / 2 else: update_score = score self.categories[label] = update_score
def call(self, action, body): """ Do the actual calling :param str action: action to perform (CRUD for example) :param str body: an object to send (will be json-encoded) """ # queue is used to send result back to this routine corr_id = str(uuid.uuid4()) queue = Queue(maxsize=1) self._reply_queues[corr_id] = queue # send message self.channel().basic_publish(exchange='', routing_key=action, properties=pika.BasicProperties( correlation_id=corr_id, reply_to=self._callback_queue, ), body=json.dumps(body)) # add timeout callback self._ioloop.add_timeout( time.time() + self._timeout, functools.partial( self._on_timeout, queue=queue, correlation_id=corr_id, )) # retrieve result back result = yield queue.get() queue.task_done() if 'timeout_error' in result: raise TimeoutError(result['error']) return result
async def _fetch_table_definition_worker( table_queue: Queue, auth_header: Optional[str], table_ownerships_and_records: List[Tuple[dict, dict]]): client = AsyncHTTPClient() async for t in table_queue: if t is None: # Exit signal return try: # Setup up pre-requisites # - default: url = f"api/{t['service_artifact']}/tables/{t['table_id']}" # - Gohan compatibility # TODO: formalize/clean this up if USE_GOHAN and t['service_artifact'] == "variant": url = f"api/gohan/tables/{t['table_id']}" print("url: " + url) #TODO: Don't fetch schema except for first time? table_ownerships_and_records.append(( t, await peer_fetch( client, CHORD_URL, url, method="GET", auth_header= auth_header, # Required, otherwise may hit a 403 error extra_headers=DATASET_SEARCH_HEADERS))) # TODO: Handle HTTP errors finally: table_queue.task_done()
class Model: def __init__(self, config_file): self.lock = locks.Lock() self.classification_queue = Queue() print('loading config %s' % config_file, file=log.v5) # Load and setup config try: self.config = Config.Config() self.config.load_file(config_file) self.pause_after_first_seq = self.config.float('pause_after_first_seq', 0.2) self.batch_size = self.config.int('batch_size', 5000) self.max_seqs = self.config.int('max_seqs', -1) except Exception: print('Error: loading config %s failed' % config_file, file=log.v1) raise try: self.devices = self._init_devices() except Exception: print('Error: Loading devices for config %s failed' % config_file, file=log.v1) raise print('Starting engine for config %s' % config_file, file=log.v5) self.engine = Engine.Engine(self.devices) try: self.engine.init_network_from_config(config=self.config) except Exception: print('Error: Loading network for config %s failed' % config_file, file=log.v1) raise IOLoop.current().spawn_callback(self.classify_in_background) self.last_used = datetime.datetime.now() def _init_devices(self): """ Initiates the required devices for a config. Same as the funtion initDevices in rnn.py. :param config: :return: A list with the devices used. """ oldDeviceConfig = ",".join(self.config.list('device', ['default'])) if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4) devArgs = get_devices_init_args(self.config) assert len(devArgs) > 0 devices = [Device(**kwargs) for kwargs in devArgs] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices @tornado.gen.coroutine def classify_in_background(self): while True: requests = [] # fetch first request r = yield self.classification_queue.get() requests.append(r) # grab all other waiting requests try: while True: requests.append(self.classification_queue.get_nowait()) except QueueEmpty: pass output_dim = {} # Do dataset creation and classification. dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim) dataset.init_seq_order() batches = dataset.generate_batches(recurrent_net=self.engine.network.recurrent, batch_size=self.batch_size, max_seqs=self.max_seqs) with (yield self.lock.acquire()): ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches) yield ctt.join() try: for i in range(dataset.num_seqs): requests[i].future.set_result(ctt.result[i]) self.classification_queue.task_done() except Exception as e: print('exception', e) raise @tornado.gen.coroutine def classify(self, data): self.last_used = datetime.datetime.now() request = ClassificationRequest(data) yield self.classification_queue.put(request) yield request.future return request.future.result()
async def peer_worker(self, peers: Set[str], peers_to_check: Queue, peers_to_check_set: Set[str], attempted_contact: Set[str], results: List[bool]): client = AsyncHTTPClient() async for peer in peers_to_check: if peer is None: # Exit signal return if (peer in self.last_errored and datetime.now().timestamp() - self.last_errored[peer] < LAST_ERRORED_CACHE_TIME): # Avoid repetitively hitting dead nodes print( f"[{SERVICE_NAME} {datetime.now()}] Skipping dead peer {peer}", flush=True) peers_to_check_set.remove(peer) peers_to_check.task_done() continue if peer in attempted_contact: peers_to_check_set.remove(peer) peers_to_check.task_done() continue if peer in self.contacting: print( f"[{SERVICE_NAME} {datetime.now()}] Avoiding race on peer {peer}", flush=True) # TODO: Do we call task_done() here? continue self.contacting.add(peer) print(f"[{SERVICE_NAME} {datetime.now()}] Contacting peer {peer}", flush=True) peer_peers: List[str] = [] try: # TODO: Combine requests? # Notify peer of current node's existence, OIDC realm, and peer list await peer_fetch( client=client, peer=peer, path_fragment= "api/federation/peers", # TODO: This should probably be parametrized request_body=json.dumps({ "peers": list(peers), "self": CHORD_URL, "oidc_discovery_uri": OIDC_DISCOVERY_URI, })) # Fetch the peer's peer list r = await peer_fetch(client=client, peer=peer, path_fragment="api/federation/peers", method="GET") # If a non-200 response is encountered, an error is raised self.connected_to_peer_network = True peer_peers = r["peers"] except IndexError: print( f"[{SERVICE_NAME} {datetime.now()}] [ERROR] Invalid 200 response returned by {peer}.", flush=True, file=sys.stderr) except (HTTPError, ValueError) as e: # HTTPError: Standard 400s/500s # ValueError: ex. Unsupported url scheme: api/federation/peers now = datetime.now() print( f"[{SERVICE_NAME} {now}] [ERROR] Peer contact error for {peer} ({str(e)})", flush=True, file=sys.stderr) self.last_errored[peer] = now.timestamp() # Incorporate the peer's peer list into the current set of peers peers = peers.union(peer_peers) # Search for new peers, and if they exist add them to the queue containing peers to verify new_peer = False for p in peer_peers: if p not in peers_to_check_set and p not in self.contacting and p not in attempted_contact: new_peer = True peers_to_check.put_nowait(p) peers_to_check_set.add(p) results.append(new_peer) attempted_contact.add(peer) self.contacting.remove(peer) peers_to_check_set.remove(peer) peers_to_check.task_done()
class AsynSpider(MySpider): def __init__(self, out, **kwargs): super(AsynSpider, self).__init__(out, **kwargs) self.client = httpclient.AsyncHTTPClient() self.q = Queue() self.fetching, self.fetched = set(), set() def assign_jobs(self, jobs): for job in jobs: self.q.put(job) @gen.coroutine def run(self): if self.q.empty(): url = LIST_URL + urllib.urlencode(self.list_query) self.q.put(url) for _ in range(CONCURRENCY): self.worker() yield self.q.join() assert self.fetching == self.fetched # print len(self.fetched) if isinstance(self._out, Analysis): self._out.finish() @gen.coroutine def worker(self): while True: yield self.fetch_url() @gen.coroutine def fetch_url(self): current_url = yield self.q.get() try: if current_url in self.fetching: return self.fetching.add(current_url) request = httpclient.HTTPRequest(current_url, headers=HEADERS) resp = yield self.client.fetch(request) self.fetched.add(current_url) xml = etree.fromstring(resp.body) has_total_count = xml.xpath("//totalcount/text()") if has_total_count: # 非空证明为列表,否则为详细页 total_count = int(has_total_count[0]) if total_count == 0: return # 列表跨界 if self.list_query["pageno"] == 1: pageno = 2 while pageno < 10: # while pageno <= total_count / PAGE_SIZE: self.list_query["pageno"] = pageno next_list_url = LIST_URL + urllib.urlencode( self.list_query) self.q.put(next_list_url) # logging.info(next_list_url) pageno += 1 job_ids = xml.xpath("//jobid/text()") job_detail_urls = [] for ID in job_ids: new_detail_query = DETAIL_QUERY.copy() new_detail_query["jobid"] = ID job_detail_urls.append(DETAIL_URL + urllib.urlencode(new_detail_query)) for detail_url in job_detail_urls: self.q.put(detail_url) # logging.info(detail_url) else: self._out.collect(xml) finally: self.q.task_done()
class Client(object): def __init__(self, server, name, stream): self.server = server self.name = name self.rooms = {} self.stream = stream self.inqueue = Queue(maxsize=QUEUE_SIZE) self.outqueue = Queue(maxsize=QUEUE_SIZE) @coroutine def forwarding(self): while True: msg = yield self.outqueue.get() if msg.command == COMMAND_QUIT: for _, room in self.rooms.items(): yield room.inqueue.put(msg) elif msg.command == COMMAND_JOIN: room_name = msg.receiver room = self.server.get_room(room_name) self.rooms[room_name] = room yield room.inqueue.put(msg) else: room = self.rooms[msg.receiver] yield room.inqueue.put(msg) self.outqueue.task_done() @coroutine def response(self): global SPEED while True: msg = yield self.inqueue.get() if msg.command == COMMAND_QUIT: self.stream.close() return else: response = ("%s %s:%s\n" % (datetime.datetime.now(), msg.sender.name, msg.content.decode()))\ .encode('utf-8') try: SPEED += 1 yield self.stream.write(response) except Exception as e: logging.debug(str(e)) self.stream.close() @coroutine def receive(self): while True: try: line = yield self.stream.read_until(b'\n') except Exception as e: logging.debug(str(e)) msg = Message(self, '', COMMAND_QUIT, 'CONNECTION ERROR') yield self.outqueue.put(msg) return data = line.strip().split(b' ') if len(data) != 2: continue room_name, content = data[0], data[1] if room_name in self.rooms: msg = Message(self, room_name, COMMAND_NORMAL, content) else: msg = Message(self, room_name, COMMAND_JOIN, content) yield self.outqueue.put(msg)
async def _table_search_worker( table_queue: Queue, dataset_join_query: Query, data_type_queries: Dict[str, Query], include_internal_results: bool, auth_header: Optional[str], dataset_object_schema: dict, dataset_results: Dict[str, list], ): client = AsyncHTTPClient() async for table_pair in table_queue: if table_pair is None: # Exit signal return try: table_ownership, table_record = table_pair table_data_type = table_record["data_type"] is_querying_data_type = table_data_type in data_type_queries # Don't need to fetch results for joining if the join query is None; just check # individual tables (which is much faster) using the public discovery endpoint. private = dataset_join_query is not None or include_internal_results if dataset_join_query is not None and table_data_type not in dataset_object_schema[ "properties"]: # Since we have a join query, we need to create a superstructure containing # different search results and a schema to match. # Set schema for data type if needed dataset_object_schema["properties"][table_data_type] = { "type": "array", "items": table_record["schema"] if is_querying_data_type else {} } # If data type is not being queried, its results are irrelevant if not is_querying_data_type: continue # Setup up search pre-requisites # - defaults: path_fragment = ( f"api/{table_ownership['service_artifact']}{'/private' if private else ''}/tables" f"/{table_record['id']}/search") url_args = (("query", json.dumps(data_type_queries[table_data_type])), ) # - Gohan compatibility # TODO: formalize/clean this up if USE_GOHAN and table_ownership['service_artifact'] == "gohan": # reset path_fragment: path_fragment = (f"api/gohan/variants/get/by/variantId") # reset url_args: # - construct based on search query supplemental_url_args = [["getSampleIdsOnly", "true"]] # - transform custom Query to list of lists to simplify # the gohan query parameter construction tmpjson = json.dumps( {"tmpkey": data_type_queries[table_data_type]}) reloaded_converted = json.loads(tmpjson)["tmpkey"] # - generate query parameters from list of query tree objects gohan_query_params = query_utils.construct_gohan_query_params( reloaded_converted, supplemental_url_args) url_args = gohan_query_params # Run the search r = await peer_fetch( client, CHORD_URL, path_fragment=path_fragment, url_args=url_args, method="GET", auth_header= auth_header, # Required in some cases to not get a 403 extra_headers=DATASET_SEARCH_HEADERS, ) if private: # We have a results array to account for results = r["results"] else: # Here, the array of 1 True is a dummy value to give a positive result results = [r] if r else [] if table_data_type not in dataset_results: dataset_results[table_data_type] = results else: dataset_results[table_data_type].extend(results) finally: table_queue.task_done()
class BatchedStream(object): """ Mostly obsolete, see BatchedSend """ def __init__(self, stream, interval): self.stream = stream self.interval = interval / 1000. self.last_transmission = default_timer() self.send_q = Queue() self.recv_q = Queue() self._background_send_coroutine = self._background_send() self._background_recv_coroutine = self._background_recv() self._broken = None self.pc = PeriodicCallback(lambda: None, 100) self.pc.start() @gen.coroutine def _background_send(self): with log_errors(): while True: msg = yield self.send_q.get() if msg == 'close': break msgs = [msg] now = default_timer() wait_time = self.last_transmission + self.interval - now if wait_time > 0: yield gen.sleep(wait_time) while not self.send_q.empty(): msgs.append(self.send_q.get_nowait()) try: yield write(self.stream, msgs) except StreamClosedError: self.recv_q.put_nowait('close') self._broken = True break if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for _ in msgs: self.send_q.task_done() @gen.coroutine def _background_recv(self): with log_errors(): while True: try: msgs = yield read(self.stream) except StreamClosedError: self.recv_q.put_nowait('close') self.send_q.put_nowait('close') self._broken = True break assert isinstance(msgs, list) if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for msg in msgs: self.recv_q.put_nowait(msg) @gen.coroutine def flush(self): yield self.send_q.join() @gen.coroutine def send(self, msg): if self._broken: raise StreamClosedError('Batch Stream is Closed') else: self.send_q.put_nowait(msg) @gen.coroutine def recv(self): result = yield self.recv_q.get() if result == 'close': raise StreamClosedError('Batched Stream is Closed') else: raise gen.Return(result) @gen.coroutine def close(self): yield self.flush() raise gen.Return(self.stream.close()) def closed(self): return self.stream.closed()
class TornadoPikaPublisher(BeergardenPublisher, PikaClient): def __init__(self, **kwargs): self.logger = logging.getLogger(__name__) self._shutdown_timeout = timedelta( seconds=kwargs.pop("shutdown_timeout", 5)) self._work_queue = Queue() self._connection = None self._channel = None self.coroutiner = CoroutineMaker({ "TornadoConnection": "on_open_callback", "channel": "on_open_callback" }) # Trying to get super() to work with incompatible signatures is a nightmare BeergardenPublisher.__init__(self) PikaClient.__init__(self, **kwargs) IOLoop.current().spawn_callback(self._process) def shutdown(self): return self._work_queue.join(timeout=self._shutdown_timeout) @coroutine def _open_connection(self): self._connection = yield self.coroutiner.convert(TornadoConnection)( parameters=self._conn_params) @coroutine def _open_channel(self): self._channel = yield self.coroutiner.convert( self._connection.channel)() @coroutine def _process(self): while True: item = yield self._work_queue.get() try: if not self._connection or not self._connection.is_open: yield self._open_connection() if not self._channel or not self._channel.is_open: yield self._open_channel() yield getattr(self._channel, item[0])(**item[1]) finally: self._work_queue.task_done() def publish(self, message, **kwargs): """Publish a message. :param message: The message to publish :param kwargs: Additional message properties :Keyword Arguments: * *routing_key* -- Routing key to use when publishing * *headers* -- Headers to be included as part of the message properties * *expiration* -- Expiration to be included as part of the message properties :return: None """ self._work_queue.put(( "basic_publish", { "exchange": self._exchange, "routing_key": kwargs["routing_key"], "body": message, "properties": BasicProperties( app_id="beer-garden", content_type="text/plain", headers=kwargs.pop("headers", None), expiration=kwargs.pop("expiration", None), ), }, )) def _event_publish_args(self, event, **kwargs): # Main thing we need to do here is figure out the appropriate routing key args = {} if event.metadata and "routing_key" in event.metadata: args["routing_key"] = event.metadata["routing_key"] elif "request" in kwargs: request = kwargs["request"] args["routing_key"] = get_routing_key("request", request.system, request.system_version, request.instance_name) else: args["routing_key"] = "beergarden" return args
class Crawler(Index): def __init__(self, openidList, max_tries=3, max_tasks=10, _loop=None): self.loop = _loop or asyncio.get_event_loop() # 事件循环 self.max_tries = max_tries # 出错重试次数 self.max_tasks = max_tasks # 并发任务数 self.urls_queue = Queue(loop=self.loop) # 地址队列 self.ClientSession = aiohttp.ClientSession( loop=self.loop) # aiohttp的session,get地址数据对象 for openid in openidList: # 将所有连接put到队列中 self.urls_queue.put_nowait(openid) self.started_at = datetime.now() # 开始计时 self.end_at = None def close(self): # 关闭aiohttp的Session对象 self.ClientSession.close() async def handle(self, openid, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): tries = 0 while tries < self.max_tries: # 取不到数据会重试3次 try: url = "https://api.weixin.qq.com/cgi-bin/user/info?access_token=" + wxClass.AccessToken + "&openid=" + openid + "&lang=zh_CN" # with aiohttp.Timeout(2): response = await self.ClientSession.get(url, allow_redirects=False ) # 不禁用重定向的取数据 jsonArr = await response.json() # 异步接收返回数据 if 'errcode' not in jsonArr: break except aiohttp.ClientError: # await response.release() # 异步释放资源 # break pass # time.sleep(2) tries += 1 try: # text = await response.text()#异步接收返回数据 print('------tries---------:%d' % tries) print(jsonArr) if 'errcode' in jsonArr: self.AppLogging.warning("get user infois error:%s", jsonArr['errcode']) else: # weixinOpenidList[openid] = jsonArr self.doDBwork(openid, bid, jsonArr, unionArr, proveinList, citiesList, self_db, logger) finally: await response.release() # 异步释放资源 async def work(self, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): try: while True: openid = await self.urls_queue.get() # 队列中取openid await self.handle(openid, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger) # 子方法去取数据 time.sleep(sleep_interval) # 线程睡眠 self.urls_queue.task_done() # 没有任务后结束 except asyncio.CancelledError: pass async def run(self, bid, wxClass, unionArr, proveinList, citiesList, self_db, logger): # 开启多个工作携程来执行 workers = [ asyncio.Task(self.work(bid, wxClass, unionArr, proveinList, citiesList, self_db, logger), loop=self.loop) for _ in range(self.max_tasks) ] self.started_at = datetime.now() # 程序开始时间 await self.urls_queue.join() # 连接join到队列中 self.end_at = datetime.now() # 结束时间 for w in workers: w.cancel() # 释放携程 def doDBwork(self, wxOpenid, bid, tmplist, unionArr, proveinList, citiesList, self_db, logger): wx = online_userinfo_weixin dbtype, vlist = self.dowork(tmplist, unionArr, proveinList, citiesList) print('---------vvvvvvv----------', dbtype) print(vlist) if dbtype == 1: # 插入 dblist = { 'bid': bid, 'openid': vlist['openid'], 'unionid': vlist['unionid'], 'groupIds': vlist['groupIds'], 'sex': vlist['sex'], 'nickname': vlist['nickname'], 'remark': vlist['remark'], 'subscribe': vlist['subscribe'], 'subscribe_time': vlist['subscribe_time'], 'thumb': vlist['headimgurl'], 'pid': vlist['pid'], 'cid': vlist['cid'], 'uptime': vlist['uptime'], 'intime': vlist['intime'], 'indate': vlist['indate'], } if 'userId' in vlist: dblist['userId'] = vlist['userId'] global insertCount insertCount += 1 print(dblist) # raise try: rr = self_db.execute(wx.__table__.insert(), dblist) self_db.commit() logger.info('bid=%s,insetr to db %s', bid, dblist) except: print('----db insert error-----') elif dbtype == 2: # 更新 dblist = { 'unionid': vlist['unionid'], 'sex': vlist['sex'], 'nickname': vlist['nickname'], 'remark': vlist['remark'], 'subscribe': vlist['subscribe'], 'subscribe_time': vlist['subscribe_time'], 'thumb': vlist['headimgurl'], 'pid': vlist['pid'], 'cid': vlist['cid'], 'uptime': vlist['uptime'], 'groupIds': vlist['groupIds'], } if 'userId' in vlist: dblist['userId'] = vlist['userId'] global updateCount updateCount += 1 try: updateUserId = bOpenidList[wxOpenid] rr = self_db.query(wx).filter(wx.id == updateUserId).update( dblist, synchronize_session=False) self_db.commit() logger.info('bid=%s,update to db %s', bid, dblist) except: print('-----db update error-------') # global bOpenidList bOpenidList.pop(wxOpenid) # self_db.commit() else: pass def dowork(self, tmplist, unionArr, proveinList, citiesList): dbtype = 1 # 1插入2更新3取消关注 indate = int( time.mktime( time.strptime(time.strftime('%Y-%m-%d', time.localtime()), '%Y-%m-%d'))) intime = int(time.time()) if not tmplist: return None, None if not isinstance(tmplist, (dict)): return None, None # subscribe if not 'openid' in tmplist: return None, None openid = tmplist['openid'] tmplist['intime'] = intime tmplist['uptime'] = intime tmplist['indate'] = indate # nickname if 'nickname' not in tmplist: tmplist['nickname'] = '' else: pass # if isinstance(tmplist['nickname'],bytes): # pass # else: # tmplist['nickname']=tmplist['nickname'].encode('unicode-escape') # headimgurl if 'headimgurl' not in tmplist: tmplist['headimgurl'] = '' # subscribe_time if 'subscribe_time' not in tmplist: tmplist['subscribe_time'] = 0 # remark if 'remark' not in tmplist: tmplist['remark'] = '' # 省分id if 'province' in tmplist: tmplist['pid'] = self.defindName(tmplist['province'], proveinList) else: tmplist['pid'] = 0 # 查找市 if 'city' in tmplist: tmplist['cid'] = self.defindName(tmplist['city'], citiesList) else: tmplist['cid'] = 0 # 性别 if 'sex' not in tmplist: tmplist['sex'] = 0 # subscribe if 'subscribe' not in tmplist: tmplist['subscribe'] = 0 # userId if 'unionid' in tmplist: findunionid = tmplist['unionid'] if findunionid in unionArr: userId = unionArr[findunionid] if userId: tmplist['userId'] = userId else: tmplist['unionid'] = '' # 用户分组 if (tmplist['subscribe'] == 0): # 取消关注清空一些数据 tmplist['groupId0'] = 0 tmplist['groupId1'] = 0 tmplist['groupId2'] = 0 tmplist['groupIds'] = '' tmplist['subscribe'] = 0 tmplist['subscribe_time'] = 0 dbtype = 3 # 取消关注 else: h = '' for ts in tmplist['tagid_list']: # print(ts) h += str(ts) + ',' tmplist['groupIds'] = h[:-1] # 存在就更新 if openid in bOpenidList: dbtype = 2 else: dbtype = 1 return dbtype, tmplist
class TaskLogger(object): def __init__(self, task_id, engine=EngineType.REQUESTS, io_loop=None, task_url=TASK_URL, wrap=False, tenant=None): self.task_id = task_id self.task_url = task_url self._seq = 0 self._partial_log_url = self._get_partial_url('log') self._partial_result_url = self._get_partial_url('result') self.wrap = wrap if wrap and tenant: self._partial_log_url = update_query_params( self._partial_log_url, {'tenant': tenant}) self._partial_result_url = update_query_params( self._partial_result_url, {'tenant': tenant}) if engine == EngineType.REQUESTS: self.log = self._log_by_requests self.result = self._result_by_requests elif engine == EngineType.TORNADO: io_loop = io_loop if io_loop else IOLoop.current() self._http_client = AsyncHTTPClient(io_loop=io_loop) self._queue = Queue() self.log = self._log_by_tornado self.result = self._result_by_tornado else: raise TaskLoggerError('', reason='engine only supports {}'.format( EngineType.types_str())) def _get_partial_url(self, partial_name): url = urljoin(self.task_url, partial_name) url = update_query_params(url, {'task_id': self.task_id}) return url def _get_log_url(self, seq): url = update_query_params(self._partial_log_url, {'seq': seq}) return url def _get_result_url(self, seq, exit_code=0): url = update_query_params(self._partial_result_url, { 'seq': seq, 'exit_code': exit_code }) return url def _log_by_requests(self, log): self._seq += 1 log_url = self._get_log_url(self._seq) data = self._create_log(log, self._seq) self._send_by_requests(log_url, data) def _result_by_requests(self, result, exit_code=0): self._seq += 1 result_url = self._get_result_url(self._seq, exit_code) data = self._create_result(result, self._seq, exit_code=exit_code) self._send_by_requests(result_url, data) @staticmethod def _send_by_requests(url, data): res = requests.post(url, data=data, verify=False) if res.status_code != 200: raise TaskLoggerError(data, reason=res.reason) @gen.coroutine def _log_by_tornado(self, log): yield self._queue.put(1) self._seq += 1 log_url = self._get_log_url(self._seq) data = self._create_log(log, self._seq) try: yield self._send_by_tornado(log_url, data) finally: yield self._queue.get() self._queue.task_done() @gen.coroutine def _result_by_tornado(self, result, exit_code=0): yield self._queue.join() self._seq += 1 result_url = self._get_result_url(self._seq, exit_code) data = self._create_result(result, self._seq, exit_code=exit_code) yield self._send_by_tornado(result_url, data) @gen.coroutine def _send_by_tornado(self, url, data): try: response = yield self._http_client.fetch( url, method='POST', headers={'Content-Type': 'application/json'}, validate_cert=False, body=data) except Exception as exc: if hasattr(exc, 'response') and exc.response: exc = 'url:{}, exc:{}, body:{}'.format(url, exc, exc.response.body) raise TaskLoggerError(data, str(exc)) else: if response.code != 200: raise TaskLoggerError(data, reason=response.body) def _create_log(self, log, seq): assert isinstance(log, basestring) log = log + '\n' if self.wrap: log_msg = TaskLogMessage(task_id=self.task_id, log=log, seq=seq) data = json_encode({'messages': log_msg}) else: data = log return data def _create_result(self, result, seq, exit_code): assert isinstance(result, basestring) result = result + '\n' if self.wrap: result_msg = TaskResultMessage(task_id=self.task_id, result=result, seq=seq, exit_code=exit_code) data = json_encode({'messages': result_msg}) else: data = result return data
class SubscribeListener(SubscribeCallback): def __init__(self): self.connected = False self.connected_event = Event() self.disconnected_event = Event() self.presence_queue = Queue() self.message_queue = Queue() self.error_queue = Queue() def status(self, pubnub, status): if utils.is_subscribed_event(status) and not self.connected_event.is_set(): self.connected_event.set() elif utils.is_unsubscribed_event(status) and not self.disconnected_event.is_set(): self.disconnected_event.set() elif status.is_error(): self.error_queue.put_nowait(status.error_data.exception) def message(self, pubnub, message): self.message_queue.put(message) def presence(self, pubnub, presence): self.presence_queue.put(presence) @tornado.gen.coroutine def _wait_for(self, coro): error = self.error_queue.get() wi = tornado.gen.WaitIterator(coro, error) while not wi.done(): result = yield wi.next() if wi.current_future == coro: raise gen.Return(result) elif wi.current_future == error: raise result else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) @tornado.gen.coroutine def wait_for_connect(self): if not self.connected_event.is_set(): yield self._wait_for(self.connected_event.wait()) else: raise Exception("instance is already connected") @tornado.gen.coroutine def wait_for_disconnect(self): if not self.disconnected_event.is_set(): yield self._wait_for(self.disconnected_event.wait()) else: raise Exception("instance is already disconnected") @tornado.gen.coroutine def wait_for_message_on(self, *channel_names): channel_names = list(channel_names) while True: try: # NOQA env = yield self._wait_for(self.message_queue.get()) if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.message_queue.task_done() @tornado.gen.coroutine def wait_for_presence_on(self, *channel_names): channel_names = list(channel_names) while True: try: try: env = yield self._wait_for(self.presence_queue.get()) except: # NOQA E722 pylint: disable=W0702 break if env.channel in channel_names: raise tornado.gen.Return(env) else: continue finally: self.presence_queue.task_done()
class SDP(tornado.websocket.WebSocketHandler): def check_origin(self, origin): return True def __init__(self, application, request): super().__init__(application, request) self.conn = r.connect(host='db', port=28015, db='test') self.registered_feeds = {} self.feeds_with_observers = [] self.queue = Queue(maxsize=10) self.user_id = '*****@*****.**' #None self.remove_observer_from_item = {} tornado.ioloop.IOLoop.current().spawn_callback(self.consumer) def call_later(self, delay, f, *args, **kwargs): return tornado.ioloop.IOLoop.current().call_later( delay, f, *args, **kwargs) @gen.coroutine def uuid(self): conn = yield self.conn ui = yield r.uuid().run(conn) return ui @gen.coroutine def run(self, query): conn = yield self.conn yield query.run(conn) def check(self, attr, type): if not isinstance(attr, type): raise CheckError(attr + ' is not of type ' + str(type)) @gen.coroutine def feed(self, sub_id, query, name): #query = query.filter(~r.row.has_fields('deleted')) print('ini of feed') conn = yield self.conn print('connection getted') #feed = yield query.changes(include_initial=True, include_states=True)._filter.run(conn) feed = yield query._filter.changes(include_initial=True, include_states=True).run(conn) self.registered_feeds[sub_id] = feed while (yield feed.fetch_next()): item = yield feed.next() print('item >', item) state = item.get('state') #if state == 'ready' or state == 'initializing': if state == 'ready': self.send_ready(sub_id) elif state == 'initializing': self.send_initializing(sub_id, query.table) else: if item.get('old_val') is None: if name in self.feeds_with_observers: new_item_id = item['new_val']['id'] yield r.table(query.table).get(new_item_id).update({ '__count': r.row['__count'].default(0) + 1 }).run(conn) def helper_remove(id): def helper(): @gen.coroutine def aux(): yield r.table(query.table).get(id).update({ '__count': r.row['__count'] - 1 }).run(conn) tornado.ioloop.IOLoop.current().spawn_callback( aux) return helper self.remove_observer_from_item.setdefault( sub_id, {})[new_item_id] = helper_remove(new_item_id) self.send_added(query.table, sub_id, item['new_val']) elif item.get('new_val') is None: old_item_id = item['old_val']['id'] remove = self.remove_observer_from_item[sub_id].pop( old_item_id, None) if remove: remove() self.send_removed(query.table, sub_id, item['old_val']['id']) else: self.send_changed(query.table, sub_id, item['new_val']) def send(self, data): def helper(x): if (isinstance(x, datetime)): return {'$date': x.timestamp() * 1000} else: return x self.write_message(json.dumps(data, default=helper)) def send_result(self, id, result): self.send({'msg': 'result', 'id': id, 'result': result}) def send_error(self, id, error): self.send({'msg': 'error', 'id': id, 'error': error}) def send_added(self, table, sub_id, doc): self.send({'msg': 'added', 'table': table, 'id': sub_id, 'doc': doc}) def send_changed(self, table, sub_id, doc): self.send({'msg': 'changed', 'table': table, 'id': sub_id, 'doc': doc}) def send_removed(self, table, sub_id, doc_id): self.send({ 'msg': 'removed', 'table': table, 'id': sub_id, 'doc_id': doc_id }) def send_ready(self, sub_id): self.send({'msg': 'ready', 'id': sub_id}) def send_initializing(self, sub_id, table): self.send({'msg': 'initializing', 'id': sub_id, 'table': table}) def send_nosub(self, sub_id, error): self.send({'msg': 'nosub', 'id': sub_id, 'error': error}) def send_nomethod(self, method_id, error): self.send({'msg': 'nomethod', 'id': method_id, 'error': error}) def on_open(self): print('open') @gen.coroutine def on_message(self, msg): yield self.queue.put(msg) def on_message_(self, msg): #print('raw ->', msg) @gen.coroutine def helper(msg): yield self.queue.put(msg) tornado.ioloop.IOLoop.current().spawn_callback(helper, msg) # consumer can be recoded as: # http: // www.tornadoweb.org / en / stable / queues.html?highlight = queue @gen.coroutine def consumer(self): # all data gets must go inside a try while True: msg = yield self.queue.get() if msg == 'stop': return def helper(dct): if '$date' in dct.keys(): d = datetime.utcfromtimestamp(dct['$date'] / 1000.0) return d.replace(tzinfo=pytz.UTC) return dct data = json.loads(msg, object_hook=helper) #print(data) try: message = data['msg'] id = data['id'] if message == 'method': params = data['params'] method = data['method'] if method not in methods: self.send_nomethod(id, 'method does not exist') else: #try: method = getattr(self, method) result = yield method(**params) self.send_result(id, result) #except Exception as e: # self.send_error(id, str(e) + ':' + str(e.__traceback__)) elif message == 'sub': name = data['name'] params = data['params'] if name not in subs: self.send_nosub(id, 'sub does not exist') else: query = getattr(self, name)(**params) tornado.ioloop.IOLoop.current().spawn_callback( self.feed, id, query, name) elif message == 'unsub': feed = self.registered_feeds[id] feed.close() if self.remove_observer_from_item.get(id): for remove in self.remove_observer_from_item[ id].values(): remove() del self.remove_observer_from_item[id] del self.registered_feeds[id] except KeyError as e: self.send_error(id, str(e)) finally: self.queue.task_done() def on_close(self): print('close') for k in self.remove_observer_from_item.keys(): for remove in self.remove_observer_from_item[k].values(): remove() for feed in self.registered_feeds.values(): feed.close() @gen.coroutine def helper(): # is it possible to call self.queue.put directly? self.queue.put('stop') tornado.ioloop.IOLoop.current().spawn_callback(helper) @gen.coroutine def insert(self, table, doc): cans = [c(self, table, doc) for c in can['insert']] if not all(cans): raise MethodError('can not insert ' + table) else: self.before_insert(table, doc) conn = yield self.conn result = yield r.table(table).insert(doc).run(conn) # self.after_insert() def before_insert(self, collection, doc): for hook in hooks['before_insert']: hook(self, collection, doc) @gen.coroutine def update(self, table, id, doc): conn = yield self.conn old_doc = yield r.table(table).get(id).run(conn) cans = [c(self, table, doc, old_doc) for c in can['update']] if not all(cans): raise MethodError('can not update ' + table + ', id: ' + str(id)) else: self.before_update(table, doc) result = yield r.table(table).get(id).update(doc).run(conn) #self.after_update() def before_update(self, collection, subdoc): for hook in hooks['before_update']: hook(self, collection, subdoc) @gen.coroutine def soft_delete(self, table, id): conn = yield self.conn old_doc = yield r.table(table).get(id).run(conn) cans = [c(self, table, old_doc) for c in can['delete']] if not all(cans): raise MethodError('can not delete ' + table + ', id: ' + str(id)) else: result = yield r.table(table).get(id).update({ 'deleted': True }).run(conn) @gen.coroutine def update_many(self, table, f, u, limit=None): conn = yield self.conn result = 0 if limit: result = yield r.table(table).filter(f).limit(limit).update( lambda item: r.branch(f(item), u, {})).run(conn) else: result = yield r.table(table).filter(f).update( lambda item: r.branch(f(item), u, {})).run(conn) return result['replaced']
class HeartbeatConnection(object): """ 与atxserver2建立连接,汇报当前已经连接的设备 """ def __init__(self, url="ws://*****:*****@nobody.io" self._secret = secret self._platform = platform self._priority = priority self._queue = Queue() self._db = defaultdict(dict) async def open(self): self._ws = await self.connect() IOLoop.current().spawn_callback(self._drain_ws_message) IOLoop.current().spawn_callback(self._drain_queue) async def _drain_queue(self): """ Logic: - send message to server when server is alive - update local db """ while True: message = await self._queue.get() if message is None: logger.info("Resent messages: %s", self._db) for _, v in self._db.items(): await self._ws.write_message(v) continue if 'udid' in message: # ping消息不包含在裡面 udid = message['udid'] update_recursive(self._db, {udid: message}) self._queue.task_done() if self._ws: try: await self._ws.write_message(message) logger.debug("websocket send: %s", message) except TypeError as e: logger.info("websocket write_message error: %s", e) async def _drain_ws_message(self): while True: message = await self._ws.read_message() logger.debug("WS read message: %s", message) if message is None: self._ws = None logger.warning("WS closed") self._ws = await self.connect() await self._queue.put(None) logger.info("WS receive message: %s", message) async def connect(self): """ Returns: tornado.WebSocketConnection """ cnt = 0 while True: try: ws = await self._connect() cnt = 0 return ws except Exception as e: cnt = min(30, cnt + 1) logger.warning("WS connect error: %s, reconnect after %ds", e, cnt + 1) await gen.sleep(cnt + 1) async def _connect(self): ws = await websocket.websocket_connect(self._ws_url, ping_interval=3) ws.__class__ = SafeWebSocket await ws.write_message({ "command": "handshake", "name": self._name, "owner": self._owner, "secret": self._secret, "url": self._provider_url, "priority": self._priority, # the large the importanter }) msg = await ws.read_message() logger.info("WS receive: %s", msg) return ws async def device_update(self, data: dict): """ Args: data (dict) should contains keys - provider (dict: optional) - coding (bool: optional) - properties (dict: optional) """ data['command'] = 'update' data['platform'] = self._platform await self._queue.put(data) async def ping(self): await self._ws.write_message({"command": "ping"})
class TornadoPikaPublisher(BeergardenPublisher, PikaClient): def __init__(self, **kwargs): self.logger = logging.getLogger(__name__) self._shutdown_timeout = timedelta( seconds=kwargs.pop('shutdown_timeout', 5)) self._work_queue = Queue() self._connection = None self._channel = None self.coroutiner = CoroutineMaker({ 'TornadoConnection': 'on_open_callback', 'channel': 'on_open_callback' }) # Trying to get super() to work with incompatible signatures is a nightmare BeergardenPublisher.__init__(self) PikaClient.__init__(self, **kwargs) IOLoop.current().spawn_callback(self._process) def shutdown(self): return self._work_queue.join(timeout=self._shutdown_timeout) @coroutine def _open_connection(self): self._connection = yield self.coroutiner.convert(TornadoConnection)( parameters=self._conn_params, stop_ioloop_on_close=False) @coroutine def _open_channel(self): self._channel = yield self.coroutiner.convert( self._connection.channel)() @coroutine def _process(self): while True: item = yield self._work_queue.get() try: if not self._connection or not self._connection.is_open: yield self._open_connection() if not self._channel or not self._channel.is_open: yield self._open_channel() yield getattr(self._channel, item[0])(**item[1]) finally: self._work_queue.task_done() def publish(self, message, **kwargs): """Publish a message. :param message: The message to publish :param kwargs: Additional message properties :Keyword Arguments: * *routing_key* -- Routing key to use when publishing * *headers* -- Headers to be included as part of the message properties * *expiration* -- Expiration to be included as part of the message properties :return: None """ self._work_queue.put(('basic_publish', { 'exchange': self._exchange, 'routing_key': kwargs['routing_key'], 'body': message, 'properties': BasicProperties(app_id='beer-garden', content_type='text/plain', headers=kwargs.pop('headers', None), expiration=kwargs.pop('expiration', None)) })) def _event_publish_args(self, event, **kwargs): # Main thing we need to do here is figure out the appropriate routing key args = {} if event.metadata and 'routing_key' in event.metadata: args['routing_key'] = event.metadata['routing_key'] elif 'request' in kwargs: request = kwargs['request'] args['routing_key'] = get_routing_key('request', request.system, request.system_version, request.instance_name) else: args['routing_key'] = 'beergarden' return args
class PollingHandler(BaseSocketHandler): """ This class represents separate websocket connection. Attributes: tracker: tornado.ioloop.PeriodicCallback with get_location method as a callback. Starts when user pushes "track" button. When started, it runs every 5 seconds to find out and update character's location. q: tornado.queues.Queue used for running tasks successively. updating: A flag indicates if router is being updated or not. Required to avoid race conditions. """ def __init__(self, *args, **kwargs): super(PollingHandler, self).__init__(*args, **kwargs) # Set Tornado PeriodicCallback with our self.track, we # will use launch it later on track/untrack commands self.tracker = PeriodicCallback(self.get_location, 5000) self.q = Queue(maxsize=5) self.updating = False async def get_location(self): """ The callback for the `self.tracker`. Makes an API call, updates router and sends updated data to the front-end. """ # Call API to find out current character location location = await self.character(self.user_id, '/location/', 'GET') if location: # Set `updating` flag to not accept periodic updates # from front-end, to not overwrite new data self.updating = True user = self.user graph_data = await user['router'].update( location['solarSystem']['name']) if graph_data: message = ['update', graph_data] logging.warning(graph_data) await self.safe_write(message) self.updating = False else: message = ['warning', 'Log into game to track your route'] await self.safe_write(message) async def scheduler(self): """ Scheduler for user tasks. Waits until there is new item in the queue, does task, resolves task. Tornado queues doc: http://www.tornadoweb.org/en/stable/queues.html Since we have no guarantee of the order of the incoming messages (new message from front-end can come before current is done), we need to ensure all tasks to run successively. Here comes the asynchronous generator. """ logging.info(f"Scheduler started for {self.request.remote_ip}") # Wait on each iteration until there's actually an item available async for item in self.q: logging.debug(f"Started resolving task for {item}...") user = self.user try: if item == 'recover': # Send saved route await self.safe_write(['recover', user['router'].recovery]) elif item == 'track': # Start the PeriodicCallback if not self.tracker.is_running(): self.tracker.start() elif item in ['stop', 'reset']: # Stop the PeriodicCallback if self.tracker.is_running(): self.tracker.stop() # Clear all saved data if item == 'reset': await user['router'].reset() elif item[0] == 'backup': # Do not overwrite user object while it's updating, # just in case, to avoid race conditions. if not self.updating: await user['router'].backup(item[1]) finally: self.q.task_done() logging.debug(f'Task "{item}" done.') async def task(self, item): """ Intermediary between `self.on_message` and `self.scheduler`. Since we cannot do anything asynchronous in the `self.on_message`, this method can handle any additional non-blocking stuff if we need it. :argument item: item to pass to the `self.scheduler`. """ await self.q.put(item) #await self.q.join() def open(self): """ Triggers on successful websocket connection. Ensures user is authorized, spawns `self.scheduler` for user tasks, adds this websocket object to the connections pool, spawns the recovery of the saved route. """ logging.info(f"Connection received from {self.request.remote_ip}") if self.user_id: self.spawn(self.scheduler) self.vagrants.append(self) self.spawn(self.task, 'recover') else: self.close() def on_message(self, message): """ Triggers on receiving front-end message. :argument message: front-end message. Receives user commands and passes them to the `self.scheduler` via `self.task`. """ self.spawn(self.task, json_decode(message)) def on_close(self): """ Triggers on closed websocket connection. Removes this websocket object from the connections pool, stops `self.tracker` if it is running. """ self.vagrants.remove(self) if self.tracker.is_running(): self.tracker.stop() logging.info("Connection closed, " + self.request.remote_ip)
class BaseSpider(object): url_parser = None def __init__(self, engine, concurrent=3): self.engine = engine self.http = httpclient.AsyncHTTPClient() self.queue = Queue() self.concurrency = concurrent @property def hostname(self): return self.url_parser.hostname @property def url_root(self): return self.url_parser.url_root @property def base_url(self): return self.url_parser.base_url @gen.coroutine def __worker(self): """Consumes the queue.""" while True: yield self.fetch_url() @gen.coroutine def crawl(self, description, location): """Starts crawling the specified URL.""" url = self.url_parser(description, location) self.queue.put(url) self.engine.notify_started(self) for _ in range(self.concurrency): self.__worker() yield self.queue.join() self.engine.notify_finished(self) @gen.coroutine def fetch_url(self): """Retrieves a URL from the queue and returns the parsed data.""" url = yield self.queue.get() logger.info('fetching %s' % url) try: response = yield self.http.fetch(url) soup = BeautifulSoup(response.body) logger.info('got response %s' % url) urls = yield self.fetch_links(response, soup) for new_url in urls: logger.debug('Added %s to queue' % new_url) yield self.queue.put(new_url) data = yield self.parse_response(response, soup) logger.info('Parsed response for %s' % url) except (httpclient.HTTPError, ValueError): message = 'HTTP Error: (%s)' % url self.engine.write_message(message, self.engine.STATUS_ERROR) else: self.engine.write_data(data) finally: self.queue.task_done() @gen.coroutine def fetch_links(self, response, soup): """Fetch URLs to be added to the queue.""" raise gen.Return([]) def parse_response(self, response, soup): """Extract information from the response, return should be a list of dict's. Sample dict: { 'title': 'Job Title', 'company': 'Company Name', 'location': 'City/State/Country', 'tags': ['tag1', 'tag2', 'tag3'], 'category': 'Software Developer', 'origin': 'Name of the origin website', 'url': 'Link to the complete job description', } """ raise NotImplementedError
class Model: def __init__(self, config_file): self.lock = locks.Lock() self.classification_queue = Queue() print('loading config %s' % config_file, file=log.v5) # Load and setup config try: self.config = Config.Config() self.config.load_file(config_file) self.pause_after_first_seq = self.config.float( 'pause_after_first_seq', 0.2) self.batch_size = self.config.int('batch_size', 5000) self.max_seqs = self.config.int('max_seqs', -1) except Exception: print('Error: loading config %s failed' % config_file, file=log.v1) raise try: self.devices = self._init_devices() except Exception: print('Error: Loading devices for config %s failed' % config_file, file=log.v1) raise print('Starting engine for config %s' % config_file, file=log.v5) self.engine = Engine.Engine(self.devices) try: self.engine.init_network_from_config(config=self.config) except Exception: print('Error: Loading network for config %s failed' % config_file, file=log.v1) raise IOLoop.current().spawn_callback(self.classify_in_background) self.last_used = datetime.datetime.now() def _init_devices(self): """ Initiates the required devices for a config. Same as the funtion initDevices in rnn.py. :param config: :return: A list with the devices used. """ oldDeviceConfig = ",".join(self.config.list('device', ['default'])) if "device" in TheanoFlags: # This is important because Theano likely already has initialized that device. config.set("device", TheanoFlags["device"]) print("Devices: Use %s via THEANO_FLAGS instead of %s." % (TheanoFlags["device"], oldDeviceConfig), file=log.v4) devArgs = getDevicesInitArgs(self.config) assert len(devArgs) > 0 devices = [Device(**kwargs) for kwargs in devArgs] for device in devices: while not device.initialized: time.sleep(0.25) if devices[0].blocking: print("Devices: Used in blocking / single proc mode.", file=log.v4) else: print("Devices: Used in multiprocessing mode.", file=log.v4) return devices @tornado.gen.coroutine def classify_in_background(self): while True: requests = [] # fetch first request r = yield self.classification_queue.get() requests.append(r) # grab all other waiting requests try: while True: requests.append(self.classification_queue.get_nowait()) except QueueEmpty: pass output_dim = {} # Do dataset creation and classification. dataset = StaticDataset(data=[r.data for r in requests], output_dim=output_dim) dataset.init_seq_order() batches = dataset.generate_batches( recurrent_net=self.engine.network.recurrent, batch_size=self.batch_size, max_seqs=self.max_seqs) with (yield self.lock.acquire()): ctt = ForwardTaskThread(self.engine.network, self.devices, dataset, batches) yield ctt.join() try: for i in range(dataset.num_seqs): requests[i].future.set_result(ctt.result[i]) self.classification_queue.task_done() except Exception as e: print('exception', e) raise @tornado.gen.coroutine def classify(self, data): self.last_used = datetime.datetime.now() request = ClassificationRequest(data) yield self.classification_queue.put(request) yield request.future return request.future.result()
class AsyncTaskManager(object): """ Aucote uses asynchronous task executed in ioloop. Some of them, especially scanners, should finish before ioloop will stop This class should be accessed by instance class method, which returns global instance of task manager """ _instances = {} TASKS_POLITIC_WAIT = 0 TASKS_POLITIC_KILL_WORKING_FIRST = 1 TASKS_POLITIC_KILL_PROPORTIONS = 2 TASKS_POLITIC_KILL_WORKING = 3 def __init__(self, parallel_tasks=10): self._shutdown_condition = Event() self._stop_condition = Event() self._cron_tasks = {} self._parallel_tasks = parallel_tasks self._tasks = Queue() self._task_workers = {} self._events = {} self._limit = self._parallel_tasks self._next_task_number = 0 self._toucan_keys = {} @classmethod def instance(cls, name=None, **kwargs): """ Return instance of AsyncTaskManager Returns: AsyncTaskManager """ if cls._instances.get(name) is None: cls._instances[name] = AsyncTaskManager(**kwargs) return cls._instances[name] @property def shutdown_condition(self): """ Event which is resolved if every job is done and AsyncTaskManager is ready to shutdown Returns: Event """ return self._shutdown_condition def start(self): """ Start CronTabCallback tasks Returns: None """ for task in self._cron_tasks.values(): task.start() for number in range(self._parallel_tasks): self._task_workers[number] = IOLoop.current().add_callback( partial(self.process_tasks, number)) self._next_task_number = self._parallel_tasks def add_crontab_task(self, task, cron, event=None): """ Add function to scheduler and execute at cron time Args: task (function): cron (str): crontab value event (Event): event which prevent from running task with similar aim, eg. security scans Returns: None """ if event is not None: event = self._events.setdefault(event, Event()) self._cron_tasks[task] = AsyncCrontabTask(cron, task, event) @gen.coroutine def stop(self): """ Stop CronTabCallback tasks and wait on them to finish Returns: None """ for task in self._cron_tasks.values(): task.stop() IOLoop.current().add_callback(self._prepare_shutdown) yield [self._stop_condition.wait(), self._tasks.join()] self._shutdown_condition.set() def _prepare_shutdown(self): """ Check if ioloop can be stopped Returns: None """ if any(task.is_running() for task in self._cron_tasks.values()): IOLoop.current().add_callback(self._prepare_shutdown) return self._stop_condition.set() def clear(self): """ Clear list of tasks Returns: None """ self._cron_tasks = {} self._shutdown_condition.clear() self._stop_condition.clear() async def process_tasks(self, number): """ Execute queue. Every task in executed in separated thread (_Executor) """ log.info("Starting worker %s", number) while True: try: item = self._tasks.get_nowait() try: log.debug("Worker %s: starting %s", number, item) thread = _Executor(task=item, number=number) self._task_workers[number] = thread thread.start() while thread.is_alive(): await sleep(0.5) except: log.exception("Worker %s: exception occurred", number) finally: log.debug("Worker %s: %s finished", number, item) self._tasks.task_done() tasks_per_scan = ( '{}: {}'.format(scanner, len(tasks)) for scanner, tasks in self.tasks_by_scan.items()) log.debug("Tasks left in queue: %s (%s)", self.unfinished_tasks, ', '.join(tasks_per_scan)) self._task_workers[number] = None except QueueEmpty: await gen.sleep(0.5) if self._stop_condition.is_set() and self._tasks.empty(): return finally: if self._limit < len(self._task_workers): break del self._task_workers[number] log.info("Closing worker %s", number) def add_task(self, task): """ Add task to the queue Args: task: Returns: None """ self._tasks.put(task) @property def unfinished_tasks(self): """ Task which are still processed or in queue Returns: int """ return self._tasks._unfinished_tasks @property def tasks_by_scan(self): """ Returns queued tasks grouped by scan """ tasks = self._tasks._queue return_value = {} for task in tasks: return_value.setdefault(task.context.scanner.NAME, []).append(task) return return_value @property def cron_tasks(self): """ List of cron tasks Returns: list """ return self._cron_tasks.values() def cron_task(self, name): for task in self._cron_tasks.values(): if task.func.NAME == name: return task def change_throttling_toucan(self, key, value): self.change_throttling(value) def change_throttling(self, new_value): """ Change throttling value. Keeps throttling value between 0 and 1. Behaviour of algorithm is described in docs/throttling.md Only working tasks are closing here. Idle workers are stop by themselves """ if new_value > 1: new_value = 1 if new_value < 0: new_value = 0 new_value = round(new_value * 100) / 100 old_limit = self._limit self._limit = round(self._parallel_tasks * float(new_value)) working_tasks = [ number for number, task in self._task_workers.items() if task is not None ] current_tasks = len(self._task_workers) task_politic = cfg['service.scans.task_politic'] if task_politic == self.TASKS_POLITIC_KILL_WORKING_FIRST: tasks_to_kill = current_tasks - self._limit elif task_politic == self.TASKS_POLITIC_KILL_PROPORTIONS: tasks_to_kill = round((old_limit - self._limit) * len(working_tasks) / self._parallel_tasks) elif task_politic == self.TASKS_POLITIC_KILL_WORKING: tasks_to_kill = (old_limit - self._limit) - ( len(self._task_workers) - len(working_tasks)) else: tasks_to_kill = 0 log.debug('%s tasks will be killed', tasks_to_kill) for number in working_tasks: if tasks_to_kill <= 0: break self._task_workers[number].stop() tasks_to_kill -= 1 self._limit = round(self._parallel_tasks * float(new_value)) current_tasks = len(self._task_workers) for number in range(self._limit - current_tasks): self._task_workers[self._next_task_number] = None IOLoop.current().add_callback( partial(self.process_tasks, self._next_task_number)) self._next_task_number += 1
class ProjectGroomer(object): """ Cleans up expired transactions for a project. """ def __init__(self, project_id, coordinator, zk_client, db_access, thread_pool): """ Creates a new ProjectGroomer. Args: project_id: A string specifying a project ID. coordinator: A GroomingCoordinator. zk_client: A KazooClient. db_access: A DatastoreProxy. thread_pool: A ThreadPoolExecutor. """ self.project_id = project_id self._coordinator = coordinator self._zk_client = zk_client self._tornado_zk = TornadoKazoo(self._zk_client) self._db_access = db_access self._thread_pool = thread_pool self._project_node = '/appscale/apps/{}'.format(self.project_id) self._containers = [] self._inactive_containers = set() self._batch_resolver = BatchResolver(self.project_id, self._db_access) self._zk_client.ensure_path(self._project_node) self._zk_client.ChildrenWatch(self._project_node, self._update_containers) self._txid_manual_offset = 0 self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) self._zk_client.DataWatch(self._offset_node, self._update_offset) self._stop_event = AsyncEvent() self._stopped_event = AsyncEvent() # Keeps track of cleanup results for each round of grooming. self._txids_cleaned = 0 self._oldest_valid_tx_time = None self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY) for _ in range(MAX_CONCURRENCY): IOLoop.current().spawn_callback(self._worker) IOLoop.current().spawn_callback(self.start) @gen.coroutine def start(self): """ Starts the grooming process until the stop event is set. """ logger.info('Grooming {}'.format(self.project_id)) while True: if self._stop_event.is_set(): break try: yield self._groom_project() except Exception: # Prevent the grooming loop from stopping if an error is encountered. logger.exception( 'Unexpected error while grooming {}'.format(self.project_id)) yield gen.sleep(MAX_TX_DURATION) self._stopped_event.set() @gen.coroutine def stop(self): """ Stops the grooming process. """ logger.info('Stopping grooming process for {}'.format(self.project_id)) self._stop_event.set() yield self._stopped_event.wait() @gen.coroutine def _worker(self): """ Processes items in the worker queue. """ while True: tx_path, composite_indexes = yield self._worker_queue.get() try: tx_time = yield self._resolve_txid(tx_path, composite_indexes) if tx_time is None: self._txids_cleaned += 1 if tx_time is not None and tx_time < self._oldest_valid_tx_time: self._oldest_valid_tx_time = tx_time finally: self._worker_queue.task_done() def _update_offset(self, new_offset, _): """ Watches for updates to the manual offset node. Args: new_offset: A string specifying the new manual offset. """ self._txid_manual_offset = int(new_offset or 0) def _update_containers(self, nodes): """ Updates the list of active txid containers. Args: nodes: A list of strings specifying ZooKeeper nodes. """ counters = [int(node[len(CONTAINER_PREFIX):] or 1) for node in nodes if node.startswith(CONTAINER_PREFIX) and node not in self._inactive_containers] counters.sort() containers = [CONTAINER_PREFIX + str(counter) for counter in counters] if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX): containers[0] = CONTAINER_PREFIX self._containers = containers @gen.coroutine def _groom_project(self): """ Runs the grooming process. """ index = self._coordinator.index worker_count = self._coordinator.total_workers oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count) # Wait until there's a reasonable chance that some transactions have # timed out. next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION # The oldest ignored transaction should still be valid, but ensure that # the timeout is not negative. next_timeout = max(0, next_timeout_eta - time.time()) time_to_wait = datetime.timedelta( seconds=next_timeout + (MAX_TX_DURATION / 2)) # Allow the wait to be cut short when a project is removed. try: yield self._stop_event.wait(timeout=time_to_wait) except gen.TimeoutError: raise gen.Return() @gen.coroutine def _remove_path(self, tx_path): """ Removes a ZooKeeper node. Args: tx_path: A string specifying the path to delete. """ try: yield self._tornado_zk.delete(tx_path) except NoNodeError: pass except NotEmptyError: yield self._thread_pool.submit(self._zk_client.delete, tx_path, recursive=True) @gen.coroutine def _resolve_txid(self, tx_path, composite_indexes): """ Cleans up a transaction if it has expired. Args: tx_path: A string specifying the location of the ZooKeeper node. composite_indexes: A list of CompositeIndex objects. Returns: The transaction start time if still valid, None if invalid because this method will also delete it. """ tx_data = yield self._tornado_zk.get(tx_path) tx_time = float(tx_data[0]) _, container, tx_node = tx_path.rsplit('/', 2) tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX)) container_count = int(container[len(CONTAINER_PREFIX):] or 1) if tx_node_id < 0: yield self._remove_path(tx_path) raise gen.Return() container_size = MAX_SEQUENCE_COUNTER + 1 automatic_offset = (container_count - 1) * container_size txid = self._txid_manual_offset + automatic_offset + tx_node_id if txid < 1: yield self._remove_path(tx_path) raise gen.Return() # If the transaction is still valid, return the time it was created. if tx_time + MAX_TX_DURATION >= time.time(): raise gen.Return(tx_time) yield self._batch_resolver.resolve(txid, composite_indexes) yield self._remove_path(tx_path) yield self._batch_resolver.cleanup(txid) @gen.coroutine def _fetch_and_clean(self, worker_index, worker_count): """ Cleans up expired transactions. Args: worker_index: An integer specifying this worker's index. worker_count: An integer specifying the number of total workers. Returns: A float specifying the time of the oldest valid transaction as a unix timestamp. """ self._txids_cleaned = 0 self._oldest_valid_tx_time = time.time() children = [] for index, container in enumerate(self._containers): container_path = '/'.join([self._project_node, container]) new_children = yield self._tornado_zk.get_children(container_path) if not new_children and index < len(self._containers) - 1: self._inactive_containers.add(container) children.extend(['/'.join([container_path, node]) for node in new_children]) logger.debug( 'Found {} transaction IDs for {}'.format(len(children), self.project_id)) if not children: raise gen.Return(self._oldest_valid_tx_time) # Refresh these each time so that the indexes are fresh. encoded_indexes = yield self._thread_pool.submit( self._db_access.get_indices, self.project_id) composite_indexes = [CompositeIndex(index) for index in encoded_indexes] for tx_path in children: tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) # Only resolve transactions that this worker has been assigned. if tx_node_id % worker_count != worker_index: continue yield self._worker_queue.put((tx_path, composite_indexes)) yield self._worker_queue.join() if self._txids_cleaned > 0: logger.info('Cleaned up {} expired txids for {}'.format( self._txids_cleaned, self.project_id)) raise gen.Return(self._oldest_valid_tx_time)
class BlogBackup(object): _default_dir_name = "seg_blog_backup" def _generate_save_dir(self): cur_dir = os.path.dirname(__file__) self.save_path = os.path.join(cur_dir, self._default_dir_name) if not os.path.isdir(self.save_path): os.mkdir(self.save_path) def _parse_save_path(self): if self.save_path: if os.path.exists(self.save_path) and os.path.isdir(self.save_path): return else: raise BlogSavePathError("'%s' not exists or is not dir!" % self.save_path) else: self._generate_save_dir() @staticmethod def parse_token_from_html(content): overall_pat = re.compile(r"SF.token =.*?,\s+_\w+ = [\d,\[\]]+;", re.DOTALL) overall_res = overall_pat.search(content) if overall_res: overall_content = overall_res.group() # remove /* */ type annotation filter_res = re.sub(r"(/\*[/a-zA-Z\d' ]+\*/)", "", overall_content) str_list = re.findall(r"(?<!//)'([a-zA-Z\d]+)'", filter_res, re.DOTALL) filter_list = re.findall(r"\[(\d+),(\d+)\]", overall_content) ret = "".join(str_list) if filter_list: for m, n in filter_list: ret = ret[: int(m)] + ret[int(n) :] if len(ret) == 32: return ret raise PageHtmlChanged("website login token has changed") def _get_user_cookies(self): s = requests.Session() s.headers.update(headers) rep = s.get(target_url) post_url = "%s%s?_=%s" % (target_url, login_api_path, self.parse_token_from_html(rep.text)) data = {"mail": self.username, "password": self.passwd} s.post(post_url, data=data) return s.cookies def __init__(self, **conf): self.username = conf["username"] self.passwd = conf["passwd"] self.save_path = conf.get("save_path") self._q = Queue() self._cookies = self._get_user_cookies() self._parse_save_path() @gen.coroutine def run(self): start_url = target_url + blog_path yield self._fetch_blog_list_page(start_url) for _ in xrange(cpu_count()): self._fetch_essay_content() yield self._q.join() @gen.coroutine def _fetch_blog_list_page(self, page_link): ret = requests.get(page_link, cookies=self._cookies) d = pq(ret.text) link_elements = d(".stream-list__item > .summary > h2 > a") for link in link_elements: yield self._q.put(d(link).attr("href")) next_ele = d(".pagination li.next a") if next_ele: next_page_url = target_url + next_ele.attr("href") self._fetch_blog_list_page(next_page_url) @gen.coroutine def _fetch_essay_content(self): while True: try: essay_path = yield self._q.get(timeout=1) essay_url = target_url + essay_path + edit_suffix ret = requests.get(essay_url, cookies=self._cookies) d = pq(ret.text) title = d("#myTitle").val() content = d("#myEditor").text() real_file_name = os.path.join(self.save_path, title + ".md") logger.info("is backup essay: %s" % title) with open(real_file_name, "w") as f: f.writelines(content.encode("utf8")) except gen.TimeoutError: raise gen.Return() finally: self._q.task_done()
class BlogBackup(object): _default_dir_name = 'seg_blog_backup' def _generate_save_dir(self): cur_dir = os.path.dirname(__file__) self.save_path = os.path.join(cur_dir, self._default_dir_name) if not os.path.isdir(self.save_path): os.mkdir(self.save_path) def _parse_save_path(self): if self.save_path: if os.path.exists(self.save_path) and \ os.path.isdir(self.save_path): return else: raise BlogSavePathError( "'%s' not exists or is not dir!" % self.save_path) else: self._generate_save_dir() def _get_user_cookies(self): url = target_url + login_page_path self.driver.get(url) try: user_input = self.driver.find_element_by_name('mail') passwd_input = self.driver.find_element_by_name('password') submit_btn = self.driver.find_element_by_class_name('pr20') except NoSuchElementException: raise PageHtmlChanged( "%s login page structure have changed!" % _domain) user_input.send_keys(self.username) passwd_input.send_keys(self.passwd) submit_btn.click() try: WebDriverWait(self.driver, 3).until(staleness_of(submit_btn)) except TimeoutException: raise Exception("Wrong username or password!") WebDriverWait(self.driver, timeout=10).until(has_page_load) try_times = 0 while True: time.sleep(1) if url != self.driver.current_url: return self.driver.get_cookies() try_times += 1 if try_times > 10: raise Exception("Getting cookie info failed!") def _get_driver(self): if self.phantomjs_path: try: return webdriver.PhantomJS( executable_path=self.phantomjs_path, service_log_path=os.path.devnull) except WebDriverException: raise PhantomjsPathError("Phantomjs locate path invalid!") else: return webdriver.PhantomJS(service_log_path=os.path.devnull) def __init__(self, **conf): self.username = conf['username'] self.passwd = conf['passwd'] self.phantomjs_path = conf.get('phantomjs_path') self.save_path = conf.get('save_path') self._q = Queue() self._parse_save_path() self.driver = self._get_driver() self._cookies = self._get_user_cookies() @gen.coroutine def run(self): self.__filter_cookies() start_url = target_url + blog_path yield self._fetch_blog_list_page(start_url) for _ in xrange(cpu_count()): self._fetch_essay_content() yield self._q.join() def __filter_cookies(self): self._cookies = {k['name']: k['value'] for k in self._cookies if k['domain'] == _domain} @gen.coroutine def _fetch_blog_list_page(self, page_link): ret = requests.get(page_link, cookies=self._cookies) d = pq(ret.text) link_elements = d('.stream-list__item > .summary > h2 > a') for link in link_elements: yield self._q.put(d(link).attr('href')) next_ele = d('.pagination li.next a') if next_ele: next_page_url = target_url + next_ele.attr('href') self._fetch_blog_list_page(next_page_url) @gen.coroutine def _fetch_essay_content(self): while True: try: essay_path = yield self._q.get(timeout=1) essay_url = target_url + essay_path + edit_suffix ret = requests.get(essay_url, cookies=self._cookies) d = pq(ret.text) title = d("#myTitle").val() content = d("#myEditor").text() file_name = title + '.md' real_file_name = os.path.join(self.save_path, file_name) with open(real_file_name, 'w') as f: f.writelines(content.encode('utf8')) except gen.TimeoutError: raise gen.Return() finally: self._q.task_done()
class Scraper(): def __init__( self, destinations=None, transform=None, headers={}, max_clients=50, maxsize=50, connect_timeout=1200, request_timeout=600,): """Instantiate a tornado async http client to do multiple concurrent requests""" if None in [destinations, transform]: sys.stderr.write('You must pass both collection of URLS and a transform function') raise SystemExit self.max_clients = max_clients self.maxsize = maxsize self.connect_timeout = connect_timeout self.request_timeout = request_timeout AsyncHTTPClient.configure("tornado.simple_httpclient.SimpleAsyncHTTPClient", max_clients=self.max_clients) self.http_client = AsyncHTTPClient() self.queue = Queue(maxsize=50) self.destinations = destinations self.transform = transform self.headers = headers self.read(self.destinations) self.get(self.transform, self.headers, self.connect_timeout, self.request_timeout, self.http_client) self.loop = ioloop.IOLoop.current() self.join_future = self.queue.join() def done(future): self.loop.stop() self.join_future.add_done_callback(done) self.loop.start() @gen.coroutine def read(self, destinations): for url in destinations: yield self.queue.put(url) @gen.coroutine def get(self, transform, headers, connect_timeout, request_timeout, http_client): while True: url = yield self.queue.get() try: request = HTTPRequest(url, connect_timeout=connect_timeout, request_timeout=request_timeout, method="GET", headers = headers ) except Exception as e: sys.stderr.write('Destination {0} returned error {1}'.format(url, str(e) + '\n')) future = self.http_client.fetch(request) def done_callback(future): body = future.result().body url = future.result().effective_url transform(body, url=url) self.queue.task_done() try: future.add_done_callback(done_callback) except Exception as e: sys.stderr.write(str(e)) queue.put(url)
class ProjectGroomer(object): """ Cleans up expired transactions for a project. """ def __init__(self, project_id, coordinator, zk_client, db_access, thread_pool): """ Creates a new ProjectGroomer. Args: project_id: A string specifying a project ID. coordinator: A GroomingCoordinator. zk_client: A KazooClient. db_access: A DatastoreProxy. thread_pool: A ThreadPoolExecutor. """ self.project_id = project_id self._coordinator = coordinator self._zk_client = zk_client self._tornado_zk = TornadoKazoo(self._zk_client) self._db_access = db_access self._thread_pool = thread_pool self._project_node = '/appscale/apps/{}'.format(self.project_id) self._containers = [] self._inactive_containers = set() self._batch_resolver = BatchResolver(self.project_id, self._db_access) self._zk_client.ensure_path(self._project_node) self._zk_client.ChildrenWatch(self._project_node, self._update_containers) self._txid_manual_offset = 0 self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) self._zk_client.DataWatch(self._offset_node, self._update_offset) self._stop_event = AsyncEvent() self._stopped_event = AsyncEvent() # Keeps track of cleanup results for each round of grooming. self._txids_cleaned = 0 self._oldest_valid_tx_time = None self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY) for _ in range(MAX_CONCURRENCY): IOLoop.current().spawn_callback(self._worker) IOLoop.current().spawn_callback(self.start) @gen.coroutine def start(self): """ Starts the grooming process until the stop event is set. """ logger.info('Grooming {}'.format(self.project_id)) while True: if self._stop_event.is_set(): break try: yield self._groom_project() except Exception: # Prevent the grooming loop from stopping if an error is encountered. logger.exception('Unexpected error while grooming {}'.format( self.project_id)) yield gen.sleep(MAX_TX_DURATION) self._stopped_event.set() @gen.coroutine def stop(self): """ Stops the grooming process. """ logger.info('Stopping grooming process for {}'.format(self.project_id)) self._stop_event.set() yield self._stopped_event.wait() @gen.coroutine def _worker(self): """ Processes items in the worker queue. """ while True: tx_path, composite_indexes = yield self._worker_queue.get() try: tx_time = yield self._resolve_txid(tx_path, composite_indexes) if tx_time is None: self._txids_cleaned += 1 if tx_time is not None and tx_time < self._oldest_valid_tx_time: self._oldest_valid_tx_time = tx_time except Exception: logger.exception( 'Unexpected error while resolving {}'.format(tx_path)) finally: self._worker_queue.task_done() def _update_offset(self, new_offset, _): """ Watches for updates to the manual offset node. Args: new_offset: A string specifying the new manual offset. """ self._txid_manual_offset = int(new_offset or 0) def _update_containers(self, nodes): """ Updates the list of active txid containers. Args: nodes: A list of strings specifying ZooKeeper nodes. """ counters = [ int(node[len(CONTAINER_PREFIX):] or 1) for node in nodes if node.startswith(CONTAINER_PREFIX) and node not in self._inactive_containers ] counters.sort() containers = [CONTAINER_PREFIX + str(counter) for counter in counters] if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX): containers[0] = CONTAINER_PREFIX self._containers = containers @gen.coroutine def _groom_project(self): """ Runs the grooming process. """ index = self._coordinator.index worker_count = self._coordinator.total_workers oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count) # Wait until there's a reasonable chance that some transactions have # timed out. next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION # The oldest ignored transaction should still be valid, but ensure that # the timeout is not negative. next_timeout = max(0, next_timeout_eta - time.time()) time_to_wait = datetime.timedelta(seconds=next_timeout + (MAX_TX_DURATION / 2)) # Allow the wait to be cut short when a project is removed. try: yield self._stop_event.wait(timeout=time_to_wait) except gen.TimeoutError: return @gen.coroutine def _remove_locks(self, txid, tx_path): """ Removes entity locks involved with the transaction. Args: txid: An integer specifying the transaction ID. tx_path: A string specifying the location of the transaction node. """ groups_path = '/'.join([tx_path, 'groups']) try: groups_data = yield self._tornado_zk.get(groups_path) except NoNodeError: # If the group list does not exist, the locks have not been acquired. return group_paths = json.loads(groups_data[0]) for group_path in group_paths: try: contenders = yield self._tornado_zk.get_children(group_path) except NoNodeError: # The lock may have been cleaned up or not acquired in the first place. continue for contender in contenders: contender_path = '/'.join([group_path, contender]) contender_data = yield self._tornado_zk.get(contender_path) contender_txid = int(contender_data[0]) if contender_txid != txid: continue yield self._tornado_zk.delete(contender_path) break @gen.coroutine def _remove_path(self, tx_path): """ Removes a ZooKeeper node. Args: tx_path: A string specifying the path to delete. """ try: yield self._tornado_zk.delete(tx_path) except NoNodeError: pass except NotEmptyError: yield self._thread_pool.submit(self._zk_client.delete, tx_path, recursive=True) @gen.coroutine def _resolve_txid(self, tx_path, composite_indexes): """ Cleans up a transaction if it has expired. Args: tx_path: A string specifying the location of the ZooKeeper node. composite_indexes: A list of CompositeIndex objects. Returns: The transaction start time if still valid, None if invalid because this method will also delete it. """ try: tx_data = yield self._tornado_zk.get(tx_path) except NoNodeError: return tx_time = float(tx_data[0]) _, container, tx_node = tx_path.rsplit('/', 2) tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX)) container_count = int(container[len(CONTAINER_PREFIX):] or 1) if tx_node_id < 0: yield self._remove_path(tx_path) return container_size = MAX_SEQUENCE_COUNTER + 1 automatic_offset = (container_count - 1) * container_size txid = self._txid_manual_offset + automatic_offset + tx_node_id if txid < 1: yield self._remove_path(tx_path) return # If the transaction is still valid, return the time it was created. if tx_time + MAX_TX_DURATION >= time.time(): raise gen.Return(tx_time) yield self._batch_resolver.resolve(txid, composite_indexes) yield self._remove_locks(txid, tx_path) yield self._remove_path(tx_path) yield self._batch_resolver.cleanup(txid) @gen.coroutine def _fetch_and_clean(self, worker_index, worker_count): """ Cleans up expired transactions. Args: worker_index: An integer specifying this worker's index. worker_count: An integer specifying the number of total workers. Returns: A float specifying the time of the oldest valid transaction as a unix timestamp. """ self._txids_cleaned = 0 self._oldest_valid_tx_time = time.time() children = [] for index, container in enumerate(self._containers): container_path = '/'.join([self._project_node, container]) new_children = yield self._tornado_zk.get_children(container_path) if not new_children and index < len(self._containers) - 1: self._inactive_containers.add(container) children.extend( ['/'.join([container_path, node]) for node in new_children]) logger.debug('Found {} transaction IDs for {}'.format( len(children), self.project_id)) if not children: raise gen.Return(self._oldest_valid_tx_time) # Refresh these each time so that the indexes are fresh. encoded_indexes = yield self._thread_pool.submit( self._db_access.get_indices, self.project_id) composite_indexes = [ CompositeIndex(index) for index in encoded_indexes ] for tx_path in children: tx_node_id = int( tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) # Only resolve transactions that this worker has been assigned. if tx_node_id % worker_count != worker_index: continue yield self._worker_queue.put((tx_path, composite_indexes)) yield self._worker_queue.join() if self._txids_cleaned > 0: logger.info('Cleaned up {} expired txids for {}'.format( self._txids_cleaned, self.project_id)) raise gen.Return(self._oldest_valid_tx_time)
class LanguageServerSession(LoggingConfigurable): """ Manage a session for a connection to a language server """ language_server = Unicode(help="the language server implementation name") spec = Schema(LANGUAGE_SERVER_SPEC) # run-time specifics process = Instance(subprocess.Popen, help="the language server subprocess", allow_none=True) writer = Instance(stdio.LspStdIoWriter, help="the JSON-RPC writer", allow_none=True) reader = Instance(stdio.LspStdIoReader, help="the JSON-RPC reader", allow_none=True) from_lsp = Instance(Queue, help="a queue for string messages from the server", allow_none=True) to_lsp = Instance(Queue, help="a queue for string message to the server", allow_none=True) handlers = Set( trait=Instance(WebSocketHandler), default_value=[], help="the currently subscribed websockets", ) status = UseEnum(SessionStatus, default_value=SessionStatus.NOT_STARTED) last_handler_message_at = Instance(datetime, allow_none=True) last_server_message_at = Instance(datetime, allow_none=True) _tasks = None _skip_serialize = ["argv", "debug_argv"] def __init__(self, *args, **kwargs): """ set up the required traitlets and exit behavior for a session """ super().__init__(*args, **kwargs) atexit.register(self.stop) def __repr__(self): # pragma: no cover return ("<LanguageServerSession(" "language_server={language_server}, argv={argv})>").format( language_server=self.language_server, **self.spec) def to_json(self): return dict( handler_count=len(self.handlers), status=self.status.value, last_server_message_at=self.last_server_message_at.isoformat() if self.last_server_message_at else None, last_handler_message_at=self.last_handler_message_at.isoformat() if self.last_handler_message_at else None, spec={ k: v for k, v in self.spec.items() if k not in SKIP_JSON_SPEC }, ) def initialize(self): """ (re)initialize a language server session """ self.stop() self.status = SessionStatus.STARTING self.init_queues() self.init_process() self.init_writer() self.init_reader() loop = asyncio.get_event_loop() self._tasks = [ loop.create_task(coro()) for coro in [self._read_lsp, self._write_lsp, self._broadcast_from_lsp] ] self.status = SessionStatus.STARTED def stop(self): """ clean up all of the state of the session """ self.status = SessionStatus.STOPPING if self.process: self.process.terminate() self.process = None if self.reader: self.reader.close() self.reader = None if self.writer: self.writer.close() self.writer = None if self._tasks: [task.cancel() for task in self._tasks] self.status = SessionStatus.STOPPED @observe("handlers") def _on_handlers(self, change: Bunch): """ re-initialize if someone starts listening, or stop if nobody is """ if change["new"] and not self.process: self.initialize() elif not change["new"] and self.process: self.stop() def write(self, message): """ wrapper around the write queue to keep it mostly internal """ self.last_handler_message_at = self.now() IOLoop.current().add_callback(self.to_lsp.put_nowait, message) def now(self): return datetime.now(timezone.utc) def init_process(self): """ start the language server subprocess """ self.process = subprocess.Popen( self.spec["argv"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=self.substitute_env(self.spec.get("env", {}), os.environ), ) def init_queues(self): """ create the queues """ self.from_lsp = Queue() self.to_lsp = Queue() def init_reader(self): """ create the stdout reader (from the language server) """ self.reader = stdio.LspStdIoReader(stream=self.process.stdout, queue=self.from_lsp, parent=self) def init_writer(self): """ create the stdin writer (to the language server) """ self.writer = stdio.LspStdIoWriter(stream=self.process.stdin, queue=self.to_lsp, parent=self) def substitute_env(self, env, base): final_env = copy(os.environ) for key, value in env.items(): final_env.update( {key: string.Template(value).safe_substitute(base)}) return final_env async def _read_lsp(self): await self.reader.read() async def _write_lsp(self): await self.writer.write() async def _broadcast_from_lsp(self): """ loop for reading messages from the queue of messages from the language server """ async for message in self.from_lsp: self.last_server_message_at = self.now() await self.parent.on_server_message(message, self) self.from_lsp.task_done()
async def test_listeners(known_server, handlers, jsonrpc_init_msg): """will some listeners listen?""" handler, ws_handler = handlers manager = handler.manager manager.all_listeners = ["jupyter_lsp.tests.listener.dummy_listener"] manager.initialize() manager._listeners["client"] = [] # hide predefined client listeners assert len(manager._listeners["all"]) == 1 dummy_listener = manager._listeners["all"][0] assert re.match( ("<MessageListener listener=<function dummy_listener at .*?>," " method=None, language_server=None>"), repr(dummy_listener), ) handler_listened = Queue() server_listened = Queue() all_listened = Queue() # some client listeners @lsp_message_listener("client", language_server=known_server, method="initialize") async def client_listener(scope, message, language_server, manager): await handler_listened.put(message) @lsp_message_listener("client", method=r"not-a-method") async def other_client_listener(scope, message, language_server, manager): # pragma: no cover await handler_listened.put(message) raise NotImplementedError("shouldn't get here") # some server listeners @lsp_message_listener("server", language_server=None, method=None) async def server_listener(scope, message, language_server, manager): await server_listened.put(message) @lsp_message_listener("server", language_server=r"not-a-language-server") async def other_server_listener(scope, message, language_server, manager): # pragma: no cover await handler_listened.put(message) raise NotImplementedError("shouldn't get here") # an all listener @lsp_message_listener("all") async def all_listener(scope, message, language_server, manager): # pragma: no cover await all_listened.put(message) assert len(manager._listeners["server"]) == 2 assert len(manager._listeners["client"]) == 2 assert len(manager._listeners["all"]) == 2 ws_handler.open(known_server) await ws_handler.on_message(jsonrpc_init_msg) results = await asyncio.wait_for( asyncio.gather( handler_listened.get(), server_listened.get(), all_listened.get(), all_listened.get(), return_exceptions=True, ), 20, ) assert all([isinstance(res, dict) for res in results]) ws_handler.on_close() handler_listened.task_done() server_listened.task_done() all_listened.task_done() all_listened.task_done() [ manager.unregister_message_listener(listener) for listener in [ client_listener, other_client_listener, server_listener, other_server_listener, all_listener, ] ] assert not manager._listeners["server"] assert not manager._listeners["client"] assert len(manager._listeners["all"]) == 1
class Server: """ Server class. """ __slots__ = [ 'data_path', 'games_path', 'available_maps', 'maps_mtime', 'notifications', 'games_scheduler', 'allow_registrations', 'max_games', 'remove_canceled_games', 'users', 'games', 'daide_servers', 'backup_server', 'backup_games', 'backup_delay_seconds', 'ping_seconds', 'interruption_handler', 'backend', 'games_with_dummy_powers', 'dispatched_dummy_powers' ] # Servers cache. __cache__ = {} # {absolute path of working folder => Server} def __new__(cls, server_dir=None, **kwargs): #pylint: disable=unused-argument server_dir = get_absolute_path(server_dir) if server_dir in cls.__cache__: server = cls.__cache__[server_dir] else: server = object.__new__(cls) return server def __init__(self, server_dir=None, **kwargs): """ Initialize the server. Server data is stored in folder ``<working directory>/data``. :param server_dir: path of folder in (from) which server data will be saved (loaded). If None, working directory (where script is executed) will be used. :param kwargs: (optional) values for some public configurable server attributes. Given values will overwrite values saved on disk. """ # File paths and attributes related to database. server_dir = get_absolute_path(server_dir) if server_dir in self.__class__.__cache__: return if not os.path.exists(server_dir) or not os.path.isdir(server_dir): raise exceptions.ServerDirException(server_dir) self.data_path = os.path.join(server_dir, 'data') self.games_path = os.path.join(self.data_path, 'games') # Data in memory (not stored on disk). self.notifications = Queue() self.games_scheduler = Scheduler(1, self._process_game) self.backup_server = None self.backup_games = {} self.interruption_handler = InterruptionHandler(self) # Backend objects used to run server. If None, server is not yet started. # Initialized when you call Server.start() (see method below). self.backend = None # type: _ServerBackend # Database (stored on disk). self.allow_registrations = True self.max_games = 0 self.remove_canceled_games = False self.backup_delay_seconds = constants.DEFAULT_BACKUP_DELAY_SECONDS self.ping_seconds = constants.DEFAULT_PING_SECONDS self.users = None # type: Users # Users and administrators usernames. self.available_maps = { } # type: Dict[str, Set[str]] # {"map_name" => set("map_power")} self.maps_mtime = 0 # Latest maps modification date (used to manage maps cache in server object). # Server games loaded on memory (stored on disk). # Saved separately (each game in one JSON file). # Each game also stores tokens connected (player tokens, observer tokens, omniscient tokens). self.games = {} # type: Dict[str, ServerGame] # Dictionary mapping game ID to list of power names. self.games_with_dummy_powers = {} # type: Dict[str, List[str]] # Dictionary mapping a game ID present in games_with_dummy_powers, to # a couple of associated bot token and time when bot token was associated to this game ID. # If there is no bot token associated, couple is (None, None). self.dispatched_dummy_powers = {} # type: dict{str, tuple} # DAIDE TCP servers listening to a game's dedicated port. self.daide_servers = {} # {port: daide_server} # Load data on memory. self._load() # If necessary, updated server configurable attributes from kwargs. self.allow_registrations = bool( kwargs.pop(strings.ALLOW_REGISTRATIONS, self.allow_registrations)) self.max_games = int(kwargs.pop(strings.MAX_GAMES, self.max_games)) self.remove_canceled_games = bool( kwargs.pop(strings.REMOVE_CANCELED_GAMES, self.remove_canceled_games)) self.backup_delay_seconds = int( kwargs.pop(strings.BACKUP_DELAY_SECONDS, self.backup_delay_seconds)) self.ping_seconds = int( kwargs.pop(strings.PING_SECONDS, self.ping_seconds)) assert not kwargs LOGGER.debug('Ping : %s', self.ping_seconds) LOGGER.debug('Backup delay: %s', self.backup_delay_seconds) # Add server on servers cache. self.__class__.__cache__[server_dir] = self @property def port(self): """ Property: return port where this server currently runs, or None if server is not yet started. """ return self.backend.port if self.backend else None def _load_available_maps(self): """ Load a dictionary (self.available_maps) mapping every map name to a dict of map info. for all maps available in diplomacy package. """ diplomacy_map_dir = os.path.join(diplomacy.settings.PACKAGE_DIR, strings.MAPS) new_maps_mtime = self.maps_mtime for filename in os.listdir(diplomacy_map_dir): if filename.endswith('.map'): map_filename = os.path.join(diplomacy_map_dir, filename) map_mtime = os.path.getmtime(map_filename) map_name = filename[:-4] if map_name not in self.available_maps or map_mtime > self.maps_mtime: # Either it's a new map file or map file was modified. available_map = Map(map_name) self.available_maps[map_name] = { 'powers': set(available_map.powers), 'supply_centers': set(available_map.scs), 'loc_type': available_map.loc_type.copy(), 'loc_abut': available_map.loc_abut.copy(), 'aliases': available_map.aliases.copy() } new_maps_mtime = max(new_maps_mtime, map_mtime) self.maps_mtime = new_maps_mtime def _get_server_data_filename(self): """ Return path to server data file name (server.json, making sure that data folder exists. Raises an exception if data folder does not exists and cannot be created. """ return os.path.join(ensure_path(self.data_path), 'server.json') def _load(self): """ Load database from disk. """ LOGGER.info("Loading database.") ensure_path(self.data_path) # <server dir>/data ensure_path(self.games_path) # <server dir>/data/games server_data_filename = self._get_server_data_filename( ) # <server dir>/data/server.json if os.path.exists(server_data_filename): LOGGER.info("Loading server.json.") server_info = load_json_from_disk(server_data_filename) self.allow_registrations = server_info[strings.ALLOW_REGISTRATIONS] self.backup_delay_seconds = server_info[ strings.BACKUP_DELAY_SECONDS] self.ping_seconds = server_info[strings.PING_SECONDS] self.max_games = server_info[strings.MAX_GAMES] self.remove_canceled_games = server_info[ strings.REMOVE_CANCELED_GAMES] self.users = Users.from_dict(server_info[strings.USERS]) self.available_maps = server_info[strings.AVAILABLE_MAPS] self.maps_mtime = server_info[strings.MAPS_MTIME] # games and map are loaded from disk. else: LOGGER.info("Creating server.json.") self.users = Users() self.backup_now(force=True) # Add default accounts. for (username, password) in (('admin', 'password'), (constants.PRIVATE_BOT_USERNAME, constants.PRIVATE_BOT_PASSWORD)): if not self.users.has_username(username): self.users.add_user(username, common.hash_password(password)) # Set default admin account. self.users.add_admin('admin') self._load_available_maps() LOGGER.info('Server loaded.') def _backup_server_data_now(self, force=False): """ Save latest backed-up version of server data on disk. This does not save games. :param force: if True, force to save current server data, even if it was not modified recently. """ if force: self.save_data() if self.backup_server: save_json_on_disk(self._get_server_data_filename(), self.backup_server) self.backup_server = None LOGGER.info("Saved server.json.") def _backup_games_now(self, force=False): """ Save latest backed-up versions of loaded games on disk. :param force: if True, force to save all games currently loaded in memory even if they were not modified recently. """ ensure_path(self.games_path) if force: for server_game in self.games.values(): self.save_game(server_game) for game_id, game_dict in self.backup_games.items(): game_path = os.path.join(self.games_path, '%s.json' % game_id) save_json_on_disk(game_path, game_dict) LOGGER.info('Game data saved: %s', game_id) self.backup_games.clear() def backup_now(self, force=False): """ Save backup of server data and loaded games immediately. :param force: if True, force to save server data and all loaded games even if there are no recent changes. """ self._backup_server_data_now(force=force) self._backup_games_now(force=force) @gen.coroutine def _process_game(self, server_game): """ Process given game and send relevant notifications. :param server_game: server game to process :return: A boolean indicating if we must stop game. :type server_game: ServerGame """ LOGGER.debug('Processing game %s (status %s).', server_game.game_id, server_game.status) previous_phase_data, current_phase_data, kicked_powers = server_game.process( ) self.save_game(server_game) if previous_phase_data is None and kicked_powers is None: # Game must be unscheduled immediately. return True notifier = Notifier(self) if kicked_powers: # Game was not processed because of kicked powers. # We notify those kicked powers and game must be unscheduled immediately. kicked_addresses = [(power_name, token) for (power_name, tokens) in kicked_powers.items() for token in tokens] # Notify kicked players. notifier.notify_game_addresses( server_game.game_id, kicked_addresses, notifications.PowersControllers, powers=server_game.get_controllers(), timestamps=server_game.get_controllers_timestamps()) return True # Game was processed normally. # Send game updates to powers, observers and omniscient observers. yield notifier.notify_game_processed(server_game, previous_phase_data, current_phase_data) # If game is completed, we must close associated DAIDE port. if server_game.is_game_done: self.stop_daide_server(server_game.game_id) # Game must be stopped if not active. return not server_game.is_game_active @gen.coroutine def _task_save_database(self): """ IO loop callable: save database and loaded games periodically. Data to save are checked every BACKUP_DELAY_SECONDS seconds. """ LOGGER.info('Waiting for save events.') while True: yield gen.sleep(self.backup_delay_seconds) self.backup_now() @gen.coroutine def _task_send_notifications(self): """ IO loop callback: consume notifications and send it. """ LOGGER.info('Waiting for notifications to send.') while True: connection_handler, notification = yield self.notifications.get() try: yield connection_handler.write_message(notification) except WebSocketClosedError: LOGGER.error( 'Websocket was closed while sending a notification.') except StreamClosedError: LOGGER.error('Stream was closed while sending a notification.') finally: self.notifications.task_done() def set_tasks(self, io_loop: IOLoop): """ Set server callbacks on given IO loop. Must be called once per server before starting IO loop. """ io_loop.add_callback(self._task_save_database) io_loop.add_callback(self._task_send_notifications) # These both coroutines are used to manage games. io_loop.add_callback(self.games_scheduler.process_tasks) io_loop.add_callback(self.games_scheduler.schedule) # Set callback on KeyboardInterrupt. signal.signal(signal.SIGINT, self.interruption_handler.handler) atexit.register(self.backup_now) def start(self, port=None, io_loop=None): """ Start server if not yet started. Raise an exception if server is already started. :param port: (optional) port where server must run. If not provided, try to start on a random selected port. Use property `port` to get current server port. :param io_loop: (optional) tornado IO lopp where server must run. If not provided, get default IO loop instance (tornado.ioloop.IOLoop.instance()). """ if self.backend is not None: raise exceptions.DiplomacyException( 'Server is already running on port %s.' % self.backend.port) if port is None: port = 8432 if io_loop is None: io_loop = tornado.ioloop.IOLoop.instance() handlers = [ tornado.web.url(r"/", ConnectionHandler, {'server': self}), ] settings = { 'cookie_secret': common.generate_token(), 'xsrf_cookies': True, 'websocket_ping_interval': self.ping_seconds, 'websocket_ping_timeout': 2 * self.ping_seconds, 'websocket_max_message_size': 64 * 1024 * 1024 } self.backend = _ServerBackend() self.backend.application = tornado.web.Application( handlers, **settings) self.backend.http_server = self.backend.application.listen(port) self.backend.io_loop = io_loop self.backend.port = port self.set_tasks(io_loop) LOGGER.info('Running on port %d', self.backend.port) if not io_loop.asyncio_loop.is_running(): io_loop.start() def get_game_indices(self): """ Iterate over all game indices in server database. Convenient method to iterate over all server games (by calling load_game() on each game index). """ for game_id in self.games: yield game_id if os.path.isdir(self.games_path): for filename in os.listdir(self.games_path): if filename.endswith('.json'): game_id = filename[:-5] if game_id not in self.games: yield game_id def count_server_games(self): """ Return number of server games in server database. """ count = 0 if os.path.isdir(self.games_path): for filename in os.listdir(self.games_path): if filename.endswith('.json'): count += 1 return count def save_data(self): """ Update on-memory backup of server data. """ self.backup_server = { strings.ALLOW_REGISTRATIONS: self.allow_registrations, strings.BACKUP_DELAY_SECONDS: self.backup_delay_seconds, strings.PING_SECONDS: self.ping_seconds, strings.MAX_GAMES: self.max_games, strings.REMOVE_CANCELED_GAMES: self.remove_canceled_games, strings.USERS: self.users.to_dict(), strings.AVAILABLE_MAPS: self.available_maps, strings.MAPS_MTIME: self.maps_mtime, } def save_game(self, server_game): """ Update on-memory version of given server game. :param server_game: server game :type server_game: ServerGame """ self.backup_games[server_game.game_id] = server_game.to_dict() # Check dummy powers for a game every time we have to save it. self.register_dummy_power_names(server_game) def register_dummy_power_names(self, server_game): """ Update internal registry of dummy power names waiting for orders for given server games. :param server_game: server game to check :type server_game: ServerGame """ if server_game.map.root_map != 'standard': # Bot does not currently support other maps. return dummy_power_names = [] if server_game.is_game_active or server_game.is_game_paused: dummy_power_names = server_game.get_dummy_unordered_power_names() if dummy_power_names: # Update registry of dummy powers. self.games_with_dummy_powers[ server_game.game_id] = dummy_power_names # Every time we update registry of dummy powers, # then we also update bot time in registry of dummy powers associated to bot tokens. bot_token, _ = self.dispatched_dummy_powers.get( server_game.game_id, (None, None)) self.dispatched_dummy_powers[server_game.game_id] = ( bot_token, common.timestamp_microseconds()) if not dummy_power_names: # No waiting dummy powers for this game, or game is not playable (canceled, completed, or forming). self.games_with_dummy_powers.pop(server_game.game_id, None) self.dispatched_dummy_powers.pop(server_game.game_id, None) def get_dummy_waiting_power_names(self, buffer_size, bot_token): """ Return names of dummy powers waiting for orders for current loaded games. This query is allowed only for bot tokens. :param buffer_size: maximum number of powers queried. :param bot_token: bot token :return: a dictionary mapping each game ID to a list of power names. """ if self.users.get_name(bot_token) != constants.PRIVATE_BOT_USERNAME: raise exceptions.ResponseException('Invalid bot token %s' % bot_token) selected_size = 0 selected_games = {} for game_id in sorted(list(self.games_with_dummy_powers.keys())): registered_token, registered_time = self.dispatched_dummy_powers[ game_id] if registered_token is not None: time_elapsed_seconds = (common.timestamp_microseconds() - registered_time) / 1000000 if time_elapsed_seconds > constants.PRIVATE_BOT_TIMEOUT_SECONDS or registered_token == bot_token: # This game still has dummy powers but, either time allocated to previous bot token is over, # or bot dedicated to this game is asking for current dummy powers of this game. # Forget previous bot token. registered_token = None if registered_token is None: # This game is not associated to any bot token. # Let current bot token handle it if buffer size is not reached. dummy_power_names = self.games_with_dummy_powers[game_id] nb_powers = len(dummy_power_names) if selected_size + nb_powers > buffer_size: # Buffer size would be exceeded. We stop to collect games now. break # Otherwise we collect this game. selected_games[game_id] = dummy_power_names selected_size += nb_powers self.dispatched_dummy_powers[game_id] = ( bot_token, common.timestamp_microseconds()) return selected_games def has_game_id(self, game_id): """ Return True if server database contains such game ID. """ if game_id in self.games: return True expected_game_path = os.path.join(self.games_path, '%s.json' % game_id) return os.path.exists(expected_game_path) and os.path.isfile( expected_game_path) def load_game(self, game_id): """ Return a game matching given game ID from server database. Raise an exception if such game does not exists. If such game is already stored in server object, return it. Else, load it from disk but **does not store it in server object**. To load and immediately store a game object in server object, please use method get_game(). Method load_game() is convenient when you want to iterate over all games in server database without taking memory space. :param game_id: ID of game to load. :return: a ServerGame object :rtype: ServerGame """ if game_id in self.games: return self.games[game_id] game_filename = os.path.join(ensure_path(self.games_path), '%s.json' % game_id) if not os.path.isfile(game_filename): raise exceptions.GameIdException() try: server_game = ServerGame.from_dict( load_json_from_disk(game_filename)) # type: ServerGame server_game.server = self server_game.filter_usernames(self.users.has_username) server_game.filter_tokens(self.users.has_token) return server_game except ValueError as exc: # Error occurred while parsing JSON file: bad JSON file. try: os.remove(game_filename) finally: # This should be an internal server error. raise exc def add_new_game(self, server_game): """ Add a new game data on server in memory and perform any addition processing. This does not save the game on disk. :type server_game: ServerGame """ # Register game on memory. self.games[server_game.game_id] = server_game # Start DAIDE server for this game. self.start_new_daide_server(server_game.game_id) def get_game(self, game_id): """ Return game saved on server matching given game ID. Raise an exception if game ID not found. Return game if already loaded on memory, else load it from disk, store it, perform any loading/addition processing and return it. :param game_id: ID of game to load. :return: a ServerGame object. :rtype: ServerGame """ server_game = self.load_game(game_id) if game_id not in self.games: LOGGER.debug('Game loaded: %s', game_id) # Check dummy powers for this game as soon as it's loaded from disk. self.register_dummy_power_names(server_game) # Register game on memory. self.games[server_game.game_id] = server_game # Start DAIDE server for this game. self.start_new_daide_server(server_game.game_id) # We have just loaded game from disk. Start it if necessary. if not server_game.start_master and server_game.has_expected_controls_count( ): # We may have to start game. if server_game.does_not_wait(): # We must process game. server_game.process() self.save_game(server_game) # Game must be scheduled only if active. if server_game.is_game_active: LOGGER.debug('Game loaded and scheduled: %s', server_game.game_id) self.schedule_game(server_game) return server_game def delete_game(self, server_game): """ Delete given game from server (both from memory and disk) and perform any post-deletion processing. :param server_game: game to delete :type server_game: ServerGame """ if not (server_game.is_game_canceled or server_game.is_game_completed): server_game.set_status(strings.CANCELED) game_filename = os.path.join(self.games_path, '%s.json' % server_game.game_id) backup_game_filename = get_backup_filename(game_filename) if os.path.isfile(game_filename): os.remove(game_filename) if os.path.isfile(backup_game_filename): os.remove(backup_game_filename) self.games.pop(server_game.game_id, None) self.backup_games.pop(server_game.game_id, None) self.games_with_dummy_powers.pop(server_game.game_id, None) self.dispatched_dummy_powers.pop(server_game.game_id, None) # Stop DAIDE server associated to this game. self.stop_daide_server(server_game.game_id) @gen.coroutine def schedule_game(self, server_game): """ Add a game to scheduler only if game has a deadline and is not already scheduled. To add games without deadline, use force_game_processing(). :param server_game: game :type server_game: ServerGame """ if not (yield self.games_scheduler.has_data(server_game) ) and server_game.deadline: yield self.games_scheduler.add_data(server_game, server_game.deadline) @gen.coroutine def unschedule_game(self, server_game): """ Remove a game from scheduler. :param server_game: game :type server_game: ServerGame """ if (yield self.games_scheduler.has_data(server_game)): yield self.games_scheduler.remove_data(server_game) @gen.coroutine def force_game_processing(self, server_game): """ Add a game to scheduler to be processed as soon as possible. Use this method instead of schedule_game() to explicitly add games with null deadline. :param server_game: game :type server_game: ServerGame """ yield self.games_scheduler.no_wait(server_game, server_game.deadline, lambda g: g.does_not_wait()) def start_game(self, server_game): """ Start given server game. :param server_game: server game :type server_game: ServerGame """ server_game.set_status(strings.ACTIVE) self.schedule_game(server_game) Notifier(self).notify_game_status(server_game) def stop_game_if_needed(self, server_game): """ Stop game if it has not required number of controlled powers. Notify game if status changed. :param server_game: game to check :param server_game: game :type server_game: ServerGame """ if server_game.is_game_active and ( server_game.count_controlled_powers() < server_game.get_expected_controls_count()): server_game.set_status(strings.FORMING) self.unschedule_game(server_game) Notifier(self).notify_game_status(server_game) def user_is_master(self, username, server_game): """ Return True if given username is a game master for given game data. :param username: username :param server_game: game data :return: a boolean :type server_game: ServerGame :rtype: bool """ return self.users.has_admin(username) or server_game.is_moderator( username) def user_is_omniscient(self, username, server_game): """ Return True if given username is omniscient for given game data. :param username: username :param server_game: game data :return: a boolean :type server_game: ServerGame :rtype: bool """ return (self.users.has_admin(username) or server_game.is_moderator(username) or server_game.is_omniscient(username)) def token_is_master(self, token, server_game): """ Return True if given token is a master token for given game data. :param token: token :param server_game: game data :return: a boolean :type server_game: ServerGame :rtype: bool """ return (self.users.has_token(token) and self.user_is_master( self.users.get_name(token), server_game)) def token_is_omniscient(self, token, server_game): """ Return True if given token is omniscient for given game data. :param token: token :param server_game: game data :return: a boolean :type server_game: ServerGame :rtype: bool """ return (self.users.has_token(token) and self.user_is_omniscient( self.users.get_name(token), server_game)) def create_game_id(self): """ Create and return a game ID not already used by a game in server database. """ game_id = base64.b64encode(os.urandom(12), b'-_').decode('utf-8') while self.has_game_id(game_id): game_id = base64.b64encode(os.urandom(12), b'-_').decode('utf-8') return game_id def remove_token(self, token): """ Disconnect given token from related user and loaded games. Stop related games if needed, e.g. if a game does not have anymore expected number of controlled powers. """ self.users.disconnect_token(token) for server_game in self.games.values(): # type: ServerGame server_game.remove_token(token) self.stop_game_if_needed(server_game) self.save_game(server_game) self.save_data() def assert_token(self, token, connection_handler): """ Check if given token is associated to an user, check if token is still valid, and link token to given connection handler. If any step failed, raise an exception. :param token: token to check :param connection_handler: connection handler associated to this token """ if not self.users.has_token(token): raise exceptions.TokenException() if self.users.token_is_alive(token): self.users.relaunch_token(token) self.save_data() else: # Logout on server side and raise exception (invalid token). LOGGER.error('Token too old %s', token) self.remove_token(token) raise exceptions.TokenException() self.users.attach_connection_handler(token, connection_handler) def assert_admin_token(self, token): """ Check if given token is an admin token. Raise an exception on error. """ if not self.users.token_is_admin(token): raise exceptions.AdminTokenException() def assert_master_token(self, token, server_game): """ Check if given token is a master token for given game data. Raise an exception on error. :param token: token :param server_game: game data :type server_game: ServerGame """ if not self.token_is_master(token, server_game): raise exceptions.GameMasterTokenException() def cannot_create_more_games(self): """ Return True if server can not accept new games. """ return self.max_games and self.count_server_games() >= self.max_games def get_map(self, map_name): """ Return map power names for given map name. """ return self.available_maps.get(map_name, None) def start_new_daide_server(self, game_id, port=None): """ Start a new DAIDE TCP server to handle DAIDE clients connections :param game_id: game id to pass to the DAIDE server :param port: the port to use. If None, an available random port will be used """ if port in self.daide_servers: raise RuntimeError('Port already in used by a DAIDE server') for server in self.daide_servers.values(): if server.game_id == game_id: return None while port is None or is_port_opened(port): port = randint(8000, 8999) # Create DAIDE TCP server daide_server = DaideServer(self, game_id) daide_server.listen(port) self.daide_servers[port] = daide_server LOGGER.info('DAIDE server running for game %s on port %d', game_id, port) return port def stop_daide_server(self, game_id): """ Stop one or all DAIDE TCP server :param game_id: game id of the DAIDE server. If None, all servers will be stopped :type game_id: str """ for port in list(self.daide_servers.keys()): server = self.daide_servers[port] if game_id is None or server.game_id == game_id: server.stop() del self.daide_servers[port] def get_daide_port(self, game_id): """ Get the DAIDE port opened for a specific game_id :param game_id: game id of the DAIDE server. """ for port, server in self.daide_servers.items(): if server.game_id == game_id: return port return None
class BatchedStream(object): """ Mostly obsolete, see BatchedSend """ def __init__(self, stream, interval): self.stream = stream self.interval = interval / 1000.0 self.last_transmission = default_timer() self.send_q = Queue() self.recv_q = Queue() self._background_send_coroutine = self._background_send() self._background_recv_coroutine = self._background_recv() self._broken = None self.pc = PeriodicCallback(lambda: None, 100) self.pc.start() @gen.coroutine def _background_send(self): with log_errors(): while True: msg = yield self.send_q.get() if msg == "close": break msgs = [msg] now = default_timer() wait_time = self.last_transmission + self.interval - now if wait_time > 0: yield gen.sleep(wait_time) while not self.send_q.empty(): msgs.append(self.send_q.get_nowait()) try: yield write(self.stream, msgs) except StreamClosedError: self.recv_q.put_nowait("close") self._broken = True break if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for _ in msgs: self.send_q.task_done() @gen.coroutine def _background_recv(self): with log_errors(): while True: try: msgs = yield read(self.stream) except StreamClosedError: self.recv_q.put_nowait("close") self.send_q.put_nowait("close") self._broken = True break assert isinstance(msgs, list) if len(msgs) > 1: logger.debug("Batched messages: %d", len(msgs)) for msg in msgs: self.recv_q.put_nowait(msg) @gen.coroutine def flush(self): yield self.send_q.join() @gen.coroutine def send(self, msg): if self._broken: raise StreamClosedError("Batch Stream is Closed") else: self.send_q.put_nowait(msg) @gen.coroutine def recv(self): result = yield self.recv_q.get() if result == "close": raise StreamClosedError("Batched Stream is Closed") else: raise gen.Return(result) @gen.coroutine def close(self): yield self.flush() raise gen.Return(self.stream.close()) def closed(self): return self.stream.closed()
class CommunicatorService: def __init__(self, dlna_service, host, ua, target_server_name): self.__queue = Queue(maxsize=10) self.dlna_service = dlna_service # TODO: プロパティ式にあとで変換して書き換えられないようにする self.ua = ua self.target_server_name = target_server_name self.is_start_ssdp_notify = self.dlna_service.is_start_ssdp_notify self.host = host # jLabs プロトコルでやりトルする為の client self.jlabs_client = None def add(self, comm_data: CommunicationData): self.__queue.put(comm_data) async def run(self): async for item in self.__queue: try: if item.action == Action.STB_START_CONNECTION: self.dlna_service.add(DlnaCommuData(DlnaAction.START_SSDP_NOTIFY)) # ToDo: sleep ではなく、SSDP NOTIFY のレスポンスがあった事をトリガに次に進ませたい logger.debug('pre sleep') await tornado.gen.sleep(5) logger.debug('post sleep') # これにより STB 側の接続先を取得する response = self.dlna_service.post(DlnaCommuData(DlnaAction.SSDP_M_SEARCH)) logger.debug('post msearch') for res_item in response: logger.debug(res_item) logger.debug('location: %s' % res_item['location']) logger.debug('server: %s' % res_item['server']) if res_item['server'] == self.target_server_name: location = res_item['location'] # ToDo: ↑で取得した STB 側の接続先を HTTP で叩く response = jlabs_protocol.get_stb_info(location, self.ua) logger.debug('get_stb_info response') logger.debug(response) break # この後は 5090 port にアクセスが来るのでそっちで処理開始 elif item.action == Action.STB_STOP_CONNECTION: self.dlna_service.add(DlnaCommuData(DlnaAction.STOP_SSDP_NOTIFY)) elif item.action == Action.STB_SAVE_JLABS_SERVER_INFO: # 5080 port の server の loop から stream をそのまま持ってきている # MEMO: そのまますぎるので、正直設計的には汚い server_address = item.args self.jlabs_client = server_address elif item.action == Action.JLABS_DATA_FROM_STB: self.__handle_jlabs(item.args) elif item.action == Action.STB_TOGGLE_POWER: # 電源ボタンが押されるのと同じアクション self.__request_jlsbs_send_key("VK_POWER") elif item.action == Action.STB_CHANNEL_DISCOVERY: # Disovery チャンネルを指定した時のアクション self.__request_jlabs_channel(Channel.DISCOVERY) finally: self.__queue.task_done() def __send_jlabs_client(self, data): # MEMO: '\x04' が無いと終端した判定してくれない self.jlabs_client.write(json.dumps(data).encode('utf-8') + b'\x04') def __request_jlsbs_send_key(self, key_id): rc_key_request_json = {"param": {"type": "keypress", "keyCode": key_id}, "sequenceID": "rcKey", "request": "rcKey"} self.__send_jlabs_client(rc_key_request_json) def __request_jlabs_channel(self, channel): if channel == Channel.DISCOVERY: self.__request_jlsbs_send_key("VK_6") self.__request_jlsbs_send_key("VK_5") self.__request_jlsbs_send_key("VK_2") self.__request_jlsbs_send_key("VK_ENTER") def __handle_jlabs(self, args): if args.get('request') is not None: if args.get('request') == 'startWiFiPairing': # 1. startWifiPairing の request を受け取ったら、受け取ったことを示す response を返す # 2. そして、次の処理の getMWVersion を呼ぶ start_wifi_pairing_response = \ {"response": "startWiFiPairing", "sequenceID": args.get('sequenceID'), "result": 1, "errorCode": "", "data": {"permission": True}} self.__send_jlabs_client(start_wifi_pairing_response) logger.info("Remote Controller service is now available.") # ここ以降の処理は、試しで実装しているけどリモコン使うだけなら要らない # ToDo: getMWVersion も要らないかどうか確認する # 続けて request を飛ばす get_mw_version_request = \ {"param": {}, "sequenceID": "getMWVersion", "request": "getMWVersion"} # MEMO: '\x04' が無いと終端した判定してくれない self.__send_jlabs_client(get_mw_version_request) elif args.get('response') is not None: if args.get('response') == 'getMWVersion': # getMWVersion のレスポンスは特にどうでもいい # getMWVersion のレスポンスを受け取ったら、次は getReservationList を呼ぶ get_reservation_list_request = {"param": {"sort":0}, "sequenceID": "getReservationList", "request": "getReservationList"} # ToDo: リモコン操作には必要ない認識なので、一旦 off # await self.__send_jlabs_client(get_reservation_list_request) elif args.get('response') == 'getReservationList': # getReservationList のレスポンスは今のところ使わない # 次にgetChannels を呼ぶ get_channels_request = {"param": {"networkType": 0}, "sequenceID": "getChannels", "request": "getChannels"} self.__send_jlabs_client(get_channels_request) elif args.get('response') == 'getChannels': # 最初は 地上波 のリストを取得 # その後、BS, CS 分呼ぶ # 今の所、特に活用せず get_channels_request = {"param": {"networkType": 0}, "sequenceID": "getChannels", "request": "getChannels"} if args.get('data').get('items')[0].get('networkType') == 0: get_channels_request['param']['networkType'] = 1 self.__send_jlabs_client(get_channels_request) elif args.get('data').get('items')[0].get('networkType') == 1: get_channels_request['param']['networkType'] = 2 self.__send_jlabs_client(get_channels_request) elif args.get('data').get('items')[0].get('networkType') == 2: # getChannel は終わったので次へ # ToDo: 途中で面倒になったのでいったんここまで pass