def _create_workers(self, pool_size): self._threads = [] for idx in xrange(pool_size): q = self._queues[idx] t = ProfiledThread(target=self._worker, args=(q,), name_prefix='TagReprModifier') self._threads.append(t) t.start()
def Start(self): self._journal.Start() logging.debug("after_journal_start") self._repr_modifier.Start() logging.debug("after_repr_modifier_start") if self._cloud_tags_server: if self._cloud_tags_masks: self._masks_reload_thread = ProfiledThread( target=self._masks_reload_loop, name_prefix='TagsMasksReload') self._masks_reload_thread.start() logging.debug("after_masks_reload_thread_start") self._cloud = self._create_cloud_client(self._on_cloud_journal_event) else: # For CloudTag's in tags.db self._cloud = SerialUpdateOnlyDummyCloudClient( RuntimeError("Wrong setup for cloud tags in rem-server")) self._create_safe_cloud() logging.debug("after_safe_cloud_start") self._subscribe_all() logging.debug("after_subscribe_all")
def __init__(self, connection_ctor_ctor, on_event): self._connection_constructor = connection_ctor_ctor(_make_protobuf_connection) self._on_event = on_event self._stopped = False self._outgoing = deque() self._running = {} self._subscriptions = set() self._next_message_id = 1 self._should_stop = self._ST_NONE self._io = None self._io_stats = [0, 0] self._lock = threading.Lock() self._outgoing_not_empty = threading.Condition(self._lock) self._outgoing_empty = threading.Condition(self._lock) #self._running_empty = threading.Condition(self._lock) self._should_and_can_stop_cond = threading.Condition(self._lock) #self._connection_state_changed = threading.Condition(self._lock) #self._should_stop_cond = threading.Condition(self._lock) self._connect_thread = ProfiledThread(target=self._connect_loop, name_prefix='CldTg-Connect') self._connect_thread.start()
def __init__(self): self._lock = threading.Lock() self._modified = threading.Condition(self._lock) self._queue = PriorityQueue() self._next_id = 1 self._should_stop = False self._loop_thread = ProfiledThread(target=self._the_loop, name_prefix='DelayedExecutor') self._loop_thread.start()
def __init__(self, server_pid, channel): # executor_pid, executor_stderr self._server_pid = server_pid self._server_exit_status = None self._channel = channel self._channel_in = dupfdopen(channel, 'r') self._channel_out = dupfdopen(channel, 'w') self._errored = Bool() self._input_queue = deque() self._lock = threading.Lock() self._queue_not_empty = threading.Condition(self._lock) self._should_stop = Bool() self._next_task_id = 1 self._tasks = {} self._fail = self._create_fail() self._server_stop = Promise() self._read_thread_inited = threading.Event() self._write_thread_inited = threading.Event() write_thread = ProfiledThread( target=_weak_method(self._write_loop), name_prefix='RunnerClnWr') self._write_thread = weakref.ref(write_thread) read_thread = ProfiledThread( target=_weak_method(self._read_loop), name_prefix='RunnerClnRd') self._read_thread = weakref.ref(read_thread) write_thread.daemon = True read_thread.daemon = True write_thread.start() read_thread.start() self._read_thread_inited.wait() self._write_thread_inited.wait()
def __init__(self, pool_size, *args, **kws): SimpleXMLRPCServer.__init__(self, *args, **kws) listen_port = self.server_address[1] self._incoming = StdQueue.Queue(pool_size) self._main_thread = ProfiledThread( target=self.__serve_forever, name_prefix='RPCAcc-%d' % listen_port ) self._workers_threads = [ ProfiledThread( target=self.__worker, name_prefix='RPCWrk-%d' % listen_port ) for _ in xrange(pool_size) ] self.__is_stopped = threading.Event() self.__should_stop = False
class DelayedExecutor(object): def __init__(self): self._lock = threading.Lock() self._modified = threading.Condition(self._lock) self._queue = PriorityQueue() self._next_id = 1 self._should_stop = False self._loop_thread = ProfiledThread(target=self._the_loop, name_prefix='DelayedExecutor') self._loop_thread.start() def _cancel(self, id): with self._lock: if id not in self._queue: # because of user-space-race with _the_loop return False if self._queue.front()[0] == id: self._modified.notify() self._queue.pop_by_key(id) return True def add(self, callback, deadline=None, timeout=None): if timeout is not None: deadline = time.time() + timeout elif deadline is None: raise ValueError("You must specify deadline or timeout") with self._lock: id = self._next_id self._next_id += 1 self._queue.add(id, (deadline, callback)) if self._queue.front()[0] == id: self._modified.notify() ret = lambda : self._cancel(id) # TODO use weak self ret.id = id return ret schedule = add def stop(self): with self._lock: self._should_stop = True self._modified.notify() self._loop_thread.join() def _the_loop(self): while not self._should_stop: with self._lock: while not(self._should_stop or self._queue): self._modified.wait() if self._should_stop: return id, (deadline, callback) = self._queue.front() now = time.time() if now < deadline: self._modified.wait(deadline - now) if self._should_stop: return if not self._queue or self._queue.front()[0] != id: continue self._queue.pop_front() try: if callback.__code__.co_argcount: callback(id) else: callback() except: logging.exception("Failed to execute %s" % callback) del callback
class AsyncXMLRPCServer2(SimpleXMLRPCServer): def __init__(self, pool_size, *args, **kws): SimpleXMLRPCServer.__init__(self, *args, **kws) listen_port = self.server_address[1] self._incoming = StdQueue.Queue(pool_size) self._main_thread = ProfiledThread( target=self.__serve_forever, name_prefix='RPCAcc-%d' % listen_port ) self._workers_threads = [ ProfiledThread( target=self.__worker, name_prefix='RPCWrk-%d' % listen_port ) for _ in xrange(pool_size) ] self.__is_stopped = threading.Event() self.__should_stop = False def __worker(self): while True: task = self._incoming.get() if task is None: return self.__handle_request(*task) del task def __handle_request(self, sock, addr): try: self.finish_request(sock, addr) self.shutdown_request(sock) except: self.handle_error(sock, addr) self.shutdown_request(sock) def start(self): for t in self._workers_threads: t.start() self._main_thread.start() def serve_forever(self, poll_interval=0.5): raise NotImplementedError() def __serve_forever(self, poll_interval=0.5): while not self.__should_stop: r, w, e = select.select([self], [], [], poll_interval) if self in r: self.__put_request() self.server_close() self.__stop_workers() self.__reset_requests() self.__is_stopped.set() def __stop_workers(self): for _ in xrange(len(self._workers_threads)): self._incoming.put(None) for t in self._workers_threads: t.join() def __reset_requests(self): rest, self._incoming.queue = self._incoming.queue, deque() for sock, _ in rest: try: _socket_send_reset(sock) except Exception as e: logging.error("Failed to send RST to RPC client: %s" % e) def __put_request(self): try: request = self.get_request() except socket.error: logging.error("XMLRPCServer: socket error") return self._incoming.put(request) def shutdown(self): self.__should_stop = True self.__is_stopped.wait()
class TagStorage(object): CLOUD_CLIENT_STOP_TIMEOUT = 10.0 def __init__(self, rhs=None): self.lock = PickableLock() self.inmem_items = {} self.infile_items = None self.db_file = "" self.db_file_opened = False self._local_tag_modify_lock = threading.Lock() self._repr_modifier = TagReprModifier() # TODO pool_size from rem.cfg self._journal = TagLogger() self._cloud = None self._safe_cloud = None self._prev_safe_cloud_state = SafeCloud.get_empty_state() self._match_cloud_tag = TagsMasks.get_empty_matcher() self._masks_reload_thread = None self._masks_should_stop = threading.Event() self._last_tag_mask_error_report_time = 0 self._all_tags_in_cloud = None if rhs: if isinstance(rhs, dict): self.inmem_items = rhs elif isinstance(rhs, TagStorage): self.inmem_items = rhs.inmem_items if hasattr(rhs, '_prev_safe_cloud_state'): self._prev_safe_cloud_state = rhs._prev_safe_cloud_state def list_cloud_tags_masks(self): return self._match_cloud_tag.regexps def PreInit(self): if self._cloud_tags_server: self._pre_init_cloud_tags_setup() def _pre_init_cloud_tags_setup(self): # Allow to run REM without python-protobuf global cloud_client import cloud_client global cloud_connection import cloud_connection if self._cloud_tags_masks: self._do_initial_cloud_tags_masks_load() self._cloud_tags_server_instances \ = cloud_connection.from_description(self._cloud_tags_server) def _do_initial_cloud_tags_masks_load(self): try_count = 3 for idx in range(try_count): try: self._match_cloud_tag = self._load_masks() except Exception as e: logging.warning("Iter #%d of cloud tags masks (%s) loading failed: %s" % ( idx + 1, self._cloud_tags_masks, e)) else: break if idx != try_count - 1: time.sleep(5) else: raise RuntimeError("Failed to load cloud tags masks for %d attempts" % try_count) def _load_masks(self): return TagsMasks.load(self._cloud_tags_masks) def Start(self): self._journal.Start() logging.debug("after_journal_start") self._repr_modifier.Start() logging.debug("after_repr_modifier_start") if self._cloud_tags_server: if self._cloud_tags_masks: self._masks_reload_thread = ProfiledThread( target=self._masks_reload_loop, name_prefix='TagsMasksReload') self._masks_reload_thread.start() logging.debug("after_masks_reload_thread_start") self._cloud = self._create_cloud_client(self._on_cloud_journal_event) else: # For CloudTag's in tags.db self._cloud = SerialUpdateOnlyDummyCloudClient( RuntimeError("Wrong setup for cloud tags in rem-server")) self._create_safe_cloud() logging.debug("after_safe_cloud_start") self._subscribe_all() logging.debug("after_subscribe_all") def _create_cloud_client(self, on_event): if not self._has_cloud_setup(): raise RuntimeError("No cloud tags setup") return cloud_client.Client(self._cloud_tags_server_instances, on_event=on_event) def _create_safe_cloud(self): self._safe_cloud = SafeCloud(self._cloud, self._journal, self._prev_safe_cloud_state) self._prev_safe_cloud_state = None def _subscribe_all(self): with self.lock: cloud_tags = set( tag.GetFullname() for tag in self.inmem_items.itervalues() if tag.IsCloud() ) if cloud_tags: self._cloud.subscribe(cloud_tags, with_future=False) def _on_cloud_journal_event(self, ev): #logging.debug('before journal event %s' % ev) with self.lock: tag = self.inmem_items.get(ev.tag_name) if not tag: logging.warning('no object in inmem_items for cloud tag %s' % ev.tag_name) return if not tag.IsCloud(): # it's like assert logging.error('tag %s is not cloud tag in inmem_items but receives event from cloud' % ev.tag_name) return if tag.version >= ev.version: # TODO warn even on equal versions, but not for initial _subscribe_all if tag.version > ev.version: logging.warning('local version (%d) > journal version (%d) for tag %s' \ % (tag.version, ev.version, ev.tag_name)) return def add_event(event, version, msg=None): self._repr_modifier.add((tag, event, msg, version)) # FIXME here with warning, on state sync without it if ev.version > ev.last_reset_version and tag.version < ev.last_reset_version: logging.debug('overtaking reset %s.%d.%d for %d' % (ev.tag_name, ev.version, ev.last_reset_version, tag.version)) add_event(ETagEvent.Reset, ev.last_reset_version, ev.last_reset_comment) # TODO last_reset_comment is wrong add_event(ev.event, ev.version, ev.last_reset_comment if ev.event == ETagEvent.Reset else None) logging.debug('after journal event for %s' % ev.tag_name) def _masks_reload_loop(self): while True: if self._masks_should_stop.wait(self._cloud_tags_masks_reload_interval): return try: match = self._load_masks() except Exception as e: logging.error("Failed to reload tags' masks from: %s" % e) continue if self._match_cloud_tag.count and not match.count: logging.warning("New cloud tags masks discarded: old count %d, new count %d" % ( self._match_cloud_tag.count, match.count)) continue logging.debug("Cloud tag's masks reloaded. Regexp count: %d" % match.count) self._match_cloud_tag = match def Stop(self): self._cloud.stop(timeout=self.CLOUD_CLIENT_STOP_TIMEOUT) self._safe_cloud.wait_running_empty() if self._masks_reload_thread: self._masks_should_stop.set() self._masks_reload_thread.join() self._repr_modifier.Stop() self._journal.Stop() def __getstate__(self): return { 'inmem_items': self.inmem_items.copy(), '_prev_safe_cloud_state': self._safe_cloud.get_state() if self._safe_cloud \ else SafeCloud.get_empty_state() } def _lookup_tags(self, tags): return self.__lookup_tags(tags, False) def _are_tags_set(self, tags): return self.__lookup_tags(tags, True) def __lookup_tags(self, tags, as_bools): ret = {} cloud_tags = set() # FIXME not as closure def _ret_value(state): if as_bools: if not state: return False elif isinstance(state, TagBase): return state.IsLocallySet() else: return state.is_set else: if not state: return None elif isinstance(state, TagBase): return {'is_set': state.IsLocallySet()} else: return state.__dict__ for tag in tags: # FIXME Consider that tag may exists as LocalTag in infile_items or inmem_items? if self._is_cloud_tag_name(tag): cloud_tags.add(tag) else: # dont_create=True to distinguish unset tags from non-existed ret[tag] = _ret_value(self._RawTag(tag, dont_create=True)) promise = Promise() if not cloud_tags: promise.set(ret) return promise.to_future() cloud_done = self._cloud.lookup(cloud_tags) def on_cloud_done(f): if f.is_success(): cloud_result = f.get() for tag in cloud_tags: ret[tag] = _ret_value(cloud_result.get(tag, None)) promise.set(ret) else: promise.set(None, f.get_exception()) cloud_done.subscribe(on_cloud_done) return promise.to_future() # For calls from REM guts def _modify_cloud_tag_safe(self, tag, event, msg=None): update = (tag.GetFullname(), event, msg) self._set_min_release_time(tag) self._safe_cloud.update(update) # Hack for hostA:RemoteTag -> hostB:CloudTag (shame on me) def _set_min_release_time(self, tag): tag._min_release_time = time.time() + self._cloud_tags_release_delay # for calls from from RPC def _modify_tags_unsafe(self, updates): if not updates: return READY_FUTURE cloud_updates = [] local_updates = [] for update in updates: tag_name = update[0] tag = self.AcquireTag(tag_name) if tag.IsCloud(): self._set_min_release_time(tag.inner) cloud_updates.append(update) else: update = list(update) update[0] = tag local_updates.append(update) local_done = self._modify_local_tags(local_updates, with_future=True) if local_updates \ else None cloud_done = self._cloud.update(cloud_updates) if cloud_updates \ else None if local_done is None: return cloud_done elif cloud_done is None: return local_done else: return CheckAllFuturesSucceed([cloud_done, local_done]) def _modify_tag_unsafe(self, tagname, event, msg=None): return self._modify_tags_unsafe([(tagname, event, msg)]) # FIXME own faster impl? def _modify_local_tags(self, updates, with_future=False): done = [] with self._local_tag_modify_lock: # FIXME for update in updates: self._journal.log_local_tag_event(*update) done.append(self._repr_modifier.add(update, with_future)) if not with_future: return return done[0] if len(done) == 1 else CheckAllFuturesSucceed(done) def _modify_local_tag_safe(self, tag, event, msg=None): self._modify_local_tags([(tag, event, msg)], with_future=False) def IsRemoteTagName(self, tagname): if not self.remote_tags_enabled: return False return ':' in tagname def AcquireTag(self, tagname): raw = self._RawTag(tagname) with self.lock: ret = self.inmem_items.setdefault(tagname, raw) if ret is raw and ret.IsCloud() and self._cloud: # no _cloud before Start() self._cloud.subscribe(tagname, with_future=False) return TagWrapper(ret) def _is_cloud_tag_name(self, name): if self.IsRemoteTagName(name): return False try: if self._tags_random_cloudiness: return hash(name) % 3 == 0 if self._all_tags_in_cloud: return True return self._match_cloud_tag(name) except Exception as e: now = time.time() if now - self._last_tag_mask_error_report_time > 5: logging.error("Failed to match tag masks: %s" % e) self._last_tag_mask_error_report_time = now return False def _create_tag(self, name): if self.IsRemoteTagName(name): return RemoteTag(name, self._modify_local_tag_safe) elif self._is_cloud_tag_name(name): return CloudTag(name, self._modify_cloud_tag_safe) else: return LocalTag(name, self._modify_local_tag_safe) def _has_cloud_setup(self): return bool(self._cloud_tags_server) def vivify_tags_from_backup(self, tags): has_cloud_setup = self._has_cloud_setup() for tag in tags: if tag.IsCloud(): if not has_cloud_setup: raise RuntimeError("Cloud tags in backup, but no setup was found in config") modify = self._modify_cloud_tag_safe else: modify = self._modify_local_tag_safe tag._request_modify = modify def _make_tag_cloud(self, tag): assert isinstance(tag, LocalTag) tag.__class__ = CloudTag tag.version = 0 self._set_modify_func(tag) def convert_in_memory_tags_to_cloud_if_need(self): if not self._has_cloud_setup(): return False updates = [] for tag_name, tag in self.inmem_items.iteritems(): must_be_cloud = self._is_cloud_tag_name(tag_name) \ and not tag.IsRemote() # Hack for disable_remote_tags if must_be_cloud == tag.IsCloud(): continue elif must_be_cloud: if tag.IsLocallySet(): updates.append((tag_name, ETagEvent.Set)) self._make_tag_cloud(tag) else: logging.error("Tag %s is cloud, but must not be" % tag_name) if not updates: return False logging.info("before conversion %d tags to CloudTag's" % len(updates)) cloud = self._create_cloud_client(lambda ev: None) try: for bucket in split_in_groups(updates, 100000): # TODO Fix cloud_client.update cloud.update(bucket).get() finally: try: cloud.stop() except: logging.exception("Failed to stop temporary cloud client") logging.info("after conversion %d tags to CloudTag's" % len(updates)) return True def make_on_disk_tags_conversion_params(self): if not self._has_cloud_setup(): raise RuntimeError("No cloud tags setup") return OnDiskTagsConvertParams( db_filename=self.db_file, in_memory_tags=set(self.inmem_items.keys()), cloud_tags_server=self._cloud_tags_server ) def _set_modify_func(self, tag): tag._request_modify = self._modify_cloud_tag_safe if tag.IsCloud() \ else self._modify_local_tag_safe def _GetTagLocalState(self, name): return self._RawTag(name, dont_create=True) def _RawTag(self, tagname, dont_create=False): if not tagname: raise ValueError("Empty tag name") tag = self.inmem_items.get(tagname, None) if tag: return tag if not self.db_file_opened: self.DBConnect() tagDescr = self.infile_items.get(tagname, None) if tagDescr: tag = cPickle.loads(tagDescr) if tag.IsCloud(): if not self._has_cloud_setup(): logging.error("Tag %s is cloud on disk storage, but no setup for" \ " cloud in config. Restart server with proper setup!" % tagname) elif not tag.IsRemote(): # Hack for disable_remote_tags if self._is_cloud_tag_name(tag.GetFullname()): logging.error("Tag %s is not cloud on disk storage, but must be." \ " Convert tags in disk storage!" % tagname) self._set_modify_func(tag) elif dont_create: return None else: tag = self._create_tag(tagname) return tag def _match_in_memory_tags(self, name_regex, prefix): for name, tag in self.inmem_items.items(): if name and (not prefix or name.startswith(prefix)) \ and (not name_regex or name_regex.match(name)): yield name, tag.IsLocallySet() def _match_in_file_tags(self, name_regex, prefix): inner_db = bsddb3.btopen(self.db_file, "r") try: name, tagDescr = inner_db.set_location(prefix) if prefix else inner_db.first() while True: if prefix and not name.startswith(prefix): break if not name_regex or name_regex.match(name): yield name, cPickle.loads(tagDescr).IsLocallySet() name, tagDescr = inner_db.next() except bsddb3._pybsddb.DBNotFoundError: pass inner_db.close() def ListTags(self, regexp=None, prefix=None, memory_only=True): lists = [] cloud_result = None if not memory_only and self._has_cloud_setup(): cloud_result = self._cloud.match( prefix=prefix, regexp='^' + regexp.pattern if regexp else None ) lists.append(list(self._match_in_memory_tags(regexp, prefix))) if not memory_only: lists.append(list(self._match_in_file_tags(regexp, prefix))) if cloud_result: # lock in future.get only after local db and inmemory lists[0:0] = [[(tag.tag_name, tag.is_set) for tag in cloud_result.get()]] if len(lists) == 1: return lists[0] ret = {} for lst in lists: for tag_name, is_set in lst: ret.setdefault(tag_name, is_set) return ret.items() def DBConnect(self): self.infile_items = bsddb3.btopen(self.db_file, "c") self.db_file_opened = True def UpdateContext(self, context): self.db_file = context.tags_db_file self.remote_tags_enabled = not context.disable_remote_tags self.DBConnect() self._journal.UpdateContext(context) self._repr_modifier.UpdateContext(context) self._cloud_tags_server = context.cloud_tags_server self._cloud_tags_masks = context.cloud_tags_masks self._cloud_tags_masks_reload_interval = context.cloud_tags_masks_reload_interval self._cloud_tags_release_delay = context.cloud_tags_release_delay self._tags_random_cloudiness = self._has_cloud_setup() and context.tags_random_cloudiness self._all_tags_in_cloud = self._has_cloud_setup() and context.all_tags_in_cloud logging.debug("TagStorage.UpdateContext, masks = %s, share = %s, server = %s" % ( self._cloud_tags_masks, self._all_tags_in_cloud, self._cloud_tags_server)) def Restore(self, timestamp): self._journal.Restore(timestamp, self, self._prev_safe_cloud_state) def ListDependentPackets(self, tag_name): return self._RawTag(tag_name).GetListenersIds() def tofileOldItems(self): old_tags = set() unsub_tags = set() now = time.time() for name, tag in self.inmem_items.items(): if tag.GetListenersNumber() == 0 \ and sys.getrefcount(tag) == 4 \ and getattr(tag, '_min_release_time', 0) < now: if tag.IsCloud(): unsub_tags.add(name) # Hack for hostA:RemoteTag -> hostB:CloudTag # XXX Store old cloud tags to local DB too, so ConnectionManager.register_share # will work from the box with cloud tags that have gone from inmem_items old_tags.add(name) if not self.db_file_opened: with self.lock: self.DBConnect() # TODO At this point GetListenersNumber and getrefcount may change with self.lock: if unsub_tags: self._cloud.unsubscribe(unsub_tags, with_future=False) for name in old_tags: tag = self.inmem_items.pop(name) tag.callbacks.clear() tag.__dict__.pop('_min_release_time', None) # FIXME try: self.infile_items[name] = cPickle.dumps(tag, 2) except bsddb3.error as e: if 'BSDDB object has already been closed' in e.message: self.db_file_opened = False self.db_file = None raise self.infile_items.sync()
class Client(object): MESSAGE_MAX_ITEM_COUNT = 100000 # ATW CodedInputStream::GetTotalBytesLimit() == 67_108_864 __FAKE_PROMISE = _FakePromise() _ST_NONE = 0 _ST_WAIT = 1 _ST_NOWAIT = 2 def __init__(self, connection_ctor_ctor, on_event): self._connection_constructor = connection_ctor_ctor(_make_protobuf_connection) self._on_event = on_event self._stopped = False self._outgoing = deque() self._running = {} self._subscriptions = set() self._next_message_id = 1 self._should_stop = self._ST_NONE self._io = None self._io_stats = [0, 0] self._lock = threading.Lock() self._outgoing_not_empty = threading.Condition(self._lock) self._outgoing_empty = threading.Condition(self._lock) #self._running_empty = threading.Condition(self._lock) self._should_and_can_stop_cond = threading.Condition(self._lock) #self._connection_state_changed = threading.Condition(self._lock) #self._should_stop_cond = threading.Condition(self._lock) self._connect_thread = ProfiledThread(target=self._connect_loop, name_prefix='CldTg-Connect') self._connect_thread.start() def get_io_stats(self): conn = self._io._connection if self._io and self._io._connection else None msg_stat = ( conn._red_msg_count if conn else None, conn._written_msg_count if conn else None) if not COUNT_IO: return (None, None) + msg_stat prev_stats = self._io_stats cur_stats = conn.get_stats() if conn else (0, 0) return (prev_stats[0] + cur_stats[0], prev_stats[1] + cur_stats[1]) + msg_stat def __repr__(self): with self._lock: io = self._io conn = io._connection if io and io._connection else None return '<%s.%s %s%s %s at 0x%x>' % ( self.__module__, type(self).__name__, 'stopped ' if self._stopped else '', '%s:%s %s' % (conn._host, conn._port, self.get_io_stats()) if conn else None, 'running=%d, outgoing=%d, subs=%d' % ( len(self._running), len(self._outgoing), len(self._subscriptions) ), id(self) ) class _Task(object): def __init__(self, promise, msg, is_resend): self.promise = promise self.msg = msg self.is_resend = is_resend def _create_connection(self): now = time.time() timeout = 0.0 while True: with self._lock: if self._should_and_can_stop(): return self._should_and_can_stop_cond.wait(timeout) if self._should_and_can_stop(): return try: conn = self._connection_constructor() except Exception as e: logging.warning("Failed to connect: %s" % e) if conn: return conn timeout = 1.0 def _connect_loop(self): self._connect_loop_inner() self._outgoing.clear() try: raise ServiceStopped() except ServiceStopped: exc = sys.exc_info() with self._lock: for task in self._running.itervalues(): task.promise.set(exc=exc) self._running.clear() #self._running_empty.notify_all() def _connect_loop_inner(self): while True: with self._lock: if self._should_and_can_stop(): return if self._io: self._io._connection.close() if COUNT_IO: rd, wr = self._io._connection.get_stats() self._io_stats[0] += rd self._io_stats[1] += wr self._io = None self._reconstruct_outgoing() # FIXME What if exception will throw here? self._broken? _push -> raise and fail futures io = self._io = self.IO() logging.info("Connecting to servers...") connection = self._create_connection() if not connection: break addr = (connection._host, connection._port) logging.info("Connected to %s" % (addr,)) io._connection = connection with self._lock: self._io._connected = True #self._connection_state_changed.notify_all() io_threads = [ ProfiledThread(target=self._read_loop, name_prefix='CldTg-ReadLoop'), ProfiledThread(target=self._write_loop, name_prefix='CldTg-WriteLoop') ] for t in io_threads: t.start() for t in io_threads: t.join() logging.info("Disconnected from %s" % (addr,)) def _reconstruct_outgoing(self): with self._lock: resend_ids = [id for id, task in self._running.iteritems() if task.is_resend] for id in resend_ids: self._running.pop(id) self._outgoing = deque( self._running[id] for id in sorted(self._running.keys()) ) # XXX _subscriptions после _outgoing, чтобы не приходили события по # тем тегам, на которые пользователь Client не подписан (ну, типа # инвариант) # XXX For now subscribes for already subscribed tags will not be # trigger events in journal # FIXME Global split feature in all _push callers? for tags in split_in_groups(self._subscriptions, self.MESSAGE_MAX_ITEM_COUNT): self._do_push( self._create_subscribe_message(tags), self.__FAKE_PROMISE, is_resend=True ) class IO(object): def __init__(self): self._thread_count = 2 self._bye_received = False self._read_finished = False self._connection = None self._connected = False def _create_subscribe_message(self, tags): ret = cloud_tags_pb2.TClientMessage() ret.Subscribe.Tags.extend(tags) return ret def fetch_my_subscriptions(self): msg = cloud_tags_pb2.TClientMessage() msg.Debug.GetMySubscriptions = True; # TODO Use TGetMySubscriptions instead of Debug return self._push(msg) def ping(self): msg = cloud_tags_pb2.TClientMessage() ping = msg.Ping ping.SetInParent() return self._push(msg) def subscribe(self, tags, with_future=True): return self._sub_unsub_in_groups(tags, self._do_subscribe, with_future) def unsubscribe(self, tags, with_future=True): return self._sub_unsub_in_groups(tags, self._do_unsubscribe, with_future) def _do_subscribe(self, tags): def update(): self._subscriptions |= tags return self._push(self._create_subscribe_message(tags), code=update) def _do_unsubscribe(self, tags): msg = cloud_tags_pb2.TClientMessage() msg.Unsubscribe.Tags.extend(tags) def update(): self._subscriptions -= tags return self._push(msg, code=update) def _sub_unsub_in_groups(self, tags, method, with_future=True): if isinstance(tags, str): tags = set([tags]) if not isinstance(tags, set): tags = set(tags) def fix_result(f): return f if with_future else None if not tags: return fix_result(READY_ACK_FUTURE) if len(tags) < self.MESSAGE_MAX_ITEM_COUNT: return fix_result(method(tags)) futures = [ method(set(tags_group)) for tags_group in split_in_groups(tags, self.MESSAGE_MAX_ITEM_COUNT) ] if not with_future: return return CheckAllFuturesSucceed(futures) def lookup(self, tags): if not isinstance(tags, set): tags = set(tags) if not tags: return READY_EMPTY_DICT_FUTURE msg = cloud_tags_pb2.TClientMessage() msg.Lookup.Tags.extend(tags) return self._push(msg) def match(self, prefix=None, regexp=None, limit=None): msg = cloud_tags_pb2.TClientMessage() match = msg.Match match.SetInParent() if prefix is not None: match.Prefix = prefix if regexp is not None: match.Regexp = regexp if limit is not None: match.Limit = limit return self._push(msg) @staticmethod def _form_update_item(item, update): item.TagName = update[0] item.Event = update[1] # FIXME convert explicitly if len(update) > 2 and update[2] is not None: item.Comment = update[2] # If this function will split updates into buckets, consider that # updates may contains updates of the same tag def update(self, updates): if not isinstance(updates, list): updates = list(updates) if not updates: return READY_ACK_FUTURE msg = cloud_tags_pb2.TClientMessage() items = msg.Update.Items for update in updates: self._form_update_item(items.add(), update) return self._push(msg) def serial_update(self, update): msg = cloud_tags_pb2.TClientMessage() self._form_update_item(msg.SyncedUpdate.Data, update) return self._push(msg) def _do_stop(self, value): self._should_stop = value #self._should_stop_cond.notify_all() if self._should_and_can_stop(): self._should_and_can_stop_cond.notify_all() self._outgoing_not_empty.notify() def is_stopped(self): return self._stopped def _stop(self, wait=True, timeout=None): if self._stopped: return new_value = self._ST_WAIT if wait else self._ST_NOWAIT with self._lock: if self._should_stop < new_value: logging.info("Stopping YtTags.Client (%s)" % '_ST_WAIT' if wait == self._ST_WAIT else '_ST_NOWAIT') self._do_stop(new_value) elif self._should_stop > new_value: logging.warning("stop() called with lower stop-level") self._connect_thread.join(timeout) # TODO sleeps def stop(self, timeout=None): self._stop(timeout=timeout) # TODO Don't wait at all, if we disconnected for a long time self._stop(wait=False) # TODO Kosher if not self._connect_thread.is_alive(): self._stopped = True def _should_and_can_stop(self): return self._should_stop == self._ST_NOWAIT \ or self._should_stop == self._ST_WAIT \ and not self._outgoing \ and not self._running def __enter__(self): return self def __exit__(self, t, v, tb): self.stop() def _push(self, msg, code=None): promise = Promise() with self._lock: if self._should_stop != self._ST_NONE: promise.set(exc=ServiceStopped()) else: self._do_push(msg, promise, code) return promise.to_future() def _do_push(self, msg, promise, code=None, is_resend=False): msg_id = self._next_message_id self._next_message_id += 1 msg.Id = msg_id task = self._Task(promise, msg, is_resend) if code: code() self._running[msg_id] = task self._outgoing.append(task) self._outgoing_not_empty.notify() def _write_loop(self): self._io_loop("Output", self._write_loop_impl, socket.SHUT_WR) def _read_loop(self): def after_shutdown(): with self._lock: self._io._read_finished = True self._outgoing_not_empty.notify() self._io_loop("Input", self._read_loop_impl, socket.SHUT_RD, after_shutdown) def _io_loop(self, type_str, loop, how, after_shutdown=None): failed = True try: loop() failed = False except Exception as e: logging.exception("%s error" % type_str) #logging.debug("%s io thread stopped" % type_str) try: self._io._connection.shutdown(how) except socket.error as e: if e.errno != errno.ENOTCONN: logging.warning("Error on socket shutdown(%d): %s" % (how, e)) if after_shutdown: after_shutdown() with self._lock: self._io._thread_count -= 1 if not self._io._thread_count: self._io._connected = False #self._connection_state_changed.notify_all() def _write_loop_impl(self): outgoing = self._outgoing conn = self._io._connection def need_emergency_stop(): # FIXME _read_finished is enough? return self._io._read_finished \ or self._io._bye_received \ or self._should_stop == self._ST_NOWAIT while True: with self._lock: while not(need_emergency_stop() or self._should_stop == self._ST_WAIT or outgoing): self._outgoing_not_empty.wait() if need_emergency_stop() or self._should_stop == self._ST_WAIT and not outgoing: break assert bool(outgoing) msg = outgoing.popleft().msg if not outgoing: self._outgoing_empty.notify_all() #logging.debug("send message to server %s" % msg) conn.send(msg) # TODO Rewrite server.cpp if need_emergency_stop(): return with self._lock: while not self._should_and_can_stop(): self._should_and_can_stop_cond.wait() def wait_outgoing_empty(self): with self._lock: if not self._outgoing: return self._outgoing_empty.wait() def _read_loop_impl(self): conn = self._io._connection while True: msg = conn.recv() if msg is None: # EOF break self._process_server_message(msg) class _ServerMessage(object): class Event_(object): def __init__(self, msg): self.tag_name = msg.TagName self.event = msg.TagEvent self.version = msg.TagVersion self.last_reset_version = msg.LastResetVersion self.last_reset_comment = msg.Comment # TODO XXX This code lies def __repr__(self): state = self.__dict__.copy() state['event'] = TagEventName[state['event']] return '<Event(' + asjsonstring(state) + ')>' @classmethod def Event(cls, msg): return [cls.Event_(item) for item in msg.Event.Items] class Lookup_(object): def __init__(self, msg): self.tag_name = msg.TagName self.is_set = msg.IsSet_ self.version = msg.TagVersion self.last_reset_version = msg.LastResetVersion self.last_reset_comment = msg.LastResetComment def __repr__(self): return '<Lookup(' + asjsonstring(self.__dict__) + ')>' @classmethod def Lookup(cls, msg): return {item.TagName: cls.Lookup_(item) for item in msg.Items} @classmethod def Match(cls, msg): if msg.Error: return (None, RuntimeError(msg.Error)) return ([cls.Lookup_(item) for item in msg.Items], None) @classmethod def Subscriptions(cls, msg): return msg.Tags def _process_server_message(self, msg): # WhichOneof doesn't work # https://github.com/google/protobuf/commit/0971bb0d57aa6f2db1abee4008b365d52b402891 # type = msg.WhichOneof('Data') def first(pred, iterable): for item in iterable: if pred(item): return item type = first(msg.HasField, ['Event', 'Bye', 'Ack', 'Lookup', 'Subscriptions', 'Match']) if type is None: # XXX Client will looped in this error raise NotImplementedError("Unknown server message type for [%s]" % msg) if type == 'Event': for ev in self._ServerMessage.Event(msg): try: self._on_event(ev) except: logging.exception("Failed to process journal event") return elif type == 'Bye': #logging.debug("...") with self._lock: self._io._bye_received = True self._outgoing_not_empty.notify() return data = getattr(msg, type) if not data.HasField('RequestId'): raise RuntimeError('No .RequestId') request_id = data.RequestId with self._lock: promise = self._running.pop(request_id).promise # FIXME , None and raise #if not self._running: #self._running_empty.notify_all() if self._should_and_can_stop(): self._should_and_can_stop_cond.notify_all() if type == 'Ack': promise.set(None) elif type == 'Lookup': promise.set(self._ServerMessage.Lookup(msg.Lookup)) elif type == 'Match': promise.set(*self._ServerMessage.Match(msg.Match)) elif type == 'Subscriptions': promise.set(self._ServerMessage.Subscriptions(msg.Subscriptions)) else: assert False
def Start(self): self._write_thread = ProfiledThread(target=self._write_loop, name_prefix='Journal') self._write_thread.start()
class TagLogger(object): def __init__(self): super(TagLogger, self).__init__() self.db_filename = None self._db = None self._restoring_mode = False self._should_stop = False self._queue = deque() self._queue_lock = threading.Lock() self._db_lock = threading.Lock() self._queue_not_empty = threading.Condition(self._queue_lock) self._write_thread = None def Start(self): self._write_thread = ProfiledThread(target=self._write_loop, name_prefix='Journal') self._write_thread.start() def Stop(self): with self._queue_lock: self._should_stop = True self._queue_not_empty.notify() self._write_thread.join() def _reopen(self): if self._db: self._db.close() if self.db_filename is None: raise RuntimeError("db_filename is not yet set") self._db = JournalDB(self.db_filename) def UpdateContext(self, context): with self._db_lock: self.db_filename = context.recent_tags_file self._reopen() def _write(self, data): timeout = 1.0 max_timeout = 15.0 while True: with self._db_lock: try: if not self._db: self._reopen() self._db.write(data) self._db.sync() except Exception as err: self._db = None logging.error("Can't write to journal (%d items left): %s" \ % (len(self._queue), err)) else: break timeout = min(max_timeout, timeout * 2) time.sleep(timeout) def _write_loop(self): while True: with self._queue_lock: while not(self._queue or self._should_stop): self._queue_not_empty.wait() if self._should_stop and not self._queue: return while self._queue: self._write(cPickle.dumps(self._queue.popleft())) def _log_event(self, ev): with self._queue_lock: if self._should_stop: raise RuntimeError("Can't register events after should_stop") self._queue.append(ev) self._queue_not_empty.notify() def log_cloud_request_start(self, id, update): self._log_event(CloudRequestStart(id, update)) def log_cloud_request_finish(self, id): self._log_event(CloudRequestFinish(id)) def log_local_tag_event(self, tag, ev, msg=None): if self._restoring_mode: return args = () if ev == ETagEvent.Set: cls = SetTagEvent elif ev == ETagEvent.Unset: cls = UnsetTagEvent else: cls = ResetTagEvent args = (msg,) if not isinstance(tag, str): tag = tag.GetFullname() self._log_event(cls(tag, *args)) def Restore(self, timestamp, tagRef, cloud_requester_state): logging.debug("TagLogger.Restore(%d)", timestamp) dirname, db_filename = os.path.split(self.db_filename) def get_filenames(): result = [] for filename in os.listdir(dirname): if filename.startswith(db_filename) and filename != db_filename: file_time = int(filename.split("-")[-1]) if file_time > timestamp: result.append(filename) result = sorted(result) if os.path.isfile(self.db_filename): result += [db_filename] return result with self._db_lock: self._restoring_mode = True for filename in get_filenames(): f = bsddb3.rnopen(os.path.join(dirname, filename), "r") for k, v in f.items(): try: obj = cPickle.loads(v) if isinstance(obj, CloudRequestStart): cloud_requester_state.start_request(obj.id, obj.update) elif isinstance(obj, CloudRequestFinish): cloud_requester_state.finish_request(obj.id) else: obj.Redo(tagRef) except Exception, e: logging.exception("occurred in TagLogger while restoring from a journal : %s", e) f.close() self._restoring_mode = False