class RPCProxy(object): def __init__(self, uri, method=None, namespaces=None, sid=''): self.__serviceURL = uri self.__serviceName = method self.namespaces = isinstance(namespaces, ClassLoader) and namespaces or \ ClassLoader(namespaces or []) self._seril = Serializer(self.namespaces) self.sid = sid self.logger = None self.start_call_listener = [] self.end_call_listener = [] def __call__(self, *args, **kw): args = self._seril.serialize((args, kw)) post_data = {"method": self.__serviceName, 'params': args, 'id':'httprpc', 'sid':self.sid} #@todo xx for l in self.start_call_listener: l(name=self.__serviceName, args=args, kw_args=kw) rpc_response = self.post_request(self.__serviceURL, post_data) try: respdata = rpc_response.read() ret, e = self._seril.deserialize(respdata) except Exception, e: raise RPCException("Failed to deserialize response data:%s, exception:%s" % (respdata, e)) finally:
class RPCStub(object): def __init__(self, uri, stub, namespace=None): self.uri = uri self.stub = stub self.namespace = namespace self._seril = Serializer(namespace) self.logger = None def process(self, method, data): ret = exception = None try: args, kw = self._seril.deserialize(data) try: self.logger and self.logger.info(u"method:%s, args:%s, kw:%s" % (method, args, kw)) except Exception, e: #@todo: fix the decode problem. self.logger and self.logger.info(str(e)) h = self._local_service(self.stub, method) if h is None: raise RPCException(u"Not found interface '%s'" % method) ret = h(*args, **kw) self.logger and self.logger.info("return:%s" % (ret, )) except BaseException, e: exception = e self.logger and self.logger.exception(e)
def testSimpleSerialization(self): m = self.createTestMessage() serialized = StringIO(Serializer.serialize(m)) deserialized = Serializer.deserialize(serialized, TestMessage1) self.assertDictEqual(m.__dict__, deserialized.__dict__)
def _fetch_metadata_by_json(self, json_obj): """ parses incoming json object representation and fetches related object metadata from the server. Returns None or the Metadata object. This method is called only when core object is not found in cache, and if metadata should be requested anyway, it could be done faster with this method that uses GET /neo/<obj_type>/198272/metadata/ Currently not used, because inside the pull function (in cascade mode) it may be faster to collect all metadata after the whole object tree is fetched. For situations, when, say, dozens of of objects tagged with the same value are requested, it's faster to fetch this value once at the end rather than requesting related metadata for every object. """ if not json_obj['fields'].has_key('metadata') or \ not json_obj['fields']['metadata']: return None # no metadata field or empty metadata url = json_obj['permalink'] # TODO move requests to the remote class resp = requests.get( url + 'metadata' , cookies=self._meta.cookie_jar ) raw_json = get_json_from_response( resp ) if not resp.status_code == 200: message = '%s (%s)' % (raw_json['message'], raw_json['details']) raise errors.error_codes[resp.status_code]( message ) if not raw_json['metadata']: # if no objects exist return empty result return None mobj = Metadata() for p, v in raw_json['metadata']: prp = Serializer.deserialize(p, self) val = Serializer.deserialize(v, self) prp.append( val ) # save both objects to cache self._cache.add_object( prp ) self._cache.add_object( val ) setattr( mobj, prp.name, prp ) return mobj # Metadata object with list of properties (tags)
def load_from_file(self): try: load_file_path = QtWidgets.QFileDialog.getOpenFileName( self, 'Save file...', './')[0] loaded_data = Serializer.deserialize(load_file_path) self.setup_data(loaded_data['event_params_model_data'], loaded_data['tasks_params_model_data'], loaded_data['tasks_quantity'], loaded_data['events_quantity'], loaded_data['use3mark_system']) except OSError: QtWidgets.QMessageBox.critical( self, 'Error', 'Произошла ошибка во время открытия файла')
def load_from_file(self): try: load_file_data = QtWidgets.QFileDialog.getOpenFileName( self, 'Load file...', os.path.dirname(os.getcwd())) if len(load_file_data) == 0: QtWidgets.QMessageBox.critical(self, 'Error', 'Файл не выбран') return else: load_file_data_path = load_file_data[0] self.data = Serializer.deserialize(load_file_data_path) self.setup_data() except: QtWidgets.QMessageBox.critical(self, 'Error', 'Загрузка невозможна')
def write(self, buf, offset=0): """ Updates object information :param buf: a YAML representation of an object. :type buf: str :return: 0 on success or and negative error code. """ try: new = Serializer.deserialize(self.model_instance.__class__, buf) new.id = self.model_instance.id except Exception, e: return -1 # TODO find a way to handle expceptions better..
class Miner: def __init__(self, pseudonym, public_key): self.public_key = public_key self.serializer = Serializer() data = { "pseudonym": pseudonym, "public_key": public_key, } r = requests.post("http://localhost:5000/register_miner", data=data) if r.text != "": raise ValueError(r.text) def get_block(self): r = requests.get("http://localhost:5000/get_block") return self.serializer.deserialize(r.text) def post_block(self, block): block_string = self.serializer.serialize(block) data = { "block": block_string } r = requests.post("http://localhost:5000/post_block", data=data) if r.text != "": raise ValueError(r.text) def mine_block(self): succeded = False while not succeded: block = self.get_block() block.add_miner_transaction(self.public_key) block.randomize_nonce() succeded = block.try_nonce() self.post_block(block) def mine(self): print("Mining...") while True: self.mine_block()
def test_can_serialize_to_string(self): junk_str = 'Iamalongstring:Sorta' class Temp(object): _serializer = SerializerInterface( serializable_attrs=['a', 'b', 'c'], ) def __init__(self, c): self.c = c def __eq__(self, other): for attr_name in ['a', 'b', 'c']: if (getattr(other, attr_name, None) != getattr( self, attr_name, None)): return False return True output = StringIO() s = Serializer(stream=output, stream_mode='str') t1 = Temp(c=junk_str) s.serialize(t1) t2 = s.deserialize(Temp) self.assertEqual(t1, t2)
class Client(): def __init__(self, public_key, private_key): # save inputs self.private_key = private_key self.public_key = public_key self.public_key_string = public_key.exportKey().decode("utf-8") # build wallet self.wallet = Wallet(self.public_key_string) # register miner pseudonym = resolve_pseudonym(self.public_key_string) if pseudonym == "": pseudonym = input("\nChoose a pseudonym: ") else: print( "\nThis key has already been registered.\nRegistered pseudonym: " + pseudonym) self.miner = Miner(pseudonym, self.public_key_string) self.s = Serializer() # build menu self.menu = "\nchoose an number:" self.menu += "\n1.) mine" self.menu += "\n2.) check balance" self.menu += "\n3.) make transaction" self.menu += "\nchoice: " def mine(self): self.miner.mine_block() print("\n=======================") print("Mined block") print("=======================") def check_balance(self): self.wallet.update(self.collect_transactions()) print("\n=======================") print("Balance: {}".format(self.wallet.get_amount())) print("=======================") def collect_transactions(self): data = { "public_key": self.public_key_string, } r = requests.post("http://localhost:5000/collect_transactions", data=data) if r.text == "": raise ValueError("Collect transactions failed") else: return self.s.deserialize(r.text) def make_transaction(self): # ask amount self.wallet.update(self.collect_transactions()) amount = input("\nHow much would you like to pay? ") try: amount = int(amount) except: print("That's not a number...") return if self.wallet.get_amount() < amount: print("You don't have that much money...") return elif amount < 1: print("The amount has to be greater than 1...") return # resolve the pseudonym to = input("Who would you like to pay? ") to_public_key = resolve_pseudonym(to) if to_public_key == "": print(to + " is not yet registered to the blockchain") return # make transactions owned_transactions = self.wallet.remove_transactions(amount) new_transactions = [] for t in owned_transactions: new_transactions.append( Transaction(t.get_signed_transaction(), self.private_key, to_public_key)) transaction = self.s.serialize(new_transactions) r = requests.post("http://localhost:5000/make_transaction", data={"transaction": transaction}) if r.text == "": raise ValueError("Transaction failed") print("\n=======================") print("Make transaction") print("=======================") def main(self): while True: option = input(self.menu) if option == "1": self.mine() elif option == "2": self.check_balance() elif option == "3": self.make_transaction() else: print("Sorry, {} is not an option...".format(option))
def pull(self, location, params={}, cascade=True, data_load=True): """ pulls object from the specified location on the server. caching: yes cascade: True/False data_load: True/False Arguments: location: object location as URL like 'http://<host>/metadata/section/2394/', or just a location '/metadata/section/2394' or a stripped version like '/mtd/sec/2394' params: dict that can contain several categories of key-value pairs cascade: fetch related objects recursively (True/False) data_load: fetch the data (True/False) Params can be: 1. common params, like 'at_time': '2013-02-22 15:34:57' 2. data params, to get only parts of the original object(s). These only work for the data-related objects (like 'analogsignal' or 'spiketrain'). start_time - start time of the required range (calculated using the same time unit as the t_start of the signal) end_time - end time of the required range (calculated using the same time unit as the t_start of the signal) duration - duration of the required range (calculated using the same time unit as the t_start of the signal) start_index - start index of the required datarange (an index of the starting datapoint) end_index - end index of the required range (an index of the end datapoint) samples_count - number of points of the required range (an index of the end datapoint) downsample - number of datapoints. This parameter is used to indicate whether downsampling is needed. The downsampling is applied on top of the selected data range using other parameters (if specified) """ location = self._meta.parse_location( location ) supp_models = [k for k in models_map.keys() if \ not k in ['property', 'value']] if not location[1] in supp_models: raise TypeError('Objects of that type are not pull-supported.') processed = {} # collector of processed objects like # {"metadata/section/2394/": <object..>, ...} to_clean = [] # collector of ids of objects to clean parent stack = [ location ] # a stack of objects to sync while len( stack ) > 0: loc = stack[0] # find object in cache etag = None cached_obj = self._cache.get_obj_by_location( loc ) if not type(cached_obj) == type(None): obj_descr = self._meta.get_gnode_descr(cached_obj) if obj_descr and obj_descr['fields'].has_key('guid'): etag = obj_descr['fields']['guid'] # request object from the server (with ETag) json_obj = self._remote.get(loc, params, etag) if json_obj == 304: # get object from cache obj = cached_obj print_status('%s loaded from cache.' % str(loc)) else: # request from server # download related data data_refs = {} # is a dict like {'signal': <array...>, ...} if data_load: data_refs = self.__parse_data_from_json( json_obj ) # parse json (+data) into python object obj = Serializer.deserialize( json_obj, self._meta, data_refs ) # put metadata in the stack #if json_obj['fields'].has_key('metadata'): # for value in json_obj['fields']['metadata']: # cl_value = self._meta.clean_location( value ) # stack.append( cl_value ) # or just download attached metadata here? # metadata = self._fetch_metadata_by_json(cls, json_obj) print_status("%s fetched from server." % loc) stack.remove( loc ) # not to forget to remove processed object processed[ str(loc) ] = obj # add it to processed app, cls, lid = loc[0], loc[1], loc[2] children = self._meta.app_definitions[cls]['children'] # child object types obj_descr = self._meta.get_gnode_descr(obj) if cascade and children and obj_descr: for child in children: # 'child' is like 'segment', 'event' etc. field_name = child + '_set' if obj_descr['fields'].has_key( field_name ) and \ obj_descr['fields'][ field_name ]: for rel_link in obj_descr['fields'][ field_name ]: cl_link = self._meta.parse_location( rel_link ) if not str(cl_link) in processed.keys() and not \ str(cl_link) in [str(o) for o in stack]: stack.insert( 0, cl_link ) # building relationships for python objects for key, obj in processed.items(): # TODO make some iterator below to avoid duplicate code loc = self._meta.parse_location( key ) app, cls, lid = loc[0], loc[1], loc[2] children = self._meta.app_definitions[cls]['children'] obj_descr = self._meta.get_gnode_descr(obj) if cascade and children and obj_descr: for child in children: # 'child' is like 'segment', 'event' etc. field_name = child + '_set' if obj_descr['fields'].has_key( field_name ) and \ obj_descr['fields'][ field_name ]: rel_objs = [] for rel_link in obj_descr['fields'][ field_name ]: cl_link = self._meta.parse_location( rel_link ) rel_objs.append( processed[str(cl_link)] ) if rel_objs: # parse children into parent attrs # a way to assign kids depends on object type self.__assign_child( child, obj, rel_objs ) """ TODO add metadata to objects # parse related metadata if not json_obj['fields'].has_key('metadata') or \ not json_obj['fields']['metadata']: else: mobj = Metadata() for p, v in raw_json['metadata']: prp = Serializer.deserialize(p, self) val = Serializer.deserialize(v, self) prp.append( val ) # save both objects to cache self._cache.add_object( prp ) self._cache.add_object( val ) setattr( mobj, prp.name, prp ) """ print_status( 'Object(s) loaded.\n' ) obj = processed[ str(location) ] self._cache.add_object(obj) self._cache.save_data_map() self._cache.save_h5_map() return obj
def restore(self): with open(self._file_name, "rb") as file: self._serializer = Serializer.deserialize(file.read()) file.close()
def parse_from_string(self, serialized): serializer = Serializer() return serializer.deserialize(serialized, self.__class__)
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLog = [] # (command, logID, term) self.__raftLog.append((None, 1, self.__raftCurrentTerm)) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__forceLogCompaction = False self.__socket = None self.__resolver = DnsCachingResolver(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize) self.__poller = createPoller() self.__isInitialized = False self.__lastInitTryTime = 0 self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__commandsQueue = Queue.Queue(self.__conf.commandsQueueSize) self.__nodes = [] self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict(list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self),)) self.__thread.start() while not self.__initialised.is_set(): pass else: self.__initInTickThread() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() self.__bind() self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr)) self.__raftNextIndex[nodeAddr] = 0 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True except: LOG_CURRENT_EXCEPTION() def _applyCommand(self, command, callback): try: self.__commandsQueue.put_nowait((command, callback)) except Queue.Full: callback(None, FAIL_REASON.QUEUE_FULL) def _checkCommandsToApply(self): for i in xrange(self.__conf.maxCommandsPerTick): if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.append((command, idx, term)) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append((term, callback)) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: if requestNode is None: callback(None, FAIL_REASON.MISSING_LEADER) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) def _autoTickThread(self): self.__initInTickThread() self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time() >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile(self.__conf.fullDumpFile): self.__loadDumpFile() self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): if self.__raftElectionDeadline < time.time(): self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break if time.time() > self.__newAppendEntriesTime: self.__sendAppendEntries() if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() self.__poller.poll(timeToWait) def _getLastCommitIndex(self): return self.__raftCommitIndex def _printStatus(self): LOG_DEBUG('self', self.__selfNodeAddr) LOG_DEBUG('leader', self.__raftLeader) LOG_DEBUG('partner nodes', len(self.__nodes)) for n in self.__nodes: LOG_DEBUG(n.getAddress(), n.getStatus()) LOG_DEBUG('log len:', len(self.__raftLog)) LOG_DEBUG('log size bytes:', len(zlib.compress(cPickle.dumps(self.__raftLog, -1)))) LOG_DEBUG('last applied:', self.__raftLastApplied) LOG_DEBUG('commit idx:', self.__raftCommitIndex) LOG_DEBUG('next node idx:', self.__raftNextIndex) def _forceLogCompaction(self): self.__forceLogCompaction = True def __doApplyCommand(self, command): args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message['type'] == 'request_vote': if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__raftState = _RAFT_STATE.FOLLOWER if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message['term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr self.__raftCurrentTerm = message['term'] self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) leaderCommitIndex = message['commit_index'] # Regular append entries if 'prevLogIdx' in message: prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: self.__sendNextNodeIdx(nodeAddr, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__deleteEntriesFrom(prevLogIdx) self.__sendNextNodeIdx(nodeAddr, reset=True) return if len(prevEntries) > 1: self.__deleteEntriesFrom(prevLogIdx + 1) self.__raftLog += newEntries # Install snapshot elif serialized is not None: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile() self.__sendNextNodeIdx(nodeAddr) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message['term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __sendNextNodeIdx(self, nodeAddr, reset=False): self.__send(nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': self.__getCurrentLogIndex() + 1, 'reset': reset, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __bind(self): self.__socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, self.__conf.sendBufferSize) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.__conf.recvBufferSize) self.__socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.__socket.setblocking(0) host, port = self.__selfNodeAddr.split(':') self.__socket.bind((host, int(port))) self.__socket.listen(5) self.__poller.subscribe(self.__socket.fileno(), self.__onNewConnection, POLL_EVENT_TYPE.READ | POLL_EVENT_TYPE.ERROR) def __onNewConnection(self, localDescr, event): if event & POLL_EVENT_TYPE.READ: try: sock, addr = self.__socket.accept() sock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, self.__conf.sendBufferSize) sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.__conf.recvBufferSize) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.setblocking(0) conn = Connection(socket=sock, timeout=self.__conf.connectionTimeout) descr = conn.fileno() self.__unknownConnections[descr] = conn self.__poller.subscribe(descr, self.__processUnknownConnections, POLL_EVENT_TYPE.READ | POLL_EVENT_TYPE.ERROR) except socket.error as e: if e.errno != socket.errno.EAGAIN: self.__isInitialized = False LOG_WARNING('Error in main socket:' + str(e)) if event & POLL_EVENT_TYPE.ERROR: self.__isInitialized = False LOG_WARNING('Error in main socket') def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None): firstEntryIDx = self.__raftLog[0][1] if fromIDx is None or fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: return self.__raftLog[diff:] return self.__raftLog[diff:diff + count] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[:diff] def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[diff:] def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time() + self.__conf.appendEntriesPeriod startTime = time.time() for node in self.__nodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex() or sendSingle or sendingSerialized: if nextNodeIndex >= self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm(nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries(nextNodeIndex, self.__conf.appendEntriesBatchSize) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData(nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[nodeAddr] = self.__raftLog[0][1] sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes: if node.getAddress() == nodeAddr: node.send(message) break def __processUnknownConnections(self, descr, event): conn = self.__unknownConnections[descr] partnerNode = None remove = False if event & POLL_EVENT_TYPE.READ: conn.read() nodeAddr = conn.getMessage() if nodeAddr is not None: for node in self.__nodes: if node.getAddress() == nodeAddr: partnerNode = node break else: remove = True if event & POLL_EVENT_TYPE.ERROR: remove = True if remove or conn.isDisconnected(): self.__unknownConnections.pop(descr) self.__poller.unsubscribe(descr) conn.close() return if partnerNode is not None: self.__unknownConnections.pop(descr) assert conn.fileno() is not None partnerNode.onPartnerConnected(conn) def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getResolver(self): return self.__resolver def _getPoller(self): return self.__poller def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) if serializeState == SERIALIZER_STATE.FAILED: LOG_WARNING("Failed to store full dump") if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and\ not self.__forceLogCompaction: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if not lastAppliedEntries: return data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) self.__serializer.serialize((data, lastAppliedEntries[1], lastAppliedEntries[0]), lastAppliedEntries[1][1]) def __loadDumpFile(self): try: data = self.__serializer.deserialize() for k, v in data[0].iteritems(): self.__dict__[k] = v self.__raftLog = [data[2], data[1]] self.__raftLastApplied = data[1][1] except: LOG_WARNING('Failed to load full dump') LOG_CURRENT_EXCEPTION()
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf self.__conf.validate() if self.__conf.password is not None: if not HAS_CRYPTO: raise ImportError("Please install 'cryptography' module") self.__encryptor = getEncryptor(self.__conf.password) else: self.__encryptor = None self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votedFor = None self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout( ) self.__raftLog = createJournal(self.__conf.journalFile) if len(self.__raftLog) == 0: self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), 1, self.__raftCurrentTerm) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__lastSerializedEntry = None self.__forceLogCompaction = False self.__leaderCommitIndex = None self.__onReadyCalled = False self.__changeClusterIDx = None self.__noopIDx = None self.__destroying = False self.__startTime = time.time() globalDnsResolver().setTimeouts(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize, self.__conf.useFork, self.__conf.serializer, self.__conf.deserializer, self.__conf.serializeChecker) self.__isInitialized = False self.__lastInitTryTime = 0 self._poller = createPoller(self.__conf.pollerType) host, port = selfNodeAddr.split(':') self.__server = TcpServer( self._poller, host, port, onNewConnection=self.__onNewConnection, sendBufferSize=self.__conf.sendBufferSize, recvBufferSize=self.__conf.recvBufferSize, connectionTimeout=self.__conf.connectionTimeout) self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__commandsQueue = Queue.Queue(self.__conf.commandsQueueSize) self.__nodes = [] self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict( list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self), )) self.__thread.start() while not self.__initialised.is_set(): pass else: self.__initInTickThread() def _destroy(self): if self.__conf.autoTick: self.__destroying = True else: self._doDestroy() def _doDestroy(self): for node in self.__nodes: node._destroy() self.__server.unbind() self.__raftLog._destroy() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() self.__server.bind() self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex( ) + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True except: LOG_CURRENT_EXCEPTION() def _addNodeToCluster(self, nodeName, callback=None): if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['add', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def _removeNodeFromCluster(self, nodeName, callback=None): if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['rem', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def _applyCommand(self, command, callback, commandType=None): try: if commandType is None: self.__commandsQueue.put_nowait((command, callback)) else: self.__commandsQueue.put_nowait( (_bchr(commandType) + command, callback)) except Queue.Full: self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback) def _checkCommandsToApply(self): startTime = time.time() while time.time() - startTime < self.__conf.appendEntriesPeriod: if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex( ) + 1, self.__raftCurrentTerm if self.__conf.dynamicMembershipChange: changeClusterRequest = self.__parseChangeClusterRequest( command) else: changeClusterRequest = None if changeClusterRequest is None or self.__changeCluster( changeClusterRequest): self.__raftLog.add(command, idx, term) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append( (term, callback)) else: self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[ self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: self.__callErrCallback(FAIL_REASON.MISSING_LEADER, callback) def _autoTickThread(self): self.__initInTickThread() self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break if self.__destroying: self._doDestroy() break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time( ) >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile( self.__conf.fullDumpFile): self.__loadDumpFile(clearJournal=False) self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): if self.__raftElectionDeadline < time.time( ) and self.__connectedToAnyone(): self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votedFor = self._getSelfNodeAddr() self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[ node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break self.__leaderCommitIndex = self.__raftCommitIndex if time.time() > self.__newAppendEntriesTime: self.__sendAppendEntries() if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 if not self.__onReadyCalled and self.__raftLastApplied == self.__leaderCommitIndex: if self.__conf.onReady: self.__conf.onReady() self.__onReadyCalled = True self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() self._poller.poll(timeToWait) def _printStatus(self): LOG_DEBUG('version', VERSION, REVISION) LOG_DEBUG('self', self.__selfNodeAddr) LOG_DEBUG('state:', self.__raftState) LOG_DEBUG('leader', self.__raftLeader) LOG_DEBUG('partner nodes', len(self.__nodes)) for n in self.__nodes: LOG_DEBUG(n.getAddress(), n.getStatus()) LOG_DEBUG('log len:', len(self.__raftLog)) LOG_DEBUG('last applied:', self.__raftLastApplied) LOG_DEBUG('commit idx:', self.__raftCommitIndex) LOG_DEBUG('raft term:', self.__raftCurrentTerm) LOG_DEBUG('next node idx:', self.__raftNextIndex) LOG_DEBUG('match idx:', self.__raftMatchIndex) LOG_DEBUG('leader commit idx:', self.__leaderCommitIndex) LOG_DEBUG('uptime:', int(time.time() - self.__startTime)) LOG_DEBUG('') def _forceLogCompaction(self): self.__forceLogCompaction = True def __doApplyCommand(self, command): commandType = ord(command[:1]) # Skip no-op and membership change commands if commandType != _COMMAND_TYPE.REGULAR: return command = cPickle.loads(command[1:]) args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message['type'] == 'request_vote': if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER self.__raftLeader = None if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return if self.__votedFor is not None: return self.__votedFor = nodeAddr self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message[ 'term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) self.__leaderCommitIndex = leaderCommitIndex = message[ 'commit_index'] # Regular append entries if 'prevLogIdx' in message: prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: self.__sendNextNodeIdx(nodeAddr, success=False, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=prevLogIdx, success=False, reset=True) return if len(prevEntries) > 1: # rollback cluster changes if self.__conf.dynamicMembershipChange: for entry in reversed(prevEntries[1:]): clusterChangeRequest = self.__parseChangeClusterRequest( entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest, reverse=True) self.__deleteEntriesFrom(prevLogIdx + 1) for entry in newEntries: self.__raftLog.add(*entry) # apply cluster changes if self.__conf.dynamicMembershipChange: for entry in newEntries: clusterChangeRequest = self.__parseChangeClusterRequest( entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest) nextNodeIdx = prevLogIdx + 1 if newEntries: nextNodeIdx = newEntries[-1][1] self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=nextNodeIdx, success=True) # Install snapshot elif serialized is not None: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile(clearJournal=True) self.__sendNextNodeIdx(nodeAddr, success=True) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message[ 'term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] success = message['success'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx if success: self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __callErrCallback(self, err, callback): if callback is None: return if isinstance(callback, tuple): requestNode, requestID = callback self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': err, }) return callback(None, err) def __sendNextNodeIdx(self, nodeAddr, reset=False, nextNodeIdx=None, success=False): if nextNodeIdx is None: nextNodeIdx = self.__getCurrentLogIndex() + 1 self.__send( nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': nextNodeIdx, 'reset': reset, 'success': success, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __onNewConnection(self, conn): descr = conn.fileno() self.__unknownConnections[descr] = conn if self.__encryptor: conn.encryptor = self.__encryptor conn.setOnMessageReceivedCallback( functools.partial(self.__onMessageReceived, conn)) conn.setOnDisconnectedCallback( functools.partial(self.__onDisconnected, conn)) def __onMessageReceived(self, conn, message): if self.__encryptor and not conn.sendRandKey: conn.sendRandKey = message conn.recvRandKey = os.urandom(32) conn.send(conn.recvRandKey) return descr = conn.fileno() partnerNode = None for node in self.__nodes: if node.getAddress() == message: partnerNode = node break if partnerNode is None: conn.disconnect() return partnerNode.onPartnerConnected(conn) self.__unknownConnections.pop(descr, None) def __onDisconnected(self, conn): self.__unknownConnections.pop(conn.fileno(), None) def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None, maxSizeBytes=None): firstEntryIDx = self.__raftLog[0][1] if fromIDx is None or fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: result = self.__raftLog[diff:] else: result = self.__raftLog[diff:diff + count] if maxSizeBytes is None: return result totalSize = 0 i = 0 for i, entry in enumerate(result): totalSize += len(entry[0]) if totalSize >= maxSizeBytes: break return result[:i + 1] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def _isReady(self): return self.__onReadyCalled def _getTerm(self): return self.__raftCurrentTerm def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesFrom(diff) def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesTo(diff) def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 # No-op command after leader election. idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), idx, term) self.__noopIDx = idx if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time( ) + self.__conf.appendEntriesPeriod startTime = time.time() for node in self.__nodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex( ) or sendSingle or sendingSerialized: if nextNodeIndex > self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm( nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries( nextNodeIndex, None, self.__conf.appendEntriesBatchSizeBytes) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData( nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[ nodeAddr] = self.__raftLog[1][1] + 1 sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes: if node.getAddress() == nodeAddr: node.send(message) break def __connectedToAnyone(self): for node in self.__nodes: if node.getStatus() == NODE_STATUS.CONNECTED: return True return False def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getEncryptor(self): return self.__encryptor def __changeCluster(self, request): if self.__raftLastApplied < self.__noopIDx: # No-op entry was not commited yet return False if self.__changeClusterIDx is not None: if self.__raftLastApplied >= self.__changeClusterIDx: self.__changeClusterIDx = None # Previous cluster change request was not commited yet if self.__changeClusterIDx is not None: return False return self.__doChangeCluster(request) def __doChangeCluster(self, request, reverse=False): requestType = request[0] requestNode = request[1] if requestType == 'add': adding = not reverse elif requestType == 'rem': adding = reverse else: return False if adding: newNode = requestNode # Node already exists in cluster if newNode == self.__selfNodeAddr or newNode in self.__otherNodesAddrs: return False self.__otherNodesAddrs.append(newNode) self.__nodes.append(Node(self, newNode)) self.__raftNextIndex[newNode] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[newNode] = 0 return True else: oldNode = requestNode if oldNode == self.__selfNodeAddr: return False if oldNode not in self.__otherNodesAddrs: return False for i in xrange(len(self.__nodes)): if self.__nodes[i].getAddress() == oldNode: self.__nodes[i]._destroy() self.__nodes.pop(i) self.__otherNodesAddrs.pop(i) del self.__raftNextIndex[oldNode] return True return False def __parseChangeClusterRequest(self, command): commandType = ord(command[:1]) if commandType != _COMMAND_TYPE.MEMBERSHIP: return None return cPickle.loads(command[1:]) def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) self.__lastSerializedEntry = serializeID if serializeState == SERIALIZER_STATE.FAILED: LOG_WARNING("Failed to store full dump") if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and \ not self.__forceLogCompaction: return if self.__conf.logCompactionSplit: allNodes = sorted(self.__otherNodesAddrs + [self.__selfNodeAddr]) nodesCount = len(allNodes) selfIdx = allNodes.index(self.__selfNodeAddr) interval = self.__conf.logCompactionMinTime periodStart = int(currTime) / interval * interval nodeInterval = float(interval) / nodesCount nodeIntervalStart = periodStart + selfIdx * nodeInterval nodeIntervalEnd = nodeIntervalStart + 0.3 * nodeInterval if currTime < nodeIntervalStart or currTime >= nodeIntervalEnd: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if len(lastAppliedEntries ) < 2 or lastAppliedEntries[0][1] == self.__lastSerializedEntry: self.__lastSerializedTime = currTime return if self.__conf.serializer is None: data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) else: data = None cluster = self.__otherNodesAddrs + [self.__selfNodeAddr] self.__serializer.serialize( (data, lastAppliedEntries[1], lastAppliedEntries[0], cluster), lastAppliedEntries[0][1]) def __loadDumpFile(self, clearJournal): try: data = self.__serializer.deserialize() if data[0] is not None: for k, v in data[0].iteritems(): self.__dict__[k] = v if clearJournal or \ len(self.__raftLog) < 2 or \ self.__raftLog[0] != data[2] or \ self.__raftLog[1] != data[1]: self.__raftLog.clear() self.__raftLog.add(*data[2]) self.__raftLog.add(*data[1]) self.__raftLastApplied = data[1][1] if self.__conf.dynamicMembershipChange: self.__otherNodesAddrs = [ node for node in data[3] if node != self.__selfNodeAddr ] self.__updateClusterConfiguration() except: LOG_WARNING('Failed to load full dump') LOG_CURRENT_EXCEPTION() def __updateClusterConfiguration(self): currentNodes = set() for i in xrange(len(self.__nodes) - 1, -1, -1): nodeAddr = self.__nodes[i].getAddress() if nodeAddr not in self.__otherNodesAddrs: self.__nodes[i]._destroy() self.__nodes.pop(i) else: currentNodes.add(nodeAddr) for nodeAddr in self.__otherNodesAddrs: if nodeAddr not in currentNodes: self.__nodes.append(Node(self, nodeAddr)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex( ) + 1 self.__raftMatchIndex[nodeAddr] = 0
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): """ Main SyncObj class, you should inherit your own class from it. :param selfNodeAddr: address of the current node server, 'host:port' :type selfNodeAddr: str :param otherNodesAddrs: addresses of partner nodes, ['host1:port1', 'host2:port2', ...] :type otherNodesAddrs: list of str :param conf: configuration object :type conf: SyncObjConf """ if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf self.__conf.validate() if self.__conf.password is not None: if not HAS_CRYPTO: raise ImportError("Please install 'cryptography' module") self.__encryptor = getEncryptor(self.__conf.password) else: self.__encryptor = None self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votedFor = None self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout( ) self.__raftLog = createJournal(self.__conf.journalFile) if len(self.__raftLog) == 0: self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), 1, self.__raftCurrentTerm) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__lastSerializedEntry = None self.__forceLogCompaction = False self.__leaderCommitIndex = None self.__onReadyCalled = False self.__changeClusterIDx = None self.__noopIDx = None self.__destroying = False self.__recvTransmission = '' self.__startTime = time.time() globalDnsResolver().setTimeouts(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize, self.__conf.useFork, self.__conf.serializer, self.__conf.deserializer, self.__conf.serializeChecker) self.__isInitialized = False self.__lastInitTryTime = 0 self._poller = createPoller(self.__conf.pollerType) if selfNodeAddr is not None: bindAddr = self.__conf.bindAddress or selfNodeAddr host, port = bindAddr.split(':') self.__server = TcpServer( self._poller, host, port, onNewConnection=self.__onNewConnection, sendBufferSize=self.__conf.sendBufferSize, recvBufferSize=self.__conf.recvBufferSize, connectionTimeout=self.__conf.connectionTimeout) self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__commandsQueue = FastQueue(self.__conf.commandsQueueSize) if not self.__conf.appendEntriesUseBatch: self.__pipeNotifier = PipeNotifier(self._poller) self.__nodes = [] self.__readonlyNodes = [] self.__readonlyNodesCounter = 0 self.__lastReadonlyCheck = 0 self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict( list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self), )) self.__thread.start() while not self.__initialised.is_set(): pass else: self.__initInTickThread() def destroy(self): """ Correctly destroy SyncObj. Stop autoTickThread, close connections, etc. """ if self.__conf.autoTick: self.__destroying = True else: self._doDestroy() def _destroy(self): self.destroy() def _doDestroy(self): for node in self.__nodes: node._destroy() for node in self.__readonlyNodes: node._destroy() if self.__selfNodeAddr is not None: self.__server.unbind() self.__raftLog._destroy() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() if self.__selfNodeAddr is not None: self.__server.bind() shouldConnect = None else: shouldConnect = True self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr, shouldConnect)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex( ) + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True except: logging.exception('failed to perform initialization') def addNodeToCluster(self, nodeName, callback=None): """Add single node to cluster (dynamic membership changes). Async. You should wait until node successfully added before adding next node. :param nodeName: nodeHost:nodePort :type nodeName: str :param callback: will be called on success or fail :type callback: function(`FAIL_REASON <#pysyncobj.FAIL_REASON>`_, None) """ if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['add', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def removeNodeFromCluster(self, nodeName, callback=None): """Remove single node from cluster (dynamic membership changes). Async. You should wait until node successfully added before adding next node. :param nodeName: nodeHost:nodePort :type nodeName: str :param callback: will be called on success or fail :type callback: function(`FAIL_REASON <#pysyncobj.FAIL_REASON>`_, None) """ if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['rem', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def _addNodeToCluster(self, nodeName, callback=None): self.addNodeToCluster(nodeName, callback) def _removeNodeFromCluster(self, nodeName, callback=None): self.removeNodeFromCluster(nodeName, callback) def _applyCommand(self, command, callback, commandType=None): try: if commandType is None: self.__commandsQueue.put_nowait((command, callback)) else: self.__commandsQueue.put_nowait( (_bchr(commandType) + command, callback)) if not self.__conf.appendEntriesUseBatch: self.__pipeNotifier.notify() except Queue.Full: self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback) def _checkCommandsToApply(self): startTime = time.time() while time.time() - startTime < self.__conf.appendEntriesPeriod: if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex( ) + 1, self.__raftCurrentTerm if self.__conf.dynamicMembershipChange: changeClusterRequest = self.__parseChangeClusterRequest( command) else: changeClusterRequest = None if changeClusterRequest is None or self.__changeCluster( changeClusterRequest): self.__raftLog.add(command, idx, term) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append( (term, callback)) else: self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() else: if requestNode is None: if callback is not None: callback(None, FAIL_REASON.REQUEST_DENIED) else: self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.REQUEST_DENIED, }) elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[ self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: self.__callErrCallback(FAIL_REASON.MISSING_LEADER, callback) def _autoTickThread(self): self.__initInTickThread() self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break if self.__destroying: self._doDestroy() break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def doTick(self, timeToWait=0.0): """Performs single tick. Should be called manually if `autoTick <#pysyncobj.SyncObjConf.autoTick>`_ disabled :param timeToWait: max time to wait for next tick. If zero - perform single tick without waiting for new events. Otherwise - wait for new socket event and return. :type timeToWait: float """ assert not self.__conf.autoTick self._onTick(timeToWait) def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time( ) >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile( self.__conf.fullDumpFile): self.__loadDumpFile(clearJournal=False) self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE ) and self.__selfNodeAddr is not None: if self.__raftElectionDeadline < time.time( ) and self.__connectedToAnyone(): self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votedFor = self._getSelfNodeAddr() self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[ node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break self.__leaderCommitIndex = self.__raftCommitIndex needSendAppendEntries = False if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 if not self.__conf.appendEntriesUseBatch: needSendAppendEntries = True if self.__raftState == _RAFT_STATE.LEADER: if time.time( ) > self.__newAppendEntriesTime or needSendAppendEntries: self.__sendAppendEntries() if not self.__onReadyCalled and self.__raftLastApplied == self.__leaderCommitIndex: if self.__conf.onReady: self.__conf.onReady() self.__onReadyCalled = True self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() if time.time() > self.__lastReadonlyCheck + 1.0: self.__lastReadonlyCheck = time.time() newReadonlyNodes = [] for node in self.__readonlyNodes: if node.isConnected(): newReadonlyNodes.append(node) else: self.__raftNextIndex.pop(node, None) self.__raftMatchIndex.pop(node, None) node._destroy() self._poller.poll(timeToWait) def getStatus(self): """Dumps different debug info about cluster to list and return it""" status = [] status.append(('version', VERSION)) status.append(('revision', REVISION)) status.append(('self', self.__selfNodeAddr)) status.append(('state', self.__raftState)) status.append(('leader', self.__raftLeader)) status.append(('partner_nodes_count', len(self.__nodes))) for n in self.__nodes: status.append(('partner_node_status_server_' + n.getAddress(), n.getStatus())) status.append(('readonly_nodes_count', len(self.__readonlyNodes))) for n in self.__readonlyNodes: status.append(('readonly_node_status_server_' + n.getAddress(), n.getStatus())) status.append( ('unknown_connections_count', len(self.__unknownConnections))) status.append(('log_len', len(self.__raftLog))) status.append(('last_applied', self.__raftLastApplied)) status.append(('commit_idx', self.__raftCommitIndex)) status.append(('raft_term', self.__raftCurrentTerm)) status.append(('next_node_idx_count', len(self.__raftNextIndex))) for k, v in self.__raftNextIndex.iteritems(): status.append(('next_node_idx_server_' + k, v)) status.append(('match_idx_count', len(self.__raftMatchIndex))) for k, v in self.__raftMatchIndex.iteritems(): status.append(('match_idx_server_' + k, v)) status.append(('leader_commit_idx', self.__leaderCommitIndex)) status.append(('uptime', int(time.time() - self.__startTime))) return status def _getStatus(self): return self.getStatus() def printStatus(self): """Dumps different debug info about cluster to default logger""" status = self.getStatus() for i in status: logging.info(i[0] + ': %s', str(i[1])) def _printStatus(self): self.printStatus() def forceLogCompaction(self): """Force to start log compaction (without waiting required time or required number of entries)""" self.__forceLogCompaction = True def _forceLogCompaction(self): self.forceLogCompaction() def __doApplyCommand(self, command): commandType = ord(command[:1]) # Skip no-op and membership change commands if commandType != _COMMAND_TYPE.REGULAR: return command = cPickle.loads(command[1:]) args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message[ 'type'] == 'request_vote' and self.__selfNodeAddr is not None: if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER self.__raftLeader = None if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return if self.__votedFor is not None: return self.__votedFor = nodeAddr self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message[ 'term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time( ) + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) self.__leaderCommitIndex = leaderCommitIndex = message[ 'commit_index'] # Regular append entries if 'prevLogIdx' in message: transmission = message.get('transmission', None) if transmission is not None: if transmission == 'start': self.__recvTransmission = message['data'] return elif transmission == 'process': self.__recvTransmission += message['data'] return elif transmission == 'finish': self.__recvTransmission += message['data'] newEntries = [cPickle.loads(self.__recvTransmission)] self.__recvTransmission = '' else: raise Exception('Wrong transmission type') prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: self.__sendNextNodeIdx(nodeAddr, success=False, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=prevLogIdx, success=False, reset=True) return if len(prevEntries) > 1: # rollback cluster changes if self.__conf.dynamicMembershipChange: for entry in reversed(prevEntries[1:]): clusterChangeRequest = self.__parseChangeClusterRequest( entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest, reverse=True) self.__deleteEntriesFrom(prevLogIdx + 1) for entry in newEntries: self.__raftLog.add(*entry) # apply cluster changes if self.__conf.dynamicMembershipChange: for entry in newEntries: clusterChangeRequest = self.__parseChangeClusterRequest( entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest) nextNodeIdx = prevLogIdx + 1 if newEntries: nextNodeIdx = newEntries[-1][1] self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=nextNodeIdx, success=True) # Install snapshot elif serialized is not None: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile(clearJournal=True) self.__sendNextNodeIdx(nodeAddr, success=True) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message[ 'term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] success = message['success'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx if success: self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __callErrCallback(self, err, callback): if callback is None: return if isinstance(callback, tuple): requestNode, requestID = callback self.__send( requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': err, }) return callback(None, err) def __sendNextNodeIdx(self, nodeAddr, reset=False, nextNodeIdx=None, success=False): if nextNodeIdx is None: nextNodeIdx = self.__getCurrentLogIndex() + 1 self.__send( nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': nextNodeIdx, 'reset': reset, 'success': success, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __onNewConnection(self, conn): descr = conn.fileno() self.__unknownConnections[descr] = conn if self.__encryptor: conn.encryptor = self.__encryptor conn.setOnMessageReceivedCallback( functools.partial(self.__onMessageReceived, conn)) conn.setOnDisconnectedCallback( functools.partial(self.__onDisconnected, conn)) def __utilityCallback(self, res, err, conn, cmd, node): cmdResult = 'FAIL' if err == FAIL_REASON.SUCCESS: cmdResult = 'SUCCESS' conn.send(cmdResult + ' ' + cmd + ' ' + node) def __onUtilityMessage(self, conn, message): if message[0] == 'status': status = self.getStatus() data = '' for i in status: data += i[0] + ':' + str(i[1]) + '\n' conn.send(data) return True elif message[0] == 'add': self.addNodeToCluster(message[1], callback=functools.partial( self.__utilityCallback, conn=conn, cmd='ADD', node=message[1])) return True elif message[0] == 'remove': if message[1] == self.__selfNodeAddr: conn.send('FAIL REMOVE ' + message[1]) else: self.removeNodeFromCluster(message[1], callback=functools.partial( self.__utilityCallback, conn=conn, cmd='REMOVE', node=message[1])) return True return False def __onMessageReceived(self, conn, message): if self.__encryptor and not conn.sendRandKey: conn.sendRandKey = message conn.recvRandKey = os.urandom(32) conn.send(conn.recvRandKey) return descr = conn.fileno() if isinstance(message, list) and self.__onUtilityMessage( conn, message): self.__unknownConnections.pop(descr, None) return partnerNode = None for node in self.__nodes: if node.getAddress() == message: partnerNode = node break if partnerNode is None and message != 'readonly': conn.disconnect() self.__unknownConnections.pop(descr, None) return if partnerNode is not None: partnerNode.onPartnerConnected(conn) else: nodeAddr = str(self.__readonlyNodesCounter) node = Node(self, nodeAddr, shouldConnect=False) node.onPartnerConnected(conn) self.__readonlyNodes.append(node) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__readonlyNodesCounter += 1 self.__unknownConnections.pop(descr, None) def __onDisconnected(self, conn): self.__unknownConnections.pop(conn.fileno(), None) def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None, maxSizeBytes=None): firstEntryIDx = self.__raftLog[0][1] if fromIDx is None or fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: result = self.__raftLog[diff:] else: result = self.__raftLog[diff:diff + count] if maxSizeBytes is None: return result totalSize = 0 i = 0 for i, entry in enumerate(result): totalSize += len(entry[0]) if totalSize >= maxSizeBytes: break return result[:i + 1] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def isReady(self): """Check if current node is initially synced with others and has an actual data. :return: True if ready, False otherwise :rtype: bool """ return self.__onReadyCalled def _isReady(self): return self.isReady() def _getTerm(self): return self.__raftCurrentTerm def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesFrom(diff) def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesTo(diff) def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes + self.__readonlyNodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 # No-op command after leader election. idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), idx, term) self.__noopIDx = idx if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time( ) + self.__conf.appendEntriesPeriod startTime = time.time() batchSizeBytes = self.__conf.appendEntriesBatchSizeBytes for node in self.__nodes + self.__readonlyNodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex( ) or sendSingle or sendingSerialized: if nextNodeIndex > self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm( nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries(nextNodeIndex, None, batchSizeBytes) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 if len(entries) == 1 and len( entries[0][0]) >= batchSizeBytes: entry = cPickle.dumps(entries[0], -1) for pos in xrange(0, len(entry), batchSizeBytes): currData = entry[pos:pos + batchSizeBytes] if pos == 0: transmission = 'start' elif pos + batchSizeBytes >= len(entries[0][0]): transmission = 'finish' else: transmission = 'process' message = { 'type': 'append_entries', 'transmission': transmission, 'data': currData, 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData( nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[ nodeAddr] = self.__raftLog[1][1] + 1 sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes + self.__readonlyNodes: if node.getAddress() == nodeAddr: node.send(message) break def __connectedToAnyone(self): for node in self.__nodes: if node.getStatus() == NODE_STATUS.CONNECTED: return True if not self.__nodes: return True return False def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getEncryptor(self): return self.__encryptor def __changeCluster(self, request): if self.__raftLastApplied < self.__noopIDx: # No-op entry was not commited yet return False if self.__changeClusterIDx is not None: if self.__raftLastApplied >= self.__changeClusterIDx: self.__changeClusterIDx = None # Previous cluster change request was not commited yet if self.__changeClusterIDx is not None: return False return self.__doChangeCluster(request) def __doChangeCluster(self, request, reverse=False): requestType = request[0] requestNode = request[1] if requestType == 'add': adding = not reverse elif requestType == 'rem': adding = reverse else: return False if self.__selfNodeAddr is not None: shouldConnect = None else: shouldConnect = True if adding: newNode = requestNode # Node already exists in cluster if newNode == self.__selfNodeAddr or newNode in self.__otherNodesAddrs: return False self.__otherNodesAddrs.append(newNode) self.__nodes.append(Node(self, newNode, shouldConnect)) self.__raftNextIndex[newNode] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[newNode] = 0 return True else: oldNode = requestNode if oldNode == self.__selfNodeAddr: return False if oldNode not in self.__otherNodesAddrs: return False for i in xrange(len(self.__nodes)): if self.__nodes[i].getAddress() == oldNode: self.__nodes[i]._destroy() self.__nodes.pop(i) self.__otherNodesAddrs.pop(i) del self.__raftNextIndex[oldNode] del self.__raftMatchIndex[oldNode] return True return False def __parseChangeClusterRequest(self, command): commandType = ord(command[:1]) if commandType != _COMMAND_TYPE.MEMBERSHIP: return None return cPickle.loads(command[1:]) def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) self.__lastSerializedEntry = serializeID if serializeState == SERIALIZER_STATE.FAILED: logging.warning('Failed to store full dump') if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and \ not self.__forceLogCompaction: return if self.__conf.logCompactionSplit: allNodes = sorted(self.__otherNodesAddrs + [self.__selfNodeAddr]) nodesCount = len(allNodes) selfIdx = allNodes.index(self.__selfNodeAddr) interval = self.__conf.logCompactionMinTime periodStart = int(currTime) / interval * interval nodeInterval = float(interval) / nodesCount nodeIntervalStart = periodStart + selfIdx * nodeInterval nodeIntervalEnd = nodeIntervalStart + 0.3 * nodeInterval if currTime < nodeIntervalStart or currTime >= nodeIntervalEnd: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if len(lastAppliedEntries ) < 2 or lastAppliedEntries[0][1] == self.__lastSerializedEntry: self.__lastSerializedTime = currTime return if self.__conf.serializer is None: data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) else: data = None cluster = self.__otherNodesAddrs + [self.__selfNodeAddr] self.__serializer.serialize( (data, lastAppliedEntries[1], lastAppliedEntries[0], cluster), lastAppliedEntries[0][1]) def __loadDumpFile(self, clearJournal): try: data = self.__serializer.deserialize() if data[0] is not None: for k, v in data[0].iteritems(): self.__dict__[k] = v if clearJournal or \ len(self.__raftLog) < 2 or \ self.__raftLog[0] != data[2] or \ self.__raftLog[1] != data[1]: self.__raftLog.clear() self.__raftLog.add(*data[2]) self.__raftLog.add(*data[1]) self.__raftLastApplied = data[1][1] if self.__conf.dynamicMembershipChange: self.__otherNodesAddrs = [ node for node in data[3] if node != self.__selfNodeAddr ] self.__updateClusterConfiguration() except: logging.exception('failed to load full dump') def __updateClusterConfiguration(self): currentNodes = set() for i in xrange(len(self.__nodes) - 1, -1, -1): nodeAddr = self.__nodes[i].getAddress() if nodeAddr not in self.__otherNodesAddrs: self.__nodes[i]._destroy() self.__nodes.pop(i) else: currentNodes.add(nodeAddr) if self.__selfNodeAddr is not None: shouldConnect = None else: shouldConnect = True for nodeAddr in self.__otherNodesAddrs: if nodeAddr not in currentNodes: self.__nodes.append(Node(self, nodeAddr, shouldConnect)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex( ) + 1 self.__raftMatchIndex[nodeAddr] = 0
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf if self.__conf.password is not None: if not HAS_CRYPTO: raise ImportError("Please install 'cryptography' module") self.__encryptor = getEncryptor(self.__conf.password) else: self.__encryptor = None self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votedFor = None self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLog = [] # (command, logID, term) self.__raftLog.append(('', 1, self.__raftCurrentTerm)) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__forceLogCompaction = False self.__leaderCommitIndex = None self.__onReadyCalled = False self.__startTime = time.time() globalDnsResolver().setTimeouts(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize) self.__isInitialized = False self.__lastInitTryTime = 0 self._poller = createPoller() host, port = selfNodeAddr.split(':') self.__server = TcpServer(self._poller, host, port, onNewConnection=self.__onNewConnection, sendBufferSize=self.__conf.sendBufferSize, recvBufferSize=self.__conf.recvBufferSize, connectionTimeout=self.__conf.connectionTimeout) self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__commandsQueue = Queue.Queue(self.__conf.commandsQueueSize) self.__nodes = [] self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict(list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self),)) self.__thread.start() while not self.__initialised.is_set(): pass else: self.__initInTickThread() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() self.__server.bind() self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr)) self.__raftNextIndex[nodeAddr] = 0 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True except: LOG_CURRENT_EXCEPTION() def _applyCommand(self, command, callback): try: self.__commandsQueue.put_nowait((command, callback)) except Queue.Full: self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback) def _checkCommandsToApply(self): startTime = time.time() while time.time() - startTime < self.__conf.appendEntriesPeriod: if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.append((command, idx, term)) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append((term, callback)) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: self.__callErrCallback(FAIL_REASON.MISSING_LEADER, callback) def _autoTickThread(self): self.__initInTickThread() self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time() >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile(self.__conf.fullDumpFile): self.__loadDumpFile() self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): if self.__raftElectionDeadline < time.time() and self.__connectedToAnyone(): self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votedFor = self._getSelfNodeAddr() self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break self.__leaderCommitIndex = self.__raftCommitIndex if time.time() > self.__newAppendEntriesTime: self.__sendAppendEntries() if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 if not self.__onReadyCalled and self.__raftLastApplied == self.__leaderCommitIndex: if self.__conf.onReady: self.__conf.onReady() self.__onReadyCalled = True self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() self._poller.poll(timeToWait) def _getLastCommitIndex(self): return self.__raftCommitIndex def _printStatus(self): LOG_DEBUG('self', self.__selfNodeAddr) LOG_DEBUG('state:', self.__raftState) LOG_DEBUG('leader', self.__raftLeader) LOG_DEBUG('partner nodes', len(self.__nodes)) for n in self.__nodes: LOG_DEBUG(n.getAddress(), n.getStatus()) LOG_DEBUG('log len:', len(self.__raftLog)) LOG_DEBUG('log size bytes:', len(zlib.compress(cPickle.dumps(self.__raftLog, -1)))) LOG_DEBUG('last applied:', self.__raftLastApplied) LOG_DEBUG('commit idx:', self.__raftCommitIndex) LOG_DEBUG('raft term:', self.__raftCurrentTerm) LOG_DEBUG('next node idx:', self.__raftNextIndex) LOG_DEBUG('match idx:', self.__raftMatchIndex) LOG_DEBUG('leader commit idx:', self.__leaderCommitIndex) LOG_DEBUG('uptime:', int(time.time() - self.__startTime)) LOG_DEBUG('') def _forceLogCompaction(self): self.__forceLogCompaction = True def __doApplyCommand(self, command): # Skip no-op command if command == '': return command = cPickle.loads(command) args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message['type'] == 'request_vote': if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER self.__raftLeader = None if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return if self.__votedFor is not None: return self.__votedFor = nodeAddr self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message['term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) self.__leaderCommitIndex = leaderCommitIndex = message['commit_index'] # Regular append entries if 'prevLogIdx' in message: prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: if prevLogIdx is None or self.__getCurrentLogIndex() is None: nextNodeIdx = None else: nextNodeIdx = min(prevLogIdx, self.__getCurrentLogIndex()) self.__sendNextNodeIdx(nodeAddr, nextNodeIdx = nextNodeIdx, success = False, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__sendNextNodeIdx(nodeAddr, nextNodeIdx = prevLogIdx, success = False, reset=True) return if len(prevEntries) > 1: self.__deleteEntriesFrom(prevLogIdx + 1) self.__raftLog += newEntries nextNodeIdx = prevLogIdx + 1 if newEntries: nextNodeIdx = newEntries[-1][1] self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=nextNodeIdx, success=True) # Install snapshot elif serialized is not None: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile() self.__sendNextNodeIdx(nodeAddr, success=True) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message['term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] success = message['success'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx if success: self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __callErrCallback(self, err, callback): if callback is None: return if isinstance(callback, tuple): requestNode, requestID = callback self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': err, }) return callback(None, err) def __sendNextNodeIdx(self, nodeAddr, reset=False, nextNodeIdx = None, success = False): if nextNodeIdx is None: nextNodeIdx = self.__getCurrentLogIndex() + 1 self.__send(nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': nextNodeIdx, 'reset': reset, 'success': success, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __onNewConnection(self, conn): descr = conn.fileno() self.__unknownConnections[descr] = conn if self.__encryptor: conn.encryptor = self.__encryptor conn.setOnMessageReceivedCallback(functools.partial(self.__onMessageReceived, conn)) conn.setOnDisconnectedCallback(functools.partial(self.__onDisconnected, conn)) def __onMessageReceived(self, conn, message): if self.__encryptor and not conn.sendRandKey: conn.sendRandKey = message conn.recvRandKey = os.urandom(32) conn.send(conn.recvRandKey) return descr = conn.fileno() partnerNode = None for node in self.__nodes: if node.getAddress() == message: partnerNode = node break if partnerNode is None: conn.disconnect() return partnerNode.onPartnerConnected(conn) self.__unknownConnections.pop(descr, None) def __onDisconnected(self, conn): self.__unknownConnections.pop(conn.fileno(), None) def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None, maxSizeBytes = None): firstEntryIDx = self.__raftLog[0][1] if fromIDx is None or fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: result = self.__raftLog[diff:] else: result = self.__raftLog[diff:diff + count] if maxSizeBytes is None: return result totalSize = 0 i = 0 for i, entry in enumerate(result): totalSize += len(entry[0]) if totalSize >= maxSizeBytes: break return result[:i + 1] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def _isReady(self): return self.__onReadyCalled def _getTerm(self): return self.__raftCurrentTerm def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[:diff] def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[diff:] def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 # No-op command after leader election. self._applyCommand('', None) self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time() + self.__conf.appendEntriesPeriod startTime = time.time() for node in self.__nodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex() or sendSingle or sendingSerialized: if nextNodeIndex >= self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm(nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries(nextNodeIndex, None, self.__conf.appendEntriesBatchSizeBytes) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData(nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[nodeAddr] = self.__raftLog[0][1] sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes: if node.getAddress() == nodeAddr: node.send(message) break def __connectedToAnyone(self): for node in self.__nodes: if node.getStatus() == NODE_STATUS.CONNECTED: return True return False def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getEncryptor(self): return self.__encryptor def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) if serializeState == SERIALIZER_STATE.FAILED: LOG_WARNING("Failed to store full dump") if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and \ not self.__forceLogCompaction: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if len(lastAppliedEntries) < 2: return data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) self.__serializer.serialize((data, lastAppliedEntries[1], lastAppliedEntries[0]), lastAppliedEntries[0][1]) def __loadDumpFile(self): try: data = self.__serializer.deserialize() for k, v in data[0].iteritems(): self.__dict__[k] = v self.__raftLog = [data[2], data[1]] self.__raftLastApplied = data[1][1] except: LOG_WARNING('Failed to load full dump') LOG_CURRENT_EXCEPTION()
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLog = [] # (command, logID, term) self.__raftLog.append((None, 1, self.__raftCurrentTerm)) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__forceLogCompaction = False self.__socket = None self.__resolver = DnsCachingResolver(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize) self.__poller = createPoller() self.__isInitialized = False self.__lastInitTryTime = 0 self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__commandsQueue = Queue.Queue(self.__conf.commandsQueueSize) self.__nodes = [] self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict(list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self),)) self.__thread.start() while not self.__initialised.is_set(): pass else: self.__initInTickThread() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() self.__bind() self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr)) self.__raftNextIndex[nodeAddr] = 0 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True except: LOG_CURRENT_EXCEPTION() def _applyCommand(self, command, callback): try: self.__commandsQueue.put_nowait((command, callback)) except Queue.Full: callback(None, FAIL_REASON.QUEUE_FULL) def _checkCommandsToApply(self): for i in xrange(self.__conf.maxCommandsPerTick): if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.append((command, idx, term)) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append((term, callback)) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: if requestNode is None: callback(None, FAIL_REASON.MISSING_LEADER) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) def _autoTickThread(self): self.__initInTickThread() self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time() >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile(self.__conf.fullDumpFile): self.__loadDumpFile() self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): if self.__raftElectionDeadline < time.time(): self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break if time.time() > self.__newAppendEntriesTime: self.__sendAppendEntries() if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() self.__poller.poll(timeToWait) def _getLastCommitIndex(self): return self.__raftCommitIndex def _printStatus(self): LOG_DEBUG('self', self.__selfNodeAddr) LOG_DEBUG('leader', self.__raftLeader) LOG_DEBUG('partner nodes', len(self.__nodes)) for n in self.__nodes: LOG_DEBUG(n.getAddress(), n.getStatus()) LOG_DEBUG('log len:', len(self.__raftLog)) LOG_DEBUG('log size bytes:', len(zlib.compress(cPickle.dumps(self.__raftLog, -1)))) LOG_DEBUG('last applied:', self.__raftLastApplied) LOG_DEBUG('commit idx:', self.__raftCommitIndex) LOG_DEBUG('next node idx:', self.__raftNextIndex) def _forceLogCompaction(self): self.__forceLogCompaction = True def __doApplyCommand(self, command): args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message['type'] == 'request_vote': if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__raftState = _RAFT_STATE.FOLLOWER if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message['term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr self.__raftCurrentTerm = message['term'] self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) leaderCommitIndex = message['commit_index'] # Regular append entries if serialized is None: prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: self.__sendNextNodeIdx(nodeAddr, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__deleteEntriesFrom(prevLogIdx) self.__sendNextNodeIdx(nodeAddr, reset=True) return if len(prevEntries) > 1: self.__deleteEntriesFrom(prevLogIdx + 1) self.__raftLog += newEntries # Install snapshot else: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile() self.__sendNextNodeIdx(nodeAddr) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message['term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __sendNextNodeIdx(self, nodeAddr, reset=False): self.__send(nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': self.__getCurrentLogIndex() + 1, 'reset': reset, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __bind(self): self.__socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, self.__conf.sendBufferSize) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.__conf.recvBufferSize) self.__socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) self.__socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.__socket.setblocking(0) host, port = self.__selfNodeAddr.split(':') self.__socket.bind((host, int(port))) self.__socket.listen(5) self.__poller.subscribe(self.__socket.fileno(), self.__onNewConnection, POLL_EVENT_TYPE.READ | POLL_EVENT_TYPE.ERROR) def __onNewConnection(self, localDescr, event): if event & POLL_EVENT_TYPE.READ: try: sock, addr = self.__socket.accept() sock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, self.__conf.sendBufferSize) sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, self.__conf.recvBufferSize) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) sock.setblocking(0) conn = Connection(socket=sock, timeout=self.__conf.connectionTimeout) descr = conn.fileno() self.__unknownConnections[descr] = conn self.__poller.subscribe(descr, self.__processUnknownConnections, POLL_EVENT_TYPE.READ | POLL_EVENT_TYPE.ERROR) except socket.error as e: if e.errno != socket.errno.EAGAIN: self.__isInitialized = False LOG_WARNING('Error in main socket:' + str(e)) if event & POLL_EVENT_TYPE.ERROR: self.__isInitialized = False LOG_WARNING('Error in main socket') def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None): firstEntryIDx = self.__raftLog[0][1] if fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: return self.__raftLog[diff:] return self.__raftLog[diff:diff + count] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[:diff] def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog = self.__raftLog[diff:] def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time() + self.__conf.appendEntriesPeriod startTime = time.time() for node in self.__nodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex() or sendSingle or sendingSerialized: if nextNodeIndex >= self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm(nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries(nextNodeIndex, self.__conf.appendEntriesBatchSize) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData(nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[nodeAddr] = self.__raftLog[0][1] sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes: if node.getAddress() == nodeAddr: node.send(message) break def __processUnknownConnections(self, descr, event): conn = self.__unknownConnections[descr] partnerNode = None remove = False if event & POLL_EVENT_TYPE.READ: conn.read() nodeAddr = conn.getMessage() if nodeAddr is not None: for node in self.__nodes: if node.getAddress() == nodeAddr: partnerNode = node break else: remove = True if event & POLL_EVENT_TYPE.ERROR: remove = True if remove or conn.isDisconnected(): self.__unknownConnections.pop(descr) self.__poller.unsubscribe(descr) conn.close() return if partnerNode is not None: self.__unknownConnections.pop(descr) assert conn.fileno() is not None partnerNode.onPartnerConnected(conn) def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getResolver(self): return self.__resolver def _getPoller(self): return self.__poller def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) if serializeState == SERIALIZER_STATE.FAILED: LOG_WARNING("Failed to store full dump") if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and\ not self.__forceLogCompaction: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if not lastAppliedEntries: return data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) self.__serializer.serialize((data, lastAppliedEntries[1], lastAppliedEntries[0]), lastAppliedEntries[1][1]) def __loadDumpFile(self): try: data = self.__serializer.deserialize() for k, v in data[0].iteritems(): self.__dict__[k] = v self.__raftLog = [data[2], data[1]] self.__raftLastApplied = data[1][1] except: LOG_WARNING('Failed to load full dump') LOG_CURRENT_EXCEPTION()
class SyncObj(object): def __init__(self, selfNodeAddr, otherNodesAddrs, conf=None): """ Main SyncObj class, you should inherit your own class from it. :param selfNodeAddr: address of the current node server, 'host:port' :type selfNodeAddr: str :param otherNodesAddrs: addresses of partner nodes, ['host1:port1', 'host2:port2', ...] :type otherNodesAddrs: list of str :param conf: configuration object :type conf: SyncObjConf """ if conf is None: self.__conf = SyncObjConf() else: self.__conf = conf self.__conf.validate() if self.__conf.password is not None: if not HAS_CRYPTO: raise ImportError("Please install 'cryptography' module") self.__encryptor = getEncryptor(self.__conf.password) else: self.__encryptor = None self.__selfNodeAddr = selfNodeAddr self.__otherNodesAddrs = otherNodesAddrs self.__unknownConnections = {} # descr => _Connection self.__raftState = _RAFT_STATE.FOLLOWER self.__raftCurrentTerm = 0 self.__votedFor = None self.__votesCount = 0 self.__raftLeader = None self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLog = createJournal(self.__conf.journalFile) if len(self.__raftLog) == 0: self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), 1, self.__raftCurrentTerm) self.__raftCommitIndex = 1 self.__raftLastApplied = 1 self.__raftNextIndex = {} self.__raftMatchIndex = {} self.__lastSerializedTime = time.time() self.__lastSerializedEntry = None self.__forceLogCompaction = False self.__leaderCommitIndex = None self.__onReadyCalled = False self.__changeClusterIDx = None self.__noopIDx = None self.__destroying = False self.__recvTransmission = '' self.__startTime = time.time() globalDnsResolver().setTimeouts(self.__conf.dnsCacheTime, self.__conf.dnsFailCacheTime) self.__serializer = Serializer(self.__conf.fullDumpFile, self.__conf.logCompactionBatchSize, self.__conf.useFork, self.__conf.serializer, self.__conf.deserializer, self.__conf.serializeChecker) self.__isInitialized = False self.__lastInitTryTime = 0 self._poller = createPoller(self.__conf.pollerType) if selfNodeAddr is not None: bindAddr = self.__conf.bindAddress or selfNodeAddr host, port = bindAddr.split(':') self.__server = TcpServer(self._poller, host, port, onNewConnection=self.__onNewConnection, sendBufferSize=self.__conf.sendBufferSize, recvBufferSize=self.__conf.recvBufferSize, connectionTimeout=self.__conf.connectionTimeout) self._methodToID = {} self._idToMethod = {} methods = sorted([m for m in dir(self) if callable(getattr(self, m))]) for i, method in enumerate(methods): self._methodToID[method] = i self._idToMethod[i] = getattr(self, method) self.__thread = None self.__mainThread = None self.__initialised = None self.__bindedEvent = threading.Event() self.__bindRetries = 0 self.__commandsQueue = FastQueue(self.__conf.commandsQueueSize) if not self.__conf.appendEntriesUseBatch: self.__pipeNotifier = PipeNotifier(self._poller) self.__nodes = [] self.__readonlyNodes = [] self.__readonlyNodesCounter = 0 self.__lastReadonlyCheck = 0 self.__newAppendEntriesTime = 0 self.__commandsWaitingCommit = collections.defaultdict(list) # logID => [(termID, callback), ...] self.__commandsLocalCounter = 0 self.__commandsWaitingReply = {} # commandLocalCounter => callback self.__properies = set() for key in self.__dict__: self.__properies.add(key) if self.__conf.autoTick: self.__mainThread = threading.current_thread() self.__initialised = threading.Event() self.__thread = threading.Thread(target=SyncObj._autoTickThread, args=(weakref.proxy(self),)) self.__thread.start() self.__initialised.wait() # while not self.__initialised.is_set(): # pass else: self.__initInTickThread() def destroy(self): """ Correctly destroy SyncObj. Stop autoTickThread, close connections, etc. """ if self.__conf.autoTick: self.__destroying = True else: self._doDestroy() def waitBinded(self): """ Waits until initialized (binded port). If success - just returns. If failed to initialized after conf.maxBindRetries - raise SyncObjException. """ self.__bindedEvent.wait() if not self.__isInitialized: raise SyncObjException('BindError') def _destroy(self): self.destroy() def _doDestroy(self): for node in self.__nodes: node._destroy() for node in self.__readonlyNodes: node._destroy() if self.__selfNodeAddr is not None: self.__server.unbind() self.__raftLog._destroy() def __initInTickThread(self): try: self.__lastInitTryTime = time.time() if self.__selfNodeAddr is not None: self.__server.bind() shouldConnect = None else: shouldConnect = True self.__nodes = [] for nodeAddr in self.__otherNodesAddrs: self.__nodes.append(Node(self, nodeAddr, shouldConnect)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__needLoadDumpFile = True self.__isInitialized = True self.__bindedEvent.set() except: self.__bindRetries += 1 if self.__conf.maxBindRetries and self.__bindRetries >= self.__conf.maxBindRetries: self.__bindedEvent.set() raise SyncObjException('BindError') logging.exception('failed to perform initialization') def addNodeToCluster(self, nodeName, callback = None): """Add single node to cluster (dynamic membership changes). Async. You should wait until node successfully added before adding next node. :param nodeName: nodeHost:nodePort :type nodeName: str :param callback: will be called on success or fail :type callback: function(`FAIL_REASON <#pysyncobj.FAIL_REASON>`_, None) """ if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['add', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def removeNodeFromCluster(self, nodeName, callback = None): """Remove single node from cluster (dynamic membership changes). Async. You should wait until node successfully added before adding next node. :param nodeName: nodeHost:nodePort :type nodeName: str :param callback: will be called on success or fail :type callback: function(`FAIL_REASON <#pysyncobj.FAIL_REASON>`_, None) """ if not self.__conf.dynamicMembershipChange: raise Exception('dynamicMembershipChange is disabled') self._applyCommand(cPickle.dumps(['rem', nodeName]), callback, _COMMAND_TYPE.MEMBERSHIP) def _addNodeToCluster(self, nodeName, callback=None): self.addNodeToCluster(nodeName, callback) def _removeNodeFromCluster(self, nodeName, callback=None): self.removeNodeFromCluster(nodeName, callback) def _applyCommand(self, command, callback, commandType = None): try: if commandType is None: self.__commandsQueue.put_nowait((command, callback)) else: self.__commandsQueue.put_nowait((_bchr(commandType) + command, callback)) if not self.__conf.appendEntriesUseBatch: self.__pipeNotifier.notify() except Queue.Full: self.__callErrCallback(FAIL_REASON.QUEUE_FULL, callback) def _checkCommandsToApply(self): startTime = time.time() while time.time() - startTime < self.__conf.appendEntriesPeriod: if self.__raftLeader is None and self.__conf.commandsWaitLeader: break try: command, callback = self.__commandsQueue.get_nowait() except Queue.Empty: break requestNode, requestID = None, None if isinstance(callback, tuple): requestNode, requestID = callback if self.__raftState == _RAFT_STATE.LEADER: idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm if self.__conf.dynamicMembershipChange: changeClusterRequest = self.__parseChangeClusterRequest(command) else: changeClusterRequest = None if changeClusterRequest is None or self.__changeCluster(changeClusterRequest): self.__raftLog.add(command, idx, term) if requestNode is None: if callback is not None: self.__commandsWaitingCommit[idx].append((term, callback)) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'log_idx': idx, 'log_term': term, }) if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() else: if requestNode is None: if callback is not None: callback(None, FAIL_REASON.REQUEST_DENIED) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.REQUEST_DENIED, }) elif self.__raftLeader is not None: if requestNode is None: message = { 'type': 'apply_command', 'command': command, } if callback is not None: self.__commandsLocalCounter += 1 self.__commandsWaitingReply[self.__commandsLocalCounter] = callback message['request_id'] = self.__commandsLocalCounter self.__send(self.__raftLeader, message) else: self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': FAIL_REASON.NOT_LEADER, }) else: self.__callErrCallback(FAIL_REASON.MISSING_LEADER, callback) def _autoTickThread(self): try: self.__initInTickThread() except SyncObjException as e: if e.errorCode == 'BindError': return raise finally: self.__initialised.set() time.sleep(0.1) try: while True: if not self.__mainThread.is_alive(): break if self.__destroying: self._doDestroy() break self._onTick(self.__conf.autoTickPeriod) except ReferenceError: pass def doTick(self, timeToWait=0.0): """Performs single tick. Should be called manually if `autoTick <#pysyncobj.SyncObjConf.autoTick>`_ disabled :param timeToWait: max time to wait for next tick. If zero - perform single tick without waiting for new events. Otherwise - wait for new socket event and return. :type timeToWait: float """ assert not self.__conf.autoTick self._onTick(timeToWait) def _onTick(self, timeToWait=0.0): if not self.__isInitialized: if time.time() >= self.__lastInitTryTime + self.__conf.bindRetryTime: self.__initInTickThread() if not self.__isInitialized: time.sleep(timeToWait) return if self.__needLoadDumpFile: if self.__conf.fullDumpFile is not None and os.path.isfile(self.__conf.fullDumpFile): self.__loadDumpFile(clearJournal=False) self.__needLoadDumpFile = False if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE) and self.__selfNodeAddr is not None: if self.__raftElectionDeadline < time.time() and self.__connectedToAnyone(): self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__raftLeader = None self.__raftState = _RAFT_STATE.CANDIDATE self.__raftCurrentTerm += 1 self.__votedFor = self._getSelfNodeAddr() self.__votesCount = 1 for node in self.__nodes: node.send({ 'type': 'request_vote', 'term': self.__raftCurrentTerm, 'last_log_index': self.__getCurrentLogIndex(), 'last_log_term': self.__getCurrentLogTerm(), }) self.__onLeaderChanged() if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: while self.__raftCommitIndex < self.__getCurrentLogIndex(): nextCommitIndex = self.__raftCommitIndex + 1 count = 1 for node in self.__nodes: if self.__raftMatchIndex[node.getAddress()] >= nextCommitIndex: count += 1 if count > (len(self.__nodes) + 1) / 2: self.__raftCommitIndex = nextCommitIndex else: break self.__leaderCommitIndex = self.__raftCommitIndex needSendAppendEntries = False if self.__raftCommitIndex > self.__raftLastApplied: count = self.__raftCommitIndex - self.__raftLastApplied entries = self.__getEntries(self.__raftLastApplied + 1, count) for entry in entries: currentTermID = entry[2] subscribers = self.__commandsWaitingCommit.pop(entry[1], []) res = self.__doApplyCommand(entry[0]) for subscribeTermID, callback in subscribers: if subscribeTermID == currentTermID: callback(res, FAIL_REASON.SUCCESS) else: callback(None, FAIL_REASON.DISCARDED) self.__raftLastApplied += 1 if not self.__conf.appendEntriesUseBatch: needSendAppendEntries = True if self.__raftState == _RAFT_STATE.LEADER: if time.time() > self.__newAppendEntriesTime or needSendAppendEntries: self.__sendAppendEntries() if not self.__onReadyCalled and self.__raftLastApplied == self.__leaderCommitIndex: if self.__conf.onReady: self.__conf.onReady() self.__onReadyCalled = True self._checkCommandsToApply() self.__tryLogCompaction() for node in self.__nodes: node.connectIfRequired() if time.time() > self.__lastReadonlyCheck + 1.0: self.__lastReadonlyCheck = time.time() newReadonlyNodes = [] for node in self.__readonlyNodes: if node.isConnected(): newReadonlyNodes.append(node) else: self.__raftNextIndex.pop(node, None) self.__raftMatchIndex.pop(node, None) node._destroy() self._poller.poll(timeToWait) def getStatus(self): """Dumps different debug info about cluster to list and return it""" status = [] status.append(('version', VERSION)) status.append(('revision', REVISION)) status.append(('self', self.__selfNodeAddr)) status.append(('state' , self.__raftState)) status.append(('leader', self.__raftLeader)) status.append(('partner_nodes_count' , len(self.__nodes))) for n in self.__nodes: status.append(('partner_node_status_server_'+n.getAddress(), n.getStatus())) status.append(('readonly_nodes_count', len(self.__readonlyNodes))) for n in self.__readonlyNodes: status.append(('readonly_node_status_server_'+n.getAddress(), n.getStatus())) status.append(('unknown_connections_count', len(self.__unknownConnections))) status.append(('log_len', len(self.__raftLog))) status.append(('last_applied', self.__raftLastApplied)) status.append(('commit_idx', self.__raftCommitIndex)) status.append(('raft_term', self.__raftCurrentTerm)) status.append(('next_node_idx_count', len(self.__raftNextIndex))) for k, v in self.__raftNextIndex.iteritems(): status.append(('next_node_idx_server_'+k, v)) status.append(('match_idx_count', len(self.__raftMatchIndex))) for k, v in self.__raftMatchIndex.iteritems(): status.append(('match_idx_server_'+k, v)) status.append(('leader_commit_idx', self.__leaderCommitIndex)) status.append(('uptime', int(time.time() - self.__startTime))) return status def _getStatus(self): return self.getStatus() def printStatus(self): """Dumps different debug info about cluster to default logger""" status = self.getStatus() for i in status: logging.info(i[0]+': %s', str(i[1])) def _printStatus(self): self.printStatus() def forceLogCompaction(self): """Force to start log compaction (without waiting required time or required number of entries)""" self.__forceLogCompaction = True def _forceLogCompaction(self): self.forceLogCompaction() def __doApplyCommand(self, command): commandType = ord(command[:1]) # Skip no-op and membership change commands if commandType != _COMMAND_TYPE.REGULAR: return command = cPickle.loads(command[1:]) args = [] kwargs = { '_doApply': True, } if not isinstance(command, tuple): funcID = command elif len(command) == 2: funcID, args = command else: funcID, args, newKwArgs = command kwargs.update(newKwArgs) return self._idToMethod[funcID](*args, **kwargs) def _onMessageReceived(self, nodeAddr, message): if message['type'] == 'request_vote' and self.__selfNodeAddr is not None: if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER self.__raftLeader = None if self.__raftState in (_RAFT_STATE.FOLLOWER, _RAFT_STATE.CANDIDATE): lastLogTerm = message['last_log_term'] lastLogIdx = message['last_log_index'] if message['term'] >= self.__raftCurrentTerm: if lastLogTerm < self.__getCurrentLogTerm(): return if lastLogTerm == self.__getCurrentLogTerm() and \ lastLogIdx < self.__getCurrentLogIndex(): return if self.__votedFor is not None: return self.__votedFor = nodeAddr self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() self.__send(nodeAddr, { 'type': 'response_vote', 'term': message['term'], }) if message['type'] == 'append_entries' and message['term'] >= self.__raftCurrentTerm: self.__raftElectionDeadline = time.time() + self.__generateRaftTimeout() if self.__raftLeader != nodeAddr: self.__onLeaderChanged() self.__raftLeader = nodeAddr if message['term'] > self.__raftCurrentTerm: self.__raftCurrentTerm = message['term'] self.__votedFor = None self.__raftState = _RAFT_STATE.FOLLOWER newEntries = message.get('entries', []) serialized = message.get('serialized', None) self.__leaderCommitIndex = leaderCommitIndex = message['commit_index'] # Regular append entries if 'prevLogIdx' in message: transmission = message.get('transmission', None) if transmission is not None: if transmission == 'start': self.__recvTransmission = message['data'] return elif transmission == 'process': self.__recvTransmission += message['data'] return elif transmission == 'finish': self.__recvTransmission += message['data'] newEntries = [cPickle.loads(self.__recvTransmission)] self.__recvTransmission = '' else: raise Exception('Wrong transmission type') prevLogIdx = message['prevLogIdx'] prevLogTerm = message['prevLogTerm'] prevEntries = self.__getEntries(prevLogIdx) if not prevEntries: self.__sendNextNodeIdx(nodeAddr, success=False, reset=True) return if prevEntries[0][2] != prevLogTerm: self.__sendNextNodeIdx(nodeAddr, nextNodeIdx = prevLogIdx, success = False, reset=True) return if len(prevEntries) > 1: # rollback cluster changes if self.__conf.dynamicMembershipChange: for entry in reversed(prevEntries[1:]): clusterChangeRequest = self.__parseChangeClusterRequest(entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest, reverse=True) self.__deleteEntriesFrom(prevLogIdx + 1) for entry in newEntries: self.__raftLog.add(*entry) # apply cluster changes if self.__conf.dynamicMembershipChange: for entry in newEntries: clusterChangeRequest = self.__parseChangeClusterRequest(entry[0]) if clusterChangeRequest is not None: self.__doChangeCluster(clusterChangeRequest) nextNodeIdx = prevLogIdx + 1 if newEntries: nextNodeIdx = newEntries[-1][1] self.__sendNextNodeIdx(nodeAddr, nextNodeIdx=nextNodeIdx, success=True) # Install snapshot elif serialized is not None: if self.__serializer.setTransmissionData(serialized): self.__loadDumpFile(clearJournal=True) self.__sendNextNodeIdx(nodeAddr, success=True) self.__raftCommitIndex = min(leaderCommitIndex, self.__getCurrentLogIndex()) if message['type'] == 'apply_command': if 'request_id' in message: self._applyCommand(message['command'], (nodeAddr, message['request_id'])) else: self._applyCommand(message['command'], None) if message['type'] == 'apply_command_response': requestID = message['request_id'] error = message.get('error', None) callback = self.__commandsWaitingReply.pop(requestID, None) if callback is not None: if error is not None: callback(None, error) else: idx = message['log_idx'] term = message['log_term'] assert idx > self.__raftLastApplied self.__commandsWaitingCommit[idx].append((term, callback)) if self.__raftState == _RAFT_STATE.CANDIDATE: if message['type'] == 'response_vote' and message['term'] == self.__raftCurrentTerm: self.__votesCount += 1 if self.__votesCount > (len(self.__nodes) + 1) / 2: self.__onBecomeLeader() if self.__raftState == _RAFT_STATE.LEADER: if message['type'] == 'next_node_idx': reset = message['reset'] nextNodeIdx = message['next_node_idx'] success = message['success'] currentNodeIdx = nextNodeIdx - 1 if reset: self.__raftNextIndex[nodeAddr] = nextNodeIdx if success: self.__raftMatchIndex[nodeAddr] = currentNodeIdx def __callErrCallback(self, err, callback): if callback is None: return if isinstance(callback, tuple): requestNode, requestID = callback self.__send(requestNode, { 'type': 'apply_command_response', 'request_id': requestID, 'error': err, }) return callback(None, err) def __sendNextNodeIdx(self, nodeAddr, reset=False, nextNodeIdx = None, success = False): if nextNodeIdx is None: nextNodeIdx = self.__getCurrentLogIndex() + 1 self.__send(nodeAddr, { 'type': 'next_node_idx', 'next_node_idx': nextNodeIdx, 'reset': reset, 'success': success, }) def __generateRaftTimeout(self): minTimeout = self.__conf.raftMinTimeout maxTimeout = self.__conf.raftMaxTimeout return minTimeout + (maxTimeout - minTimeout) * random.random() def __onNewConnection(self, conn): descr = conn.fileno() self.__unknownConnections[descr] = conn if self.__encryptor: conn.encryptor = self.__encryptor conn.setOnMessageReceivedCallback(functools.partial(self.__onMessageReceived, conn)) conn.setOnDisconnectedCallback(functools.partial(self.__onDisconnected, conn)) def __utilityCallback(self, res, err, conn, cmd, node): cmdResult = 'FAIL' if err == FAIL_REASON.SUCCESS: cmdResult = 'SUCCESS' conn.send(cmdResult + ' ' + cmd + ' ' + node) def __onUtilityMessage(self, conn, message): if message[0] == 'status': status = self.getStatus() data = '' for i in status: data += i[0] + ':' + str(i[1]) + '\n' conn.send(data) return True elif message[0] == 'add': self.addNodeToCluster(message[1], callback=functools.partial(self.__utilityCallback, conn=conn, cmd='ADD', node=message[1])) return True elif message[0] == 'remove': if message[1] == self.__selfNodeAddr: conn.send('FAIL REMOVE ' + message[1]) else: self.removeNodeFromCluster(message[1], callback=functools.partial(self.__utilityCallback, conn=conn, cmd='REMOVE', node=message[1])) return True return False def __onMessageReceived(self, conn, message): if self.__encryptor and not conn.sendRandKey: conn.sendRandKey = message conn.recvRandKey = os.urandom(32) conn.send(conn.recvRandKey) return descr = conn.fileno() if isinstance(message, list) and self.__onUtilityMessage(conn, message): self.__unknownConnections.pop(descr, None) return partnerNode = None for node in self.__nodes: if node.getAddress() == message: partnerNode = node break if partnerNode is None and message != 'readonly': conn.disconnect() self.__unknownConnections.pop(descr, None) return if partnerNode is not None: partnerNode.onPartnerConnected(conn) else: nodeAddr = str(self.__readonlyNodesCounter) node = Node(self, nodeAddr, shouldConnect=False) node.onPartnerConnected(conn) self.__readonlyNodes.append(node) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 self.__readonlyNodesCounter += 1 self.__unknownConnections.pop(descr, None) def __onDisconnected(self, conn): self.__unknownConnections.pop(conn.fileno(), None) def __getCurrentLogIndex(self): return self.__raftLog[-1][1] def __getCurrentLogTerm(self): return self.__raftLog[-1][2] def __getPrevLogIndexTerm(self, nextNodeIndex): prevIndex = nextNodeIndex - 1 entries = self.__getEntries(prevIndex, 1) if entries: return prevIndex, entries[0][2] return None, None def __getEntries(self, fromIDx, count=None, maxSizeBytes = None): firstEntryIDx = self.__raftLog[0][1] if fromIDx is None or fromIDx < firstEntryIDx: return [] diff = fromIDx - firstEntryIDx if count is None: result = self.__raftLog[diff:] else: result = self.__raftLog[diff:diff + count] if maxSizeBytes is None: return result totalSize = 0 i = 0 for i, entry in enumerate(result): totalSize += len(entry[0]) if totalSize >= maxSizeBytes: break return result[:i + 1] def _isLeader(self): return self.__raftState == _RAFT_STATE.LEADER def _getLeader(self): return self.__raftLeader def isReady(self): """Check if current node is initially synced with others and has an actual data. :return: True if ready, False otherwise :rtype: bool """ return self.__onReadyCalled def _isReady(self): return self.isReady() def _getTerm(self): return self.__raftCurrentTerm def _getRaftLogSize(self): return len(self.__raftLog) def __deleteEntriesFrom(self, fromIDx): firstEntryIDx = self.__raftLog[0][1] diff = fromIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesFrom(diff) def __deleteEntriesTo(self, toIDx): firstEntryIDx = self.__raftLog[0][1] diff = toIDx - firstEntryIDx if diff < 0: return self.__raftLog.deleteEntriesTo(diff) def __onBecomeLeader(self): self.__raftLeader = self.__selfNodeAddr self.__raftState = _RAFT_STATE.LEADER for node in self.__nodes + self.__readonlyNodes: nodeAddr = node.getAddress() self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0 # No-op command after leader election. idx, term = self.__getCurrentLogIndex() + 1, self.__raftCurrentTerm self.__raftLog.add(_bchr(_COMMAND_TYPE.NO_OP), idx, term) self.__noopIDx = idx if not self.__conf.appendEntriesUseBatch: self.__sendAppendEntries() self.__sendAppendEntries() def __onLeaderChanged(self): for id in sorted(self.__commandsWaitingReply): self.__commandsWaitingReply[id](None, FAIL_REASON.LEADER_CHANGED) self.__commandsWaitingReply = {} def __sendAppendEntries(self): self.__newAppendEntriesTime = time.time() + self.__conf.appendEntriesPeriod startTime = time.time() batchSizeBytes = self.__conf.appendEntriesBatchSizeBytes for node in self.__nodes + self.__readonlyNodes: nodeAddr = node.getAddress() if not node.isConnected(): self.__serializer.cancelTransmisstion(nodeAddr) continue sendSingle = True sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] while nextNodeIndex <= self.__getCurrentLogIndex() or sendSingle or sendingSerialized: if nextNodeIndex > self.__raftLog[0][1]: prevLogIdx, prevLogTerm = self.__getPrevLogIndexTerm(nextNodeIndex) entries = [] if nextNodeIndex <= self.__getCurrentLogIndex(): entries = self.__getEntries(nextNodeIndex, None, batchSizeBytes) self.__raftNextIndex[nodeAddr] = entries[-1][1] + 1 if len(entries) == 1 and len(entries[0][0]) >= batchSizeBytes: entry = cPickle.dumps(entries[0], -1) for pos in xrange(0, len(entry), batchSizeBytes): currData = entry[pos:pos + batchSizeBytes] if pos == 0: transmission = 'start' elif pos + batchSizeBytes >= len(entries[0][0]): transmission = 'finish' else: transmission = 'process' message = { 'type': 'append_entries', 'transmission': transmission, 'data': currData, 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'entries': entries, 'prevLogIdx': prevLogIdx, 'prevLogTerm': prevLogTerm, } node.send(message) else: transmissionData = self.__serializer.getTransmissionData(nodeAddr) message = { 'type': 'append_entries', 'term': self.__raftCurrentTerm, 'commit_index': self.__raftCommitIndex, 'serialized': transmissionData, } node.send(message) if transmissionData is not None: isLast = transmissionData[2] if isLast: self.__raftNextIndex[nodeAddr] = self.__raftLog[1][1] + 1 sendingSerialized = False else: sendingSerialized = True else: sendingSerialized = False nextNodeIndex = self.__raftNextIndex[nodeAddr] sendSingle = False delta = time.time() - startTime if delta > self.__conf.appendEntriesPeriod: break def __send(self, nodeAddr, message): for node in self.__nodes + self.__readonlyNodes: if node.getAddress() == nodeAddr: node.send(message) break def __connectedToAnyone(self): for node in self.__nodes: if node.getStatus() == NODE_STATUS.CONNECTED: return True if not self.__nodes: return True return False def _getSelfNodeAddr(self): return self.__selfNodeAddr def _getConf(self): return self.__conf def _getEncryptor(self): return self.__encryptor def __changeCluster(self, request): if self.__raftLastApplied < self.__noopIDx: # No-op entry was not commited yet return False if self.__changeClusterIDx is not None: if self.__raftLastApplied >= self.__changeClusterIDx: self.__changeClusterIDx = None # Previous cluster change request was not commited yet if self.__changeClusterIDx is not None: return False return self.__doChangeCluster(request) def __doChangeCluster(self, request, reverse = False): requestType = request[0] requestNode = request[1] if requestType == 'add': adding = not reverse elif requestType == 'rem': adding = reverse else: return False if self.__selfNodeAddr is not None: shouldConnect = None else: shouldConnect = True if adding: newNode = requestNode # Node already exists in cluster if newNode == self.__selfNodeAddr or newNode in self.__otherNodesAddrs: return False self.__otherNodesAddrs.append(newNode) self.__nodes.append(Node(self, newNode, shouldConnect)) self.__raftNextIndex[newNode] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[newNode] = 0 return True else: oldNode = requestNode if oldNode == self.__selfNodeAddr: return False if oldNode not in self.__otherNodesAddrs: return False for i in xrange(len(self.__nodes)): if self.__nodes[i].getAddress() == oldNode: self.__nodes[i]._destroy() self.__nodes.pop(i) self.__otherNodesAddrs.pop(i) del self.__raftNextIndex[oldNode] del self.__raftMatchIndex[oldNode] return True return False def __parseChangeClusterRequest(self, command): commandType = ord(command[:1]) if commandType != _COMMAND_TYPE.MEMBERSHIP: return None return cPickle.loads(command[1:]) def __tryLogCompaction(self): currTime = time.time() serializeState, serializeID = self.__serializer.checkSerializing() if serializeState == SERIALIZER_STATE.SUCCESS: self.__lastSerializedTime = currTime self.__deleteEntriesTo(serializeID) self.__lastSerializedEntry = serializeID if serializeState == SERIALIZER_STATE.FAILED: logging.warning('Failed to store full dump') if serializeState != SERIALIZER_STATE.NOT_SERIALIZING: return if len(self.__raftLog) <= self.__conf.logCompactionMinEntries and \ currTime - self.__lastSerializedTime <= self.__conf.logCompactionMinTime and \ not self.__forceLogCompaction: return if self.__conf.logCompactionSplit: allNodes = sorted(self.__otherNodesAddrs + [self.__selfNodeAddr]) nodesCount = len(allNodes) selfIdx = allNodes.index(self.__selfNodeAddr) interval = self.__conf.logCompactionMinTime periodStart = int(currTime) / interval * interval nodeInterval = float(interval) / nodesCount nodeIntervalStart = periodStart + selfIdx * nodeInterval nodeIntervalEnd = nodeIntervalStart + 0.3 * nodeInterval if currTime < nodeIntervalStart or currTime >= nodeIntervalEnd: return self.__forceLogCompaction = False lastAppliedEntries = self.__getEntries(self.__raftLastApplied - 1, 2) if len(lastAppliedEntries) < 2 or lastAppliedEntries[0][1] == self.__lastSerializedEntry: self.__lastSerializedTime = currTime return if self.__conf.serializer is None: data = dict([(k, self.__dict__[k]) for k in self.__dict__.keys() if k not in self.__properies]) else: data = None cluster = self.__otherNodesAddrs + [self.__selfNodeAddr] self.__serializer.serialize((data, lastAppliedEntries[1], lastAppliedEntries[0], cluster), lastAppliedEntries[0][1]) def __loadDumpFile(self, clearJournal): try: data = self.__serializer.deserialize() if data[0] is not None: for k, v in data[0].iteritems(): self.__dict__[k] = v if clearJournal or \ len(self.__raftLog) < 2 or \ self.__raftLog[0] != data[2] or \ self.__raftLog[1] != data[1]: self.__raftLog.clear() self.__raftLog.add(*data[2]) self.__raftLog.add(*data[1]) self.__raftLastApplied = data[1][1] if self.__conf.dynamicMembershipChange: self.__otherNodesAddrs = [node for node in data[3] if node != self.__selfNodeAddr] self.__updateClusterConfiguration() except: logging.exception('failed to load full dump') def __updateClusterConfiguration(self): currentNodes = set() for i in xrange(len(self.__nodes) -1, -1, -1): nodeAddr = self.__nodes[i].getAddress() if nodeAddr not in self.__otherNodesAddrs: self.__nodes[i]._destroy() self.__nodes.pop(i) else: currentNodes.add(nodeAddr) if self.__selfNodeAddr is not None: shouldConnect = None else: shouldConnect = True for nodeAddr in self.__otherNodesAddrs: if nodeAddr not in currentNodes: self.__nodes.append(Node(self, nodeAddr, shouldConnect)) self.__raftNextIndex[nodeAddr] = self.__getCurrentLogIndex() + 1 self.__raftMatchIndex[nodeAddr] = 0
def select(self, model_name, params={}, data_load=False, mode='obj'): """ requests objects of a given type from server in bulk mode. caching: caches files only cascade: no data_load: yes/no Arguments: model_name: type of the object (like 'block', 'segment' or 'section'.) params: dict that can contain several categories of key-value pairs data_load: fetch the data or not (applied if mode == 'obj') mode: 'obj' or 'json' - return mode, python object or JSON Params can be: 1. filters, like: 'owner__username': '******' 'segment__id__in': [19485,56223,89138] 'n_definition__icontains': 'blafoo' # negative filter! (has 'n_') 2. common params, like 'at_time': '2013-02-22 15:34:57' 'offset': 50 'max_results': 20 3. data params, to get only parts of the original object(s). These only work for the data-related objects (like 'analogsignal' or 'spiketrain'). start_time - start time of the required range (calculated using the same time unit as the t_start of the signal) end_time - end time of the required range (calculated using the same time unit as the t_start of the signal) duration - duration of the required range (calculated using the same time unit as the t_start of the signal) start_index - start index of the required datarange (an index of the starting datapoint) end_index - end index of the required range (an index of the end datapoint) samples_count - number of points of the required range (an index of the end datapoint) downsample - number of datapoints. This parameter is used to indicate whether downsampling is needed. The downsampling is applied on top of the selected data range using other parameters (if specified) Examples: get('analogsignal', params={'id__in': [38551], 'downsample': 100}) get('analogsignal', params={'segment__id': 93882, 'start_time': 500.0}) get('section', params={'odml_type': 'experiment', 'date_created': '2013-02-22'}) """ if model_name in self._meta.cls_aliases.values(): # TODO put into model_safe decorator model_name = [k for k, v in self._meta.cls_aliases.items() if v==model_name][0] if not model_name in self._meta.models_map.keys(): raise TypeError('Objects of that type are not supported.') # fetch from remote + save in cache if possible json_objs = self._remote.get_list( model_name, params ) if mode == 'json': # return pure JSON (no data) if requested objects = json_objs else: # convert to objects in 'obj' mode app = self._meta.app_prefix_dict[ model_name ] model = models_map[ model_name ] objects = [] for json_obj in json_objs: data_refs = {} # is a dict like {'signal': <array...>, ...} if data_load: data_refs = self.__parse_data_from_json( json_obj ) obj = Serializer.deserialize( json_obj, self._meta, data_refs ) objects.append( obj ) self._cache.save_data_map() # updates on-disk cache with new datafiles self._cache.save_h5_map() return objects