def __init__(self, inbox, outbox, hostID, queue, monitorInterval): # Save arguments tpSize = config.getint('irs', 'thread_pool_size') // 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-hsm", tpSize, waitTimeout, maxTasks) self._stop = False self._queue = queue self._activeMessages = {} self._monitorInterval = monitorInterval self._hostID = int(hostID) self._used_slots_array = [0] * MESSAGES_PER_MAILBOX self._outgoingMail = EMPTYMAILBOX self._incomingMail = EMPTYMAILBOX # TODO: add support for multiple paths (multiple mailboxes) self._inCmd = [ constants.EXT_DD, 'if=' + str(inbox), 'iflag=direct,fullblock', 'bs=' + str(MAILBOX_SIZE), 'count=1', 'skip=' + str(self._hostID) ] self._outCmd = [ constants.EXT_DD, 'of=' + str(outbox), 'iflag=fullblock', 'oflag=direct', 'conv=notrunc', 'bs=' + str(MAILBOX_SIZE), 'count=1', 'seek=' + str(self._hostID) ] self._init = False self._initMailbox() # Read initial mailbox state self._msgCounter = 0 self._sendMail() # Clear outgoing mailbox self._thread = concurrent.thread(self._run, name="mailbox-hsm", log=self.log) self._thread.start()
def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2): """ Note: inbox paramerter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread( self._run, name="mailbox-spm", log=self.log) self._thread.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID)
def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._lock = threading.Lock()
def __init__(self, pool, maxHostID, monitorInterval=2): self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = str(pool.spUUID) self._spmStorageDir = pool.storage_repository tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) # *** IMPORTANT NOTE: The SPM's inbox is the HSMs' outbox and vice # versa *** # self._inbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "inbox") if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "outbox") if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = [ 'dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = [ 'dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug( "SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") t = concurrent.thread(self.run, name="mailbox-spm", logger=self.log.name) t.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID)
def __init__(self, pool, maxHostID, monitorInterval=2): self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = str(pool.spUUID) self._spmStorageDir = pool.storage_repository tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) # *** IMPORTANT NOTE: The SPM's inbox is the HSMs' outbox and vice # versa *** # self._inbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "inbox") if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "outbox") if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") t = concurrent.thread(self.run, name="mailbox-spm", logger=self.log.name) t.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID)
def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2): """ Note: inbox paramerter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread( self.run, name="mailbox-spm", log=self.log) self._thread.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID)
def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._insertTaskLock = threading.Lock()
def __init__(self, inbox, outbox, hostID, queue, monitorInterval): # Save arguments tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-hsm", tpSize, waitTimeout, maxTasks) self._stop = False self._flush = False self._queue = queue self._activeMessages = {} self._monitorInterval = monitorInterval self._hostID = int(hostID) self._used_slots_array = [0] * MESSAGES_PER_MAILBOX self._outgoingMail = EMPTYMAILBOX self._incomingMail = EMPTYMAILBOX # TODO: add support for multiple paths (multiple mailboxes) self._spmStorageDir = config.get('irs', 'repository') self._inCmd = [constants.EXT_DD, 'if=' + str(inbox), 'iflag=direct,fullblock', 'bs=' + str(BLOCK_SIZE), 'count=' + str(BLOCKS_PER_MAILBOX), 'skip=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._outCmd = [constants.EXT_DD, 'of=' + str(outbox), 'iflag=fullblock', 'oflag=direct', 'conv=notrunc', 'bs=' + str(BLOCK_SIZE), 'seek=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._init = False self._initMailbox() # Read initial mailbox state self._msgCounter = 0 self._sendMail() # Clear outgoing mailbox self._thread = concurrent.thread(self.run, name="mailbox-hsm", logger=self.log.name) self._thread.start()
class SPM_MailMonitor: log = logging.getLogger('storage.MailBox.SpmMailMonitor') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2): """ Note: inbox paramerter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread( self._run, name="mailbox-spm", log=self.log) self._thread.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def wait(self, timeout=None): self._thread.join(timeout=timeout) return not self._thread.is_alive() def stop(self): self._stop = True def isStopped(self): return self._stopped def getMaxHostID(self): return self._numHosts def setMaxHostID(self, newMaxId): with self._inLock: with self._outLock: diff = newMaxId - self._numHosts if diff > 0: delta = MAILBOX_SIZE * diff * "\0" self._outgoingMail += delta self._incomingMail += delta elif diff < 0: delta = MAILBOX_SIZE * diff self._outgoingMail = self._outgoingMail[:-delta] self._incomingMail = self._incomingMail[:-delta] self._numHosts = newMaxId self._outMailLen = MAILBOX_SIZE * self._numHosts @classmethod def validateMailbox(self, mailbox, mailboxIndex): """ Return True if mailbox has a valid checksum, and is not an empty mailbox, False otherwise. """ assert len(mailbox) == MAILBOX_SIZE data = mailbox[:-CHECKSUM_BYTES] checksum = mailbox[-CHECKSUM_BYTES:] n = misc.checksum(data, CHECKSUM_BYTES) expected = struct.pack('<l', n) # Assumes CHECKSUM_BYTES equals 4!!! if checksum != expected: self.log.error( "mailbox %s checksum failed, not clearing mailbox, clearing " "new mail (data=%r, checksum=%r, expected=%r)", mailboxIndex, data, checksum, expected) return False elif expected == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. Check message # version, if 0 then message is empty and can be skipped if newMail[msgStart] in ['\0', '0']: continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self.validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug("SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock self._outLock.acquire() try: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] finally: self._outLock.release() send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug("SPM_MailMonitor: processing request: " "%s" % repr(newMail[ msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart: msgStart + MESSAGE_SIZE]) ) if not res: raise Exception() else: self.log.error("SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error("SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error("SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that neither _numHosts nor # incomingMail are changed during checkForMail self._inLock.acquire() try: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = misc.execCmd(cmd, raw=True) if rc: raise IOError(errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error('SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): self._outLock.acquire() try: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") finally: self._outLock.release() finally: self._inLock.release() def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that neither _numHosts nor # outgoingMail are changed while used self._outLock.acquire() try: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID / SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset: mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + ['bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset / MAILBOX_SIZE)] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") finally: self._outLock.release() def _run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) time.sleep(self._monitorInterval) finally: self._stopped = True self.tp.joinAll(waitForTasks=False) self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped")
class HSM_MailMonitor(object): log = logging.getLogger('storage.MailBox.HsmMailMonitor') def __init__(self, inbox, outbox, hostID, queue, monitorInterval): # Save arguments tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-hsm", tpSize, waitTimeout, maxTasks) self._stop = False self._flush = False self._queue = queue self._activeMessages = {} self._monitorInterval = monitorInterval self._hostID = int(hostID) self._used_slots_array = [0] * MESSAGES_PER_MAILBOX self._outgoingMail = EMPTYMAILBOX self._incomingMail = EMPTYMAILBOX # TODO: add support for multiple paths (multiple mailboxes) self._inCmd = [constants.EXT_DD, 'if=' + str(inbox), 'iflag=direct,fullblock', 'bs=' + str(MAILBOX_SIZE), 'count=1', 'skip=' + str(self._hostID) ] self._outCmd = [constants.EXT_DD, 'of=' + str(outbox), 'iflag=fullblock', 'oflag=direct', 'conv=notrunc', 'bs=' + str(MAILBOX_SIZE), 'count=1', 'seek=' + str(self._hostID) ] self._init = False self._initMailbox() # Read initial mailbox state self._msgCounter = 0 self._sendMail() # Clear outgoing mailbox self._thread = concurrent.thread(self._run, name="mailbox-hsm", log=self.log) self._thread.start() def _initMailbox(self): # Sync initial incoming mail state with storage view (rc, out, err) = _mboxExecCmd(self._inCmd, raw=True) if rc == 0: self._incomingMail = out self._init = True else: self.log.warning("HSM_MailboxMonitor - Could not initialize " "mailbox, will not accept requests until init " "succeeds") def immStop(self): self._stop = True def immFlush(self): self._flush = True def wait(self, timeout=None): self._thread.join(timeout=timeout) return not self._thread.is_alive() def _handleResponses(self, newMsgs): rc = False for i in range(0, MESSAGES_PER_MAILBOX): # Skip checking non used slots if self._used_slots_array[i] == 0: continue # Skip empty return messages (messages with version 0) start = i * MESSAGE_SIZE # First byte of message is message version. # Check return message version, if 0 then message is empty if newMsgs[start] in ['\0', '0']: continue for j in range(start, start + MESSAGE_SIZE): if newMsgs[j] != self._incomingMail[j]: break # If search exhausted then message hasn't changed since last read # and can be skipped if j == (start + MESSAGE_SIZE - 1): continue # # We only get here if there is a novel reply so we can remove the # message from the active list and the outgoing mail and handle the # reply # rc = True newMsg = newMsgs[start:start + MESSAGE_SIZE] if newMsg == CLEAN_MESSAGE: del self._activeMessages[i] self._used_slots_array[i] = 0 self._msgCounter -= 1 self._outgoingMail = self._outgoingMail[0:start] + \ MESSAGE_SIZE * "\0" + self._outgoingMail[start + MESSAGE_SIZE:] continue msg = self._activeMessages[i] self._activeMessages[i] = CLEAN_MESSAGE self._outgoingMail = self._outgoingMail[0:start] + \ CLEAN_MESSAGE + self._outgoingMail[start + MESSAGE_SIZE:] try: self.log.debug("HSM_MailboxMonitor(%s/%s) - Checking reply: " "%s", self._msgCounter, MESSAGES_PER_MAILBOX, repr(newMsg)) msg.checkReply(newMsg) if msg.callback: try: id = str(uuid.uuid4()) if not self.tp.queueTask(id, runTask, (msg.callback, msg.volumeData)): raise Exception() except: self.log.error("HSM_MailMonitor: exception caught " "while running msg callback, for " "message: %s, callback function: %s", repr(msg.payload), msg.callback, exc_info=True) except RuntimeError as e: self.log.error("HSM_MailMonitor: exception: %s caught while " "checking reply for message: %s, reply: %s", str(e), repr(msg.payload), repr(newMsg)) except: self.log.error("HSM_MailMonitor: exception caught while " "checking reply from SPM, request was: %s " "reply: %s", repr(msg.payload), repr(newMsg), exc_info=True) # Finished processing incoming mail, now save mail to compare against # next batch self._incomingMail = newMsgs return rc def _checkForMail(self): # self.log.debug("HSM_MailMonitor - checking for mail") # self.log.debug("Running command: " + str(self._inCmd)) (rc, in_mail, err) = misc.execCmd(self._inCmd, raw=True) if rc: raise RuntimeError("_handleResponses.Could not read mailbox - rc " "%s" % rc) if (len(in_mail) != MAILBOX_SIZE): raise RuntimeError("_handleResponses.Could not read mailbox - len " "%s != %s" % (len(in_mail), MAILBOX_SIZE)) # self.log.debug("Parsing inbox content: %s", in_mail) return self._handleResponses(in_mail) def _sendMail(self): self.log.info("HSM_MailMonitor sending mail to SPM - " + str(self._outCmd)) chk = misc.checksum( self._outgoingMail[0:MAILBOX_SIZE - CHECKSUM_BYTES], CHECKSUM_BYTES) pChk = struct.pack('<l', chk) # Assumes CHECKSUM_BYTES equals 4!!! self._outgoingMail = \ self._outgoingMail[0:MAILBOX_SIZE - CHECKSUM_BYTES] + pChk _mboxExecCmd(self._outCmd, data=self._outgoingMail) def _handleMessage(self, message): # TODO: add support for multiple mailboxes freeSlot = False for i in range(0, MESSAGES_PER_MAILBOX): if self._used_slots_array[i] == 0: if not freeSlot: freeSlot = i continue duplicate = True for j in range(0, MESSAGE_SIZE): if message[j] != self._activeMessages[i][j]: duplicate = False break if duplicate: self.log.debug("HSM_MailMonitor - ignoring duplicate message " "%s" % (repr(message))) return if not freeSlot: raise RuntimeError("HSM_MailMonitor - Active messages list full, " "cannot add new message") self._msgCounter += 1 self._used_slots_array[freeSlot] = 1 self._activeMessages[freeSlot] = message start = freeSlot * MESSAGE_SIZE end = start + MESSAGE_SIZE self._outgoingMail = self._outgoingMail[0:start] + message.payload + \ self._outgoingMail[end:] self.log.debug("HSM_MailMonitor - start: %s, end: %s, len: %s, " "message(%s/%s): %s" % (start, end, len(self._outgoingMail), self._msgCounter, MESSAGES_PER_MAILBOX, repr(self._outgoingMail[start:end]))) def _run(self): try: failures = 0 # Do not start processing requests before incoming mailbox is # initialized while not self._init and not self._stop: try: time.sleep(2) self._initMailbox() # Read initial mailbox state except: pass while not self._stop: try: message = None sendMail = False # If no message is pending, block_wait until a new message # or stop command arrives while not self._stop and not message and \ not self._activeMessages: try: # self.log.debug("No requests in queue, going to " # "sleep until new requests arrive") # Check if a new message is waiting to be sent message = self._queue.get( block=True, timeout=self._monitorInterval) self._handleMessage(message) message = None sendMail = True except queue.Empty: pass if self._stop: break # If pending messages available, check if there are new # messages waiting in queue as well empty = False while (not empty) and \ (len(self._activeMessages) < MESSAGES_PER_MAILBOX): # TODO: Remove single mailbox limitation try: message = self._queue.get(block=False) self._handleMessage(message) message = None sendMail = True except queue.Empty: empty = True if self._flush: self._flush = False sendMail = True try: sendMail |= self._checkForMail() failures = 0 except: self.log.error("HSM_MailboxMonitor - Exception caught " "while checking for mail", exc_info=True) failures += 1 if sendMail: self._sendMail() # If there are active messages waiting for SPM reply, wait # a few seconds before performing another IO op if self._activeMessages and not self._stop: # If recurring failures then sleep for one minute # before retrying if (failures > 9): time.sleep(60) else: time.sleep(self._monitorInterval) except: self.log.error("HSM_MailboxMonitor - Incoming mail" "monitoring thread caught exception; " "will try to recover", exc_info=True) finally: self.log.info("HSM_MailboxMonitor - Incoming mail monitoring " "thread stopped, clearing outgoing mail") self._outgoingMail = EMPTYMAILBOX self._sendMail() # Clear outgoing mailbox
class TaskManager: log = logging.getLogger('storage.taskmanager') def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._lock = threading.Lock() def queue(self, task): return self._queueTask(task, task.commit) def queueRecovery(self, task): return self._queueTask(task, task.recover) def _queueTask(self, task, method): with self._lock: if task.id in self._tasks: raise se.AddTaskError('Task id already in use: {0}'.format( task.id)) self.log.debug("queuing task: %s", task.id) self._tasks[task.id] = task try: if not self.tp.queueTask(task.id, method): self.log.error("unable to queue task: %s", task.dumpTask()) with self._lock: del self._tasks[task.id] raise se.AddTaskError() self.log.debug("task queued: %s", task.id) except Exception: self.log.exception('could not queue task %s', task.id) raise return task.id def scheduleJob(self, type, store, task, jobName, func, *args): task.setTag(type) if store is not None: task.setPersistence(store, cleanPolicy=TaskCleanType.manual) task.setManager(self) task.setRecoveryPolicy("auto") task.addJob(Job(jobName, func, *args)) self.log.debug("scheduled job %s for task %s ", jobName, task.id) def _getTask(self, taskID): Task.validateID(taskID) t = self._tasks.get(taskID, None) if t is None: raise se.UnknownTask(taskID) return t def prepareForShutdown(self, wait=False): """ Prepare to shutdown and stop all threads. """ self.log.debug("Request to stop all threads (wait=%s)", wait) self.tp.joinAll(waitForThreads=wait) def getTaskStatus(self, taskID): """ Internal return Task status for a given task. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) status = t.deprecated_getStatus() self.log.debug("Return. Response: %s", status) return status def getAllTasksStatuses(self, tag=None): """ Return Task status for all tasks by type. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = task.deprecated_getStatus() except se.UnknownTask: # Return statuses for existing tasks only. self.log.warn( "Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def getAllTasks(self): """ Return Tasks for all public tasks. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): try: subRes[taskID] = task.getDetails() except se.UnknownTask: # Return info for existing tasks only. self.log.warn( "Unknown task %s. Maybe task was already " "cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def unloadTasks(self, tag=None): """ Remove Tasks from managed tasks list """ self.log.debug("Entry.") with self._lock: for taskID, task in list(self._tasks.items()): if not tag or tag in task.getTags(): self._tasks.pop(taskID, None) self.log.debug("Return") def stopTask(self, taskID, force=False): """ Stop a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) t.stop(force=force) self.log.debug("Return.") return True def revertTask(self, taskID): self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self._getTask(taskID) t.rollback() self.log.debug("Return.") def clearTask(self, taskID): """ Clear a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self._getTask(taskID) # Task clean procedure may block so we don't want to do it under tasks # dict lock, instead use pop under lock in case the task was already # removed by another call. t.clean() with self._lock: self._tasks.pop(taskID, None) self.log.debug("Return.") def getTaskInfo(self, taskID): """ Return task's data according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self._getTask(taskID) info = t.getInfo() self.log.debug("Return. Response: %s", info) return info def getAllTasksInfo(self, tag=None): """ Return Task info for all public tasks. i.e - not internal. """ self.log.debug("Entry.") subRes = {} with self._lock: for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = task.getInfo() except se.UnknownTask: # Return info for existing tasks only. self.log.warn( "Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return. Response: %s", subRes) return subRes def loadDumpedTasks(self, store): if not os.path.exists(store): self.log.debug("task dump path %s does not exist.", store) return # taskID is the root part of each (root.ext) entry in the dump task dir tasksIDs = set(os.path.splitext(tid)[0] for tid in os.listdir(store)) for taskID in tasksIDs: self.log.debug("Loading dumped task %s", taskID) try: t = Task.loadTask(store, taskID) t.setPersistence(store, str(t.persistPolicy), str(t.cleanPolicy)) self._unqueuedTasks.append(t) except Exception: self.log.error("taskManager: Skipping directory: %s", taskID, exc_info=True) continue def recoverDumpedTasks(self): for task in self._unqueuedTasks[:]: self.queueRecovery(task) self._unqueuedTasks.remove(task)
class SPM_MailMonitor: log = logging.getLogger('storage.MailBox.SpmMailMonitor') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, pool, maxHostID, monitorInterval=2): self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = str(pool.spUUID) self._spmStorageDir = pool.storage_repository tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) # *** IMPORTANT NOTE: The SPM's inbox is the HSMs' outbox and vice # versa *** # self._inbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "inbox") if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = os.path.join(self._spmStorageDir, self._poolID, "mastersd", sd.DOMAIN_META_DATA, "outbox") if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * "\0" self._incomingMail = self._outgoingMail self._inCmd = ['dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = ['dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # Clear outgoing mail self.log.debug("SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") t = concurrent.thread(self.run, name="mailbox-spm", logger=self.log.name) t.start() self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def stop(self): self._stop = True def isStopped(self): return self._stopped def getMaxHostID(self): return self._numHosts def setMaxHostID(self, newMaxId): with self._inLock: with self._outLock: diff = newMaxId - self._numHosts if diff > 0: delta = MAILBOX_SIZE * diff * "\0" self._outgoingMail += delta self._incomingMail += delta elif diff < 0: delta = MAILBOX_SIZE * diff self._outgoingMail = self._outgoingMail[:-delta] self._incomingMail = self._incomingMail[:-delta] self._numHosts = newMaxId self._outMailLen = MAILBOX_SIZE * self._numHosts def _validateMailbox(self, mailbox, mailboxIndex): chkStart = MAILBOX_SIZE - CHECKSUM_BYTES chk = misc.checksum(mailbox[0:chkStart], CHECKSUM_BYTES) pChk = struct.pack('<l', chk) # Assumes CHECKSUM_BYTES equals 4!!! if pChk != mailbox[chkStart:chkStart + CHECKSUM_BYTES]: self.log.error("SPM_MailMonitor: mailbox %s checksum failed, not " "clearing mailbox, clearing newMail.", str(mailboxIndex)) return False elif pChk == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. Check message # version, if 0 then message is empty and can be skipped if newMail[msgStart] in ['\0', '0']: continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self._validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug("SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock self._outLock.acquire() try: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] finally: self._outLock.release() send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug("SPM_MailMonitor: processing request: " "%s" % repr(newMail[ msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart: msgStart + MESSAGE_SIZE]) ) if not res: raise Exception() else: self.log.error("SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error("SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error("SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that neither _numHosts nor # incomingMail are changed during checkForMail self._inLock.acquire() try: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = misc.execCmd(cmd, raw=True) if rc: raise IOError(errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error('SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): self._outLock.acquire() try: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") finally: self._outLock.release() finally: self._inLock.release() def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that neither _numHosts nor # outgoingMail are changed while used self._outLock.acquire() try: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID / SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset: mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + ['bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset / MAILBOX_SIZE)] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") finally: self._outLock.release() def run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) time.sleep(self._monitorInterval) finally: self._stopped = True self.tp.joinAll(waitForTasks=False) self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped")
class HSM_MailMonitor(object): log = logging.getLogger('storage.MailBox.HsmMailMonitor') def __init__(self, inbox, outbox, hostID, queue, monitorInterval): # Save arguments tpSize = config.getint('irs', 'thread_pool_size') / 2 waitTimeout = 3 maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-hsm", tpSize, waitTimeout, maxTasks) self._stop = False self._flush = False self._queue = queue self._activeMessages = {} self._monitorInterval = monitorInterval self._hostID = int(hostID) self._used_slots_array = [0] * MESSAGES_PER_MAILBOX self._outgoingMail = EMPTYMAILBOX self._incomingMail = EMPTYMAILBOX # TODO: add support for multiple paths (multiple mailboxes) self._spmStorageDir = config.get('irs', 'repository') self._inCmd = [constants.EXT_DD, 'if=' + str(inbox), 'iflag=direct,fullblock', 'bs=' + str(BLOCK_SIZE), 'count=' + str(BLOCKS_PER_MAILBOX), 'skip=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._outCmd = [constants.EXT_DD, 'of=' + str(outbox), 'iflag=fullblock', 'oflag=direct', 'conv=notrunc', 'bs=' + str(BLOCK_SIZE), 'seek=' + str(self._hostID * BLOCKS_PER_MAILBOX) ] self._init = False self._initMailbox() # Read initial mailbox state self._msgCounter = 0 self._sendMail() # Clear outgoing mailbox self._thread = concurrent.thread(self.run, name="mailbox-hsm", logger=self.log.name) self._thread.start() def _initMailbox(self): # Sync initial incoming mail state with storage view (rc, out, err) = _mboxExecCmd(self._inCmd, raw=True) if rc == 0: self._incomingMail = out self._init = True else: self.log.warning("HSM_MailboxMonitor - Could not initialize " "mailbox, will not accept requests until init " "succeeds") def immStop(self): self._stop = True def immFlush(self): self._flush = True def _handleResponses(self, newMsgs): rc = False for i in range(0, MESSAGES_PER_MAILBOX): # Skip checking non used slots if self._used_slots_array[i] == 0: continue # Skip empty return messages (messages with version 0) start = i * MESSAGE_SIZE # First byte of message is message version. # Check return message version, if 0 then message is empty if newMsgs[start] in ['\0', '0']: continue for j in range(start, start + MESSAGE_SIZE): if newMsgs[j] != self._incomingMail[j]: break # If search exhausted then message hasn't changed since last read # and can be skipped if j == (start + MESSAGE_SIZE - 1): continue # # We only get here if there is a novel reply so we can remove the # message from the active list and the outgoing mail and handle the # reply # rc = True newMsg = newMsgs[start:start + MESSAGE_SIZE] if newMsg == CLEAN_MESSAGE: del self._activeMessages[i] self._used_slots_array[i] = 0 self._msgCounter -= 1 self._outgoingMail = self._outgoingMail[0:start] + \ MESSAGE_SIZE * "\0" + self._outgoingMail[start + MESSAGE_SIZE:] continue msg = self._activeMessages[i] self._activeMessages[i] = CLEAN_MESSAGE self._outgoingMail = self._outgoingMail[0:start] + \ CLEAN_MESSAGE + self._outgoingMail[start + MESSAGE_SIZE:] try: self.log.debug("HSM_MailboxMonitor(%s/%s) - Checking reply: " "%s", self._msgCounter, MESSAGES_PER_MAILBOX, repr(newMsg)) msg.checkReply(newMsg) if msg.callback: try: id = str(uuid.uuid4()) if not self.tp.queueTask(id, runTask, (msg.callback, msg.volumeData)): raise Exception() except: self.log.error("HSM_MailMonitor: exception caught " "while running msg callback, for " "message: %s, callback function: %s", repr(msg.payload), msg.callback, exc_info=True) except RuntimeError as e: self.log.error("HSM_MailMonitor: exception: %s caught while " "checking reply for message: %s, reply: %s", str(e), repr(msg.payload), repr(newMsg)) except: self.log.error("HSM_MailMonitor: exception caught while " "checking reply from SPM, request was: %s " "reply: %s", repr(msg.payload), repr(newMsg), exc_info=True) # Finished processing incoming mail, now save mail to compare against # next batch self._incomingMail = newMsgs return rc def _checkForMail(self): # self.log.debug("HSM_MailMonitor - checking for mail") # self.log.debug("Running command: " + str(self._inCmd)) (rc, in_mail, err) = misc.execCmd(self._inCmd, raw=True) if rc: raise RuntimeError("_handleResponses.Could not read mailbox - rc " "%s" % rc) if (len(in_mail) != MAILBOX_SIZE): raise RuntimeError("_handleResponses.Could not read mailbox - len " "%s != %s" % (len(in_mail), MAILBOX_SIZE)) # self.log.debug("Parsing inbox content: %s", in_mail) return self._handleResponses(in_mail) def _sendMail(self): self.log.info("HSM_MailMonitor sending mail to SPM - " + str(self._outCmd)) chk = misc.checksum( self._outgoingMail[0:MAILBOX_SIZE - CHECKSUM_BYTES], CHECKSUM_BYTES) pChk = struct.pack('<l', chk) # Assumes CHECKSUM_BYTES equals 4!!! self._outgoingMail = \ self._outgoingMail[0:MAILBOX_SIZE - CHECKSUM_BYTES] + pChk _mboxExecCmd(self._outCmd, data=self._outgoingMail) def _handleMessage(self, message): # TODO: add support for multiple mailboxes freeSlot = False for i in range(0, MESSAGES_PER_MAILBOX): if self._used_slots_array[i] == 0: if not freeSlot: freeSlot = i continue duplicate = True for j in range(0, MESSAGE_SIZE): if message[j] != self._activeMessages[i][j]: duplicate = False break if duplicate: self.log.debug("HSM_MailMonitor - ignoring duplicate message " "%s" % (repr(message))) return if not freeSlot: raise RuntimeError("HSM_MailMonitor - Active messages list full, " "cannot add new message") self._msgCounter += 1 self._used_slots_array[freeSlot] = 1 self._activeMessages[freeSlot] = message start = freeSlot * MESSAGE_SIZE end = start + MESSAGE_SIZE self._outgoingMail = self._outgoingMail[0:start] + message.payload + \ self._outgoingMail[end:] self.log.debug("HSM_MailMonitor - start: %s, end: %s, len: %s, " "message(%s/%s): %s" % (start, end, len(self._outgoingMail), self._msgCounter, MESSAGES_PER_MAILBOX, repr(self._outgoingMail[start:end]))) def run(self): try: failures = 0 # Do not start processing requests before incoming mailbox is # initialized while not self._init and not self._stop: try: time.sleep(2) self._initMailbox() # Read initial mailbox state except: pass while not self._stop: try: message = None sendMail = False # If no message is pending, block_wait until a new message # or stop command arrives while not self._stop and not message and \ not self._activeMessages: try: # self.log.debug("No requests in queue, going to " # "sleep until new requests arrive") # Check if a new message is waiting to be sent message = self._queue.get( block=True, timeout=self._monitorInterval) self._handleMessage(message) message = None sendMail = True except queue.Empty: pass if self._stop: break # If pending messages available, check if there are new # messages waiting in queue as well empty = False while (not empty) and \ (len(self._activeMessages) < MESSAGES_PER_MAILBOX): # TODO: Remove single mailbox limitation try: message = self._queue.get(block=False) self._handleMessage(message) message = None sendMail = True except queue.Empty: empty = True if self._flush: self._flush = False sendMail = True try: sendMail |= self._checkForMail() failures = 0 except: self.log.error("HSM_MailboxMonitor - Exception caught " "while checking for mail", exc_info=True) failures += 1 if sendMail: self._sendMail() # If there are active messages waiting for SPM reply, wait # a few seconds before performing another IO op if self._activeMessages and not self._stop: # If recurring failures then sleep for one minute # before retrying if (failures > 9): time.sleep(60) else: time.sleep(self._monitorInterval) except: self.log.error("HSM_MailboxMonitor - Incoming mail" "monitoring thread caught exception; " "will try to recover", exc_info=True) finally: self.log.info("HSM_MailboxMonitor - Incoming mail monitoring " "thread stopped, clearing outgoing mail") self._outgoingMail = EMPTYMAILBOX self._sendMail() # Clear outgoing mailbox
class SPM_MailMonitor: log = logging.getLogger('storage.mailbox') def registerMessageType(self, messageType, callback): self._messageTypes[messageType] = callback def unregisterMessageType(self, messageType): del self._messageTypes[messageType] def __init__(self, poolID, maxHostID, inbox, outbox, monitorInterval=2.0, eventInterval=0.5): """ Note: inbox parameter here should point to the HSM's outbox mailbox file, and vice versa. """ self._messageTypes = {} # Save arguments self._stop = False self._stopped = False self._poolID = poolID tpSize = config.getint('irs', 'thread_pool_size') // 2 waitTimeout = wait_timeout(monitorInterval) maxTasks = config.getint('irs', 'max_tasks') self.tp = ThreadPool("mailbox-spm", tpSize, waitTimeout, maxTasks) self._inbox = inbox if not os.path.exists(self._inbox): self.log.error("SPM_MailMonitor create failed - inbox %s does not " "exist" % repr(self._inbox)) raise RuntimeError("SPM_MailMonitor create failed - inbox %s does " "not exist" % repr(self._inbox)) self._outbox = outbox if not os.path.exists(self._outbox): self.log.error("SPM_MailMonitor create failed - outbox %s does " "not exist" % repr(self._outbox)) raise RuntimeError("SPM_MailMonitor create failed - outbox %s " "does not exist" % repr(self._outbox)) self._numHosts = int(maxHostID) self._outMailLen = MAILBOX_SIZE * self._numHosts self._monitorInterval = monitorInterval self._eventInterval = min(eventInterval, monitorInterval) # TODO: add support for multiple paths (multiple mailboxes) self._outgoingMail = self._outMailLen * b"\0" self._incomingMail = self._outgoingMail self._inCmd = [ 'dd', 'if=' + str(self._inbox), 'iflag=direct,fullblock', 'count=1' ] self._outCmd = [ 'dd', 'of=' + str(self._outbox), 'oflag=direct', 'iflag=fullblock', 'conv=notrunc', 'count=1' ] self._outLock = threading.Lock() self._inLock = threading.Lock() # The event detected in an empty mailbox. self._last_event = uuid.UUID(int=0) # Clear outgoing mail self.log.debug( "SPM_MailMonitor - clearing outgoing mail, command is: " "%s", self._outCmd) cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't clear outgoing mail, " "dd failed") self._thread = concurrent.thread(self._run, name="mailbox-spm", log=self.log) self.log.debug('SPM_MailMonitor created for pool %s' % self._poolID) def start(self): self._thread.start() def wait(self, timeout=None): self._thread.join(timeout=timeout) return not self._thread.is_alive() def stop(self): self._stop = True def isStopped(self): return self._stopped @classmethod def validateMailbox(self, mailbox, mailboxIndex): """ Return True if mailbox has a valid checksum, and is not an empty mailbox, False otherwise. """ assert len(mailbox) == MAILBOX_SIZE data = mailbox[:-CHECKSUM_BYTES] csum = mailbox[-CHECKSUM_BYTES:] expected = packed_checksum(data) if csum != expected: self.log.error( "mailbox %s checksum failed, not clearing mailbox, clearing " "new mail (data=%r, checksum=%r, expected=%r)", mailboxIndex, data, checksum, expected) return False elif expected == pZeroChecksum: return False # Ignore messages of empty mailbox return True def _handleRequests(self, newMail): send = False # run through all messages and check if new messages have arrived # (since last read) for host in range(0, self._numHosts): # Check mailbox checksum mailboxStart = host * MAILBOX_SIZE isMailboxValidated = False for i in range(0, MESSAGES_PER_MAILBOX): msgId = host * SLOTS_PER_MAILBOX + i msgStart = msgId * MESSAGE_SIZE # First byte of message is message version. # A null byte indicates an empty message to be skipped. if newMail[msgStart:msgStart + 1] == b"\0": continue # Most mailboxes are probably empty so it costs less to check # that all messages start with 0 than to validate the mailbox, # therefor this is done after we find a non empty message in # mailbox if not isMailboxValidated: if not self.validateMailbox( newMail[mailboxStart:mailboxStart + MAILBOX_SIZE], host): # Cleaning invalid mbx in newMail newMail = newMail[:mailboxStart] + EMPTYMAILBOX + \ newMail[mailboxStart + MAILBOX_SIZE:] break self.log.debug( "SPM_MailMonitor: Mailbox %s validated, " "checking mail", host) isMailboxValidated = True newMsg = newMail[msgStart:msgStart + MESSAGE_SIZE] msgOffset = msgId * MESSAGE_SIZE if newMsg == CLEAN_MESSAGE: # Should probably put a setter on outgoingMail which would # take the lock with self._outLock: self._outgoingMail = \ self._outgoingMail[0:msgOffset] + CLEAN_MESSAGE + \ self._outgoingMail[msgOffset + MESSAGE_SIZE: self._outMailLen] send = True continue # Message isn't empty, check if its new isMessageNew = False for j in range(msgStart, msgStart + MESSAGE_SIZE): if newMail[j] != self._incomingMail[j]: isMessageNew = True break # If search exhausted, i.e. message hasn't changed since last # read, it can be skipped if not isMessageNew: continue # We only get here if there is a novel request try: msgType = newMail[msgStart + 1:msgStart + 5] if msgType in self._messageTypes: # Use message class to process request according to # message specific logic id = str(uuid.uuid4()) self.log.debug( "SPM_MailMonitor: processing request: " "%s" % repr(newMail[msgStart:msgStart + MESSAGE_SIZE])) res = self.tp.queueTask( id, runTask, (self._messageTypes[msgType], msgId, newMail[msgStart:msgStart + MESSAGE_SIZE])) if not res: raise Exception() else: self.log.error( "SPM_MailMonitor: unknown message type " "encountered: %s", msgType) except RuntimeError as e: self.log.error( "SPM_MailMonitor: exception: %s caught " "while handling message: %s", str(e), newMail[msgStart:msgStart + MESSAGE_SIZE]) except: self.log.error( "SPM_MailMonitor: exception caught while " "handling message: %s", newMail[msgStart:msgStart + MESSAGE_SIZE], exc_info=True) self._incomingMail = newMail return send def _checkForMail(self): # Lock is acquired in order to make sure that # incomingMail is not changed during checkForMail with self._inLock: # self.log.debug("SPM_MailMonitor -_checking for mail") cmd = self._inCmd + ['bs=' + str(self._outMailLen)] # self.log.debug("SPM_MailMonitor - reading incoming mail, " # "command: " + str(cmd)) (rc, in_mail, err) = _mboxExecCmd(cmd, raw=True) if rc: raise IOError( errno.EIO, "_handleRequests._checkForMail - " "Could not read mailbox: %s" % self._inbox) if (len(in_mail) != (self._outMailLen)): self.log.error( 'SPM_MailMonitor: _checkForMail - dd succeeded ' 'but read %d bytes instead of %d, cannot check ' 'mail. Read mail contains: %s', len(in_mail), self._outMailLen, repr(in_mail[:80])) raise RuntimeError("_handleRequests._checkForMail - Could not " "read mailbox") # self.log.debug("Parsing inbox content: %s", in_mail) if self._handleRequests(in_mail): with self._outLock: cmd = self._outCmd + ['bs=' + str(self._outMailLen)] (rc, out, err) = _mboxExecCmd(cmd, data=self._outgoingMail) if rc: self.log.warning("SPM_MailMonitor couldn't write " "outgoing mail, dd failed") def sendReply(self, msgID, msg): # Lock is acquired in order to make sure that # outgoingMail is not changed while used with self._outLock: msgOffset = msgID * MESSAGE_SIZE self._outgoingMail = \ self._outgoingMail[0:msgOffset] + msg.payload + \ self._outgoingMail[msgOffset + MESSAGE_SIZE:self._outMailLen] mailboxOffset = (msgID // SLOTS_PER_MAILBOX) * MAILBOX_SIZE mailbox = self._outgoingMail[mailboxOffset:mailboxOffset + MAILBOX_SIZE] cmd = self._outCmd + [ 'bs=' + str(MAILBOX_SIZE), 'seek=' + str(mailboxOffset // MAILBOX_SIZE) ] # self.log.debug("Running command: %s, for message id: %s", # str(cmd), str(msgID)) (rc, out, err) = _mboxExecCmd(cmd, data=mailbox) if rc: self.log.error("SPM_MailMonitor: sendReply - couldn't send " "reply, dd failed") def _run(self): try: while not self._stop: try: self._checkForMail() except: self.log.error("Error checking for mail", exc_info=True) self._wait_for_events() finally: self._stopped = True self.tp.joinAll() self.log.info("SPM_MailMonitor - Incoming mail monitoring thread " "stopped") # Events. def _wait_for_events(self): """ Wait until an event is received in the event block, or the monitor interval has passed. With the default monitor and event intervals, we expect to check event 3 times between mail checks. check mail |---------------------|-------------------| check event | | | | | | With this configuraion we run 3 event checks per 2 seconds, which is expected to consume less than 1% cpu. """ if not config.getboolean("mailbox", "events_enable"): time.sleep(self._monitorInterval) return now = time.monotonic() deadline = now + self._monitorInterval while now < deadline: remaining = deadline - now if remaining <= self._eventInterval: # The last interval before checking mail. time.sleep(remaining) return time.sleep(self._eventInterval) try: event = self._read_event() except ReadEventError as e: self.log.warning("Error reading event block: %s", e) else: if event != self._last_event: self.log.debug("Received event: %s", event) self._last_event = event return now = time.monotonic() def _read_event(self): """ Read event from host 0 mailbox. """ # Even if we got a short read, it will be at least 512 bytes, so # we don't need iflag=fullblock. cmd = [ constants.EXT_DD, 'if=' + str(self._inbox), 'iflag=direct', 'count=1', 'bs=' + str(MAILBOX_SIZE), ] # If read fails, we will retry on the next check. In the worst # case we will check the entire mailbox after one monitor # interval. rc, out, err = _mboxExecCmd(cmd, raw=True) if rc != 0: raise ReadEventError(err.decode()) # Should never happen, we will retry on the next check. if len(out) < 24: raise ReadEventError(f"Short read: {len(out)} < 24") return uuid.UUID(bytes=bytes(out[4:20]))
class TaskManager: log = logging.getLogger('storage.TaskManager') def __init__(self, tpSize=config.getint('irs', 'thread_pool_size'), waitTimeout=3, maxTasks=config.getint('irs', 'max_tasks')): self.tp = ThreadPool("tasks", tpSize, waitTimeout, maxTasks) self._tasks = {} self._unqueuedTasks = [] self._insertTaskLock = threading.Lock() def queue(self, task): return self._queueTask(task, task.commit) def queueRecovery(self, task): return self._queueTask(task, task.recover) def _queueTask(self, task, method): with self._insertTaskLock: if task.id in self._tasks: raise se.AddTaskError( 'Task id already in use: {0}'.format(task.id)) self.log.debug("queuing task: %s", task.id) self._tasks[task.id] = task try: if not self.tp.queueTask(task.id, method): self.log.error("unable to queue task: %s", task.dumpTask()) del self._tasks[task.id] raise se.AddTaskError() self.log.debug("task queued: %s", task.id) except Exception: self.log.exception('could not queue task %s', task.id) raise return task.id def scheduleJob(self, type, store, task, jobName, func, *args): task.setTag(type) if store is not None: task.setPersistence(store, cleanPolicy=TaskCleanType.manual) task.setManager(self) task.setRecoveryPolicy("auto") task.addJob(Job(jobName, func, *args)) self.log.debug("scheduled job %s for task %s ", jobName, task.id) def __getTask(self, taskID): Task.validateID(taskID) t = self._tasks.get(taskID, None) if t is None: raise se.UnknownTask(taskID) return t def prepareForShutdown(self): """ Prepare to shutdown. Stop all threads and asynchronous tasks """ self.log.debug("Request to stop all tasks") # Clear the task queue and terminate all pooled threads for t in self._tasks: if hasattr(t, 'stop'): t.stop() self.log.info(str(t)) self.tp.joinAll(waitForTasks=False, waitForThreads=False) def getTaskStatus(self, taskID): """ Internal return Task status for a given task. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) status = t.deprecated_getStatus() self.log.debug("Return. Response: %s", status) return status def getAllTasksStatuses(self, tag=None): """ Return Task status for all tasks by type. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = self.getTaskStatus(taskID) except se.UnknownTask: # Return statuses for existing tasks only. self.log.warn("Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def getAllTasks(self): """ Return Tasks for all public tasks. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): try: subRes[taskID] = task.getDetails() except se.UnknownTask: # Return info for existing tasks only. self.log.warn("Unknown task %s. Maybe task was already " "cleared.", taskID) self.log.debug("Return: %s", subRes) return subRes def unloadTasks(self, tag=None): """ Remove Tasks from managed tasks list """ self.log.debug("Entry.") for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): self._tasks.pop(taskID, None) self.log.debug("Return") def stopTask(self, taskID, force=False): """ Stop a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) t.stop(force=force) self.log.debug("Return.") return True def revertTask(self, taskID): self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self.__getTask(taskID) t.rollback() self.log.debug("Return.") def clearTask(self, taskID): """ Clear a task according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) # TODO: Should we stop here implicitly ??? t = self.__getTask(taskID) t.clean() del self._tasks[taskID] self.log.debug("Return.") def getTaskInfo(self, taskID): """ Return task's data according to given uuid. """ self.log.debug("Entry. taskID: %s", taskID) t = self.__getTask(taskID) info = t.getInfo() self.log.debug("Return. Response: %s", info) return info def getAllTasksInfo(self, tag=None): """ Return Task info for all public tasks. i.e - not internal. """ self.log.debug("Entry.") subRes = {} for taskID, task in self._tasks.items(): if not tag or tag in task.getTags(): try: subRes[taskID] = self.getTaskInfo(taskID) except se.UnknownTask: # Return info for existing tasks only. self.log.warn("Unknown task %s. " "Maybe task was already cleared.", taskID) self.log.debug("Return. Response: %s", subRes) return subRes def loadDumpedTasks(self, store): if not os.path.exists(store): self.log.debug("task dump path %s does not exist.", store) return # taskID is the root part of each (root.ext) entry in the dump task dir tasksIDs = set(os.path.splitext(tid)[0] for tid in os.listdir(store)) for taskID in tasksIDs: self.log.debug("Loading dumped task %s", taskID) try: t = Task.loadTask(store, taskID) t.setPersistence(store, str(t.persistPolicy), str(t.cleanPolicy)) self._unqueuedTasks.append(t) except Exception: self.log.error("taskManager: Skipping directory: %s", taskID, exc_info=True) continue def recoverDumpedTasks(self): for task in self._unqueuedTasks[:]: self.queueRecovery(task) self._unqueuedTasks.remove(task)