def __init__(self, url, bVerbose, namespace, containerName): self.ns = namespace self.containerName = containerName #init all sub-class self.r = Reducer(bVerbose, namespace, containerName) self.logInfo(self.dumpListActionName()) self.grid = ListenerGridAccess(namespace) self.tool = ListenerTools() self.context = zmq.Context() #init ZMQ self.sock = self.context.socket(zmq.PULL) self.r.setTimeout(None, LST_MSGIN_TIMEOUT / 1000) surl = "tcp://" + url self.sock.bind(surl)
def grid_content_CONCATandRemove(self, namespace, container, listcontentSrc, contentDest): grid = ListenerGridAccess(namespace) tool = ListenerTools() listToRemove = [] buffer = "" for content in listcontentSrc: path = namespace + "/" + container + "/" + content b = grid.get_content(path); if b != "": self.printmsg("Read content " + content) buffer += b + "\n" listToRemove.append(path) self.printmsg("Create content " + contentDest) stream = io.BytesIO(buffer.encode("utf-8")) size = tool.getSizeStream(stream) grid.put_content(namespace + "/" + container + "/" + contentDest, stream, size) for path in listToRemove: self.printmsg("Remove temp content " + path) grid.remove_content(path)
class Listener: #JSON format/constant # MANDATORY key NAME, format JSON JSON_KEYNAME_HEAD = "HEAD" JSON_KEYNAME_HEAD_NAME = "SRC_NAME" JSON_KEYNAME_HEAD_PID = "SRC_ID" JSON_KEYNAME_HEAD_STATUS = "STATUS" JSON_KEYNAME_HEAD_MSGID = "MSG_ID" JSON_KEYNAME_HEAD_CRAWLERID= "CRAWL_ID" JSON_KEYNAME_DATAH = "DATAH" JSON_KEYNAME_DATAR = "DATAR" # specific data about previous header field JSON_KEYNAME_HEAD_STATUS_setcrawlerid = "setcrawlerid" JSON_KEYNAME_HEAD_STATUS_stopact = "stopact" JSON_KEYNAME_HEAD_STATUS_data = "" #init queue def __init__(self, url, bVerbose, namespace, containerName): self.ns = namespace self.containerName = containerName #init all sub-class self.r = Reducer(bVerbose, namespace, containerName) self.logInfo(self.dumpListActionName()) self.grid = ListenerGridAccess(namespace) self.tool = ListenerTools() self.context = zmq.Context() #init ZMQ self.sock = self.context.socket(zmq.PULL) self.r.setTimeout(None, LST_MSGIN_TIMEOUT / 1000) surl = "tcp://" + url self.sock.bind(surl) #descruct and close queue def __del__(self): self.context.term() def Initialize(self): self.r.clearAll() error = self.grid.create_container(self.containerName) if error != "": self.syslogErr(error) return False else: self.syslogInfo("Container [" + self.containerName + "] for storage Listener&reduce result is created or already exists") return True def dumpListActionName(self): list = self.r.getListActionName() d = "Reducer actions used: " for action in list: d += "\n\t" + action return d def logdebug(self, msg): logging.debug("%s: %s", LST_NAMESVC, msg) # logging info... def logInfo(self, msg): logging.info("%s: %s", LST_NAMESVC, msg) def syslogInfo(self, msg): self.logInfo(msg) syslog.syslog(syslog.LOG_INFO, LST_NAMESVC + ": " + msg) # logging error def syslogErr(self, msg): #print "%s: %s", LST_NAMESVC, msg logging.error("%s: %s", LST_NAMESVC, msg) syslog.syslog(syslog.LOG_ERR, LST_NAMESVC + ": " + msg) #build content with result of reducer def buildContent(self, sdatetime, result): content = "Reduce end date/time: " + sdatetime + " on namespace " + self.ns + "\n" content += "\n" content += result return content # end process, end reduce def endprocess(self, action_name): if action_name == "": return False try: #finalyze reduce process result = self.r.finalize(action_name) #get result string result = self.r.dumps(action_name) print "-------------------------------------------------------------" print result except Exception as e: self.syslogErr("Reduce error :" + str(e)) return False; except: self.syslogErr("Unknown Reduce Error :") return False; if result == "": return False # date/heure courante datetimecrt = datetime.datetime.now() #convet ti to write on content sdatetime = datetimecrt.strftime("%02d/%02m/%04Y %02H:%02M:%02S") content = self.buildContent(sdatetime, result) stream = io.BytesIO(content.encode("utf-8")) size = self.tool.getSizeStream(stream) #build name for new content sdatetime = datetimecrt.strftime("%04Y%02m%02d%02H%02M%02S") contentName = sdatetime + "-" + action_name + "-Reduce" path = self.ns + "/" + LISTENER_RESULT_CONTAINER_NAME + "/" + str(contentName) #write new content on specfoc container error = self.grid.put_content(path, stream, size) if error != "": fileName=LISTENER_RESULT_DIRTMP_NAME + "/" + contentName errorF = self.tool.write_file(fileName, path, result) if errorF != "": msg = "Reduce result: " + error + " - " + errorF self.syslogErr(msg) else: msg = "Reduce result: " + error + " - Result write on temporarily file [" + fileName + "]" self.syslogErr(msg) else: msg = "Reduce Result write on content [" + path + "] with success" self.syslogInfo(msg) #clear all data and statistique self.r.clear(action_name) return True; #analysed recv message aznd execute it #return True if all action are terminated def ManageProcess(self, result): #extract header of message m_head = result[self.JSON_KEYNAME_HEAD] m_head_name = m_head[self.JSON_KEYNAME_HEAD_NAME] m_head_pid = m_head[self.JSON_KEYNAME_HEAD_PID] m_head_status = m_head[self.JSON_KEYNAME_HEAD_STATUS] m_head_msgid = m_head[self.JSON_KEYNAME_HEAD_MSGID] m_head_crawlerid = m_head[self.JSON_KEYNAME_HEAD_CRAWLERID] #self.logInfo("msg received") bResult = False #test name of source of message if m_head_status != self.JSON_KEYNAME_HEAD_STATUS_data: #command to execute #save new id to manage autorized message if m_head_status == self.JSON_KEYNAME_HEAD_STATUS_setcrawlerid: self.logInfo("Message received: command " + self.JSON_KEYNAME_HEAD_STATUS_setcrawlerid + "id=" + m_head_crawlerid) self.r.ids_add(m_head_name, m_head_crawlerid, None) #stop process about specific pid generate message data to reduce elif m_head_status == self.JSON_KEYNAME_HEAD_STATUS_stopact: self.logInfo("end src process pid = " + str(m_head_pid) + " was received") self.r.ids_rm(m_head_name, m_head_crawlerid, m_head_pid) #verif if allaction are terminated, if True: return True if self.r.idcrawl_isEmpty(m_head_name) == True: if self.r.idsrc_isEmpty(m_head_name) == True: self.logInfo("All src process was received: end reduce") self.endprocess(m_head_name) bResult = True; else: #verif if crawlerid are autorised to reduce data or not: here if self.r.idcrawl_isExist(m_head_name, m_head_crawlerid) == False: return False #execute reduce action m_datah = result[self.JSON_KEYNAME_DATAH] m_datar = result[self.JSON_KEYNAME_DATAR] self.logdebug('listener: recv : [' + str(result) + ']') sys.stdout.flush() try: if self.r.run(m_head_name, m_datah, m_datar) == True: # add source of message for manage terminated action self.r.ids_add(m_head_name, None, m_head_pid) else: self.syslogErr("Reduce error : bad [HEAD]/[NAME]=\""+ m_head_name +"\"") except Exception as e: self.syslogErr("Reduce error :" + str(e)) except: self.syslogErr("Unknown Reduce Error :") # timeout management for the others reducers self.ManageTimeout(m_head_name) return bResult # browse all educer for test timeout, excepted action_name_excepted # action_name_excepted = None: all reducer def ManageTimeout(self, action_name_excepted): list_action_name = self.r.getListActionName() for action_name in list_action_name: if action_name_excepted != action_name: if self.r.idcrawl_isEmpty(action_name) == False: if self.r.isTimeout(action_name) == True: self.logInfo("Listener: timeout action [" + action_name + "]") self.endprocess(action_name) # listener function def go(self, timeout): poll = zmq.Poller() poll.register(self.sock, zmq.POLLIN) self.logInfo("Waiting for incomming message to reduce...") while True: s = dict(poll.poll(timeout)) if s.get(self.sock) == zmq.POLLIN: # get JSON format message try: result= self.sock.recv_json() except Exception as e: self.syslogErr("error :" + str(e)) continue except: self.syslogErr("Unknown Error :") continue self.ManageProcess(result) else: self.ManageTimeout(None) self.logdebug("End listening!")