def execute(self, num): """ 执行 :return: """ self.threadStopState[getCurrentThreadName()] = False while True: if all(self.threadStopState.values()): break # 判断是否需要线程 if self.queue.empty(): self.threadStopState[getCurrentThreadName()] = True TimeUtil.sleep(5) continue self.threadStopState[getCurrentThreadName()] = False message = self.queue.get() # 去重 if message.getEnterUrl() in self.messageSet: continue self.messageSet.add(message.getEnterUrl()) # 获取匹配爬虫并跑数 spider = self.match(message) msgs = spider.execute(message) for msg in msgs: if msg.getEnterUrl() not in self.messageSet: self.queue.put(msg)
def setStateRunning(self): self.threadStateHash[getCurrentThreadName()] = self.RUNNING
def setStateOver(self): self.threadStateHash[getCurrentThreadName()] = self.OVER
def getState(self) -> str: return self.threadStateHash[getCurrentThreadName()]
def setStateDone(self): self.threadStateHash[getCurrentThreadName()] = self.DONE
def getKeyName(self) -> str: """获取当前线程键名""" return self.KEY_FORMAT.format(**{"curRunnerId": self.curRunnerId, "jobId": self.jobId, "threadName": getCurrentThreadName()})