Example #1
0
    def execute(self, num):
        """
        执行
        :return:
        """
        self.threadStopState[getCurrentThreadName()] = False
        while True:
            if all(self.threadStopState.values()):
                break

            # 判断是否需要线程
            if self.queue.empty():
                self.threadStopState[getCurrentThreadName()] = True
                TimeUtil.sleep(5)
                continue
            self.threadStopState[getCurrentThreadName()] = False

            message = self.queue.get()

            # 去重
            if message.getEnterUrl() in self.messageSet:
                continue
            self.messageSet.add(message.getEnterUrl())

            # 获取匹配爬虫并跑数
            spider = self.match(message)
            msgs = spider.execute(message)
            for msg in msgs:
                if msg.getEnterUrl() not in self.messageSet:
                    self.queue.put(msg)
Example #2
0
 def setStateRunning(self):
     self.threadStateHash[getCurrentThreadName()] = self.RUNNING
Example #3
0
 def setStateOver(self):
     self.threadStateHash[getCurrentThreadName()] = self.OVER
Example #4
0
 def getState(self) -> str:
     return self.threadStateHash[getCurrentThreadName()]
Example #5
0
 def setStateDone(self):
     self.threadStateHash[getCurrentThreadName()] = self.DONE
Example #6
0
 def getKeyName(self) -> str:
     """获取当前线程键名"""
     return self.KEY_FORMAT.format(**{"curRunnerId": self.curRunnerId, "jobId": self.jobId,
                                      "threadName": getCurrentThreadName()})