def register(self, channel: Channel, job: Job) -> bool: if not channel.addJob(job): self.LogManager.debug('[QueueManager:'+str(channel.name)+'] job: ' + str(job.loguid()) + ' already registered') return False self.LogManager.debug('[QueueManager:'+str(channel.name)+'] job: ' + str(job.loguid())+ ' registered successfully') channel.pushWaitingJob(job)
def addChannel(self, name:str) -> Channel: options = self.getMatchingOptions(channelName=name) self._channels[name] = Channel( name=name, maxConcurrentJobs=options[self.OPTION_MAX_CONCURRENT_JOBS], timeoutOffset=options[self.OPTION_TIMEOUT_OFFSET], quotasPerInterval=options[self.OPTION_QUOTAS_PER_INTERVAL] ) return self._channels[name]
def loop(self, channel: Channel): while self._runEvent.is_set(): time.sleep(.5) while channel.isFillable() and self._runEvent.is_set(): quotasInterval = self.QuotasManager.getQuotasIntervalHit(channel=channel) if quotasInterval: self.QuotasManager.logInterval(channel.name, quotasInterval) time.sleep(1) continue job = channel.popNextWaitingJob() if not job: continue self.LogManager.debug('[QueueManager:%s] job: %s GO' % (channel.name, job.loguid())) channel.pushWorkingJob(job) self.MqttManager.publish(topic='%s/go/%s' % (channel.name, job.uid), payload=job.uid) job.startedAt = time.time() self.QuotasManager.track(channel=channel, timeToTrack=job.startedAt)
def finished(self, channel: Channel, jobUid: str): job = channel.getJob(uid=jobUid) if job: self.kill(job, 'finished')
def infos(self, channel: Channel): self.LogManager.info('[QueueManager:%s] Waiting jobs: %s, Working jobs: %s' % (channel.name, channel.countWaitingJobs(), channel.countWorkingJobs()))
def aborted(self, channel: Channel, jobUid: str): job = channel.getJob(uid=jobUid) if job: self.kill(job, 'aborted')
def process(data, provider, proxy): """ :param data: :param provider: :return: """ channels = m3u_regex.findall(data) filters = config.config["providers"][provider].get("filters", None) for info, name, url in channels: name = str(name).strip() url = str(url).strip() logging.debug(" Work on " + name + " (" + url + ")") # When name is not properly set on the row, try to get it from tvg-name attribute if name is None or name == "": name = parse(name_regex, info) channel = Channel( id=clean_id(name), name=name, logo=parse(logo_regex, info), country=parse(country_regex, info), group=parse(group_regex, info), lang=parse(lang_regex, info), number=parse(number_regex, info), ) if filter(filters, channel): # Validate URL # valid = validators.url(url) is True # logging.debug(" Validate " + str(url) + " : " + str(valid)) # Prevent duplicate # valid = valid and url not in urlCollector valid = url not in urlCollector if valid: channel.url = translate_url(url, proxy) # let me proxy! # Validate logo if channel.logo is not None and channel.logo != "": if validators.url(channel.logo) is not True: channel.logo = None if channel.id in mapping.channels: c = mapping.channels[channel.id] channel.id = c.get("id") # identifier for epg mapping channel.name = c.get("name") # display name channel.group = c.get("group") # group if c.get("number") is not None and c.get("number") != "": channel.number = c.get("number") # order by? if c.get("logo") is not None and c.get("logo") != "": if validators.url(c.get("logo")): channel.logo = c.get( "logo") # valid logo when missing site = WebGrab(id=c.get("id"), site=c.get("site"), site_id=c.get("site_id"), name=c.get("name"), same_as=c.get("same_as"), offset=c.get("offset")) logging.info(" Added webgrab mapping " + str(site)) # Add channel to webgrab site mapping webgrab["channels"].append(site) else: channel.name = clean_name(name) logging.info(" Added channel " + str(channel.name)) # Add channel to playlist and to duplicate prevent list playlist["channels"].append(channel) urlCollector.append(url) else: logging.info(" Channel " + name + " already present.")
from model.Spiders import Spiders from model.Channel import Channel if __name__ == '__main__': while True: Logger.info("主线程活动,启动抓取程序...") active_thread = threading.activeCount() # active_thread 程序默认为4 if active_thread > 4: Logger.info("还有线程未抓取完成,继续休眠1小时...") time.sleep(3600) continue channels = [] try: channels = mysql.get_channels() except Exception as e: Logger.error("查询channels失败,%s", str(e)) for spider_name in Spiders: spider = Spiders.get(spider_name) spider.channels = [] for channel in channels: _channel = Channel() _channel.from_dict(channel) if spider_name == _channel.spider: spider.channels.append(_channel) if len(spider.channels) > 0: threading.Thread(target=processor.__main__, args=(spider, )).start() Logger.info("所有channel启动完成,休眠12小时...") time.sleep(3600 * 12)