def __init__(self, conf_file): self.conf_file = conf_file self.parser = ConfigParser.ConfigParser() try: self.parser.read(conf_file) except Exception as e: LOG.warn("parse config file %s failed." % (conf_file)) raise
def fetch_page(self, url): page = None try: fd = urllib.urlopen(url) page = fd.read() fd.close() return page except IOError as e: LOG.warn("open url:%s failed." % (url)) return page
def _aging_check(self): while True: se_del_list = [] for key, stat in self.stats.iteritems(): timer_count = self.stats[key][self.SE_STATS_TIMER] if timer_count == 0: se_del_list.append(key) continue timer_count = timer_count - 1 self.stats[key][self.SE_STATS_TIMER] = timer_count for l in se_del_list: self._aging_timer_fire(key) self.stats.pop(key) LOG.warn("search engine %s aging!" % (key)) time.sleep(self.SE_AGING_TIMER_INTERVAL)
def _search_negative_word(self, user, link): page = self.fetch_page(link) if not page: return try: self.html_parser.reset_parser() self.html_parser.feed(page) except: LOG.warn("parse link:%s failed." % (link)) return url = link if self.html_parser.redirect_url: page = self.fetch_page(self.html_parser.redirect_url) url = self.html_parser.redirect_url for negative_word in user['negative_word']: self._search_one_negative_word(user, page, negative_word, url)