class Productor( Thread ): def __init__(self): self.db = DB() Thread.__init__(self) def link_id(self,link): return int(link.split('?')[1][3:]) def run(self): while True: try: maxid = self.db.news_list()[0]['id'] except: maxid = 1 print(maxid) client = HTTPClient() response = client.fetch('http://cs.hust.edu.cn/rss') result = response.body.decode("utf-8",errors='ignore') soup = BeautifulStoneSoup(result) items = soup.find_all('item') for item in items: title = item.title.text link = item.link.text desc = item.description.text linkid = self.link_id(link) if linkid > maxid: result = self.db.add_news(linkid,title,desc,link) if result: result = self.get_article(link) else: break time.sleep(3600) def get_article(self,link): client = HTTPClient() response = client.fetch(link) result = response.body.decode('utf-8',errors='ignore') soup = BeautifulSoup(result) a = soup.find('div',class_='neirong') title = a.find('div',class_='show_title').text date = a.find('span',class_='ari10').text content = a.find('div',class_='show_cont').text linkid = self.link_id(link) self.db.add_new(linkid,title,content,date,link) #insert into the database return 1