コード例 #1
0
 def news_content(self, url):
     x = Spider(url)
     print "------------------------------------"
     print url, self.news_type
     news_content_page = self.news_page_object(x.get_page())
     news = {}
     news = news_content_page.news_all(news)
     news['news_url'] = url.encode('utf8')
     news['news_type'] = self.news_type
     if self.database_object.is_existed(url):
         print 'this url is existed'
         return False
     else:
         database = self.database_object(**news)
         database.save()
     return True
コード例 #2
0
 def progress(self):
     while True:
         news_next_url = self.news_base_url
         while news_next_url:
             print news_next_url
             if news_next_url is not None:
                 x = Spider(news_next_url)
                 news_list_source = self.main_page_object(x.get_page())
                 news_list = news_list_source.find_news_list(self.base_url)
                 if not self.news_content_by_list(news_list):
                     break
                 news_next_url = news_list_source.find_news_next_page(self.base_url)
                 time.sleep(5)
             else:
                 break
         print 'sleep 300s current_thread name is %s' % threading.current_thread().getName()
         time.sleep(300)