Exemple #1
0
 def get(self):
     link = self.extract( 'class="pn" href="', '" target=""> 中英对照')
     spider.put(HTTP%link)
Exemple #2
0
#        tid = int(self.get_argument('tid'))
#        print tid, name
#        self.page.append((tid, self.request.url, name, html))
# 
#    @classmethod
#    def write(cls):
#        page = cls.page
#        page.sort(key=itemgetter(0), reverse=True)
#        with open(join(PREFIX, 'ecocn_org.xml'), 'w') as rss:
#            rss.write(
#                cls.template.render(
#                    rss_title='经济学人 . 中文网',
#                    rss_link='http://www.ecocn.org',
#                    li=[
#                        dict(
#                            link=link,
#                            title=title,
#                            txt=txt
#                        ) for id, link, title, txt in cls.page
#                    ]
#                )
#            )
# 
 
if __name__ == '__main__':
    spider.put('http://www.ecocn.org/portal.php?mod=list&catid=1')
    #10个并发抓取线程 , 网页读取超时时间为30秒
    spider.run(10, 30)
    forum.write()