Beispiel #1
0
 def __init__(self):
     try:
         self.urlManager = urlManager.UrlManager()
         self.htmlLoader = htmlLoader.HtmlLoader()
         self.htmlParser = htmlParser.HtmlParser()
     except Exception as e:
         SpiderLogger.log(e)
Beispiel #2
0
 def __init__(self,root_url,category_Nums,categories_Name,path):
     self.searchUrlsManger=urlM.UrlManager()
     self.crawlUrlsManger=urlM.UrlManager()
     self.htmlParser = htmlP.HtmlParser()
     ## construct the seach urls
     for pagesNum in range(int(category_Nums/10)):
         self.searchUrlsManger.add_new_url(root_url+"word="+categories_Name+"&pn="+str(pagesNum))
     
     htmlDownloader = htmlD.HtmlDownloader()
             ## search all the questions
     dataOutput = DataOutput()
     for i in range(self.searchUrlsManger.get_urls_num()):
         tmp_searchUrl=self.searchUrlsManger.get_new_url()
         tmp_content=htmlDownloader.download(tmp_searchUrl)
         tmp_datas=self.htmlParser.parse(tmp_content,i)
         dataOutput.output_excel(tmp_datas,path)
Beispiel #3
0
 def __init__(self):
     self.smtp = mysendmail.MyMail()
     self.htmlLoader = htmlLoader.HtmlLoader()
     self.urlManager = urlManager.UrlManager()
     self.htmlParser = htmlParser.HtmlParser()
Beispiel #4
0
 def __init__(self):
     self.urls = urlManager.UrlManager()
     self.downloader = htmlDown.HtmlDownloader()
     self.parser = htmlParser.HtmlParser()
 def __init__(self):
     self.urls = urlManager.UrlManager();
     self.downloader = htmlDownloader.HtmlDownloader();
     self.parser = htmlParser.HtmlParser();
     self.outputer = htmlOutputer.HtmlOutputer();