def getArticle(key_word, real_url_set): path = "C:/Users/Liuyus/Desktop/大停电爬虫/" + method + key_word os.mkdir(path) print(real_url_set) count = 0 for real_url in real_url_set: try: time.sleep(1) response = requests.get(url=real_url, headers=headers) print(response.text) text = fulltext(response.text, language='zh') #news=Article(real_url,language='zh') #news.download() #news.parse() print(real_url) #print(news.text) filename = str(count) + ".txt" f = open(path + "/" + filename, "w") #f.write(news.title) f.write(real_url) #f.write(news.text) print(text) f.write(text) f.close() count = count + 1 except: continue return 0
def getArticle(self, key_word, real_url_set): path = "C:/Users/Liuyus/Desktop/大停电爬虫/" + method + key_word os.mkdir(path) print(real_url_set) count = 0 for real_url in real_url_set: try: time.sleep(1) try: self.driver.get(real_url) except: self.driver.refresh() time.sleep(2) driver.refresh() js = "var q=document.documentElement.scrollTop=100000" driver.execute_script(js) time.sleep(3) text = fulltext(driver.page_source, language='zh') #news=Article(real_url,language='zh') #news.download() #news.parse() print(real_url) print(driver.page_source) filename = str(count) + ".txt" f = open(path + "/" + filename, "w") #f.write(news.title) f.write(real_url) #f.write(news.text) print(text) f.write(text) f.close() count = count + 1 except: continue return 0