def write_new_file(self, url, title, source, i, time): ok = 0 content = ''' <html> <head> <meta charset="utf-8"> <meta name="keywords" content="estarinfo"> <title>''' + title + '''</title> </head> <body> <h1 class="title">''' + title + '''</h1> <span class="time">''' + time + '''</span> <span class="source">130679</span> <div class="article">''' + source + '''</div> </body> </html> ''' page_text = url + '\n' + title + '\n130679\n\n\n\n' + content if self.debug: print('count:', self.total, ' === ', title, ' ===') if '' == self._dir: self.djckb_mkdir() filename = self._dir + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2' for num in range(2): if 1 == crawlerfun.write_file(filename, page_text, ifdisplay = 0): fileName = '/root/Downloads/djckb/' + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2' crawlerfun.write_file(fileName, page_text, ifdisplay = 0) # 再次保存到/root/Downloads目录下 ok = 1 break else: # 有时目录会被c程序删掉 crawlerfun.mkdir(self._dir) return ok
def write_new_file(self, url, title, source, i, time, id): content = ''' <html> <head> <meta charset="utf-8"> <meta name="keywords" content="estarinfo"> <title>''' + title + '''</title> </head> <body> <h1 class="title">''' + title + '''</h1> <span class="time">''' + time + '''</span> <span class="source">''' + str(id) + '''</span> <div class="article">''' + source + '''</div> </body> </html> ''' page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n\n' + content if self.debug: print('count:', self.i, ' --- ', title) if '' == self._dir: self.crawl_mkdir() filename = self._dir + 'iask_' + str(i) + '_' + str(len(self.d)) + '.htm-2' for num in range(2): if 1 == crawlerfun.write_file(filename, page_text, ifdisplay = 0): break else: # 有时目录会被c程序删掉 crawlerfun.mkdir(self._dir)
def write_new_file(self, url, title, source, i, time, id): if self.debug: print('count:', self.i, ' --- ', title) content = ''' <html> <head> <meta charset="utf-8"> <meta name="keywords" content="estarinfo"> <title>''' + title + '''</title> </head> <body> <h1 class="title">''' + title + '''</h1> <span class="time">''' + time + '''</span> <span class="source">''' + str(id) + '''</span> <div class="article">''' + source + '''</div> </body> </html> ''' page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n\n' + content if '' == self._dir: self.jlwb_mkdir() filename = self._dir + 'iask_' + str(i) + '_' + str(len( self.d)) + '.htm-2' for num in range(2): if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0): savePath = '/root/estar_save/jlwb/' if not os.path.exists(savePath): os.makedirs(savePath) fileName = savePath + 'iask_' + str(i) + '_' + str(len( self.d)) + '.htm-2' crawlerfun.write_file(fileName, page_text, ifdisplay=0) # 再次保存到/root/estar_save目录下 break else: # 有时目录会被c程序删掉 crawlerfun.mkdir(self._dir)
def write_new_file(self, url, title, source, i, id, time): dct = {1170771: '国家电网有限公司', 1170773: '中国南方电网供应链统一服务平台'} ok = 0 content = ''' <html> <head> <meta charset="utf-8"> <meta name="keywords" content="estarinfo"> </head> <body> <h1 class="title">''' + title + '''</h1> <span class="time">''' + time + '''</span> <span class="source">''' + dct[id] + '''</span> <div class="article">''' + source + '''</div> </body> </html> ''' page_text = url + '\n' + title + '\n' + str(id) + '\n\n\n' + content print(title) if '' == self._dir: self.guojiadianwang_mkdir() filename = self._dir + 'iask_' + str(i) + '_' + str(len( self.d)) + '.htm-2' for num in range(2): if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0): filename = '/root/estar_save/' + 'iask_' + str(i) + '_' + str( len(self.d)) + '.htm-2' crawlerfun.write_file(filename, page_text, ifdisplay=0) # 再次保存到/root/estar_save目录下 ok = 1 break else: # 有时目录会被c程序删掉 crawlerfun.mkdir(self._dir) return ok
def write_new_file(self, current_url, current_title, page_source, i): ok = 0 # 合成铱星文件格式 # page_source = current_url + '\n' + current_title + '\n' + '1152937' + '\n\n\n\n' + page_source page_text = current_url + '\n' + current_title + '\n0\n\n\n\n' + page_source # iask_3_7830.htm-2 #iask_[ip数字]_[采集文章序号].htm-[线程号] if '' == self._dir: self.baijiahao_mkdir() # filename = self._dir + 'iask_' + str(page) + '_' + str(self.total.num) + '.htm-2' filename = self._dir + 'iask_' + +str(ts) + '_' + str(i) + '.html' for num in range(2): if 1 == crawlerfun.write_file(filename, page_text, ifdisplay=0): ok = 1 break else: # 有时目录会被c程序删掉 crawlerfun.mkdir(self._dir) return ok