def run(self): with open('000_short_video_all.csv', 'r', encoding='utf-8') as f: self.csv_file = csv.reader(f) for line in self.csv_file: print(line) name = line[0] page_url = line[1] url = self.url.format(self.baseUrl, page_url) try: content = Config.get_content(url) https = Config.xpath_content( content, '//input[@id="lin1k0"]/@value') thunder = Config.xpath_content( content, '//input[@id="lin1k1"]/@value') except Exception as e: print('error :' + str(e)) https = https[0] if len(https) != 0 else '' thunder = thunder[0] if len(thunder) != 0 else '' print(name, page_url, https, thunder) SAVE_CSV_FILE('shipin_short_video_thunder.csv', [name, page_url, https, thunder], False) time.sleep(4)
def run(self): for i in range(1, 35 + 1): self.url = 'https://{}/xiazai/list-%e5%8f%98%e6%80%81%e5%8f%a6%e7%b1%bb-{}.html'.format( self.baseUrl, i) print(self.url) # 获取短视频页的所有链接 content = Config.get_content(self.url) a_href = Config.xpath_content( content, '//div[@id="tpl-img-content"]/li/a/@href') a_text = Config.xpath_content( content, '//div[@id="tpl-img-content"]/li/a/@title') print(a_href) print(a_text) # 保存短视频页的所有链接 fileName = datetime.datetime.now().strftime('%H%M%S%f') fileName = 'abnormal_video_' + str(fileName) + '.csv' filePath = './abnormal_csv/' if not os.path.exists(filePath): os.makedirs(filePath) with open(filePath + fileName, 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) for i in zip(a_text, a_href): writer.writerow(i) time.sleep(4)
def run(self): for i in range(22,23+1): self.url = 'https://{}/xiazai/list-%E4%BA%9A%E6%B4%B2%E7%94%B5%E5%BD%B1-{}.html'.format(self.baseUrl, i) print(self.url) # 获取短视频页的所有链接 content = Config.get_content(self.url) a_href = Config.xpath_content(content, '//div[@id="tpl-img-content"]/li/a/@href') a_text = Config.xpath_content(content, '//div[@id="tpl-img-content"]/li/a/@title') print(a_href) print(a_text) # 保存短视频页的所有链接 fileName = datetime.datetime.now().strftime('%H%M%S%f') fileName = 'asia_video_' + str(fileName) + '.csv' filePath = './asia_video_main_csv/' if not os.path.exists(filePath): os.makedirs(filePath) with open(filePath+fileName, 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) for i in zip(a_text, a_href): writer.writerow(i) time.sleep(4)
def run(self): for i in range(108, 109): url = 'https://{}/shipin/list-%E7%9F%AD%E8%A7%86%E9%A2%91-{}.html'.format( self.baseUrl, i) print(url) # 获取短视频页的所有链接 content = Config.get_content(url) a_href = Config.xpath_content(content, '//*[@id="grid"]/li/a/@href') a_text = Config.xpath_content(content, '//*[@id="grid"]/li/a/@title') print(a_href) print(a_text) # 保存短视频页的所有链接 fileName = datetime.datetime.now().strftime('%H%M%S%f') fileName = 'short_video_' + str(fileName) + '.csv' with open(fileName, 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) for i in zip(a_text, a_href): writer.writerow(i) time.sleep(3)
def run(self): for i in range(31, 39 + 1): self.url = 'https://{}/xiazai/list-%E5%88%B6%E6%9C%8D%E4%B8%9D%E8%A2%9C-{}.html'.format( self.baseUrl, i) print(self.url) try: # 获取视频页的所有链接 content = Config.get_content(self.url) self.a_href = Config.xpath_content( content, '//div[@id="tpl-img-content"]/li/a/@href') self.a_text = Config.xpath_content( content, '//div[@id="tpl-img-content"]/li/a/@title') print(self.a_href) print(self.a_text) # 保存视频页的所有链接 fileName = datetime.datetime.now().strftime('%H%M%S%f') fileName = 'zifu_video_' + str(fileName) + '.csv' abs_path = os.path.dirname(sys.argv[0]) filePath = abs_path + '/zifu_video_main_csv/' if not os.path.exists(filePath): os.makedirs(filePath) with open(filePath + fileName, 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) for i in zip(self.a_text, self.a_href): writer.writerow(i) except Exception as e: MY_ERROR_LOG(self.url) MY_ERROR_LOG(self.a_href) MY_ERROR_LOG(self.a_text) print('error : ', str(e)) time.sleep(4)