def parse_item(self, response): #解析数据函数 tree = etree.HTML(response.text) a_list = tree.xpath('//div[@id="infinite_scroll"]/div') for a in a_list: url = a.xpath('.//div[@class="ABox"]/a/@href') next_obj = self.NextBianSpider(url) GetImgAddress.DriveEngine(next_obj).run()
def parse_item(self, response): #解析数据函数 tree = etree.HTML(response.text) a_list = tree.xpath("//ul[@class='img']/li") for a in a_list: url = a.xpath('./a/@href')[0] next_obj = self.NextBianSpider([urljoin(response.url, url)], self.name) GetImgAddress.DriveEngine(next_obj).run()
def parse_item(self, response): #解析数据函数 print(response.url) tree = etree.HTML(response.text) a_list = tree.xpath('//div[@class="list"]/ul/li/a') for a in a_list: url = a.xpath('./@href')[0] next_obj = self.NextBianSpider([urljoin(response.url, url)], self.name) GetImgAddress.DriveEngine(next_obj).run()
def parse_item(self, response): #解析数据函数 print(response.url) tree = etree.HTML(response.text) a_list = tree.xpath("//div[@class='news_bom-left']//li") for a in a_list: url = a.xpath('./a/@href')[0] file_name = self.text_analysis(a.xpath('./a/@title')[0]) next_obj = self.NextBianSpider([urljoin(response.url, url)], "%s/%s" % (self.name, file_name)) GetImgAddress.DriveEngine(next_obj).run()
import os, sys BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) from core import GetImgAddress, customize_class import inspect if __name__ == "__main__": for name, obj in inspect.getmembers(customize_class): #遍历所有自定义采集信息对象 if inspect.isclass( obj) and name != 'Options' and obj.display: #筛选符合的对象 bb = GetImgAddress.DriveEngine(obj()) bb.run()