class CarListSpider(object): # 打开数据库 def open_spider(self, spider): self.db = DBHelper() # 关闭数据库 def close_spider(self, spider): self.db.close_db(self) # 对数据进行处理 def process_item(self, item, spider): values = ( item['brands_category'], item['qczj_fctid'], item['containbookedspec'], item['fctname'], item['newenergy'], item['newenergySeriesId'], item['pnglogo'], item['rank'], item['seriesImg'], item['seriesName'], item['seriesPriceMax'], item['seriesPriceMin'], item['seriesState'], item['seriesid'], item['seriesplace'], ) sql = "INSERT INTO car_list VALUES(NULL, %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" self.db.insert_db(sql, values) return item
def parse(self, response): data = json.loads(response.text) item = CarListItem() for row in data['fctlist']: for res in row['serieslist']: db = DBHelper() sql = 'select `id` from car_brands_category where `qczj_fctid`={}'.format( row['fctid']) fctids = db.select_db(sql) item['brands_category'] = fctids[0][0] # brands_id item['qczj_fctid'] = res['fctid'] item['containbookedspec'] = res['containbookedspec'] item['fctname'] = res['fctname'] item['newenergy'] = res['newenergy'] item['newenergySeriesId'] = res['newenergySeriesId'] item['pnglogo'] = res['pnglogo'] item['rank'] = res['rank'] item['seriesImg'] = res['seriesImg'] item['seriesName'] = res['seriesName'] item['seriesPriceMax'] = res['seriesPriceMax'] item['seriesPriceMin'] = res['seriesPriceMin'] item['seriesState'] = res['seriesState'] item['seriesid'] = res['seriesid'] item['seriesplace'] = res['seriesplace'] yield item
def start_requests(self): db = DBHelper() sql = 'select `id`, `qczj_id` from car_brands' infos = db.select_db(sql) for info in infos: url_init = 'https://car.m.autohome.com.cn/ajax/GetSeriesByBrandId1?r=9&brandid={}'.format( info[1]) yield scrapy.Request(url_init, meta={'id': info[0]}, callback=self.parse)
class MySQLPipeline(object): # 打开数据库 def open_spider(self, spider): self.db = DBHelper() # 关闭数据库 def close_spider(self, spider): self.db.close_db(self) # 对数据进行处理 def process_item(self, item, spider): values = ( item['text'], item['qczj_id'], item['img_url'], ) sql = "INSERT INTO car_brands VALUES(NULL, %s,%s,%s)" self.db.insert_db(sql, values) return item
class BrandCategorySpider(object): # 打开数据库 def open_spider(self, spider): self.db = DBHelper() # 关闭数据库 def close_spider(self, spider): self.db.close_db(self) # 对数据进行处理 def process_item(self, item, spider): values = ( item['fctname'], item['qczj_fctid'], item['seriesplace'], item['seriesplacenum'], item['brandsid'], item['qczj_brandsid'], ) sql = "INSERT INTO car_brands_category VALUES(NULL, %s,%s,%s,%s,%s,%s)" self.db.insert_db(sql, values) return item
def open_spider(self, spider): self.db = DBHelper()