def _insert_collection_data_to_db(self, collection_data: tuple, collection_num=''): """ 添加合集的数据""" # 添加合集的标签 name, tag_names, total_num, img_first_url, width, height = collection_data tags = [] for tag_name in tag_names: t = self.session.query(Tag).filter_by(tag_name=tag_name) if t.count() == 0: # tag不存在 self.session.add(Tag(tag_name=tag_name)) self.session.commit() tags.append( self.session.query(Tag).filter_by( tag_name=tag_name).first()) else: # tag 存在 tags.append(t.first()) collection = Collection( collection_num=collection_num, name=name, total_num=total_num, tags=tags, ) self.session.add(collection) self.session.commit() # 添加该合集的图片信息 images = [] for count in range(1, int(total_num) + 1): if 1 <= count <= 9: index = '0' + str(count) else: index = str(count) info = img_first_url.split("/") year = info[-3] month = info[-2] day = info[-1][:2] img_url = f"https://i.meizitu.net/{year}/{month}/{day}{info[-1][2]}{index}.{info[-1].split('.')[-1]}" image = Image(year=year, month=month, day=day, width=width, height=height, meizitu_url=img_url, collection_num=collection_num) images.append(image) self.session.add_all(images) # self.session.commit() download_record = self.session.query(DownloadRecord).filter_by( collection_num=collection_num) download_record.update({'status': 1}) self.session.commit()
def process_item(self, item, spider): ''' :param item: item是从spiders通过yield发射过来的对象 :param spider: spider是指的不同爬虫 (spider.name) :return: ''' try: item_name = item.get_name() if item_name == "OrangemallCategory": cate = Category(cate_id=item['cate_id'], parent_id=item['parent_id'], level=item['level'], name=item['name'], create_time=item['create_time'], is_delete=item['is_delete']) minst.add_records(session, cate) elif item_name == "OrangemallShop": shop = Shop(shop_id=item['shop_id'], name=item['name'], original_price=item['original_price'], promote_price=item['promote_price'], stock=item['stock'], cate_id=item['cate_id'], create_date=item['create_date'], sale=item['sale'], sort=item['sort'], is_hot=item['is_hot'], is_delete=item['is_delete']) minst.add_records(session, shop) elif item_name == "OrangeMallProperty": property = Property(property_id=item['property_id'], name=item['name'], shop_id=item['shop_id'], is_delete=item['is_delete']) minst.add_records(session, property) elif item_name == "OrangeMallImage": image = Image(img_id=item['img_id'], shop_id=item['shop_id'], type=item['type'], img_url=item['img_url'], is_delete=item['is_delete']) minst.add_records(session, image) # return item except Exception as e: print(f"MySQLBookPipeLine:process_item has error: {e}") # return item finally: return item