コード例 #1
0
    def _insert_collection_data_to_db(self,
                                      collection_data: tuple,
                                      collection_num=''):
        """ 添加合集的数据"""
        # 添加合集的标签
        name, tag_names, total_num, img_first_url, width, height = collection_data
        tags = []
        for tag_name in tag_names:
            t = self.session.query(Tag).filter_by(tag_name=tag_name)
            if t.count() == 0:  # tag不存在
                self.session.add(Tag(tag_name=tag_name))
                self.session.commit()
                tags.append(
                    self.session.query(Tag).filter_by(
                        tag_name=tag_name).first())
            else:  # tag 存在
                tags.append(t.first())

        collection = Collection(
            collection_num=collection_num,
            name=name,
            total_num=total_num,
            tags=tags,
        )
        self.session.add(collection)
        self.session.commit()

        #  添加该合集的图片信息
        images = []
        for count in range(1, int(total_num) + 1):
            if 1 <= count <= 9:
                index = '0' + str(count)
            else:
                index = str(count)
            info = img_first_url.split("/")
            year = info[-3]
            month = info[-2]
            day = info[-1][:2]

            img_url = f"https://i.meizitu.net/{year}/{month}/{day}{info[-1][2]}{index}.{info[-1].split('.')[-1]}"
            image = Image(year=year,
                          month=month,
                          day=day,
                          width=width,
                          height=height,
                          meizitu_url=img_url,
                          collection_num=collection_num)
            images.append(image)

        self.session.add_all(images)
        # self.session.commit()

        download_record = self.session.query(DownloadRecord).filter_by(
            collection_num=collection_num)
        download_record.update({'status': 1})
        self.session.commit()
コード例 #2
0
    def process_item(self, item, spider):
        '''
        :param item:  item是从spiders通过yield发射过来的对象
        :param spider:  spider是指的不同爬虫 (spider.name)
        :return:
        '''
        try:
            item_name = item.get_name()
            if item_name == "OrangemallCategory":
                cate = Category(cate_id=item['cate_id'],
                                parent_id=item['parent_id'],
                                level=item['level'],
                                name=item['name'],
                                create_time=item['create_time'],
                                is_delete=item['is_delete'])
                minst.add_records(session, cate)

            elif item_name == "OrangemallShop":
                shop = Shop(shop_id=item['shop_id'],
                            name=item['name'],
                            original_price=item['original_price'],
                            promote_price=item['promote_price'],
                            stock=item['stock'],
                            cate_id=item['cate_id'],
                            create_date=item['create_date'],
                            sale=item['sale'],
                            sort=item['sort'],
                            is_hot=item['is_hot'],
                            is_delete=item['is_delete'])
                minst.add_records(session, shop)

            elif item_name == "OrangeMallProperty":
                property = Property(property_id=item['property_id'],
                                    name=item['name'],
                                    shop_id=item['shop_id'],
                                    is_delete=item['is_delete'])
                minst.add_records(session, property)

            elif item_name == "OrangeMallImage":
                image = Image(img_id=item['img_id'],
                              shop_id=item['shop_id'],
                              type=item['type'],
                              img_url=item['img_url'],
                              is_delete=item['is_delete'])
                minst.add_records(session, image)

        # return item
        except Exception as e:
            print(f"MySQLBookPipeLine:process_item has error: {e}")
        # return item
        finally:
            return item