class VnnewscrawlerPipeline(object): def __init__(self, download_dir): self.download_dir = download_dir self.exporter = BaseItemExporter() @classmethod def from_crawler(cls, crawler): return cls(download_dir=crawler.settings.get("DOWNLOAD_DIR", "downloads")) def process_item(self, item, spider): subdir = os.path.join( self.download_dir, spider.name, regex.sub( r"[\s_-]+", "-", unidecode(item.get("category", "unknown")) ).lower(), ) os.makedirs(subdir, exist_ok=True) filename = os.path.join( subdir, item.get("code", datetime.now().strftime("%Y%m%d%H%M%S%f")) ) with open(filename, "w", encoding="UTF-8") as fp: json.dump( dict(self.exporter._get_serialized_fields(item)), fp, indent=4, ensure_ascii=False, ) return item
class BeautyCrawlerPipeline(object): def __init__(self): self.exporter = BaseItemExporter() def process_item(self, item, spider): if isinstance(item, GalleryItem): self.process_gallery(item) if isinstance(item, ImageItem): self.process_image(item) return item def process_gallery(self, item): json_item = dict(self.exporter._get_serialized_fields(item)) Gallery(**json_item).save() def process_image(self, item): json_item = dict(self.exporter._get_serialized_fields(item)) Image(**json_item).save()
def _get_exporter(self, **kwargs): return BaseItemExporter(**kwargs)
def __init__(self, download_dir): self.download_dir = download_dir self.exporter = BaseItemExporter()
def __init__(self): self.exporter = BaseItemExporter()
def __init__(self, arquivo, *args, **kwargs): BaseItemExporter.__init__(self, *args, **kwargs) self.arquivo = arquivo
def process_item(self, item, spider): itemExporter=BaseItemExporter() for k,v in enumerate(item): item[v]=itemExporter.serialize_field(item.fields[v], v, item[v])