class JsonPipelineExporterMixin: @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): item = self.pre_process_item(item) self.exporter.export_item(item) return item def pre_process_item(self, item): return item
class JsonPipeline(object): """Save Pipeline output to JSON.""" def __init__(self, spider_name): self.file = open("output/{}_recipes.json".format(spider_name), 'wb') self.file.write( '{"date_scraped": "%s", "recipes": ' % datetime.datetime.now() ) self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() @classmethod def from_crawler(cls, crawler): return cls( spider_name=crawler.spider.name ) def close_spider(self): self.exporter.finish_exporting() self.file.write("}") self.file.close() def process_item(self, item): self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = codecs.open('%s_data.json' % spider.name, 'w+b', encoding='utf-8') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExportPipeline(object): """ app.pipelines.exporter_json.JsonExportPipeline """ def __init__(self): self.files = {} self.exporter = None @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file_json = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file_json self.exporter = JsonItemExporter(file_json) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file_json = self.files.pop(spider) file_json.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class BaseFilePipeline(object): def __init__(self,saved_path): self.files = {} self.saved_path = saved_path self.exporter = None @classmethod def from_crawler(cls, crawler): pipeline = cls(crawler.settings.get('SAVED_PATH')) return pipeline def open_spider(self, spider): tp = self.gettype()['name'] filename = '%s_%s.json' % (spider.name,tp) filename = os.path.join(self.saved_path,filename) file_ = open(filename,'w+b') self.files[spider] = file_ self.exporter = JsonItemExporter(file_,ensure_ascii=False,encoding='utf-8') self.exporter.start_exporting() def gettype(): pass def close_spider(self, spider): self.exporter.finish_exporting() file_ = self.files.pop(spider) file_.close()
class SaveItemToJson(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file=file) print self.exporter self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): _log.info('JsonExportPipeline.init....') self.files = {} @classmethod def from_crawler(cls, crawler): _log.info('JsonExportPipeline.from_crawler....') pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): _log.info('JsonExportPipeline.spider_opened....') file = open('%s.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): _log.info('JsonExportPipeline.spider_closed....') self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): _log.info('JsonExportPipeline.process_item....') self.exporter.export_item(item) return item
class JsonPipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('/home/gaoliang/Desktop/result.json', 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file, ensure_ascii=False) # 添加ensure_ascii=False用于使json保存中文不乱码 self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): # 调用scrapy提供的json_export 导出json文件 def __init__(self): self.file = open('articleexport.json','wb') self.exporter = JsonItemExporter(self.file,encoding="utf-8",ensure_ascii = False) self.exporter.start_exporting() def close_spider(self,spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class VisionsJsonPipeline(object): def __init__(self): self.exporter = None def open_spider(self, spider): self.exporter = JsonItemExporter(open('%s.json' %spider.name, 'wb')) self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) return item def close_spider(self, spider): self.exporter.finish_exporting()
class ItcastPipeline(object): def open_spider(self, spider): # 可选实现,当spider被开启时,这个方法被调用。 # 输出到tongcheng_pipeline.json文件 self.file = open('tongcheng_pipeline.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8') self.exporter.start_exporting() def close_spier(self, spider): # 可选实现,当spider被关闭时,这个方法被调用 self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class WriteItemPipeline(object): def __init__(self): self.file = open("trans.json", 'wb') self.exporter = JsonItemExporter( self.file ) # json is by definition an unordered collection thus you need to order cols in python later self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonWriterPipeline(object): def __init__(self): self.file = open(generate_file_name('json', 'output'), 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class Search_Pipeline: def __init__(self): self.file = open('../data/search_results.json', 'wb') self.exporter = JsonItemExporter(self.file,encoding='utf-8',ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): if isinstance(item, zlibItem): self.exporter.export_item(item) return item
class HomeWorkMarket(object): def __init__(self): self.file = open("jobs.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): def __init__(self): self.file = open('article_export.json', 'wb') # 以二进制的方式打开 self.exporter = JsonItemExporter( self.file, encoding='utf-8', ensure_ascii=False) # 实例化JsonItemExporter self.exporter.start_exporting() # 第一步start_exporting def spider_close(self, spider): self.exporter.finish_exporting() # 第三步finish_exporting self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) # 第二步export_item return item
class KugoumusicPipeline(object): def __init__(self): self.file = open('items.json', 'wb') self.exporter = JsonItemExporter( self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExplorerPipeline(object): # 使用框架保存数据到json def __init__(self): self.file = open('table_img.json', 'wb') self.json_explorer = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.json_explorer.start_exporting() def process_item(self, item, spider): return item def spider_closed(self, spider): self.json_explorer.finish_exporting() self.file.close()
class JsonExporterPipeline(object): #将json文件输出 #在setting.py里配置这个pipeline的数字为2,进行测试 #调用scrapy提供的json export导出json文件 def __init__(self): self.file = open('articleexporter.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf_8', ensure_ascii=False) #用JsonItemExporter 做实例化 self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): #pipelines.py 会接受item 在这里将item写入文件 #调用process_item时要记得return item, 因为下一pipeline可能还需要处理item self.exporter.export_item(item) return item
class RatingsPipeline: def open_spider(self, spider): filepath = Path("indeed/data_files/ratings_{}.json".format( spider.filename)) self.file = open(filepath, "wb") self.exporter = JsonItemExporter(self.file, encoding="utf-8") self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class CitiesPipeline: def open_spider(self, spider): self.city = JsonItemExporter(open("cities.json", "wb"), encoding='utf-8', indent=2) self.city.start_exporting() def close_spider(self, spider): self.city.finish_exporting() def process_item(self, item, spider): if isinstance(item, items.CityItem): self.city.export_item(item) return item raise DropItem(item)
class SchoolofathensPipeline: def __init__(self): self.file = open("philosophers.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): def __init__(self): self.file = open("LianjiaExpoter.json", 'wb') self.expoter = JsonItemExporter(self.file, ensure_ascii=False, encoding='utf-8') self.expoter.start_exporting() def close_spider(self, spider): self.expoter.finish_exporting() self.file.close() def process_item(self, item, spider): self.expoter.export_item(item) return (item)
class JsonExporterPipleline(object): # 调用scrapy提供的json export导出json文件 def __init__(self): self.file = open('articleexport.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) return item def spider_close(self, spider): """当Spider关闭的时候,会顺便关闭文件""" self.exporter.finish_exporting() self.file.close()
class JsonExpoterPipeline(object): #调用scrapy 提供的 json exporter 导出json文件 def __init__(self): self.file = open('jsonexplor.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExportPipline(object): def __init__(self): self.file = open('articleexport.json', 'wb') self.exporter = JsonItemExporter( self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) print(dict(item)) return item
class JsonPipeline(object): def open_spider(self, spider): self.file = open("../data/json/" + spider.name + ".json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): # 调用scrapy提供的json_exporter到处json文件 def __init__(self): self.file = open('articleexport.json','wb') self.exporter = JsonItemExporter(self.file,encoding="utf-8",ensure_ascii=False) self.exporter.start_exporting() # 关闭文件 def close_spider(self,spider): self.exporter.finish_exporting() #停止导出 self.file.close() # def process_item(self,item,spider): self.exporter.export_item(item) return item
class FifaScrapyPipeline_URL(object): def __init__(self): self.file = open("../data/players_url.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): #调用scrapy提供的json export导出json文件 def __init__(self): self.file = open('articleexport.json','wb') self.exporter = JsonItemExporter(self.file,encoding='utf-8',ensure_ascii='Flase') #打开文件 self.exporter.start_exporting() #导出文件 def close_spider(self,spider): self.exporter.finish_exporting() self.file.close() #文件关闭 def process_item(self,item,spider): self.exporter.export_item(item) return item
class JsonExpoterPipline(object): def __init__(self): self.file = open("article_items.json", "wb") self.export = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.export.start_exporting() def close_spider(self): self.export.finish_exporting() self.file.close() def process_item(self, item, spider): self.export.export_item(item) return item
class FileExporterJson(object): def open_spider(self, spider): self.file = open("./docs/receipts.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExporterPipeline(object): def __init__(self): self.file = open('articleExporter.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) return item def spider_close(self, spider): self.exporter.finish_exporting() self.file.close()
class UsagovScrappingPipeline: def __init__(self): self.file = open("tarea1.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): self.file = open("article_json_export.json", 'wb') self.export = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.export.start_exporting() def process_item(self, item, spider): self.export.export_item(item) return item def close_spider(self, spider): self.export.finish_exporting() self.file.close()
class JsonExportPipeline(object): def __init__(self): self.file = open("jobboleExport.json", "wb") self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) #用一个list来存储所有的dict。 return item def close_spider(self, spider): self.exporter.finish_exporting() self.file.close()
class JsonExporterPipeline: def __init__(self): self.file = open("artileexporter.json", "wb") self.exporter = JsonItemExporter(self.file, encoding="utf8", ensure_ascii=False) self.exporter.start_exporting() def close_spider(self): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item=item) return item
class JsonWriterPipeline(object): def open_spider(self, spider): self.file = open('tour_data.json', 'ab') self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class ImmobilienscoutPipeline(object): def __init__(self): self.file = open("results.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class XmlItemPipline(object): def open_spider(self, spider): self.file = open('amazon.xml', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() def close_spider(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
def process_item(self, item, spider): designer_dir_name = skutils.escape_filename(item['name']) designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name) file_path = os.path.join(designer_dir_path, designer_dir_name) # write json file with open('%s.json' % file_path, 'w+b') as f: exporter = JsonItemExporter(f) exporter.start_exporting() exporter.export_item(item) exporter.finish_exporting() # write excel file excelutils.write_designer_excel(item, file_path, designer_dir_name) return item
class JsonWriterPipeline(BaseItemExporter): def __init__(self, **kwargs): self._configure(kwargs) self.files = {} self.encoder = json.JSONEncoder(ensure_ascii=False, **kwargs) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = codecs.open('item.json', 'wb', encoding="utf-8") self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): if item['title']: # and item['image_url'] : item['description'] = re.sub("\r|\n","", item['description']) item['general_impression'] = re.sub("\r|\n","", item['general_impression']) item['subject_of_photo'] = re.sub("\r|\n","", item['subject_of_photo']) item['composition'] = re.sub("\r|\n","", item['composition']) item['use_of_camera'] = re.sub("\r|\n","", item['use_of_camera']) item['depth_of_field'] = re.sub("\r|\n","", item['depth_of_field']) item['color_lighting'] = re.sub("\r|\n","", item['color_lighting']) item['focus'] = re.sub("\r|\n","", item['focus']) ##line = json.dumps(dict(item)) + '\n' ##self.file.write(line) self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) self.files = {} def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class WikicrawlerPipeline(object): def __init__(self): self.item_file = open('items.json', 'wb') self.exporter = JsonItemExporter(self.item_file) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) def spider_closed(self): self.exporter.finish_exporting() self.item_file.close()
class SiteMapJsonExportPipeline(object): '''Process the SiteMap spider output Items, and write them as JSON to an output file. The output file is taken from the Spider's config (spider.config)''' @classmethod def from_crawler(cls, crawler): ''' Boilerplate ''' pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): self.file = open(spider.config['map_file'], 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class DuplicatesExportPipeline(object): def __init__(self): self.category_seen = set() self.product_seen = set() self.shop_seen = set() self.product_price_seen = set() def open_spider(self, spider): # Creates 4 files for storage scraped items self.category_file = open('spider/scraped/category.json', 'wb') self.category_exporter = JsonItemExporter(self.category_file, encoding="utf-8") self.category_exporter.start_exporting() self.product_file = open('spider/scraped/product.json', 'wb') self.product_exporter = JsonItemExporter(self.product_file, encoding="utf-8") self.product_exporter.start_exporting() self.shop_file = open('spider/scraped/shop.json', 'wb') self.shop_exporter = JsonItemExporter(self.shop_file, encoding="utf-8") self.shop_exporter.start_exporting() self.product_price_file = open('spider/scraped/productprice.json', 'wb') self.product_price_exporter = JsonItemExporter(self.product_price_file, encoding="utf-8") self.product_price_exporter.start_exporting() def close_spider(self, spider): # Closing exports and scraped item files self.category_exporter.finish_exporting() self.category_file.close() self.product_exporter.finish_exporting() self.product_file.close() self.shop_exporter.finish_exporting() self.shop_file.close() self.product_price_exporter.finish_exporting() self.product_price_file.close() def process_item(self, item, spider): if 'id' in item.keys() and 'name' in item.keys() and 'parent_category_id' in item.keys(): # Drops duplicates in category if item['id'] in self.category_seen: raise DropItem("Duplicate category item found: %s" % item) else: self.category_seen.add(item['id']) # Exports category item self.category_exporter.export_item(item) return item if 'name' in item.keys() and 'category_id' in item.keys() and 'thumbnail_url' in item.keys() and 'url' in item.keys(): # Drops duplicates in products if item['url'] in self.product_seen: raise DropItem("Duplicate product item found: %s" % item) else: self.product_seen.add(item['url']) # Exports category item self.product_exporter.export_item(item) return item if 'name' in item.keys() and 'url' in item.keys() and 'thumbnail_url' in item.keys(): # Drops duplicates in shops if item['url'] in self.shop_seen: raise DropItem("Duplicate shop item found: %s" % item) else: self.shop_seen.add(item['url']) # Exports shop item self.shop_exporter.export_item(item) return item if 'shop_id' in item.keys() and 'product_id' in item.keys() and 'price' in item.keys() and 'price_and_shipment' in item.keys(): # Drops duplicates in product price if item['shop_id'] + '-' + item['product_id'] in self.product_price_seen: raise DropItem("Duplicate product price item found: %s" % item) else: self.product_price_seen.add(item['shop_id'] + '-' + item['product_id']) # Exports product price item self.product_price_exporter.export_item(item) return item return item
class JsonExportPipeline(object): def __init__(self): self.files = [] self.exporters = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): if spider.__class__ == MayorsSpider: mayor_file = open("data/mayor_candidates.json", "w+b") council_file = open("data/city_counsils.json", "w+b") self.files.append(mayor_file) self.files.append(council_file) self.mayor_exporter = JsonItemExporter(mayor_file) self.council_exporter = JsonItemExporter(council_file) self.mayor_exporter.start_exporting() self.council_exporter.start_exporting() elif spider.__class__ == RegionCountiesSpider: counties_file = open("data/region_counties.json", "w+b") self.counties_exporter = JsonItemExporter(counties_file) self.files.append(counties_file) def create_exporter(self, filename): file = open(filename, "w+b") exporter = JsonItemExporter(file) exporter.start_exporting() self.files.append(file) return exporter def spider_closed(self, spider): if spider.__class__ == MayorsSpider: self.mayor_exporter.finish_exporting() self.council_exporter.finish_exporting() elif spider.__class__ == RegionCountiesSpider: for exporter in self.exporters.itervalues(): exporter.finish_exporting() for file in self.files: file.close() def process_item(self, item, spider): if item.__class__ == CityCouncil: self.council_exporter.export_item(item) elif item.__class__ == MayorCandidate: self.mayor_exporter.export_item(item) self.counties_exporter.export_item(item) else: if item.__class__ == RegionCandidate: filename = "data/region_council_candidates.json" elif item.__class__ == RegionCounty: filename = "data/region_counties.json" elif item.__class__ == CityCouncilCandidate: filename = "data/city_council_candidates.json" elif item.__class__ == CityCounty: filename = "data/city_counties.json" exporter_name = item.__class__.__name__ if exporter_name not in self.exporters: self.exporters[exporter_name] = self.create_exporter(filename) self.exporters[exporter_name].export_item(item) return item