class BaseFilePipeline(object): def __init__(self,saved_path): self.files = {} self.saved_path = saved_path self.exporter = None @classmethod def from_crawler(cls, crawler): pipeline = cls(crawler.settings.get('SAVED_PATH')) return pipeline def open_spider(self, spider): tp = self.gettype()['name'] filename = '%s_%s.json' % (spider.name,tp) filename = os.path.join(self.saved_path,filename) file_ = open(filename,'w+b') self.files[spider] = file_ self.exporter = JsonItemExporter(file_,ensure_ascii=False,encoding='utf-8') self.exporter.start_exporting() def gettype(): pass def close_spider(self, spider): self.exporter.finish_exporting() file_ = self.files.pop(spider) file_.close()
def spider_opened(self, spider): if spider.__class__ == MayorsSpider: mayor_file = open("data/mayor_candidates.json", "w+b") council_file = open("data/city_counsils.json", "w+b") self.files.append(mayor_file) self.files.append(council_file) self.mayor_exporter = JsonItemExporter(mayor_file) self.council_exporter = JsonItemExporter(council_file) self.mayor_exporter.start_exporting() self.council_exporter.start_exporting() elif spider.__class__ == RegionCountiesSpider: counties_file = open("data/region_counties.json", "w+b") self.counties_exporter = JsonItemExporter(counties_file) self.files.append(counties_file)
def open_spider(self, spider): # Creates 4 files for storage scraped items self.category_file = open('spider/scraped/category.json', 'wb') self.category_exporter = JsonItemExporter(self.category_file, encoding="utf-8") self.category_exporter.start_exporting() self.product_file = open('spider/scraped/product.json', 'wb') self.product_exporter = JsonItemExporter(self.product_file, encoding="utf-8") self.product_exporter.start_exporting() self.shop_file = open('spider/scraped/shop.json', 'wb') self.shop_exporter = JsonItemExporter(self.shop_file, encoding="utf-8") self.shop_exporter.start_exporting() self.product_price_file = open('spider/scraped/productprice.json', 'wb') self.product_price_exporter = JsonItemExporter(self.product_price_file, encoding="utf-8") self.product_price_exporter.start_exporting()
def __init__(self, spider_name): self.file = open("output/{}_recipes.json".format(spider_name), 'wb') self.file.write( '{"date_scraped": "%s", "recipes": ' % datetime.datetime.now() ) self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def open_spider(self, spider): tp = self.gettype()['name'] filename = '%s_%s.json' % (spider.name,tp) filename = os.path.join(self.saved_path,filename) file_ = open(filename,'w+b') self.files[spider] = file_ self.exporter = JsonItemExporter(file_,ensure_ascii=False,encoding='utf-8') self.exporter.start_exporting()
class JsonExportPipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = codecs.open('%s_data.json' % spider.name, 'w+b', encoding='utf-8') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonExportPipeline(object): """ app.pipelines.exporter_json.JsonExportPipeline """ def __init__(self): self.files = {} self.exporter = None @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file_json = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file_json self.exporter = JsonItemExporter(file_json) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file_json = self.files.pop(spider) file_json.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonPipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('/home/gaoliang/Desktop/result.json', 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file, ensure_ascii=False) # 添加ensure_ascii=False用于使json保存中文不乱码 self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class SaveItemToJson(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file=file) print self.exporter self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class JsonPipeline(object): """Save Pipeline output to JSON.""" def __init__(self, spider_name): self.file = open("output/{}_recipes.json".format(spider_name), 'wb') self.file.write( '{"date_scraped": "%s", "recipes": ' % datetime.datetime.now() ) self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting() @classmethod def from_crawler(cls, crawler): return cls( spider_name=crawler.spider.name ) def close_spider(self): self.exporter.finish_exporting() self.file.write("}") self.file.close() def process_item(self, item): self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): _log.info('JsonExportPipeline.init....') self.files = {} @classmethod def from_crawler(cls, crawler): _log.info('JsonExportPipeline.from_crawler....') pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): _log.info('JsonExportPipeline.spider_opened....') file = open('%s.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): _log.info('JsonExportPipeline.spider_closed....') self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): _log.info('JsonExportPipeline.process_item....') self.exporter.export_item(item) return item
class JsonPipelineExporterMixin: @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): item = self.pre_process_item(item) self.exporter.export_item(item) return item def pre_process_item(self, item): return item
class JsonExporterPipeline(object): # 调用scrapy提供的json_export 导出json文件 def __init__(self): self.file = open('articleexport.json','wb') self.exporter = JsonItemExporter(self.file,encoding="utf-8",ensure_ascii = False) self.exporter.start_exporting() def close_spider(self,spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
def process_item(self, item, spider): designer_dir_name = skutils.escape_filename(item['name']) designer_dir_path = os.path.join(GlobalState.data_dir, designer_dir_name) file_path = os.path.join(designer_dir_path, designer_dir_name) # write json file with open('%s.json' % file_path, 'w+b') as f: exporter = JsonItemExporter(f) exporter.start_exporting() exporter.export_item(item) exporter.finish_exporting() # write excel file excelutils.write_designer_excel(item, file_path, designer_dir_name) return item
class VisionsJsonPipeline(object): def __init__(self): self.exporter = None def open_spider(self, spider): self.exporter = JsonItemExporter(open('%s.json' %spider.name, 'wb')) self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) return item def close_spider(self, spider): self.exporter.finish_exporting()
class JsonWriterPipeline(BaseItemExporter): def __init__(self, **kwargs): self._configure(kwargs) self.files = {} self.encoder = json.JSONEncoder(ensure_ascii=False, **kwargs) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = codecs.open('item.json', 'wb', encoding="utf-8") self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): if item['title']: # and item['image_url'] : item['description'] = re.sub("\r|\n","", item['description']) item['general_impression'] = re.sub("\r|\n","", item['general_impression']) item['subject_of_photo'] = re.sub("\r|\n","", item['subject_of_photo']) item['composition'] = re.sub("\r|\n","", item['composition']) item['use_of_camera'] = re.sub("\r|\n","", item['use_of_camera']) item['depth_of_field'] = re.sub("\r|\n","", item['depth_of_field']) item['color_lighting'] = re.sub("\r|\n","", item['color_lighting']) item['focus'] = re.sub("\r|\n","", item['focus']) ##line = json.dumps(dict(item)) + '\n' ##self.file.write(line) self.exporter.export_item(item) return item
class JsonExportPipeline(object): def __init__(self): dispatcher.connect(self.spider_opened, signals.spider_opened) dispatcher.connect(self.spider_closed, signals.spider_closed) self.files = {} def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
class WikicrawlerPipeline(object): def __init__(self): self.item_file = open('items.json', 'wb') self.exporter = JsonItemExporter(self.item_file) @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): self.exporter.start_exporting() def process_item(self, item, spider): self.exporter.export_item(item) def spider_closed(self): self.exporter.finish_exporting() self.item_file.close()
class SiteMapJsonExportPipeline(object): '''Process the SiteMap spider output Items, and write them as JSON to an output file. The output file is taken from the Spider's config (spider.config)''' @classmethod def from_crawler(cls, crawler): ''' Boilerplate ''' pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): self.file = open(spider.config['map_file'], 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
def __init__(self): self.file = open('../data/search_results.json', 'wb') self.exporter = JsonItemExporter(self.file,encoding='utf-8',ensure_ascii=False) self.exporter.start_exporting()
def open_spider(self, spider): self.exporter = JsonItemExporter(open('%s.json' %spider.name, 'wb')) self.exporter.start_exporting()
def open_spider(self, spider): f = open('items.json', 'wb') self.exporter = JsonItemExporter(f) self.exporter.start_exporting()
def __init__(self): # self.file = open('/output/article_exporter.json', 'wb') self.file = open('/output/company_exporter.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False)
def open_spider(self, spider): self.files[spider.registry] = open('data/' + spider.registry + '.json', 'wb') self.exporters[spider.registry] = JsonItemExporter( self.files[spider.registry], encoding='utf-8', ensure_ascii=False) self.exporters[spider.registry].start_exporting()
def _get_exporter(self, **kwargs): return JsonItemExporter(self.output, **kwargs)
def __init__(self): self.file = open(generate_file_name('json', 'output'), 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def spider_opened(self, spider): file = codecs.open('item.json', 'wb', encoding="utf-8") self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): _log.info('JsonExportPipeline.spider_opened....') file = open('%s.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def __init__(self): self.file = open('article2.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False)
def __init__(self): self.item_file = open('items.json', 'wb') self.exporter = JsonItemExporter(self.item_file)
def open_spider(self, spider): time_now = datetime.datetime.now().strftime('%m-%d-%Y') file_name = f"House-{time_now}.json" self.file = open(file_name, 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open(self.out_file, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def __init__(self): self.file = open("trans.json", 'wb') self.exporter = JsonItemExporter( self.file ) # json is by definition an unordered collection thus you need to order cols in python later self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_%s.json' % (spider.name,spider.categoryId), 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def __init__(self): self.fp=open("tv.json","wb") self.exporter=JsonItemExporter(self.fp,encoding='utf-8',ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.file = open("news_Crawl_from20060101_200809011.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8') self.exporter.start_exporting()
def __init__(self, file_path): self.file = open("{}".format(file_path), 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.file = open("articleexport.json", "wb") self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting()
class TpdbApiScenePipeline: def __init__(self, crawler): if crawler.settings['ENABLE_MONGODB']: db = MongoClient(crawler.settings['MONGODB_URL']) self.db = db['scrapy'] self.crawler = crawler if crawler.settings.get('path'): path = crawler.settings.get('path') else: path = crawler.settings.get('DEFAULT_EXPORT_PATH') if crawler.settings.get('file'): filename = crawler.settings.get('file') if '\\' not in filename and '/' not in filename: filename = Path(path, filename) else: filename = Path(path, '%s_%s.json' % (crawler.spidercls.name, time.strftime('%Y%m%d-%H%M'))) if crawler.settings.getbool('export'): print(f'*** Exporting to file: {filename}') self.fp = open(filename, 'wb') self.fp.write('{"scenes":['.encode()) if crawler.settings.getbool('oneline'): self.exporter = JsonLinesItemExporter(self.fp, ensure_ascii=False, encoding='utf-8') else: self.exporter = JsonItemExporter(self.fp, ensure_ascii=False, encoding='utf-8', sort_keys=True, indent=2) @classmethod def from_crawler(cls, crawler): return cls(crawler) async def process_item(self, item, spider): if spider.debug is True: return item # So we don't re-send scenes that have already been scraped if self.crawler.settings['ENABLE_MONGODB']: if spider.force is not True: result = self.db.scenes.find_one({'url': item['url']}) if result is not None: return payload = { 'title': item['title'], 'description': item['description'], 'date': item['date'], 'image': item['image'], 'image_blob': item['image_blob'], 'url': item['url'], 'performers': item['performers'], 'tags': item['tags'], 'external_id': str(item['id']), 'site': item['site'], 'trailer': item['trailer'], 'parent': item['parent'], 'network': item['network'], 'force_update': self.crawler.settings.getbool('FORCE_UPDATE'), } # Post the scene to the API - requires auth with permissions if self.crawler.settings['TPDB_API_KEY'] and not spider.settings.get('local'): headers = { 'Authorization': 'Bearer %s' % self.crawler.settings['TPDB_API_KEY'], 'Accept': 'application/json', 'Content-Type': 'application/json', 'User-Agent': 'tpdb-scraper/1.0.0' } response = Http.post('https://api.metadataapi.net/scenes', json=payload, headers=headers) if response: if response.ok: disp_result = 'Submitted OK' else: disp_result = 'Submission Error: Code #%d' % response.status_code else: disp_result = 'Submission Error: No Response Code' url_hash = hashlib.sha1(str(item['url']).encode('utf-8')).hexdigest() if self.crawler.settings['MONGODB_ENABLE']: if not response.ok: self.db.errors.replace_one({'_id': url_hash}, { 'url': item['url'], 'error': 1, 'when': datetime.now().isoformat(), 'response': response.json() }, upsert=True) else: self.db.scenes.replace_one( {'_id': url_hash}, dict(item), upsert=True) else: disp_result = 'Local Run, Not Submitted' if spider.settings.getbool('display') and spider.settings.get('LOG_LEVEL') == 'INFO': if len(item['title']) >= 50: title_length = 5 else: title_length = 55 - len(item['title']) if len(item['site']) >= 15: site_length = 5 else: site_length = 20 - len(item['site']) if "T" in item['date']: disp_date = re.search(r'(.*)T\d', item['date']).group(1) else: disp_date = item['date'] print(f"Item: {item['title'][0:50]}" + " " * title_length + f"{item['site'][0:15]}" + " " * site_length + f"\t{str(item['id'])[0:15]}\t{disp_date}\t{item['url']}\t{disp_result}") if spider.settings.getbool('export'): item2 = item.copy() if not spider.settings.get('showblob'): if 'image_blob' in item2: item2.pop('image_blob', None) self.exporter.export_item(item2) return item def close_spider(self, spider): if spider.settings.getbool('export'): self.fp.write(']}'.encode()) self.fp.close()
def open_spider(self, spider): self.file = open(result_json_path, "wb") self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting()
class TpdbApiPerformerPipeline: def __init__(self, crawler): if crawler.settings['ENABLE_MONGODB']: db = MongoClient(crawler.settings['MONGODB_URL']) self.db = db['scrapy'] self.crawler = crawler if crawler.settings.get('path'): path = crawler.settings.get('path') else: path = crawler.settings.get('DEFAULT_EXPORT_PATH') if crawler.settings.get('file'): filename = crawler.settings.get('file') if '\\' not in filename and '/' not in filename: filename = Path(path, filename) else: filename = Path(path, '%s_%s-performers.json' % (crawler.spidercls.name, time.strftime('%Y%m%d-%H%M'))) if crawler.settings.getbool('export'): print(f"*** Exporting to file: {filename}") self.fp = open(filename, 'wb') self.fp.write('{"scenes":['.encode()) if crawler.settings.getbool('oneline'): self.exporter = JsonLinesItemExporter(self.fp, ensure_ascii=False, encoding='utf-8') else: self.exporter = JsonItemExporter(self.fp, ensure_ascii=False, encoding='utf-8', sort_keys=True, indent=2) @classmethod def from_crawler(cls, crawler): return cls(crawler) async def process_item(self, item, spider): if self.crawler.settings['ENABLE_MONGODB']: if spider.force is not True: result = self.db.performers.find_one({'url': item['url']}) if result is not None: return payload = { 'name': item['name'], 'site': item['network'], 'url': item['url'], 'image': item['image'], 'image_blob': item['image_blob'], 'bio': item['bio'], 'gender': item['gender'], 'birthday': item['birthday'], 'astrology': item['astrology'], 'birthplace': item['birthplace'], 'ethnicity': item['ethnicity'], 'nationality': item['nationality'], 'eyecolor': item['eyecolor'], 'haircolor': item['haircolor'], 'weight': item['weight'], 'height': item['height'], 'measurements': item['measurements'], 'tattoos': item['tattoos'], 'piercings': item['piercings'], 'cupsize': item['cupsize'], 'fakeboobs': item['fakeboobs'], } # Post the scene to the API - requires auth with permissions if self.crawler.settings['TPDB_API_KEY'] and not spider.settings.get('local'): headers = { 'Authorization': 'Bearer %s' % self.crawler.settings['TPDB_API_KEY'], 'Accept': 'application/json', 'Content-Type': 'application/json', 'User-Agent': 'tpdb-scraper/1.0.0' } response = Http.post('https://api.metadataapi.net/performer_sites', json=payload, headers=headers, verify=False) if response: if response.ok: disp_result = 'Submitted OK' else: disp_result = 'Submission Error: Code #' + str(response.status_code) else: disp_result = 'Submission Error: No Response Code' if self.crawler.settings['MONGODB_ENABLE']: url_hash = hashlib.sha1(str(item['url']).encode('utf-8')).hexdigest() if not response.ok: self.db.errors.replace_one({'_id': url_hash}, { 'url': item['url'], 'error': 1, 'when': datetime.now().isoformat(), 'response': response.json() }, upsert=True) else: self.db.performers.replace_one({'_id': url_hash}, dict(item), upsert=True) else: disp_result = 'Local Run, Not Submitted' if spider.settings.getbool('display') and spider.settings.get('LOG_LEVEL') == 'INFO': name_length = 50 - len(item['name']) if name_length < 1: name_length = 1 print(f"Performer: {item['name']}" + " " * name_length + f"{item['network']}\t{item['url']}\t{disp_result}") if spider.settings.getbool('export'): item2 = item.copy() if not spider.settings.get('showblob'): if "image_blob" in item2: item2.pop('image_blob', None) self.exporter.export_item(item2) return item def close_spider(self, spider): if spider.settings.getbool('export'): self.fp.write(']}'.encode()) self.fp.close()
def spider_opened(self, spider): file = open('%s.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def open_spider(self, spider): self.file = open('results.jl', 'wb') self.exp = JsonItemExporter(self.file, indent=4) self.exp.start_exporting()
def __init__(self): self.file = open('baike.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('/home/gaoliang/Desktop/result.json', 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file, ensure_ascii=False) # 添加ensure_ascii=False用于使json保存中文不乱码 self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_products.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file,ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.file = open('questions_exporter.json', 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) # 输出中文格式 self.exporter.start_exporting()
def __init__(self): self.file = open('book2.json', 'wb') self.exporter = JsonItemExporter(file=self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.file = open('articleexport.json', 'ab') self.exporter = JsonItemExporter(self.file, encoding="utf-8", ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.fp = open('duanzi.josn', 'wb') self.exporter = JsonItemExporter(self.fp, ensure_ascii=False, encoding='utf-8') self.exporter.start_exporting() ##开始导入
def __init__(self,file,**kwrgs): JsonItemExporter.__init__(self,file,ensure_ascii = False,**kwrgs)
def __init__(self): self.file = open("data_export.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def __init__(self): self.file = open('channels.json', 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def __init__(self): self.file = open("../data/json/players_urls.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def open_spider(self, spider): self.file = open('Guoke.json', 'wb') self.exporter = JsonItemExporter(self.file, ensure_ascii=False, encoding='utf-8') self.exporter.start_exporting()
def open_spider(self, spider): self.file = open(self.filename, 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def create_exporter(self, filename): file = open(filename, "w+b") exporter = JsonItemExporter(file) exporter.start_exporting() self.files.append(file) return exporter