def open_spider(self, spider): self.cols = spider.cols self.start_urls = spider.start_urls self.file = open('test.json', 'w+b') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): fjson = open( 'output/%s_%s_items.json' % (spider.name, str(int(time.mktime(time.gmtime())))), 'wb') self.fjsons[spider] = fjson self.exporter = JsonItemExporter(fjson) self.exporter.start_exporting()
def initialize_exporters(self): for meme_type in self.meme_types: json_filename = self.get_json_filename(meme_type) json_file = open(json_filename, 'w') self.files[meme_type] = json_file self.exporters[meme_type] = JsonItemExporter(json_file) self.exporters[meme_type].start_exporting()
def open_spider(self, spider): print 'Opening spider.' self.files['question'] = codecs.open(self.files_path['question_file'], 'w', encoding='utf-8') self.files['answer'] = codecs.open(self.files_path['answer_file'], 'w', encoding='utf-8') self.files['user'] = codecs.open(self.files_path['user_file'], 'w', encoding='utf-8') self.exporters['question'] = JsonItemExporter(self.files['question']) self.exporters['answer'] = JsonItemExporter(self.files['answer']) self.exporters['user'] = JsonItemExporter(self.files['user']) for exporter in self.exporters.itervalues(): exporter.start_exporting()
def process_item(self, item, spider): if self.first_item: self.first_item = False file = open('%s_items.json' % spider.name, 'wb') # scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文? # http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence # 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。 # 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。 self.exporter = JsonItemExporter(file, ensure_ascii=False) self.exporter.start_exporting() self.exporter.export_item(item) return item
def process_item(self, item, spider): if FeedSpider.is_content_op(spider) and isinstance(item, ContentItem): spider.make_sure_path_exists(spider.get_content_output_dir_path()) file_path = spider.get_content_output_file_path(item['id'], item['name'].replace(' ', '-')) is_exist = os.path.exists(file_path) self.file = open(file_path, 'w') if is_exist: # if file already exists, clean it and write new content. self.file.seek(0) self.file.truncate() self.item_exporter = JsonItemExporter(self.file, indent=4) self.item_exporter.export_item(item) self.file.close() log.msg('ContentWriterPipeline, saved content file %s successful.' % file_path) raise DropItem('Save item success') else: return item
def __init__(self): self.status = Status() self.classifiers = [] self.exporters = {} for classifier in self.status.classifiers.keys(): CF = ClassifierFactory(self.status.classifiers[classifier]) CF.create_data_set("both") lc = lc = CF.create_classifier( LogisticRegression(C=1e5), self.status.classifiers[classifier]['features']()) lc.fit() self.classifiers.append((classifier, lc)) self.classifiers = sorted( self.classifiers, key=lambda a: a[1].estimate_accuracy(5, verbose=True)) print "Classifier {0} needs the most improvement; selected for export".format( self.classifiers[0][0]) for classification in self.status.classifiers[self.classifiers[0] [0]]['classifications']: f = file("{0}.json".format(classification), "wb") self.exporters[classification] = JsonItemExporter(f)
def open_spider(self, spider): self.exporter = JsonItemExporter(open('data/%s.json' %spider.name, 'w')) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) # self.exporter.fields_to_export=['weightProduct','priceProduct'] self.exporter.start_exporting()
def __init__(self): self.file = open("./collected.json", 'wb') self.exporter = JsonItemExporter(self.file, encoding='utf-8', ensure_ascii=False) self.exporter.start_exporting()
def spider_opened(self, spider): print(get_project_settings().get('JASONFILE_PATH') + 'items.json') self.file = open('items' + datetime.datetime.today().strftime('%Y-%m-%d') + '.json', 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_items.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): self.file = open('items.json', 'wb') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def __init__(self): self.f = open('news.json', 'wb') self.exporter = JsonItemExporter(self.f, encoding='utf-8') self.exporter.start_exporting()
def __init__(self): self.file = open('items.json', 'w') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file_to_save = open('exports/json/%s.json' % spider.name, 'w+b') self.files[spider] = file_to_save self.exporter = JsonItemExporter(file_to_save) self.exporter.start_exporting()
def spider_opened(self, spider): self.file = open('%s_output.json' % spider.name, 'w+b') self.exporter = JsonItemExporter(self.file) self.exporter.start_exporting()
def _get_exporter(self, **kwargs): return JsonItemExporter(self.output, **kwargs)
def spider_opened(self, spider): file = open('%sItems.json' % spider.name, 'a+') self.files[spider] = file self.exporter = JsonItemExporter(file, ensure_ascii=False) self.exporter.start_exporting()
def spider_opened(self, spider): """Open Spider.""" file = open('../website/data/complete.json', 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_products.json' % spider.name, 'w+b') self.files[spider] = file self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P self.exporter.start_exporting()