コード例 #1
0
ファイル: pipelines.py プロジェクト: etongle/dc
    def open_spider(self, spider):
        self.cols = spider.cols
        self.start_urls = spider.start_urls

        self.file = open('test.json', 'w+b')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()
コード例 #2
0
 def spider_opened(self, spider):
     fjson = open(
         'output/%s_%s_items.json' %
         (spider.name, str(int(time.mktime(time.gmtime())))), 'wb')
     self.fjsons[spider] = fjson
     self.exporter = JsonItemExporter(fjson)
     self.exporter.start_exporting()
コード例 #3
0
ファイル: pipelines.py プロジェクト: ericmoritz/meme_scraper
    def initialize_exporters(self):

        for meme_type in self.meme_types:

            json_filename = self.get_json_filename(meme_type)
            json_file = open(json_filename, 'w')
            self.files[meme_type] = json_file
            self.exporters[meme_type] = JsonItemExporter(json_file)
            self.exporters[meme_type].start_exporting()
コード例 #4
0
    def open_spider(self, spider):
        print 'Opening spider.'
        self.files['question'] = codecs.open(self.files_path['question_file'],
                                             'w',
                                             encoding='utf-8')
        self.files['answer'] = codecs.open(self.files_path['answer_file'],
                                           'w',
                                           encoding='utf-8')
        self.files['user'] = codecs.open(self.files_path['user_file'],
                                         'w',
                                         encoding='utf-8')

        self.exporters['question'] = JsonItemExporter(self.files['question'])
        self.exporters['answer'] = JsonItemExporter(self.files['answer'])
        self.exporters['user'] = JsonItemExporter(self.files['user'])

        for exporter in self.exporters.itervalues():
            exporter.start_exporting()
コード例 #5
0
 def process_item(self, item, spider):
     if self.first_item:
         self.first_item = False
         file = open('%s_items.json' % spider.name, 'wb')
         # scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文?
         # http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence
         # 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。
         # 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。
         self.exporter = JsonItemExporter(file, ensure_ascii=False)
         self.exporter.start_exporting()
     self.exporter.export_item(item)
     return item
コード例 #6
0
 def process_item(self, item, spider):
     if FeedSpider.is_content_op(spider) and isinstance(item, ContentItem):
         spider.make_sure_path_exists(spider.get_content_output_dir_path())
         file_path = spider.get_content_output_file_path(item['id'], item['name'].replace(' ', '-'))
         is_exist = os.path.exists(file_path)
         self.file = open(file_path, 'w')
         if is_exist:
             # if file already exists, clean it and write new content.
             self.file.seek(0)
             self.file.truncate()
         self.item_exporter = JsonItemExporter(self.file, indent=4)
         self.item_exporter.export_item(item)
         self.file.close()
         log.msg('ContentWriterPipeline, saved content file %s successful.' % file_path)
         raise DropItem('Save item success')
     else:
         return item
コード例 #7
0
    def __init__(self):

        self.status = Status()
        self.classifiers = []
        self.exporters = {}
        for classifier in self.status.classifiers.keys():
            CF = ClassifierFactory(self.status.classifiers[classifier])
            CF.create_data_set("both")
            lc = lc = CF.create_classifier(
                LogisticRegression(C=1e5),
                self.status.classifiers[classifier]['features']())
            lc.fit()
            self.classifiers.append((classifier, lc))

        self.classifiers = sorted(
            self.classifiers,
            key=lambda a: a[1].estimate_accuracy(5, verbose=True))
        print "Classifier {0} needs the most improvement; selected for export".format(
            self.classifiers[0][0])
        for classification in self.status.classifiers[self.classifiers[0]
                                                      [0]]['classifications']:
            f = file("{0}.json".format(classification), "wb")
            self.exporters[classification] = JsonItemExporter(f)
コード例 #8
0
 def open_spider(self, spider):
     self.exporter = JsonItemExporter(open('data/%s.json' %spider.name, 'w'))
     self.exporter.start_exporting()
コード例 #9
0
ファイル: pipelines.py プロジェクト: otwo2002/community
 def spider_opened(self, spider):
     file = open('%s.json' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file)
     # self.exporter.fields_to_export=['weightProduct','priceProduct']
     self.exporter.start_exporting()
コード例 #10
0
 def __init__(self):
     self.file = open("./collected.json", 'wb')
     self.exporter = JsonItemExporter(self.file,
                                      encoding='utf-8',
                                      ensure_ascii=False)
     self.exporter.start_exporting()
コード例 #11
0
 def spider_opened(self, spider):
     print(get_project_settings().get('JASONFILE_PATH') + 'items.json')
     self.file = open('items' + datetime.datetime.today().strftime('%Y-%m-%d') + '.json', 'wb')
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #12
0
 def spider_opened(self, spider):
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #13
0
ファイル: jsonexporter.py プロジェクト: RuichaoQiu/gasmanager
 def spider_opened(self, spider):
     file = open('%s_items.json' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file)
     self.exporter.start_exporting()
コード例 #14
0
 def spider_opened(self, spider):
     self.file = open('items.json', 'wb')
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #15
0
 def __init__(self):
     self.f = open('news.json', 'wb')
     self.exporter = JsonItemExporter(self.f, encoding='utf-8')
     self.exporter.start_exporting()
コード例 #16
0
 def __init__(self):
     self.file = open('items.json', 'w')
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #17
0
 def spider_opened(self, spider):
     file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
     self.files[spider] = file_to_save
     self.exporter = JsonItemExporter(file_to_save)
     self.exporter.start_exporting()
コード例 #18
0
 def spider_opened(self, spider):
     self.file = open('%s_output.json' % spider.name, 'w+b')
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #19
0
 def _get_exporter(self, **kwargs):
     return JsonItemExporter(self.output, **kwargs)
コード例 #20
0
 def spider_opened(self, spider):
     file = open('%sItems.json' % spider.name, 'a+')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file, ensure_ascii=False)
     self.exporter.start_exporting()
コード例 #21
0
 def spider_opened(self, spider):
     """Open Spider."""
     file = open('../website/data/complete.json', 'w+b')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file)
     self.exporter.start_exporting()
コード例 #22
0
 def spider_opened(self, spider):
     file = open('%s_products.json' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P
     self.exporter.start_exporting()