Esempio n. 1
0
    def open_spider(self, spider):
        self.cols = spider.cols
        self.start_urls = spider.start_urls

        self.file = open('test.json', 'w+b')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()
 def spider_opened(self, spider):
     fjson = open(
         'output/%s_%s_items.json' %
         (spider.name, str(int(time.mktime(time.gmtime())))), 'wb')
     self.fjsons[spider] = fjson
     self.exporter = JsonItemExporter(fjson)
     self.exporter.start_exporting()
Esempio n. 3
0
class SaveNewItems(object):
    def __init__(self):
        self.files = []
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    def process_item(self, item, spider):
        self.new_file_exporter.export_item(item)
        print "Save " + item["title"][0]
        return item

    def spider_opened(self, spider):
        self.new_item_file = open("storage\\%s_new_items.json" % spider.name, "w")
        self.new_file_exporter = JsonItemExporter(self.new_item_file)
        self.new_file_exporter.start_exporting()

    def spider_closed(self, spider):
        with open("storage\\%s_items.json" % spider.name, "w") as items_file:
            self.exporter = JsonItemExporter(items_file)
            self.exporter.start_exporting()
            for item in incomingData:
                self.exporter.export_item(item)
            self.exporter.finish_exporting()
            self.new_file_exporter.finish_exporting()
            items_file.close()
            self.new_item_file.close()
Esempio n. 4
0
class ExportJSON(ExportData):
    """
    Exporting to export/json/spider-name.json file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()
Esempio n. 5
0
class ExportJSON(ExportData):
    """
    Exporting to export/json/spider-name.json file
    """
    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()
Esempio n. 6
0
 def process_item(self, item, spider):
     if self.first_item:
         self.first_item = False
         file = open('%s_items.json' % spider.name, 'wb')
         # scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文?
         # http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence
         # 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。
         # 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。
         self.exporter = JsonItemExporter(file, ensure_ascii=False)
         self.exporter.start_exporting()
     self.exporter.export_item(item)
     return item
Esempio n. 7
0
	def open_spider(self, spider):
		self.cols = spider.cols
		self.start_urls = spider.start_urls

		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()
Esempio n. 8
0
 def __init__(self):
     file = codecs.open('books2.json','w+b',encoding='utf-8')
     #file = open('books2.json','w+b')
     self.exporter = JsonItemExporter(file)
     self.exporter.encoding='utf-8'
     self.exporter.start_exporting()
     self.encoder = json.JSONEncoder(ensure_ascii=False)
Esempio n. 9
0
 def process_item(self, item, spider):
     if FeedSpider.is_content_op(spider) and isinstance(item, ContentItem):
         spider.make_sure_path_exists(spider.get_content_output_dir_path())
         file_path = spider.get_content_output_file_path(item['id'], item['name'].replace(' ', '-'))
         is_exist = os.path.exists(file_path)
         self.file = open(file_path, 'w')
         if is_exist:
             # if file already exists, clean it and write new content.
             self.file.seek(0)
             self.file.truncate()
         self.item_exporter = JsonItemExporter(self.file, indent=4)
         self.item_exporter.export_item(item)
         self.file.close()
         log.msg('ContentWriterPipeline, saved content file %s successful.' % file_path)
         raise DropItem('Save item success')
     else:
         return item
	def spider_opened(self, spider):
		if(spider.name == 'timesnews'):
			file = open('TodaysToiScrapedItems.json', 'w+b')
		else :
			file = open('TodaysHtScrapedItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
Esempio n. 11
0
    def open_spider(self, spider):
        print 'Opening spider.'
        self.files['question'] = codecs.open(self.files_path['question_file'],
                                             'w',
                                             encoding='utf-8')
        self.files['answer'] = codecs.open(self.files_path['answer_file'],
                                           'w',
                                           encoding='utf-8')
        self.files['user'] = codecs.open(self.files_path['user_file'],
                                         'w',
                                         encoding='utf-8')

        self.exporters['question'] = JsonItemExporter(self.files['question'])
        self.exporters['answer'] = JsonItemExporter(self.files['answer'])
        self.exporters['user'] = JsonItemExporter(self.files['user'])

        for exporter in self.exporters.itervalues():
            exporter.start_exporting()
Esempio n. 12
0
    def initialize_exporters(self):

        for meme_type in self.meme_types:

            json_filename = self.get_json_filename(meme_type)
            json_file = open(json_filename, 'w')
            self.files[meme_type] = json_file
            self.exporters[meme_type] = JsonItemExporter(json_file)
            self.exporters[meme_type].start_exporting()
Esempio n. 13
0
 def spider_closed(self, spider):
     with open("storage\\%s_items.json" % spider.name, "w") as items_file:
         self.exporter = JsonItemExporter(items_file)
         self.exporter.start_exporting()
         for item in incomingData:
             self.exporter.export_item(item)
         self.exporter.finish_exporting()
         self.new_file_exporter.finish_exporting()
         items_file.close()
         self.new_item_file.close()
Esempio n. 14
0
 def engine_started(self):
     self.json_file = open("result.json", "w")
     self.json_exporter = JsonItemExporter(
         self.json_file,
         fields_to_export=self.fields_to_export[self.spider._crawler.settings["CommandLineParameter"][0]],
     )
     self.json_exporter.start_exporting()
     log.msg(
         message="ManningPipeline, engine_started, mode=%s"
         % self.spider._crawler.settings["CommandLineParameter"][0]
     )
Esempio n. 15
0
 def process_item(self, item, spider):
     if self.first_item:
         self.first_item = False
         file = open('%s_items.json' % spider.name, 'wb')
         # scrapy 使用item export输出中文到json文件,内容为unicode码,如何输出为中文?
         # http://stackoverflow.com/questions/18337407/saving-utf-8-texts-in-json-dumps-as-utf8-not-as-u-escape-sequence
         # 里面有提到,将 JSONEncoder 的 ensure_ascii 参数设为 False 即可。
         # 因此就在调用 scrapy.contrib.exporter.JsonItemExporter 的时候额外指定 ensure_ascii=False 就可以啦。
         self.exporter = JsonItemExporter(file, ensure_ascii=False)
         self.exporter.start_exporting()
     self.exporter.export_item(item)
     return item
Esempio n. 16
0
class ExportJSON(object):

    """
    Exporting to export/json/spider-name.json file
    """

    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_to_save = open('exports/json/%s.json' % spider.name, 'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_to_save = self.files.pop(spider)
        file_to_save.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 17
0
class MonitorPipeline(object):

	def open_spider(self, spider):
		self.cols = spider.cols
		self.start_urls = spider.start_urls

		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def close_spider(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):

		try:
			index = self.start_urls.index( item['surl'] )
			groupId = index / self.cols
			r = index % self.cols
			if r == 0:
				item['main'] = 0
			elif r == 1:
				item['main'] = 1
			elif r == 2:
				item['main'] = 2
			item['gid'] = groupId
		except:
			index = -1

		self.exporter.export_item(item)
		return item		
Esempio n. 18
0
class YxreviewPipeline(object):
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.file = open('items.json', 'wb')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.checkData(item, "title")
        self.checkData(item, "summary")
        self.checkData(item, "cover_image")
        self.checkData(item, "score")

        self.exporter.export_item(item)

        return item

    def checkData(self, item, field):
        if len(item[field]) > 0:
            newText = item[field][0].encode("utf-8")
            item[field] = newText.strip()
        else:
            item[field] = ""
class SpidercrawlerPipeline(object):
	def __init__(self):
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)
		self.files = {}
		#file = open('ScrapedItems.json', 'w+b')
		self.exporter = JsonItemExporter(file)
	
	def spider_opened(self, spider):
		if(spider.name == 'timesnews'):
			file = open('TodaysToiScrapedItems.json', 'w+b')
		else :
			file = open('TodaysHtScrapedItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
	
	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()
		
	def process_item(self, item, spider):
		self.exporter.export_item(item)
	        return item
Esempio n. 20
0
class ExportJSON(object):
    """
    Exporting to export/json/spider-name.json file
    """
    def __init__(self):
        self.files = {}
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file_to_save = open(GLOBAL_PATH + 'exports/json/%s.json' % spider.name,
                            'w+b')
        self.files[spider] = file_to_save
        self.exporter = JsonItemExporter(file_to_save)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file_to_save = self.files.pop(spider)
        file_to_save.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 21
0
class AppsPipeline(object):
    def __init__(self, spider):
        self.file = open(
            'data/{category}-{today}.json'.format(
                today=date.today().strftime('%d-%m-%Y'),
                category=spider.category), 'wb')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_crawler(cls, crawler):
        if crawler.spider is not None:
            return cls(spider=crawler.spider)

    def spider_opened(self, spider):
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        if spider.name == 'apps':
            self.exporter.export_item(item)
        return item
Esempio n. 22
0
class GovbuyPipeline(object):
    def __init__(self):
        self.titles_seen = set()
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        if item['title'] in self.titles_seen:
            raise DropItem("Duplicate item found: %s" % item)
        else:
            self.titles_seen.add(item['title'])
            self.exporter.export_item(item)
            return item
Esempio n. 23
0
class YxreviewPipeline(object):

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        self.file = open('items.json', 'wb')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.checkData(item, "title")
        self.checkData(item, "summary")
        self.checkData(item, "cover_image")
        self.checkData(item, "score")

        self.exporter.export_item(item)

        return item

    def checkData(self ,item, field):
        if len(item[field]) > 0:
            newText = item[field][0].encode("utf-8")
            item[field] = newText.strip()
        else:
            item[field] = ""
Esempio n. 24
0
class AppsPipeline(object):

    def __init__(self, spider):
        self.file = open('{category}-{today}.json'.format(
                today = date.today().strftime('%d-%m-%Y'),
                category = spider.category), 'wb')
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_crawler(cls, crawler):
        return cls(spider = crawler.spider)

    def spider_opened(self, spider):
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        if spider.name == 'apps':
            self.exporter.export_item(item)
        return item
Esempio n. 25
0
class JsonExportPipeline(object):
    __doc__ = '''
    将所有(从所有spider中)爬取到的item,存储到一个独立地 AtaBlogItems.json文件,每行包含一个序列化为JSON格式的item,
    JSON 是一个简单而有弹性的格式, 但对大量数据的扩展性不是很好,因为这里会将整个对象放入内存.
    如果你要JSON既强大又简单,可以考虑 JsonLinesItemExporter , 或把输出对象分为多个块.
    '''

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%sItems.json' % spider.name, 'a+')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file, ensure_ascii=False)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
class JsonExportPipeline(object):
    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 27
0
class CrawlerPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 28
0
class JsonExportPipeline(object):
	def __init__(self):
		self.files = {}

	@classmethod
	def from_crawler(cls, crawler):
		pipeline = cls()
		crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
		crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
		return pipeline

	def spider_opened(self, spider):
		#file = open('%_ixbt_jokes.json' % spider.name, 'w+b')
		file = open('ixbt_jokes.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()

	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item
Esempio n. 29
0
class MonitorPipeline(object):
    def open_spider(self, spider):
        self.cols = spider.cols
        self.start_urls = spider.start_urls

        self.file = open('test.json', 'w+b')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):

        try:
            index = self.start_urls.index(item['surl'])
            groupId = index / self.cols
            r = index % self.cols
            if r == 0:
                item['main'] = 0
            elif r == 1:
                item['main'] = 1
            elif r == 2:
                item['main'] = 2
            item['gid'] = groupId
        except:
            index = -1

        self.exporter.export_item(item)
        return item
class DatesPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s_products.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file, indent=4) # tu powinno byc ensure_ascii=False ale nie dziala;P
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 31
0
class JsonExportPipeline(object):
    def __init__(self):
        log.msg('JsonExportPipeline.init....', level=log.INFO)
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
        log.msg('JsonExportPipeline.from_crawler....', level=log.INFO)
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        log.msg('JsonExportPipeline.spider_opened....', level=log.INFO)
        file = open('%s.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        log.msg('JsonExportPipeline.spider_closed....', level=log.INFO)
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        log.msg('JsonExportPipeline.process_item....', level=log.INFO)
        self.exporter.export_item(item)
        return item
Esempio n. 32
0
class CLPipe(object):
    """A pipeline for writing results to json"""
    def __init__(self, **kwargs):
        self.files = {}
        self.AppID = kwargs.get('AppID')
        self.ApiKey = kwargs.get('ApiKey')
        super(CLPipe, self).__init__(**kwargs)

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        #open a static/dynamic file to read and write to
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()
        #reopen new static/dynamic file to parse for sending
        new = open('%s_items.json' % spider.name)
        data = json.load(new)
        #reg = re.compile(r'[\n\r\t]')
        #for i in data:
        #    log.msg( i )
            #this is actually very bad to loop here
            #in one day I sent almost 500k requests.. thats bad
            #try sending one load and process on the other end. 


            #not sure if this is efficient, but it works
            #makes new api call for each loop
            #pushes single object for each call
        connection = httplib.HTTPSConnection('api.parse.com', 443)
        connection.connect()
        connection.request('POST', '/1/functions/scrapeSaver', json.dumps({
        #    #"email":data[i]["email"], "referer":data[i]["referer"], "scrapeID":data[i]["id"]
            "data":data
        }), {
            "X-Parse-Application-Id": self.AppID,
            "X-Parse-REST-API-Key": self.ApiKey,
            "Content-Type": "application/json"
        })
        result = json.loads(connection.getresponse().read())
        print "Sending load ", result
        #done with the new file, close it
        new.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 33
0
class ContentWriterPipeline(object):
    def __init__(self):
        log.msg('ContentWriterPipeline.__init__()')
        self.file = None
        self.item_exporter = None
        self.count = 0

    def process_item(self, item, spider):
        if FeedSpider.is_content_op(spider) and isinstance(item, ContentItem):
            spider.make_sure_path_exists(spider.get_content_output_dir_path())
            file_path = spider.get_content_output_file_path(item['id'], item['name'].replace(' ', '-'))
            is_exist = os.path.exists(file_path)
            self.file = open(file_path, 'w')
            if is_exist:
                # if file already exists, clean it and write new content.
                self.file.seek(0)
                self.file.truncate()
            self.item_exporter = JsonItemExporter(self.file, indent=4)
            self.item_exporter.export_item(item)
            self.file.close()
            log.msg('ContentWriterPipeline, saved content file %s successful.' % file_path)
            raise DropItem('Save item success')
        else:
            return item
Esempio n. 34
0
    def parse(self, response):
        # Only handles top-level categories: http://www.peapod.com/processShowBrowseAisles.jhtml Only one of these
        # Then looks through each top-level category with new requests to find subcategories, which are processed by parse_category

        # Top level categories: Already in a list on the left with each link class 'mainCat'
        hxs = HtmlXPathSelector(response)
        big_categories = hxs.select('//a[@class="mainCat"]')
        big_category_objects = []

        # Extract categories one-by-one. We are now in the scope of each <a>:
        # <a href="?cnid=2098" target="_self" class="mainCat">Produce</a>
        for cat in big_categories:
            # Get category id (cnid): Remove all text except digits from href that links to category
            cnid_href = cat.select('@href').extract()[0].translate(string.digits)
            cnid = re.sub('\D', '', cnid_href) # remove all non-digits, again!?
            name = cat.select('text()').extract()[0].rstrip() # Category name: Remove the \n at end

            new_cat = ShopCategory(name=name, cnid=cnid, parent='')

            big_category_objects.append(new_cat)
            self.all_categories.append(new_cat)

            # unicodedata.normalize('NFKD', title).encode('ascii','ignore') turn the text fields to ascii

        # Prepare requests to parse subcategories
        subcategory_requests = []
        for cat in big_category_objects:
            url = "http://www.peapod.com/processShowBrowseAisles.jhtml?cnid=" + cat['cnid']
            request = Request(url=url, callback=self.parse_category)
            request.meta['parent_cnid'] = cat['cnid']
            subcategory_requests.append(request)

        # Export big categories
        print big_category_objects, '\n\n'

        file = open("categories_top.txt", 'wb')
        exporter = JsonItemExporter(file)
        exporter.start_exporting()

        for cat in big_category_objects:
            exporter.export_item(cat)

        exporter.finish_exporting()

        # Move on to parsing subcategories
        print '\n\n\n\n\ndone with top level category request. moving on to next\n\n\n'
        return subcategory_requests
Esempio n. 35
0
class JsonItemPipeline(object):
    def open_spider(self, spider):
        self.file = open('test.json', 'w+b')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 36
0
class JSONExportPipeline(object):
    def __init__(self):
        self.file = open('items.json', 'w')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()
Esempio n. 37
0
class JsonPipeline(object):
    def __init__(self):
        self.file = open("./collected.json", 'wb')
        self.exporter = JsonItemExporter(self.file,
                                         encoding='utf-8',
                                         ensure_ascii=False)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
Esempio n. 38
0
class JsonItemPipeline(object):

	def open_spider(self, spider):
		self.file = open('test.json', 'w+b')
		self.exporter = JsonItemExporter(self.file)
		self.exporter.start_exporting()

	def close_spider(self, spider):
		self.exporter.finish_exporting()
		self.file.close()

	def process_item(self, item, spider):
		self.exporter.export_item(item)
		return item	
Esempio n. 39
0
class JsonPipeline(object):
    """
    使用自带方法
    """
    def __init__(self):
        self.f = open('news.json', 'wb')
        self.exporter = JsonItemExporter(self.f, encoding='utf-8')
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.exporter.finish_exporting()
Esempio n. 40
0
class DoubanSpiderPipeline(object):
    def __init__(self):
        file = codecs.open('books2.json','w+b',encoding='utf-8')
        #file = open('books2.json','w+b')
        self.exporter = JsonItemExporter(file)
        self.exporter.encoding='utf-8'
        self.exporter.start_exporting()
        self.encoder = json.JSONEncoder(ensure_ascii=False)

    def spider_closed(self,spider):
        self.exporter.finish_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(self.encoder.encode(item))
        return item
Esempio n. 41
0
class DoubanJsonWrite(object):
    def __init__(self):
        #dispatcher.connect(self.open_spider, signals.spider_opened)
        #dispatcher.connect(self.close_spider, signals.spider_closed)
        self.itemsfile = open('imtes.jl', 'w')

    def open_spider(self, spider):
        self.exporter = JsonItemExporter(self.itemsfile)
        self.exporter.start_exporting()
    def close_spider(self, spider):
        self.exporter.finish_exporting()


    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 42
0
class Pipeline(object):
    """Pipeline for exporting scraped items into JSON format."""
 
    def __init__(self):
        self._file = open('wholefoods.json', 'wb')
        self._exporter = JsonItemExporter(self._file)

    def open_spider(self, spider):
        self._exporter.start_exporting()

    def close_spider(self, spider):
        self._exporter.finish_exporting()

    def process_item(self, item, spider):
        self._exporter.export_item(item)
        return item
Esempio n. 43
0
    def __init__(self):

        self.status = Status()
        self.classifiers = []
        self.exporters = {}
        for classifier in self.status.classifiers.keys():
            CF = ClassifierFactory(self.status.classifiers[classifier])
            CF.create_data_set("both")
            lc = lc = CF.create_classifier(
                LogisticRegression(C=1e5),
                self.status.classifiers[classifier]['features']())
            lc.fit()
            self.classifiers.append((classifier, lc))

        self.classifiers = sorted(
            self.classifiers,
            key=lambda a: a[1].estimate_accuracy(5, verbose=True))
        print "Classifier {0} needs the most improvement; selected for export".format(
            self.classifiers[0][0])
        for classification in self.status.classifiers[self.classifiers[0]
                                                      [0]]['classifications']:
            f = file("{0}.json".format(classification), "wb")
            self.exporters[classification] = JsonItemExporter(f)
Esempio n. 44
0
class VisionsJsonPipeline(object):
    """
    Prints category and product data to a JSON file (data/category.json or
    data/product.json)
    """
    def __init__(self):
        self.exporter = None

    def open_spider(self, spider):
        self.exporter = JsonItemExporter(open('data/%s.json' %spider.name, 'w'))
        self.exporter.start_exporting()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item

    def close_spider(self, spider):
        self.exporter.finish_exporting()
Esempio n. 45
0
class DceChicangSave(object):
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        print(get_project_settings().get('JASONFILE_PATH') + 'items.json')
        self.file = open('items' + datetime.datetime.today().strftime('%Y-%m-%d') + '.json', 'wb')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()
        #         print(get_project_settings().get('JASONFILE_PATH') +'items.json')
        #         datestampStr=datetime.datetime.today().strftime('%Y-%m-%d')
        #         if os.path.exists(get_project_settings().get('JASONFILE_PATH')+'items_end'+datestampStr+'.json'):
        #             os.remove(get_project_settings().get('JASONFILE_PATH')+'items_end'+datestampStr+'.json')
        #         print(get_project_settings().get('JASONFILE_PATH')+ 'items_end.json')
        #         os.rename(get_project_settings().get('JASONFILE_PATH')+'items.json',get_project_settings().get('JASONFILE_PATH')+ 'items_end'+datestampStr+'.json')
        #
        # 读取配置
        settings = get_project_settings()
        ftp_host = settings.get('FTP_HOST')
        ftp_username = settings.get('FTP_USER')
        ftp_password = settings.get('FTP_PASSWORD')
        ftp_path = settings.get('FTP_PATH')

        filenametosave = 'items' + datetime.datetime.today().strftime('%Y-%m-%d') + '.json'


        # 1成功0失败
        # result=uploadfile(ftp_host, ftp_username, ftp_password,ftp_path, filenametosave, os.getcwd())
        # print result

    def process_item(self, item, spider):
        if isinstance(item, DceChicangItem):
            self.exporter.export_item(item)
        return item
Esempio n. 46
0
class JsonWriterPipeline(object):
    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        self.file = open('%s_output.json' % spider.name, 'w+b')
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 47
0
class JsonExportPipeline(object):
    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        file = open('web/fefelinks.json', 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 48
0
class JsonExportPipeline(object):
    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        file = open('%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 49
0
class JSONExportPipeline(object):

    directory = '/home/ubuntu/dealscrape/output/'

    def __init__(self):
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        file = open(self.directory + '%s_items.json' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        file = self.files.pop(spider)
        for k,v in spider.crawled_items.items():
            self.exporter.export_item(v)
        self.exporter.finish_exporting()
        file.close()
Esempio n. 50
0
class GamecrawlerPipeline(object):
	def __init__(self):
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)
		self.files = {}
		
	def spider_opened(self, spider):
		file = open('gameCrawlerItems.json', 'w+b')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
		
	def spider_closed(self, spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()
		
	def process_item(self, item, spider):
    	##filter out boxscore(tsn) and htmlreport(nhl) links
		if (item['link']):
			boxscoreLinks = []
			for link in item['link']:
    			#tsn boxscore
				if ("/nhl/scores/boxscore" in link):
					boxscoreLinks.append(link)
    			#nhl html game report
				if ("/scores/htmlreports" in link):	
					boxscoreLinks.append(link)
			item['link'] = boxscoreLinks

    	#check if actually date(Mar 13 '14) with regex
		if (item['date']):
			dates = []
			for date in item['date']:
				#check if actually date(Mar 13 '14) OR (Monday, March 13, 2014) with regex
				if (re.match(DATE_PATTERN_STRING, date) is not None):
					dates.append(date)
			item['date'] = dates
		self.exporter.export_item(item)
		return item
Esempio n. 51
0
class JsonExportPipeline(object):

    def __init__(self):
        log.start()
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.fjsons = {}

    def spider_opened(self, spider):
        fjson = open('output/%s_%s_items.json' % (spider.name, str(int(time.mktime(time.gmtime())))), 'wb')
        self.fjsons[spider] = fjson
        self.exporter = JsonItemExporter(fjson)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        fjson = self.fjsons.pop(spider)
        fjson.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 52
0
class StatsPipeline(object):

	def __init__(self):
		self.files = {}
		dispatcher.connect(self.spider_opened, signals.spider_opened)
		dispatcher.connect(self.spider_closed, signals.spider_closed)
	def spider_opened(self,spider):
		file = open('stats.json','wb')
		self.files[spider] = file
		self.exporter = JsonItemExporter(file)
		self.exporter.start_exporting()
	def spider_closed(self,spider):
		self.exporter.finish_exporting()
		file = self.files.pop(spider)
		file.close()
	def process_item(self, item, spider):
		# determine if the name has a * in it & remove it
		if item['name']:
			if "* " in item['name']:
				item['name'].replace("* ","",1)
		self.exporter.export_item(item)
		return item
class JsonExportPipeline(object):
    def __init__(self):
        log.start()
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.fjsons = {}

    def spider_opened(self, spider):
        fjson = open(
            'output/%s_%s_items.json' %
            (spider.name, str(int(time.mktime(time.gmtime())))), 'wb')
        self.fjsons[spider] = fjson
        self.exporter = JsonItemExporter(fjson)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        fjson = self.fjsons.pop(spider)
        fjson.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
Esempio n. 54
0
class ManningPipeline(object):
    def __init__(self):
        self.fields_to_export = {
            "list": ["title", "url"],
            "all": ["isbn", "title", "url", "year", "authors", "image_url", "ebook_price"],
            "parse": ["isbn", "title", "url", "year", "authors", "image_url", "ebook_price"],
        }
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        dispatcher.connect(self.engine_started, signals.engine_started)

        log.msg(message="ManningPipeline, __init__", _level=log.INFO)

    def spider_opened(self, spider):
        self.spider = spider

    def engine_started(self):
        self.json_file = open("result.json", "w")
        self.json_exporter = JsonItemExporter(
            self.json_file,
            fields_to_export=self.fields_to_export[self.spider._crawler.settings["CommandLineParameter"][0]],
        )
        self.json_exporter.start_exporting()
        log.msg(
            message="ManningPipeline, engine_started, mode=%s"
            % self.spider._crawler.settings["CommandLineParameter"][0]
        )

    def process_item(self, item, spider):
        log.msg(message="ManningPipeline, process_item", _level=log.INFO)
        self.json_exporter.export_item(item)
        return item

    def spider_closed(self, spider):
        self.json_exporter.finish_exporting()
        self.json_file.close()
        log.msg(message="ManningPipeline, spider_closed", _level=log.INFO)
Esempio n. 55
0
class CategoryPipeline(object):
    def __init__(self, spider):
        if spider.name == 'categories':
            self.file = open('categories.json', 'wb')
            dispatcher.connect(self.spider_opened, signals.spider_opened)
            dispatcher.connect(self.spider_closed, signals.spider_closed)

    @classmethod
    def from_crawler(cls, crawler):
        if crawler.spider is not None:
            return cls(spider=crawler.spider)

    def spider_opened(self, spider):
        self.exporter = JsonItemExporter(self.file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        if spider.name == 'categories':
            self.exporter.export_item(item)
        return item
Esempio n. 56
0
 def spider_opened(self, spider):
     """Open Spider."""
     file = open('../website/data/complete.json', 'w+b')
     self.files[spider] = file
     self.exporter = JsonItemExporter(file)
     self.exporter.start_exporting()
Esempio n. 57
0
class CrawlerPipeline(object):
    """Pipeline to alter scraped items."""
    def __init__(self):
        """Initialise Pipeline."""
        dispatcher.connect(self.spider_opened, signals.spider_opened)
        dispatcher.connect(self.spider_closed, signals.spider_closed)
        self.files = {}

    def spider_opened(self, spider):
        """Open Spider."""
        file = open('../website/data/complete.json', 'w+b')
        self.files[spider] = file
        self.exporter = JsonItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        """Close Spider."""
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        """Process Items."""
        # Process Field 'number'
        if item['number']:
            # Add leading 0 to 12a and 12b
            if item['number'][0] in ('12a', '12b'):
                item['number'] = '0' + item['number'][0]
            # if item['number'][0] == '12a':
            #     item['number'] = '012.1'
            # elif item['number'][0] == '12b':
            #     item['number'] = '012.2'
            else:
                # Add leading 0 up to total length of 3
                item['number'] = item['number'][0].zfill(3)
        else:
            # Drop empty item
            raise DropItem(item['number'])

        # Process Field 'pubdate'
        if item['pubdate']:
            alterToCET = datetime.datetime.strptime(item['pubdate'][0],
                                                    '%a, %d %b %Y %H:%M:%S')
            alterToCET = alterToCET + datetime.timedelta(hours=1)
            # alterToCET = alterToCET.strftime('%a, %d %b %Y %H:%M:%S')
            alterToCET = alterToCET.strftime('%Y-%m-%d %H:%M:%S.000000')
            item['pubdate'] = alterToCET

        # Process Field 'pubday'
        if item['pubday']:
            # Changing day format
            if any('Mon' in s for s in item['pubday']):
                item['pubday'] = 'Montag'
            elif any('Tue' in s for s in item['pubday']):
                item['pubday'] = 'Dienstag'
            elif any('Wed' in s for s in item['pubday']):
                item['pubday'] = 'Mittwoch'
            elif any('Thu' in s for s in item['pubday']):
                item['pubday'] = 'Donnerstag'
            elif any('Fri' in s for s in item['pubday']):
                item['pubday'] = 'Freitag'
            elif any('Sat' in s for s in item['pubday']):
                item['pubday'] = 'Samstag'
            elif any('Sun' in s for s in item['pubday']):
                item['pubday'] = 'Sonntag'
            else:
                item['pubday'] = 'WRONG DAY FORMAT'

        # Process Field 'pubtime'
        if item['pubtime']:
            # Change from GMT to CET
            alterTimeToCET = datetime.datetime.strptime(
                item['pubtime'][0], '%H:%M:%S')
            alterTimeToCET = alterTimeToCET + datetime.timedelta(hours=1)
            alterTimeToCET = alterTimeToCET.strftime('%H:%M:%S')
            item['pubtime'] = alterTimeToCET
            # Save pubtime as integer
            tpub = item['pubtime'][0]
            item['pubtime_integer'] = sum(
                int(x) * 60**i
                for i, x in enumerate(reversed(tpub.split(":"))))

        # Alter empty durations with real value
        if item['number'] == '000':
            item['duration'][0] = '01:46:01'
        elif item['number'] == '001':
            item['duration'][0] = '01:39:34'
        elif item['number'] == '002':
            item['duration'][0] = '01:55:14'
        elif item['number'] == '003':
            item['duration'][0] = '02:02:28'
        elif item['number'] == '004':
            item['duration'][0] = '02:17:18'
        elif item['number'] == '005':
            item['duration'][0] = '02:08:03'
        elif item['number'] == '006':
            item['duration'][0] = '02:29:09'
        elif item['number'] == '007':
            item['duration'][0] = '02:42:58'
        elif item['number'] == '008':
            item['duration'][0] = '02:21:36'
        elif item['number'] == '009':
            item['duration'][0] = '02:07:42'
        elif item['number'] == '010':
            item['duration'][0] = '02:13:07'
        elif item['number'] == '011':
            item['duration'][0] = '02:19:37'
        elif item['number'] == '012a':
            item['duration'][0] = '00:59:06'
        elif item['number'] == '012b':
            item['duration'][0] = '00:41:33'
        elif item['number'] == '013':
            item['duration'][0] = '02:43:32'
        elif item['number'] == '014':
            item['duration'][0] = '01:59:41'
        elif item['number'] == '015':
            item['duration'][0] = '02:30:15'
        elif item['number'] == '016':
            item['duration'][0] = '02:46:53'
        elif item['number'] == '017':
            item['duration'][0] = '02:29:01'
        elif item['number'] == '018':
            item['duration'][0] = '02:44:20'
        elif item['number'] == '019':
            item['duration'][0] = '02:18:56'
        elif item['number'] == '020':
            item['duration'][0] = '02:27:10'
        elif item['number'] == '021':
            item['duration'][0] = '02:51:22'
        elif item['number'] == '022':
            item['duration'][0] = '02:18:16'
        elif item['number'] == '023':
            item['duration'][0] = '02:49:37'
        elif item['number'] == '024':
            item['duration'][0] = '02:37:09'
        elif item['number'] == '025':
            item['duration'][0] = '02:34:52'
        elif item['number'] == '026':
            item['duration'][0] = '02:44:25'
        elif item['number'] == '027':
            item['duration'][0] = '02:37:43'
        elif item['number'] == '028':
            item['duration'][0] = '02:56:38'
        elif item['number'] == '029':
            item['duration'][0] = '03:14:28'
        elif item['number'] == '030':
            item['duration'][0] = '02:19:35'
        elif item['number'] == '031':
            item['duration'][0] = '02:55:49'
        elif item['number'] == '032':
            item['duration'][0] = '03:12:45'
        elif item['number'] == '033':
            item['duration'][0] = '02:17:02'
        elif item['number'] == '034':
            item['duration'][0] = '02:52:31'
        elif item['number'] == '035':
            item['duration'][0] = '02:36:32'
        elif item['number'] == '036':
            item['duration'][0] = '03:40:17'
        elif item['number'] == '037':
            item['duration'][0] = '03:07:41'
        elif item['number'] == '038':
            item['duration'][0] = '02:50:01'
        elif item['number'] == '039':
            item['duration'][0] = '03:01:35'
        elif item['number'] == '040':
            item['duration'][0] = '03:39:16'
        elif item['number'] == '041':
            item['duration'][0] = '01:48:45'
        elif item['number'] == '042':
            item['duration'][0] = '03:37:21'
        elif item['number'] == '043':
            item['duration'][0] = '02:46:26'
        elif item['number'] == '044':
            item['duration'][0] = '02:46:05'
        elif item['number'] == '045':
            item['duration'][0] = '03:08:51'
        elif item['number'] == '046':
            item['duration'][0] = '02:59:17'
        elif item['number'] == '047':
            item['duration'][0] = '02:46:31'
        elif item['number'] == '048':
            item['duration'][0] = '03:14:36'
        elif item['number'] == '049':
            item['duration'][0] = '03:21:24'
        elif item['number'] == '077':
            item['duration'][0] = '03:13:03'

        # Process Field 'duration'
        if item['duration']:
            # Save pubtime as integer
            tdur = item['duration'][0]
            item['duration_integer'] = sum(
                int(x) * 60**i
                for i, x in enumerate(reversed(tdur.split(':'))))
            # Specify duration as time format
            alterDuration = datetime.datetime.strptime(item['duration'][0],
                                                       '%H:%M:%S')
            alterDuration = alterDuration.strftime('%H:%M:%S')
            item['duration'] = alterDuration

        # Return all crawled items
        self.exporter.export_item(item)
        return item
Esempio n. 58
0
 def __init__(self, file, **kwargs):
     JsonItemExporter.__init__(self, file, ensure_ascii=False, **kwargs)
Esempio n. 59
0
 def spider_opened(self, spider):
     self.exporter = JsonItemExporter(self.file)
     self.exporter.start_exporting()