コード例 #1
0
    def process_item(self, item, spider):
        '''Actually processes the xml file content'''
        if spider.name is 'match':
            filename = 'matches/' \
            + item['country'] \
            + '/' + item['league'] \
            + '/' + item['season'] \
            + '/' + str(item['stage']) \
            +'/%s.xml' % item['matchId']
            if not os.path.exists(os.path.dirname(filename)):
                try:
                    os.makedirs(os.path.dirname(filename))
                except OSError as exc:  # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise
            with open(filename, 'w+b') as outfile:
                self.files[item['matchId']] = outfile
                self.exporter = XmlItemExporter(outfile)
                self.exporter.fields_to_export = [
                    'country', 'league', 'season', 'stage', 'matchId', 'date',
                    'homeTeamId', 'awayTeamId', 'homeTeamFullName',
                    'awayTeamFullName', 'homeTeamAcronym', 'awayTeamAcronym',
                    'homeTeamGoal', 'awayTeamGoal', 'homePlayers',
                    'awayPlayers', 'homePlayersId', 'awayPlayersId',
                    'homePlayersX', 'awayPlayersX', 'homePlayersY',
                    'awayPlayersY', 'goal', 'shoton', 'shotoff', 'foulcommit',
                    'card', 'cross', 'corner', 'possession'
                ]

                self.exporter.export_item(item)
            return item
        elif spider.name is 'player':
            filename = 'players/' \
            + item['name']+'_'+item['matchId']+'_'+item['fifaId']+'.xml'
            if not os.path.exists(os.path.dirname(filename)):
                try:
                    os.makedirs(os.path.dirname(filename))
                except OSError as exc:  # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise
            with open(filename, 'w+b') as outfile:
                self.files[item['name']] = file
                self.exporter = XmlItemExporter(file)
                self.exporter.fields_to_export = [
                    'name', 'matchId', 'fifaId', 'birthday', 'height',
                    'weight', 'stats'
                ]
                self.exporter.export_item(item)
            return item
コード例 #2
0
 def __init__(self):
     self.files = {}
     file = open('myfile/%s.xml' % 'csdnarticle', 'w')
     self.files['csdnarticle'] = file
     # 实例化一个XmlItemExporter对象
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
コード例 #3
0
ファイル: test_exporters.py プロジェクト: wsppt/scrapy
 def assertExportResult(self, item, expected_value):
     fp = BytesIO()
     ie = XmlItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertXmlEquivalent(fp.getvalue(), expected_value)
コード例 #4
0
ファイル: file.py プロジェクト: Aitrg/md_ingestion
 def spider_opened(self, spider):
     outpath = os.path.join(
         settings.get('STORAGE_DIR'), self._settings.get('JIRA_ID'),
         self.file_name if self.file_name else '%s_items.xml' % spider.name)
     self.createFolder(outpath)
     self.file = open(outpath, 'w+b')
     self.exporter = XmlItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #5
0
 def spider_opened(self, spider):
     file = open(spider.settings['FILES_STORE'] + '/%s.xml' % 'export',
                 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file,
                                     item_element='game',
                                     root_element='games')
     self.exporter.start_exporting()
     return
コード例 #6
0
 def spider_opened(self, spider):
     file = open('%s_all.xml' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.fields_to_export = [
         'title', 'genres', 'rating', 'description', 'authors', 'published',
         'link'
     ]
     self.exporter.start_exporting()
コード例 #7
0
    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = XmlItemExporter(file)
        self.exporter.start_exporting()
コード例 #8
0
 def spider_opened(self, spider):
     """
     XML開設定
     :param spider: xml
     :return:       None
     """
     file = open('%s_products.xml' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
コード例 #9
0
 def spider_opened(self, spider):
     file = open('%s_adverts.xml' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.root_element = 'books'
     self.exporter.item_element = 'book'
     self.exporter.fields_to_export = [
         'title', 'upc', 'category', 'data_modified', 'price_tax',
         'price_no_tax', 'tax', 'availability', 'reviews_nr', 'description',
         'product_type', 'thumbnail', 'images'
     ]
     self.exporter.start_exporting()
コード例 #10
0
ファイル: pipelines.py プロジェクト: Karan-GM/webcrawler
 def process_item(self, item, spider):
     filename = item['filename']
     file = open('data/{}.xml'.format(filename[0]), 'wb')
     exporter = XmlItemExporter(file)
     exporter.start_exporting()
     exporter.fields_to_export = [
         "docno", "http_header", "title", "text", "html_source", "author",
         "depth", "url"
     ]
     exporter.export_item(item)
     exporter.finish_exporting()
     file.close()
     return item
コード例 #11
0
 def exporter_for_format(feed_format, f):
     if feed_format == 'csv':
         return CsvItemExporter(f)
     elif feed_format == 'xml':
         return XmlItemExporter(f)
     elif feed_format == 'json':
         return JsonItemExporter(f)
     elif feed_format == 'jsonlines':
         return JsonLinesItemExporter(f)
     elif feed_format == 'pickle':
         return PickleItemExporter(f)
     elif feed_format == 'marshal':
         return MarshalItemExporter(f)
     else:
         raise ValueError(
             'Export format {} is not supported'.format(feed_format))
コード例 #12
0
    def _exporter_for_item(self, item):
        year = item['year']
        if year not in self.year_to_exporter:

            f = open('{}.xml'.format(year), 'wb')
            # f = open('{}.py'.format(year), 'wb')
            # f = open('{}.csv'.format(year), 'wb')
            # f = open('{}.pickle'.format(year), 'wb')
            # f = open('{}.json'.format(year), 'wb')
            # f = open('{}.jl'.format(year), 'wb')
            # f = open('{}.marshal'.format(year), 'wb')

            exporter = XmlItemExporter(f)
            # exporter = BaseItemExporter(f)
            # exporter = PythonItemExporter(f)
            # exporter = CsvItemExporter(f)
            # exporter = PickleItemExporter(f)
            # exporter = PprintItemExporter(f)
            # exporter = JsonItemExporter(f)
            # exporter = JsonLinesItemExporter(f)
            # exporter = MarshalItemExporter(f)
            exporter.start_exporting()
            self.year_to_exporter[year] = exporter
        return self.year_to_exporter[year]
コード例 #13
0
 def spider_opened(self, spider):
     file = open('%s_dump.xml' % spider.name, 'wb')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file, encoding='utf-8')
     self.exporter.start_exporting()
コード例 #14
0
 def spider_opened(self, spider):
     '''Open XML file for writing'''
     outfile = open('%s.xml' % spider.name, 'w+b')
     self.files[spider] = outfile
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
コード例 #15
0
 def spider_opened(self, spider):
     file = open('myfile/%s.xml' % spider.name, 'w+b')
     self.files[spider] = file
     # 实例化一个XmlItemExporter对象
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
コード例 #16
0
 def __init__(self):
     self.file = open("assets/movies.xml", 'wb')
     self.exporter = XmlItemExporter(self.file, encoding='utf-8')
     self.exporter.start_exporting()
コード例 #17
0
ファイル: pipelines.py プロジェクト: zxming828/scrapycrawl
 def __init__(self):
     self.file = open(
         '/home/CORPUSERS/xp017845/zxmcrawl/caipiao/cp_products.xml', 'w+b')
     self.exporter = XmlItemExporter(self.file,
                                     item_element='item',
                                     root_element='root')
コード例 #18
0
ファイル: pipelines.py プロジェクト: hotea/douban-spiders
 def spider_opened(self, spider):
     file = open('%s_urls.xml' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()
コード例 #19
0
 def __init__(self):
     file_name = str(datetime.datetime.now().date()) + '.xml'
     self.file = open(file_name, 'wb')
     self.exporter = XmlItemExporter(file=self.file)
     self.exporter.start_exporting()
コード例 #20
0
ファイル: test_exporters.py プロジェクト: wsppt/scrapy
 def _get_exporter(self, **kwargs):
     return XmlItemExporter(self.output, **kwargs)
コード例 #21
0
 def __init__(self):
     self.fp = open('qidian_dev.xls', 'wb')
     # self.exporter = JsonLinesItemExporter(self.fp, ensure_ascii=False, encoding='utf-8')
     self.exporter = XmlItemExporter(self.fp, ensure_ascii=False, encoding='utf-8')
コード例 #22
0
 def open_spider(self, spider):
     self.file = open('honglingjing.xml', 'wb')
     self.exporter = XmlItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #23
0
 def __init__(self):
     self.file = open('book2.xml', 'wb')
     self.exporter = XmlItemExporter(file=self.file, encoding='utf-8')
     self.exporter.start_exporting()
コード例 #24
0
ファイル: pipelines.py プロジェクト: Fatake/PythonHaking
	def spider_opened(self, spider):
		file = open('europython_items.xml', 'w+b')
		self.files[spider] = file
		self.exporter = XmlItemExporter(file)
		self.exporter.start_exporting()
コード例 #25
0
 def open_spider(self, spider):
     self.file = open('dianpincity.xml', 'wb')
     self.exporter = XmlItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #26
0
 def open_spider(self, spider):
     self.file = open('amazon_bestseller.xml', 'wb')
     self.exporter = XmlItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #27
0
ファイル: pipelines.py プロジェクト: sheri528/robot
 def open_spider(self, spider):
     file = open('%s_products.xml' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = XmlItemExporter(file)
     self.exporter.start_exporting()