def process_item(self, item, spider): '''Actually processes the xml file content''' if spider.name is 'match': filename = 'matches/' \ + item['country'] \ + '/' + item['league'] \ + '/' + item['season'] \ + '/' + str(item['stage']) \ +'/%s.xml' % item['matchId'] if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(filename, 'w+b') as outfile: self.files[item['matchId']] = outfile self.exporter = XmlItemExporter(outfile) self.exporter.fields_to_export = [ 'country', 'league', 'season', 'stage', 'matchId', 'date', 'homeTeamId', 'awayTeamId', 'homeTeamFullName', 'awayTeamFullName', 'homeTeamAcronym', 'awayTeamAcronym', 'homeTeamGoal', 'awayTeamGoal', 'homePlayers', 'awayPlayers', 'homePlayersId', 'awayPlayersId', 'homePlayersX', 'awayPlayersX', 'homePlayersY', 'awayPlayersY', 'goal', 'shoton', 'shotoff', 'foulcommit', 'card', 'cross', 'corner', 'possession' ] self.exporter.export_item(item) return item elif spider.name is 'player': filename = 'players/' \ + item['name']+'_'+item['matchId']+'_'+item['fifaId']+'.xml' if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(filename, 'w+b') as outfile: self.files[item['name']] = file self.exporter = XmlItemExporter(file) self.exporter.fields_to_export = [ 'name', 'matchId', 'fifaId', 'birthday', 'height', 'weight', 'stats' ] self.exporter.export_item(item) return item
def __init__(self): self.files = {} file = open('myfile/%s.xml' % 'csdnarticle', 'w') self.files['csdnarticle'] = file # 实例化一个XmlItemExporter对象 self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def assertExportResult(self, item, expected_value): fp = BytesIO() ie = XmlItemExporter(fp) ie.start_exporting() ie.export_item(item) ie.finish_exporting() self.assertXmlEquivalent(fp.getvalue(), expected_value)
def spider_opened(self, spider): outpath = os.path.join( settings.get('STORAGE_DIR'), self._settings.get('JIRA_ID'), self.file_name if self.file_name else '%s_items.xml' % spider.name) self.createFolder(outpath) self.file = open(outpath, 'w+b') self.exporter = XmlItemExporter(self.file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open(spider.settings['FILES_STORE'] + '/%s.xml' % 'export', 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file, item_element='game', root_element='games') self.exporter.start_exporting() return
def spider_opened(self, spider): file = open('%s_all.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.fields_to_export = [ 'title', 'genres', 'rating', 'description', 'authors', 'published', 'link' ] self.exporter.start_exporting()
def open_spider(self, spider): print('Custom export opened') # Opening file in binary-write mode file = open(self.file_name, 'wb') self.file_handle = file # Creating a FanItemExporter object and initiating export self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): """ XML開設定 :param spider: xml :return: None """ file = open('%s_products.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_adverts.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.root_element = 'books' self.exporter.item_element = 'book' self.exporter.fields_to_export = [ 'title', 'upc', 'category', 'data_modified', 'price_tax', 'price_no_tax', 'tax', 'availability', 'reviews_nr', 'description', 'product_type', 'thumbnail', 'images' ] self.exporter.start_exporting()
def process_item(self, item, spider): filename = item['filename'] file = open('data/{}.xml'.format(filename[0]), 'wb') exporter = XmlItemExporter(file) exporter.start_exporting() exporter.fields_to_export = [ "docno", "http_header", "title", "text", "html_source", "author", "depth", "url" ] exporter.export_item(item) exporter.finish_exporting() file.close() return item
def exporter_for_format(feed_format, f): if feed_format == 'csv': return CsvItemExporter(f) elif feed_format == 'xml': return XmlItemExporter(f) elif feed_format == 'json': return JsonItemExporter(f) elif feed_format == 'jsonlines': return JsonLinesItemExporter(f) elif feed_format == 'pickle': return PickleItemExporter(f) elif feed_format == 'marshal': return MarshalItemExporter(f) else: raise ValueError( 'Export format {} is not supported'.format(feed_format))
def _exporter_for_item(self, item): year = item['year'] if year not in self.year_to_exporter: f = open('{}.xml'.format(year), 'wb') # f = open('{}.py'.format(year), 'wb') # f = open('{}.csv'.format(year), 'wb') # f = open('{}.pickle'.format(year), 'wb') # f = open('{}.json'.format(year), 'wb') # f = open('{}.jl'.format(year), 'wb') # f = open('{}.marshal'.format(year), 'wb') exporter = XmlItemExporter(f) # exporter = BaseItemExporter(f) # exporter = PythonItemExporter(f) # exporter = CsvItemExporter(f) # exporter = PickleItemExporter(f) # exporter = PprintItemExporter(f) # exporter = JsonItemExporter(f) # exporter = JsonLinesItemExporter(f) # exporter = MarshalItemExporter(f) exporter.start_exporting() self.year_to_exporter[year] = exporter return self.year_to_exporter[year]
def spider_opened(self, spider): file = open('%s_dump.xml' % spider.name, 'wb') self.files[spider] = file self.exporter = XmlItemExporter(file, encoding='utf-8') self.exporter.start_exporting()
def spider_opened(self, spider): '''Open XML file for writing''' outfile = open('%s.xml' % spider.name, 'w+b') self.files[spider] = outfile self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): file = open('myfile/%s.xml' % spider.name, 'w+b') self.files[spider] = file # 实例化一个XmlItemExporter对象 self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def __init__(self): self.file = open("assets/movies.xml", 'wb') self.exporter = XmlItemExporter(self.file, encoding='utf-8') self.exporter.start_exporting()
def __init__(self): self.file = open( '/home/CORPUSERS/xp017845/zxmcrawl/caipiao/cp_products.xml', 'w+b') self.exporter = XmlItemExporter(self.file, item_element='item', root_element='root')
def spider_opened(self, spider): file = open('%s_urls.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def __init__(self): file_name = str(datetime.datetime.now().date()) + '.xml' self.file = open(file_name, 'wb') self.exporter = XmlItemExporter(file=self.file) self.exporter.start_exporting()
def _get_exporter(self, **kwargs): return XmlItemExporter(self.output, **kwargs)
def __init__(self): self.fp = open('qidian_dev.xls', 'wb') # self.exporter = JsonLinesItemExporter(self.fp, ensure_ascii=False, encoding='utf-8') self.exporter = XmlItemExporter(self.fp, ensure_ascii=False, encoding='utf-8')
def open_spider(self, spider): self.file = open('honglingjing.xml', 'wb') self.exporter = XmlItemExporter(self.file) self.exporter.start_exporting()
def __init__(self): self.file = open('book2.xml', 'wb') self.exporter = XmlItemExporter(file=self.file, encoding='utf-8') self.exporter.start_exporting()
def spider_opened(self, spider): file = open('europython_items.xml', 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def open_spider(self, spider): self.file = open('dianpincity.xml', 'wb') self.exporter = XmlItemExporter(self.file) self.exporter.start_exporting()
def open_spider(self, spider): self.file = open('amazon_bestseller.xml', 'wb') self.exporter = XmlItemExporter(self.file) self.exporter.start_exporting()
def open_spider(self, spider): file = open('%s_products.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()