def assertExportResult(self, item, expected_value): fp = BytesIO() ie = XmlItemExporter(fp) ie.start_exporting() ie.export_item(item) ie.finish_exporting() self.assertXmlEquivalent(fp.getvalue(), expected_value)
def test_multivalued_fields(self): output = StringIO() item = TestItem(name=[u'John\xa3', u'Doe']) ie = XmlItemExporter(output) ie.start_exporting() ie.export_item(item) ie.finish_exporting() expected_value = '<?xml version="1.0" encoding="utf-8"?>\n<items><item><name><value>John\xc2\xa3</value><value>Doe</value></name></item></items>' self.assertEqual(output.getvalue(), expected_value)
def process_item(self, item, spider): XmlExportPipeline.count += 1 self.outdir = spider.date.strftime('%Y%m%d') self.file = open( 'reuters/%s/%s_item.xml' % (self.outdir, XmlExportPipeline.count), 'w+b') self.exporter = XmlItemExporter(self.file, root_element='items', item_element='story') self.exporter.start_exporting() self.exporter.export_item(item) self.exporter.finish_exporting() return item
def test_nested_list_item(self): output = StringIO() i1 = TestItem(name=u'foo') i2 = TestItem(name=u'bar') i3 = TestItem(name=u'buz', age=[i1, i2]) ie = XmlItemExporter(output) ie.start_exporting() ie.export_item(i3) ie.finish_exporting() expected_value = '<?xml version="1.0" encoding="utf-8"?>\n'\ '<items><item>'\ '<age>'\ '<value><name>foo</name></value>'\ '<value><name>bar</name></value>'\ '</age>'\ '<name>buz</name>'\ '</item></items>' self.assertEqual(output.getvalue(), expected_value)
def test_nested_item(self): output = BytesIO() i1 = TestItem(name=u'foo\xa3hoo', age='22') i2 = TestItem(name=u'bar', age=i1) i3 = TestItem(name=u'buz', age=i2) ie = XmlItemExporter(output) ie.start_exporting() ie.export_item(i3) ie.finish_exporting() expected_value = '<?xml version="1.0" encoding="utf-8"?>\n'\ '<items><item>'\ '<age>'\ '<age>'\ '<age>22</age>'\ '<name>foo\xc2\xa3hoo</name>'\ '</age>'\ '<name>bar</name>'\ '</age>'\ '<name>buz</name>'\ '</item></items>' self.assertXmlEquivalent(output.getvalue(), expected_value)
def _get_exporter(self, **kwargs): return XmlItemExporter(self.output, **kwargs)
def spider_opened(self, spider): file = open('postUGR_withLabel.xml', 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def create_xml(self, spider): dump_file = open(self.get_xml_path(spider), 'w+b') self.files[spider] = dump_file self.exporter = XmlItemExporter(dump_file, root_element="products", item_element="product") self.exporter.start_exporting()
def spider_opened(self, spider): file = open('%s_products.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()
def spider_opened(self, spider): self.file = open('bbsData.xml', 'wb') self.expoter = XmlItemExporter(self.file) self.expoter.start_exporting()
def spider_opened(self, spider): self.duplicates[spider] = set() file = open('%s_items.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = XmlItemExporter(file) self.exporter.start_exporting()