def process_item(self, item, spider): category = item['category'] language = item['language'] topic = item['topic'] if not category and language and topic: raise DropItem("No VocabularyListItem") base_path = conf['output_dir'] file_path = '%s/%s/%s/%s.xml' % (base_path, self._get_valid_dirname(category), self._get_valid_dirname(language), self._get_valid_dirname(topic)) if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) with open(file_path, 'w+b') as f: self.exporter = XmlVocabularyListItemExporter(f, item_element=conf['xml_item_element'], root_element=conf['xml_root_element']) self.exporter.start_exporting() self.exporter.export_item(item) self.exporter.finish_exporting() f.close() return item
class XmlExportPipeline(object): """ Exports an scrapy item to a specific xml file using the XmlVocabularyListItemExporter """ def process_item(self, item, spider): category = item['category'] language = item['language'] topic = item['topic'] if not category and language and topic: raise DropItem("No VocabularyListItem") base_path = conf['output_dir'] file_path = '%s/%s/%s/%s.xml' % (base_path, self._get_valid_dirname(category), self._get_valid_dirname(language), self._get_valid_dirname(topic)) if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) with open(file_path, 'w+b') as f: self.exporter = XmlVocabularyListItemExporter(f, item_element=conf['xml_item_element'], root_element=conf['xml_root_element']) self.exporter.start_exporting() self.exporter.export_item(item) self.exporter.finish_exporting() f.close() return item def _get_valid_dirname(self, name): dirname = name; dirname = dirname.replace("/", "-") return dirname