def open_spider(self, spider): print('Custom export opened') # Opening file in binary-write mode file = open(self.file_name, 'wb') self.file_handle = file # Creating a FanItemExporter object and initiating export self.exporter = MarshalItemExporter(file) self.exporter.start_exporting()
def test_nonstring_types_item(self): item = self._get_nonstring_types_item() item.pop('time') # datetime is not marshallable fp = tempfile.TemporaryFile() ie = MarshalItemExporter(fp) ie.start_exporting() ie.export_item(item) ie.finish_exporting() fp.seek(0) self.assertEqual(marshal.load(fp), item)
def exporter_for_format(feed_format, f): if feed_format == 'csv': return CsvItemExporter(f) elif feed_format == 'xml': return XmlItemExporter(f) elif feed_format == 'json': return JsonItemExporter(f) elif feed_format == 'jsonlines': return JsonLinesItemExporter(f) elif feed_format == 'pickle': return PickleItemExporter(f) elif feed_format == 'marshal': return MarshalItemExporter(f) else: raise ValueError( 'Export format {} is not supported'.format(feed_format))
class MarshalExporterPipeline(object): def __init__(self, file_name): # Storing output filename self.file_name = file_name # Creating a file handle and setting it to None self.file_handle = None @classmethod def from_crawler(cls, crawler): return cls( file_name=crawler.settings.get('MARSHAL_PATH'), ) def open_spider(self, spider): print('Custom export opened') # Opening file in binary-write mode file = open(self.file_name, 'wb') self.file_handle = file # Creating a FanItemExporter object and initiating export self.exporter = MarshalItemExporter(file) self.exporter.start_exporting() def close_spider(self, spider): print('Custom Exporter closed') # Ending the export to file from FanItemExport object self.exporter.finish_exporting() # Closing the opened output file self.file_handle.close() def process_item(self, item, spider): # passing the item to FanItemExporter object for expoting to file self.exporter.export_item(item) return item
def _get_exporter(self, **kwargs): self.output = tempfile.TemporaryFile() return MarshalItemExporter(self.output, **kwargs)