Beispiel #1
0
    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = MarshalItemExporter(file)
        self.exporter.start_exporting()
Beispiel #2
0
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     item.pop('time')  # datetime is not marshallable
     fp = tempfile.TemporaryFile()
     ie = MarshalItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     fp.seek(0)
     self.assertEqual(marshal.load(fp), item)
Beispiel #3
0
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     item.pop('time')  # datetime is not marshallable
     fp = tempfile.TemporaryFile()
     ie = MarshalItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     fp.seek(0)
     self.assertEqual(marshal.load(fp), item)
Beispiel #4
0
 def exporter_for_format(feed_format, f):
     if feed_format == 'csv':
         return CsvItemExporter(f)
     elif feed_format == 'xml':
         return XmlItemExporter(f)
     elif feed_format == 'json':
         return JsonItemExporter(f)
     elif feed_format == 'jsonlines':
         return JsonLinesItemExporter(f)
     elif feed_format == 'pickle':
         return PickleItemExporter(f)
     elif feed_format == 'marshal':
         return MarshalItemExporter(f)
     else:
         raise ValueError(
             'Export format {} is not supported'.format(feed_format))
Beispiel #5
0
class MarshalExporterPipeline(object):

    def __init__(self, file_name):
        # Storing output filename
        self.file_name = file_name
        # Creating a file handle and setting it to None
        self.file_handle = None

    @classmethod
    def from_crawler(cls, crawler):
        return cls(
            file_name=crawler.settings.get('MARSHAL_PATH'),
        )


    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = MarshalItemExporter(file)
        self.exporter.start_exporting()
    
    def close_spider(self, spider):
        print('Custom Exporter closed')

        # Ending the export to file from FanItemExport object
        self.exporter.finish_exporting()

        # Closing the opened output file
        self.file_handle.close()
    
    def process_item(self, item, spider):
        # passing the item to FanItemExporter object for expoting to file
        self.exporter.export_item(item)
        return item
Beispiel #6
0
 def _get_exporter(self, **kwargs):
     self.output = tempfile.TemporaryFile()
     return MarshalItemExporter(self.output, **kwargs)