コード例 #1
0
 def __init__(self):
     self.file_name = "class.pickle"
     self.file_path = join(dirname(dirname(abspath(__file__))),
                           self.file_name)
     self.file = open(self.file_path, 'wb')
     self.exporter = PickleItemExporter(self.file)
     self.exporter.start_exporting()
コード例 #2
0
    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = PickleItemExporter(file)
        self.exporter.start_exporting()
コード例 #3
0
ファイル: test_exporters.py プロジェクト: wsppt/scrapy
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     fp = BytesIO()
     ie = PickleItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertEqual(pickle.loads(fp.getvalue()), item)
コード例 #4
0
ファイル: test_exporters.py プロジェクト: Rokicto/scrapy
 def test_nonstring_types_item(self):
     item = self._get_nonstring_types_item()
     fp = BytesIO()
     ie = PickleItemExporter(fp)
     ie.start_exporting()
     ie.export_item(item)
     ie.finish_exporting()
     self.assertEqual(pickle.loads(fp.getvalue()), item)
コード例 #5
0
ファイル: test_exporters.py プロジェクト: wsppt/scrapy
 def test_export_multiple_items(self):
     i1 = TestItem(name='hello', age='world')
     i2 = TestItem(name='bye', age='world')
     f = BytesIO()
     ie = PickleItemExporter(f)
     ie.start_exporting()
     ie.export_item(i1)
     ie.export_item(i2)
     ie.finish_exporting()
     f.seek(0)
     self.assertEqual(pickle.load(f), i1)
     self.assertEqual(pickle.load(f), i2)
コード例 #6
0
 def test_export_multiple_items(self):
     i1 = self.item_class(name="hello", age="world")
     i2 = self.item_class(name="bye", age="world")
     f = BytesIO()
     ie = PickleItemExporter(f)
     ie.start_exporting()
     ie.export_item(i1)
     ie.export_item(i2)
     ie.finish_exporting()
     f.seek(0)
     self.assertEqual(self.item_class(**pickle.load(f)), i1)
     self.assertEqual(self.item_class(**pickle.load(f)), i2)
コード例 #7
0
class PickleExporterPipeline(object):
    def __init__(self, file_name):
        # Storing output filename
        self.file_name = file_name
        # Creating a file handle and setting it to None
        self.file_handle = None

    @classmethod
    def from_crawler(cls, crawler):
        return cls(file_name=crawler.settings.get('PICKLE_PATH'), )

    def open_spider(self, spider):
        print('Custom export opened')

        # Opening file in binary-write mode
        file = open(self.file_name, 'wb')
        self.file_handle = file

        # Creating a FanItemExporter object and initiating export
        self.exporter = PickleItemExporter(file)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        print('Custom Exporter closed')

        # Ending the export to file from FanItemExport object
        self.exporter.finish_exporting()

        # Closing the opened output file
        self.file_handle.close()

    def process_item(self, item, spider):
        # passing the item to FanItemExporter object for expoting to file
        self.exporter.export_item(item)
        return item
コード例 #8
0
class PickleExportPipeline(object):
    def __init__(self):
        self.files = dict()
        self.exporter = None

    @classmethod
    def from_crawler(cls, crawler):
        pipeline = cls()
        crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
        crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
        return pipeline

    def spider_opened(self, spider):
        file = open('%s.pickle' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = PickleItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
コード例 #9
0
ファイル: test_exporters.py プロジェクト: Rokicto/scrapy
 def test_export_multiple_items(self):
     i1 = TestItem(name='hello', age='world')
     i2 = TestItem(name='bye', age='world')
     f = BytesIO()
     ie = PickleItemExporter(f)
     ie.start_exporting()
     ie.export_item(i1)
     ie.export_item(i2)
     ie.finish_exporting()
     f.seek(0)
     self.assertEqual(pickle.load(f), i1)
     self.assertEqual(pickle.load(f), i2)
コード例 #10
0
 def exporter_for_format(feed_format, f):
     if feed_format == 'csv':
         return CsvItemExporter(f)
     elif feed_format == 'xml':
         return XmlItemExporter(f)
     elif feed_format == 'json':
         return JsonItemExporter(f)
     elif feed_format == 'jsonlines':
         return JsonLinesItemExporter(f)
     elif feed_format == 'pickle':
         return PickleItemExporter(f)
     elif feed_format == 'marshal':
         return MarshalItemExporter(f)
     else:
         raise ValueError(
             'Export format {} is not supported'.format(feed_format))
コード例 #11
0
class PickleWriterPipeline(object):
    def __init__(self):
        self.file_name = "class.pickle"
        self.file_path = join(dirname(dirname(abspath(__file__))),
                              self.file_name)
        self.file = open(self.file_path, 'wb')
        self.exporter = PickleItemExporter(self.file)
        self.exporter.start_exporting()

    def close_spider(self, spider):
        self.exporter.finish_exporting()
        self.file.close()

    def process_item(self, item, spider):
        self.exporter.export_item(item)
        return item
コード例 #12
0
class ProxyPipeline(object):

    def __init__(self):
        self.files = {}

    @classmethod
    def from_crawler(cls, crawler):
         pipeline = cls()
         crawler.signals.connect(pipeline.spider_opened, signals.spider_opened)
         crawler.signals.connect(pipeline.spider_closed, signals.spider_closed)
         return pipeline

    def spider_opened(self, spider):
        file = open('data/%s_Items.p' % spider.name, 'w+b')
        self.files[spider] = file
        self.exporter = PickleItemExporter(file)
        self.exporter.start_exporting()

    def spider_closed(self, spider):
        self.exporter.finish_exporting()
        file = self.files.pop(spider)
        file.close()

    def process_item(self, item, spider):
        if item['type'] == 'transparent':
            raise DropItem("Transparent Proxy Dropped")
        try:
            socket="http://{0}:{1}".format(item['ip'],item['port'])
            proxyDict = {"http":socket}
            response = rq.get('http://www.google.com',proxies=proxyDict,timeout=2)
            elapsed = response.elapsed
            spider.logger.info('Socket{0}\tElapsed{1}'.format(socket,elapsed))
            if not ( 200 <= response.status_code < 300):
                raise DropItem("Not valid respose")
            if elapsed>timedelta(seconds=5):
                raise DropItem("Slow connection")
        except Exception as e:
            raise DropItem("Cannot Connect")
        item["speed"] = elapsed
        item["lastcheck"] = date.today()
        self.exporter.export_item(item)
        return item
コード例 #13
0
ファイル: test_exporters.py プロジェクト: wsppt/scrapy
 def _get_exporter(self, **kwargs):
     return PickleItemExporter(self.output, **kwargs)
コード例 #14
0
 def spider_opened(self, spider):
     file = open('%s.pickle' % spider.name, 'w+b')
     self.files[spider] = file
     self.exporter = PickleItemExporter(file)
     self.exporter.start_exporting()