Esempio n. 1
0
 def __init__(self, settings, stats):
     if not settings.getbool('HTTPCACHE_ENABLED'):
         raise NotConfigured
     self.policy = load_object(settings['HTTPCACHE_POLICY'])(settings)
     self.storage = load_object(settings['HTTPCACHE_STORAGE'])(settings)
     self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING')
     self.stats = stats
Esempio n. 2
0
 def __init__(self):
     self.classes = {}
     self.mimetypes = MimeTypes()
     mimedata = get_data('dscrapy', 'mime.types')
     self.mimetypes.readfp(StringIO(mimedata))
     for mimetype, cls in self.CLASSES.iteritems():
         self.classes[mimetype] = load_object(cls)
Esempio n. 3
0
    def __init__(self, global_settings, global_signals):
        self._handlers = {}
        self._notconfigured = {}
        handlers = global_settings.get('DOWNLOAD_HANDLERS_BASE')
        handlers.update(global_settings.get('DOWNLOAD_HANDLERS', {}))
        for scheme, clspath in handlers.iteritems():
            # Allow to disable a handler just like any other
            # component (extension, middleware, etc).
            if clspath is None:
                continue
            cls = load_object(clspath)
            try:
                dh = cls(global_settings)
            except NotConfigured as ex:
                self._notconfigured[scheme] = str(ex)
            else:
                self._handlers[scheme] = dh

        global_signals.connect(self._close, signals.engine_stopped)
Esempio n. 4
0
    def from_settings(cls, settings, signals=None, stats=None):
        mwlist = cls._get_mwlist_from_settings(settings)
        middlewares = []
        for clspath in mwlist:
            try:
                mwcls = load_object(clspath)
                if hasattr(mwcls, 'from_settings'):
                    mw = mwcls.from_settings(settings, signals, stats)
                else:
                    mw = mwcls()
                middlewares.append(mw)
            except NotConfigured as e:
                if e.args:
                    clsname = clspath.split('.')[-1]
                    log.msg(format="Disabled %(clsname)s: %(eargs)s",
                            level=log.WARNING, clsname=clsname, eargs=e.args[0])

        enabled = [x.__class__.__name__ for x in middlewares]
        log.msg(format="Enabled %(componentname)ss: %(enabledlist)s", level=log.INFO,
                componentname=cls.component_name, enabledlist=', '.join(enabled))
        return cls(*middlewares)
Esempio n. 5
0
 def __init__(self, settings):
     self._pool = HTTPConnectionPool(reactor, persistent=True)
     self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
     self._pool._factory.noisy = False
     self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
     self._contextFactory = self._contextFactoryClass()
Esempio n. 6
0
from dscrapy.downloader import Downloader
from dscrapy.http import Request

class DownloaderHandler:
  implements(DownloaderService.Iface)  
  def __init__(self, downloader):
    self.log = {}
    self.downloader = downloader

  def download(self, r):
    print 'download( %s )' % (r.url)
    request = Request(r.url)
    self.downloader.fetch(request, None)

if __name__ == '__main__':

    global_settings = DScrapySettings()
    global_signals = SignalManager()
    global_stats = load_object(global_settings['STATS_CLASS'])(global_settings)
    
    downloader = Downloader(global_settings, global_signals, global_stats)

    handler = DownloaderHandler(downloader)
    processor = DownloaderService.Processor(handler)
    pfactory = TBinaryProtocol.TBinaryProtocolFactory()
    server = reactor.listenTCP(9090,
                TTwisted.ThriftServerFactory(processor,
                pfactory), interface="127.0.0.1")
    reactor.run()

Esempio n. 7
0
 def __init__(self, settings):
     self.HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY'])
     self.ClientContextFactory = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])