Beispiel #1
0
    def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs):
        self.base_url = base_url
        self.conf_urls = conf_urls
        self.verbosity = verbosity
        self.ascend = ascend
        self.auth = kwargs.get('auth') 

        if output_dir:
            assert os.path.isdir(output_dir)
            self.output_dir = os.path.realpath(output_dir)
            LOG.info("Output will be saved to %s" % self.output_dir)
        else:
            self.output_dir = None
        
        #These two are what keep track of what to crawl and what has been.
        self.not_crawled = [(0, 'START',self.base_url)]
        self.crawled = {}

        self.c = Client(REMOTE_ADDR='127.0.0.1')

        #login, and remember the user which was logged in
        if self.auth:
            self._login(self.auth)
            self.user = self.c.session['_auth_user_id']
        
        self.plugins = []
        for plug in Plugin.__subclasses__():
            active = getattr(plug, 'active', True)
            if active:
                #TODO: Check if plugin supports writing CSV (or to a file in general?)
                self.plugins.append(plug())
Beispiel #2
0
    def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs):
        self.base_url = base_url
        self.conf_urls = conf_urls
        self.verbosity = verbosity
        self.ascend = ascend

        auth = kwargs.get('auth')

        if output_dir:
            assert os.path.isdir(output_dir)
            self.output_dir = os.path.realpath(output_dir)
            LOG.info("Output will be saved to %s" % self.output_dir)
        else:
            self.output_dir = None

        #These two are what keep track of what to crawl and what has been.
        self.not_crawled = [(0, 'START',self.base_url)]
        self.crawled = {}

        self.c = Client(REMOTE_ADDR='127.0.0.1')

        if auth:
            printable_auth = ', '.join(
                '%s: %s' % (key, cleanse_setting(key.upper(), value))
                for key, value in auth.items())
            LOG.info('Log in with %s' % printable_auth)
            self.c.login(**auth)

        self.plugins = []
        for plug in Plugin.__subclasses__():
            active = getattr(plug, 'active', True)
            if active:
                #TODO: Check if plugin supports writing CSV (or to a file in general?)
                self.plugins.append(plug())