def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity self.ascend = ascend auth = kwargs.get('auth') if output_dir: assert os.path.isdir(output_dir) self.output_dir = os.path.realpath(output_dir) LOG.info("Output will be saved to %s" % self.output_dir) else: self.output_dir = None #These two are what keep track of what to crawl and what has been. self.not_crawled = [(0, 'START',self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR='127.0.0.1') if auth: printable_auth = ', '.join( '%s: %s' % (key, cleanse_setting(key.upper(), value)) for key, value in auth.items()) LOG.info('Log in with %s' % printable_auth) self.c.login(**auth) self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, 'active', True) if active: #TODO: Check if plugin supports writing CSV (or to a file in general?) self.plugins.append(plug())
def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity self.ascend = ascend if output_dir: assert os.path.isdir(output_dir) self.output_dir = os.path.realpath(output_dir) LOG.info("Output will be saved to %s" % self.output_dir) else: self.output_dir = None # These two are what keep track of what to crawl and what has been. self.not_crawled = [(0, "START", self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR="127.0.0.1") self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, "active", True) if active: # TODO: Check if plugin supports writing CSV (or to a file in general?) self.plugins.append(plug())
def __init__(self, base_url, conf_urls={}, verbosity=1, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity #These two are what keep track of what to crawl and what has been. self.not_crawled = [('START',self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR='127.0.0.1') self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, 'active', True) if active: self.plugins.append(plug())
def __init__(self, base_url, conf_urls={}, verbosity=1, output_dir=None, ascend=True, **kwargs): self.base_url = base_url self.conf_urls = conf_urls self.verbosity = verbosity self.ascend = ascend auth = kwargs.get('auth') if output_dir: assert os.path.isdir(output_dir) self.output_dir = os.path.realpath(output_dir) LOG.info("Output will be saved to %s" % self.output_dir) else: self.output_dir = None #These two are what keep track of what to crawl and what has been. self.not_crawled = [(0, 'START', self.base_url)] self.crawled = {} self.c = Client(REMOTE_ADDR='127.0.0.1') if auth: printable_auth = ', '.join( '%s: %s' % (key, cleanse_setting(key.upper(), value)) for key, value in auth.items()) LOG.info('Log in with %s' % printable_auth) self.c.login(**auth) self.plugins = [] for plug in Plugin.__subclasses__(): active = getattr(plug, 'active', True) if active: #TODO: Check if plugin supports writing CSV (or to a file in general?) self.plugins.append(plug())