def _load_lists(self): # Load content types for details self._tika_valid_content_types = set() if self.conf["tika"]["enabled"]: self.log("Reloading content types list for Tika details") for k, v in self.conf["tika"]["valid_content_types"].iteritems(): keywords = load_config(v) if not isinstance(keywords, list): raise ImproperlyConfigured("Keywords content types \ details list '{}' not valid".format(k)) keywords = {i.lower() for i in keywords} self._tika_valid_content_types |= keywords self.log("Content types Tika '{}' loaded".format(k)) # Load content types for blacklist self.log("Reloading content types list blacklist") self._cont_type_bl = set() for k, v in self.conf["content_types_blacklist"].iteritems(): keywords = load_config(v) if not isinstance(keywords, list): raise ImproperlyConfigured("Keywords content types blacklist \ list '{}' not valid".format(k)) keywords = {i.lower() for i in keywords} self._cont_type_bl |= keywords self.log("Content types blacklist '{}' loaded".format(k))
def _load_mails(self): """This function load mails in a priority queue. """ self.log("Loading new mails for spout") mailboxes = self.conf['mailboxes'] for k, v in mailboxes.iteritems(): if not os.path.exists(v['path_mails']): raise ImproperlyConfigured( "Mail path '{}' does NOT exist".format(v['path_mails'])) all_mails = set( glob.glob( os.path.join(v['path_mails'], '{}'.format(v['files_pattern'])))) # put new mails in queue for mail in (all_mails - self._queue_tail): self._queue_tail.add(mail) self._queue.put( MailItem(filename=mail, mail_server=v['mail_server'], mailbox=k, priority=v['priority'], trust=v['trust_string']))
def _conf_loader(self): if not self.conf_file: raise ImproperlyConfigured( "Bolts configuration path NOT set for '{}'".format( self.component_name)) self.log("Reloading configuration for bolt") self._bolts_conf = load_config(self.conf_file) self._conf = self.bolts_conf[self.component_name]
def _check_conf(self): self._where = self.conf["post_processing"]["where"] if not self._where: raise ImproperlyConfigured( "where in '{}' is NOT configurated".format(self.spouts_conf)) self._where_failed = self.conf["post_processing"]["where.failed"] if not self._where_failed: raise ImproperlyConfigured( "where.failed in '{}' is NOT configurated".format( self.spouts_conf)) if not os.path.exists(self._where): os.makedirs(self._where) if not os.path.exists(self._where_failed): os.makedirs(self._where_failed)
def _check_conf(self): self._where = self.conf["post_processing"]["where"] if not self._where: raise ImproperlyConfigured( "where in {!r} is not configurated".format( self.component_name)) self._where_failed = self.conf["post_processing"]["where.failed"] if not self._where_failed: raise ImproperlyConfigured( "where.failed in {!r} is not configurated".format( self.component_name)) if not os.path.exists(self._where): os.makedirs(self._where) if not os.path.exists(self._where_failed): os.makedirs(self._where_failed)
def _resolve_if_path_needed(use_flag, path_string): """Determines if a path is needed and if it is valid.""" if use_flag: path = Path(path_string) if path.exists() and path.is_dir(): return path raise ImproperlyConfigured('Invalid path string provided.') return False
def _load_lists(self): # Load subjects keywords self.log("Reloading phishing subjects keywords") self._s_keys = set() for k, v in self.conf["lists"]["subjects"].iteritems(): keywords = load_config(v) if not isinstance(keywords, list): raise ImproperlyConfigured( "Keywords subjects list '{}' not valid".format(k)) self._s_keys |= set(keywords) # Load targets keywords self.log("Reloading phishing targets keywords") self._t_keys = {} for k, v in self.conf["lists"]["targets"].iteritems(): keywords = load_config(v) if not isinstance(keywords, dict): raise ImproperlyConfigured( "Keywords targets dict '{}' not valid".format(k)) self._t_keys.update(keywords)
def get_settings(command_line_args): """Compiles all relevant settings for application.""" ini_location = command_line_args['config'] config = configparser.ConfigParser() config.read(ini_location) # Add the settings from the ini file settings = {} try: settings['crawl_delay'] = config.getfloat('settings', 'crawl_delay') settings['api_url'] = config['settings']['api_url'] settings['api_authorization'] = 'Token {}'.format( config['settings']['api_token']) settings['abc_url'] = config['settings']['abc_url'] settings['abc_id_start'] = config.getint('settings', 'abc_id_start') settings['abc_id_end'] = config.getint('settings', 'abc_id_end') settings['abc_id_increment'] = config.getint('settings', 'abc_id_increment') settings['robot'] = { 'user_agent': config['robot']['user_agent'], 'from': config['robot']['from'], } settings['extracted_data'] = { 'html': config['locations']['html'], 'api': config['locations']['api'], } settings['sentry'] = config['sentry']['dsn'] except (configparser.Error, KeyError) as error: raise ImproperlyConfigured(error) # Check if use_html location is needed files = {} files['use_html'] = _resolve_if_path_needed( command_line_args['use_html_file'], config['locations']['html']) files['save_html'] = _resolve_if_path_needed( command_line_args['save_html'], config['locations']['html']) files['save_api'] = _resolve_if_path_needed(command_line_args['save_api'], config['locations']['api']) settings['files'] = files # Add the other command line arguments settings['data_upload'] = not command_line_args['disable_data_upload'] return settings
def _load_whitelist(self): self.log("Reloading whitelists domains for bolt") self._whitelist = set() for k, v in self.conf['whitelists'].iteritems(): expiry = v.get('expiry') now = datetime.utcnow() if (not expiry or datetime.strptime(expiry, "%Y-%m-%dT%H:%M:%S.%fZ") >= now): domains = load_config(v['path']) if not isinstance(domains, list): raise ImproperlyConfigured( "Whitelist {} not loaded".format(k)) domains = {i.lower() for i in domains} self._whitelist |= domains self.log("Whitelist '{}' loaded".format(k))
def _load_mails(self): """This function load mails in a priority queue. """ self.log("Loading new mails for {!r}".format(self.component_name)) mailboxes = self.conf["mailboxes"] for k, v in mailboxes.iteritems(): if not os.path.exists(v["path_mails"]): raise ImproperlyConfigured( "Mail path {!r} does not exist".format(v["path_mails"])) all_mails = set( glob.glob(os.path.join(v["path_mails"], v["files_pattern"]))) # put new mails in queue for mail in (all_mails - self._queue_tail): self._queue_tail.add(mail) self._queue.put( MailItem(filename=mail, mail_server=v["mail_server"], mailbox=k, priority=v["priority"], trust=v["trust_string"]))