Ejemplo n.º 1
0
    def _load_lists(self):
        # Load content types for details
        self._tika_valid_content_types = set()
        if self.conf["tika"]["enabled"]:
            self.log("Reloading content types list for Tika details")
            for k, v in self.conf["tika"]["valid_content_types"].iteritems():
                keywords = load_config(v)
                if not isinstance(keywords, list):
                    raise ImproperlyConfigured("Keywords content types \
                        details list '{}' not valid".format(k))
                keywords = {i.lower() for i in keywords}
                self._tika_valid_content_types |= keywords
                self.log("Content types Tika '{}' loaded".format(k))

        # Load content types for blacklist
        self.log("Reloading content types list blacklist")
        self._cont_type_bl = set()
        for k, v in self.conf["content_types_blacklist"].iteritems():
            keywords = load_config(v)
            if not isinstance(keywords, list):
                raise ImproperlyConfigured("Keywords content types blacklist \
                    list '{}' not valid".format(k))
            keywords = {i.lower() for i in keywords}
            self._cont_type_bl |= keywords
            self.log("Content types blacklist '{}' loaded".format(k))
Ejemplo n.º 2
0
    def _load_mails(self):
        """This function load mails in a priority queue. """

        self.log("Loading new mails for spout")

        mailboxes = self.conf['mailboxes']
        for k, v in mailboxes.iteritems():
            if not os.path.exists(v['path_mails']):
                raise ImproperlyConfigured(
                    "Mail path '{}' does NOT exist".format(v['path_mails']))

            all_mails = set(
                glob.glob(
                    os.path.join(v['path_mails'],
                                 '{}'.format(v['files_pattern']))))

            # put new mails in queue
            for mail in (all_mails - self._queue_tail):
                self._queue_tail.add(mail)
                self._queue.put(
                    MailItem(filename=mail,
                             mail_server=v['mail_server'],
                             mailbox=k,
                             priority=v['priority'],
                             trust=v['trust_string']))
Ejemplo n.º 3
0
 def _conf_loader(self):
     if not self.conf_file:
         raise ImproperlyConfigured(
             "Bolts configuration path NOT set for '{}'".format(
                 self.component_name))
     self.log("Reloading configuration for bolt")
     self._bolts_conf = load_config(self.conf_file)
     self._conf = self.bolts_conf[self.component_name]
Ejemplo n.º 4
0
    def _check_conf(self):
        self._where = self.conf["post_processing"]["where"]
        if not self._where:
            raise ImproperlyConfigured(
                "where in '{}' is NOT configurated".format(self.spouts_conf))

        self._where_failed = self.conf["post_processing"]["where.failed"]
        if not self._where_failed:
            raise ImproperlyConfigured(
                "where.failed in '{}' is NOT configurated".format(
                    self.spouts_conf))

        if not os.path.exists(self._where):
            os.makedirs(self._where)

        if not os.path.exists(self._where_failed):
            os.makedirs(self._where_failed)
Ejemplo n.º 5
0
    def _check_conf(self):
        self._where = self.conf["post_processing"]["where"]
        if not self._where:
            raise ImproperlyConfigured(
                "where in {!r} is not configurated".format(
                    self.component_name))

        self._where_failed = self.conf["post_processing"]["where.failed"]
        if not self._where_failed:
            raise ImproperlyConfigured(
                "where.failed in {!r} is not configurated".format(
                    self.component_name))

        if not os.path.exists(self._where):
            os.makedirs(self._where)

        if not os.path.exists(self._where_failed):
            os.makedirs(self._where_failed)
Ejemplo n.º 6
0
def _resolve_if_path_needed(use_flag, path_string):
    """Determines if a path is needed and if it is valid."""
    if use_flag:
        path = Path(path_string)

        if path.exists() and path.is_dir():
            return path

        raise ImproperlyConfigured('Invalid path string provided.')

    return False
Ejemplo n.º 7
0
    def _load_lists(self):

        # Load subjects keywords
        self.log("Reloading phishing subjects keywords")
        self._s_keys = set()
        for k, v in self.conf["lists"]["subjects"].iteritems():
            keywords = load_config(v)
            if not isinstance(keywords, list):
                raise ImproperlyConfigured(
                    "Keywords subjects list '{}' not valid".format(k))
            self._s_keys |= set(keywords)

        # Load targets keywords
        self.log("Reloading phishing targets keywords")
        self._t_keys = {}
        for k, v in self.conf["lists"]["targets"].iteritems():
            keywords = load_config(v)
            if not isinstance(keywords, dict):
                raise ImproperlyConfigured(
                    "Keywords targets dict '{}' not valid".format(k))
            self._t_keys.update(keywords)
Ejemplo n.º 8
0
def get_settings(command_line_args):
    """Compiles all relevant settings for application."""
    ini_location = command_line_args['config']
    config = configparser.ConfigParser()
    config.read(ini_location)

    # Add the settings from the ini file
    settings = {}

    try:
        settings['crawl_delay'] = config.getfloat('settings', 'crawl_delay')
        settings['api_url'] = config['settings']['api_url']
        settings['api_authorization'] = 'Token {}'.format(
            config['settings']['api_token'])
        settings['abc_url'] = config['settings']['abc_url']
        settings['abc_id_start'] = config.getint('settings', 'abc_id_start')
        settings['abc_id_end'] = config.getint('settings', 'abc_id_end')
        settings['abc_id_increment'] = config.getint('settings',
                                                     'abc_id_increment')
        settings['robot'] = {
            'user_agent': config['robot']['user_agent'],
            'from': config['robot']['from'],
        }
        settings['extracted_data'] = {
            'html': config['locations']['html'],
            'api': config['locations']['api'],
        }
        settings['sentry'] = config['sentry']['dsn']

    except (configparser.Error, KeyError) as error:
        raise ImproperlyConfigured(error)

    # Check if use_html location is needed
    files = {}
    files['use_html'] = _resolve_if_path_needed(
        command_line_args['use_html_file'], config['locations']['html'])
    files['save_html'] = _resolve_if_path_needed(
        command_line_args['save_html'], config['locations']['html'])
    files['save_api'] = _resolve_if_path_needed(command_line_args['save_api'],
                                                config['locations']['api'])
    settings['files'] = files

    # Add the other command line arguments
    settings['data_upload'] = not command_line_args['disable_data_upload']

    return settings
Ejemplo n.º 9
0
    def _load_whitelist(self):

        self.log("Reloading whitelists domains for bolt")
        self._whitelist = set()
        for k, v in self.conf['whitelists'].iteritems():
            expiry = v.get('expiry')
            now = datetime.utcnow()

            if (not expiry or
                    datetime.strptime(expiry, "%Y-%m-%dT%H:%M:%S.%fZ") >= now):
                domains = load_config(v['path'])

                if not isinstance(domains, list):
                    raise ImproperlyConfigured(
                        "Whitelist {} not loaded".format(k))
                domains = {i.lower() for i in domains}
                self._whitelist |= domains
                self.log("Whitelist '{}' loaded".format(k))
Ejemplo n.º 10
0
    def _load_mails(self):
        """This function load mails in a priority queue. """

        self.log("Loading new mails for {!r}".format(self.component_name))

        mailboxes = self.conf["mailboxes"]
        for k, v in mailboxes.iteritems():
            if not os.path.exists(v["path_mails"]):
                raise ImproperlyConfigured(
                    "Mail path {!r} does not exist".format(v["path_mails"]))

            all_mails = set(
                glob.glob(os.path.join(v["path_mails"], v["files_pattern"])))

            # put new mails in queue
            for mail in (all_mails - self._queue_tail):
                self._queue_tail.add(mail)
                self._queue.put(
                    MailItem(filename=mail,
                             mail_server=v["mail_server"],
                             mailbox=k,
                             priority=v["priority"],
                             trust=v["trust_string"]))