Esempio n. 1
0
    def test_get_emoji(self):
        """Test getting emoji code."""
        skull_emoji = emojis.get_emoji('skull_crossbone')
        skull_code = '&#x2620'

        self.assertEqual(skull_emoji, skull_code)

        locomotive_emoji = emojis.get_emoji('LOCOMOTIVE')
        locomotive_code = '&#x1F682'
        self.assertEqual(locomotive_emoji, locomotive_code)

        does_not_exist = emojis.get_emoji('er_ekki_til')
        self.assertEqual(does_not_exist, '')
Esempio n. 2
0
    def test_get_emoji(self):
        """Test getting emoji code."""
        skull_emoji = emojis.get_emoji('skull_crossbone')
        skull_code = '&#x2620'

        self.assertEqual(skull_emoji, skull_code)

        locomotive_emoji = emojis.get_emoji('LOCOMOTIVE')
        locomotive_code = '&#x1F682'
        self.assertEqual(locomotive_emoji, locomotive_code)

        does_not_exist = emojis.get_emoji('er_ekki_til')
        self.assertEqual(does_not_exist, '')
Esempio n. 3
0
    def test_get_emoji(self):
        """Test getting emoji code."""
        skull_emoji = emojis.get_emoji("skull_crossbone")
        skull_code = "&#x2620"

        self.assertEqual(skull_emoji, skull_code)

        locomotive_emoji = emojis.get_emoji("LOCOMOTIVE")
        locomotive_code = "&#x1F682"
        self.assertEqual(locomotive_emoji, locomotive_code)

        does_not_exist = emojis.get_emoji("er_ekki_til")
        self.assertEqual(does_not_exist, "")
Esempio n. 4
0
    def test_get_chains(self):
        """Test the chain."""

        for plugin in manager.ChainPluginsManager.get_plugins(None):
            manager.ChainPluginsManager.deregister_plugin(plugin)

        manager.ChainPluginsManager.register_plugin(FakeChainPlugin)

        analyzer = FakeAnalyzer("test_index", sketch_id=1)
        analyzer.datastore.client = mock.Mock()

        plugins = getattr(analyzer, "_chain_plugins")
        self.assertEqual(len(plugins), 1)

        plugin = plugins[0]
        self.assertIsInstance(plugin, interface.BaseChainPlugin)

        analyzer_result = analyzer.run()
        expected_result = (
            "3 base events annotated with a chain UUID for 3 chains "
            "for a total of 9 events. [fake_chain] 9"
        )
        self.assertEqual(analyzer_result, expected_result)

        link_emoji = emojis.get_emoji("LINK")
        for event in plugin.ALL_EVENTS:
            attributes = event.attributes
            chains = attributes.get("chains", [])
            for event_chain in chains:
                plugin = event_chain.get("plugin", "")
                self.assertEqual(plugin, "fake_chain")

            event_emojis = event.emojis
            self.assertEqual(len(event_emojis), 1)
            self.assertEqual(event_emojis[0], link_emoji)
Esempio n. 5
0
    def tagger(self, name, config):
        """Tag and add emojis to events.

        Args:
            name: String with the name describing what will be tagged.
            config: A dict that contains the configuration See data/tags.yaml
                for fields and documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get('query_string')
        query_dsl = config.get('query_dsl')
        create_view = config.get('create_view', False)
        view_name = config.get('view_name', name)
        tags = config.get('tags', [])
        emoji_names = config.get('emojis', [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        event_counter = 0
        events = self.event_stream(query_string=query, query_dsl=query_dsl)

        for event in events:
            event_counter += 1
            event.add_tags(tags)
            event.add_emojis(emojis_to_add)

            # Commit the event to the datastore.
            event.commit()

        if create_view and event_counter:
            self.sketch.add_view(
                view_name, self.NAME, query_string=query, query_dsl=query_dsl)

        return '{0:d} events tagged for [{1:s}]'.format(event_counter, name)
Esempio n. 6
0
    def test_get_chains(self):
        """Test the chain."""

        for plugin in manager.ChainPluginsManager.get_plugins(None):
            manager.ChainPluginsManager.deregister_plugin(plugin)

        manager.ChainPluginsManager.register_plugin(FakeChainPlugin)

        analyzer = FakeAnalyzer('test_index', sketch_id=1)
        analyzer.datastore.client = mock.Mock()

        plugins = getattr(analyzer, '_chain_plugins')
        self.assertEqual(len(plugins), 1)

        plugin = plugins[0]
        self.assertIsInstance(plugin, interface.BaseChainPlugin)

        analyzer_result = analyzer.run()
        expected_result = (
            '3 base events tagged with a chain UUID for 3 chains '
            'for a total of 9 events.')
        self.assertEqual(analyzer_result, expected_result)

        link_emoji = emojis.get_emoji('LINK')
        for event in plugin.ALL_EVENTS:
            attributes = event.attributes
            self.assertEqual(attributes.get('chain_plugins', []),
                             ['fake_chain'])

            event_emojis = event.emojis
            self.assertEqual(len(event_emojis), 1)
            self.assertEqual(event_emojis[0], link_emoji)
Esempio n. 7
0
    def run(self):
        """Entry point for the analyzer.
        Returns:
            String with summary of the analyzer result
        """
        # Elasticsearch query to find Chrome extensions from filestat
        query = ('(data_type:"fs:stat"'
                 'AND (filename:"Chrome" AND filename:"Extensions)')

        # Specify what returned fields you need for your analyzer.
        return_fields = ['filename', 'data_type']
        question_emoji = emojis.get_emoji('QUESTION')

        # Generator of events based on your query.
        events = self.event_stream(
            query_string=query, return_fields=return_fields)

        #  Add analyzer logic here.
        # Methods available to use for sketch analyzers:
        # sketch.get_all_indices()
        # sketch.add_view(name, query_string, query_filter={})
        # event.add_attributes({'foo': 'bar'})
        # event.add_tags(['tag_name'])
        # event_add_label('label')
        # event.add_star()
        # event.add_comment('comment')

        login_count = 0

        for event in events:
            data_type = event.source.get('data_type')
            filename = event.source.get('message')
            extension_id = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', message)
              if extension_id = 0
                continue
Esempio n. 8
0
    def run(self):
        """Entry point for the browser search analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = 'source_short:"WEBHIST"'
        return_fields = ['url']
        search_emoji = emojis.get_emoji('MAGNIFYING_GLASS')

        # Generator of events based on your query.
        events = self.event_stream(
            query_string=query, return_fields=return_fields)

        simple_counter = 0
        for event in events:
            url = event.source.get('url')

            if url is None:
                continue

            for engine, expression, method_name, parameter in self._URL_FILTERS:
                callback_method = getattr(self, method_name, None)
                if not callback_method:
                    continue

                match = expression.search(url)
                if not match:
                    continue

                if parameter:
                    search_query = callback_method(url, parameter)
                else:
                    search_query = callback_method(url)

                if not search_query:
                    continue

                simple_counter += 1
                event.add_attributes({'search_string': search_query})

                event.add_human_readable('{0:s} search query: {1:s}'.format(
                    engine, search_query), self.NAME)
                event.add_emojis([search_emoji])
                event.add_tags(['browser_search'])
                # We break at the first hit of a successful search engine.
                break

            # Commit the event to the datastore.
            event.commit()

        if simple_counter > 0:
            self.sketch.add_view(
                view_name='Browser Search', analyzer_name=self.NAME,
                query_string='tag:"browser_search"')

        return (
            'Browser Search completed with {0:d} search results '
            'extracted.').format(simple_counter)
Esempio n. 9
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        # TODO: Once we can identify user generated events this should be
        # updated to include all user generated events instead of focusing
        # solely on browser events.
        query = 'source_short:"WEBHIST" OR source:"WEBHIST"'

        return_fields = ['timestamp', 'url', 'tag', '__ts_emojis']

        data_frame = self.event_pandas(
            query_string=query, return_fields=return_fields)

        if not data_frame.shape[0]:
            return 'No browser events discovered.'

        sleeping_emoji = emojis.get_emoji('SLEEPING_FACE')

        # This query filters out all timestamps that have a zero timestamp as
        # well as those that occure after 2038-01-01, this may need to be
        # changed in the future.
        data_frame = data_frame[
            (data_frame.timestamp > 0) & (
                data_frame.timestamp < 2145916800000000)]
        data_frame['timestamp'] = pd.to_numeric(data_frame.timestamp)
        data_frame['datetime'] = pd.to_datetime(
            data_frame.timestamp / 1e6, utc=True, unit='s')
        data_frame['hour'] = pd.to_numeric(
            data_frame.datetime.dt.strftime('%H'))

        total_count = data_frame.shape[0]
        activity_hours, threshold, aggregation = get_active_hours(data_frame)

        if not activity_hours:
            return 'Did not discover any activity hours.'

        hour_count = dict(aggregation.values.tolist())
        data_frame_outside = data_frame[~data_frame.hour.isin(activity_hours)]

        for event in utils.get_events_from_data_frame(
                data_frame_outside, self.datastore):
            event.add_tags(['outside-active-hours'])
            hour = event.source.get('hour')
            this_hour_count = hour_count.get(hour)
            event.add_attributes(
                {'activity_summary': (
                    'Number of events for this hour ({0:d}): {1:d}, with the '
                    'threshold value: {2:0.2f}').format(
                        hour, this_hour_count, threshold),
                 'hour_count': this_hour_count})
            event.add_emojis([sleeping_emoji])
            event.commit()

        return (
            'Tagged {0:d} out of {1:d} events as outside of normal '
            'active hours.').format(data_frame_outside.shape[0], total_count)
Esempio n. 10
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        link_emoji = emojis.get_emoji('LINK')

        number_of_base_events = 0
        counter = collections.Counter()

        # TODO: Have each plugin run in a separate task.
        # TODO: Add a time limit for each plugins run to prevent it from
        #       holding everything up.
        for chain_plugin in self._chain_plugins:
            if chain_plugin.SEARCH_QUERY_DSL:
                search_dsl = chain_plugin.SEARCH_QUERY_DSL
                search_string = None
            else:
                search_dsl = None
                search_string = chain_plugin.SEARCH_QUERY

            return_fields = chain_plugin.EVENT_FIELDS
            return_fields.extend(['chain_id_list', 'chain_plugins'])
            events = self.event_stream(
                query_string=search_string, query_dsl=search_dsl,
                return_fields=return_fields)

            for event in events:
                if not chain_plugin.process_chain(event):
                    continue
                number_of_base_events += 1
                chain_id = uuid.uuid4().hex

                number_chained_events = chain_plugin.build_chain(
                    base_event=event, chain_id=chain_id)
                counter[chain_id] = number_chained_events
                counter['total'] += number_chained_events

                chain_id_list = event.source.get('chain_id_list', [])
                chain_id_list.append(chain_id)
                chain_plugins_list = event.source.get('chain_plugins', [])
                chain_plugins_list.append(chain_plugin.NAME)
                attributes = {
                    'chain_id_list': chain_id_list,
                    'chain_plugins': chain_plugins_list}
                event.add_attributes(attributes)
                event.add_emojis([link_emoji])
                event.commit()

        number_of_chains = len(counter.keys()) - 1
        return (
            '{0:d} base events annotated with a chain UUID for {1:d} '
            'chains for a total of {2:d} events.'.format(
                number_of_base_events, number_of_chains,
                counter['total']))
Esempio n. 11
0
    def test_get_helper_from_unicode(self):
        """Test getting helper text from an emoji code."""
        skull_emoji = emojis.get_emoji('skull_crossbone')
        skull_helper = emojis.get_helper_from_unicode(skull_emoji)
        helper_text = 'Suspicious entry'

        self.assertEqual(skull_helper, helper_text)

        does_not_exist = emojis.get_helper_from_unicode('er_ekki_til')
        self.assertEqual(does_not_exist, '')
Esempio n. 12
0
    def test_get_helper_from_unicode(self):
        """Test getting helper text from an emoji code."""
        skull_emoji = emojis.get_emoji('skull_crossbone')
        skull_helper = emojis.get_helper_from_unicode(skull_emoji)
        helper_text = 'Suspicious entry'

        self.assertEqual(skull_helper, helper_text)

        does_not_exist = emojis.get_helper_from_unicode('er_ekki_til')
        self.assertEqual(does_not_exist, '')
Esempio n. 13
0
    def run(self):
        """Entry point for the analyzer.
        Returns:
            String with summary of the analyzer result
        """
        # Add Elasticsearch query to get the events you need.
        query = ('(data_type:"syslog:line"'
                 'AND body:"Invalid user")')

        # Specify what returned fields you need for your analyzer.
        return_fields = ['message', 'data_type', 'source_short']
        stop_emoji = emojis.get_emoji('STOP')

        # Generator of events based on your query.
        events = self.event_stream(
            query_string=query, return_fields=return_fields)

        #  Add analyzer logic here.
        # Methods available to use for sketch analyzers:
        # sketch.get_all_indices()
        # sketch.add_view(name, query_string, query_filter={})
        # event.add_attributes({'foo': 'bar'})
        # event.add_tags(['tag_name'])
        # event_add_label('label')
        # event.add_star()
        # event.add_comment('comment')

        login_count = 0

        for event in events:
            data_type = event.source.get('data_type')
            source_short = event.source.get('source_short')
            message = event.source.get('message')
            ip_address = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', message)
	    if ip_address:
            event.add_attributes({'ip_address': ip_address})
            if ip_address is None:
                continue
	    username = re.search(r'Invalid user ([a-zA-Z0-9_\.+\-]{1,32}) from', message)
            if username:
                event.add_attributes({'user': username})
	    if username is None:
                continue
            event.add_emojis([stop_emoji])
            event.add_tags(['unknown_user'])

         if login_count > 0:
          self.sketch.add_view(
		 view_name='Potential bruteforce', analyzer_name=self.NAME,
		 query_string=query)


        # TODO: Return a summary from the analyzer.
        return 'Potential bruteforce analyzer completed, {0:d} login attempts from unknown users found'.format(
    login_count)
Esempio n. 14
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = (
            '{"query": { "bool": { "should": [ '
            '{ "exists" : { "field" : "url" }}, '
            '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url']

        events = self.event_stream(
            '', query_dsl=query, return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                url = event.source.get('url')
                if not url:
                    continue
                domain = utils.get_domain_from_url(url)
                event.add_attributes({'domain': domain})

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = '.'.join(domain.split('.')[-2:])
            tld_counter[tld] += 1

        satellite_emoji = emojis.get_emoji('SATELLITE')
        for domain, count in domain_counter.iteritems():
            emojis_to_add = [satellite_emoji]
            text = '{0:s} seen {1:d} times'.format(domain, count)

            for event in domains.get(domain, []):
                event.add_emojis(emojis_to_add)
                event.add_human_readable(text, self.NAME, append=False)
                event.add_attributes({'domain_count': count})

        return (
            '{0:d} domains discovered with {1:d} TLDs.').format(
                len(domains), len(tld_counter))
Esempio n. 15
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        if not self.yeti_api_root or not self.yeti_api_key:
            return 'No Yeti configuration settings found, aborting.'

        self.get_intrusion_sets()
        actors_found = []
        for intrusion_set in self.intel.values():
            if not intrusion_set['indicators']:
                continue

            found = False

            for indicator in intrusion_set['indicators']:
                query = build_query_for_indicators([indicator])

                events = self.event_stream(query_string=query,
                                           return_fields=[])

                name = intrusion_set['name']
                for event in events:
                    found = True
                    event.add_emojis([emojis.get_emoji('SKULL')])
                    event.add_tags([name])
                    event.commit()
                    event.add_comment(
                        'Indicator "{0:s}" found for actor "{1:s}"'.format(
                            indicator['name'], name))

            if found:
                actors_found.append(name)
                self.sketch.add_view(
                    'Domain activity for actor {0:s}'.format(name),
                    self.NAME,
                    query_string=query)

        if actors_found:
            return '{0:d} actors were found! [{1:s}]'.format(
                len(actors_found), ', '.join(actors_found))
        return 'No indicators were found in the timeline.'
Esempio n. 16
0
    def matcher(self, name, config):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result.
        """
        event_field_name = config.get('event_field_name')
        bq_query = config.get('bq_query')
        bq_project = config.get('bq_project')
        tags = config.get('tags')
        emoji_names = config.get('emojis')
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        es_query = ('{"query": { "bool": { "should": [ '
                    '{ "exists" : { "field" : "' + event_field_name +
                    '" }} ] } } }')
        events_stream = self.event_stream(
            query_dsl=es_query,
            return_fields=[event_field_name],
        )

        events = {}
        for event in events_stream:
            field = event.source.get(event_field_name)
            events.setdefault(field, []).append(event)

        try:
            bq_client = bigquery.Client(project=bq_project)
        except (google_auth_exceptions.DefaultCredentialsError) as exception:
            return 'Could not authenticate to BigQuery: {0!s}'.format(exception)

        num_matches = 0
        for i in range(0, len(events), self._BQ_BATCH_SIZE):
            batch = list(itertools.islice(events, i, i + self._BQ_BATCH_SIZE))
            query_job = self.bigquery_match(bq_client, bq_query,
                                            event_field_name, batch)
            for row in query_job:
                for event in events[row[0]]:
                    event.add_tags(tags)
                    event.add_emojis(emojis_to_add)
                    event.commit()
                    num_matches += 1
        return ('{0:d} events found for matcher [{1:s}]').format(
            num_matches, name)
Esempio n. 17
0
    def mark_event(self, indicator, event, neighbors):
        """Anotate an event with data from indicators and neighbors.

        Tags with skull emoji, adds a comment to the event.
        """
        event.add_emojis([emojis.get_emoji('SKULL')])
        tags = []
        for n in neighbors:
            slug = re.sub(r'[^a-z0-9]', '-', n['name'].lower())
            slug = re.sub(r'-+', '-', slug)
            tags.append(slug)
        event.add_tags(tags)
        event.commit()

        msg = 'Indicator match: "{0:s}" ({1:s})\n'.format(
            indicator['name'], indicator['id'])
        msg += 'Related entities: {0!s}'.format([n['name'] for n in neighbors])
        event.add_comment(msg)
        event.commit()
Esempio n. 18
0
    def mark_event(self, indicator, event, neighbors):
        """Anotate an event with data from indicators and neighbors.

        Tags with skull emoji, adds a comment to the event.
        """
        event.add_emojis([emojis.get_emoji("SKULL")])
        tags = []
        for n in neighbors:
            slug = re.sub(r"[^a-z0-9]", "-", n["name"].lower())
            slug = re.sub(r"-+", "-", slug)
            tags.append(slug)
        event.add_tags(tags)
        event.commit()

        msg = 'Indicator match: "{0:s}" ({1:s})\n'.format(
            indicator["name"], indicator["id"])
        msg += "Related entities: {0!s}".format([n["name"] for n in neighbors])
        event.add_comment(msg)
        event.commit()
Esempio n. 19
0
    def _config_validation(self, config):
        """Validate that all items of a config are valid."""
        query = config.get("query_string", config.get("query_dsl"))
        self.assertIsNotNone(query)
        self.assertIsInstance(query, str)

        emojis_to_add = config.get("emojis")
        if emojis_to_add:
            self.assertIsInstance(emojis_to_add, (list, tuple))
            for emoji_name in emojis_to_add:
                emoji_code = emojis.get_emoji(emoji_name)
                self.assertNotEqual(emoji_code, "")

        tags = config.get("tags")
        if tags:
            self.assertIsInstance(tags, (list, tuple))

        create_view = config.get("create_view")
        if create_view:
            self.assertIsInstance(create_view, bool)
Esempio n. 20
0
    def _config_validation(self, config):
        """Validate that all items of a config are valid."""
        query = config.get('query_string', config.get('query_dsl'))
        self.assertIsNotNone(query)
        self.assertIsInstance(query, str)

        attribute = config.get('attribute')
        self.assertIsNotNone(attribute)

        store_as = config.get('store_as')
        self.assertIsNotNone(store_as)

        expression = config.get('re')
        self.assertIsNotNone(expression)
        try:
            _ = re.compile(expression)
        except re.error as exception:
            self.assertIsNone(exception)

        emojis_to_add = config.get('emojis')
        if emojis_to_add:
            self.assertIsInstance(emojis_to_add, (list, tuple))
            for emoji_name in emojis_to_add:
                emoji_code = emojis.get_emoji(emoji_name)
                self.assertNotEqual(emoji_code, '')

        tags = config.get('tags')
        if tags:
            self.assertIsInstance(tags, (list, tuple))

        create_view = config.get('create_view')
        if create_view:
            self.assertIsInstance(create_view, bool)

        aggregate = config.get('aggregate')
        if aggregate:
            self.assertIsInstance(aggregate, bool)
Esempio n. 21
0
    def _config_validation(self, config):
        """Validate that all items of a config are valid."""
        query = config.get('query_string', config.get('query_dsl'))
        self.assertIsNotNone(query)
        self.assertIsInstance(query, str)

        attribute = config.get('attribute')
        self.assertIsNotNone(attribute)

        store_as = config.get('store_as')
        self.assertIsNotNone(store_as)

        expression = config.get('re')
        self.assertIsNotNone(expression)
        try:
            _ = re.compile(expression)
        except re.error as exception:
            self.assertIsNone(exception)

        emojis_to_add = config.get('emojis')
        if emojis_to_add:
            self.assertIsInstance(emojis_to_add, (list, tuple))
            for emoji_name in emojis_to_add:
                emoji_code = emojis.get_emoji(emoji_name)
                self.assertNotEqual(emoji_code, '')

        tags = config.get('tags')
        if tags:
            self.assertIsInstance(tags, (list, tuple))

        create_view = config.get('create_view')
        if create_view:
            self.assertIsInstance(create_view, bool)

        aggregate = config.get('aggregate')
        if aggregate:
            self.assertIsInstance(aggregate, bool)
Esempio n. 22
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = ('{"query": { "bool": { "should": [ '
                 '{ "exists" : { "field" : "url" }}, '
                 '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url']

        events = self.event_stream('',
                                   query_dsl=query,
                                   return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()
        cdn_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                url = event.source.get('url')
                if not url:
                    continue
                domain = utils.get_domain_from_url(url)

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = '.'.join(domain.split('.')[-2:])
            tld_counter[tld] += 1

        satellite_emoji = emojis.get_emoji('SATELLITE')
        for domain, count in iter(domain_counter.items()):
            emojis_to_add = [satellite_emoji]
            tags_to_add = []
            text = '{0:s} seen {1:d} times'.format(domain, count)

            cdn_provider = utils.get_cdn_provider(domain)
            if cdn_provider:
                tags_to_add.append('known-cdn')
                cdn_counter[cdn_provider] += 1

            for event in domains.get(domain, []):
                event.add_tags(tags_to_add)
                event.add_emojis(emojis_to_add)

                event.add_human_readable(text, self.NAME, append=False)
                new_attributes = {'domain': domain, 'domain_count': count}
                if cdn_provider:
                    new_attributes['cdn_provider'] = cdn_provider
                event.add_attributes(new_attributes)

                # Commit the event to the datastore.
                event.commit()

        return ('{0:d} domains discovered ({1:d} TLDs) and {2:d} known '
                'CDN networks found.').format(len(domains), len(tld_counter),
                                              len(cdn_counter))
Esempio n. 23
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        # TODO: Once we can identify user generated events this should be
        # updated to include all user generated events instead of focusing
        # solely on browser events.
        query = 'source_short:"WEBHIST" OR source:"WEBHIST"'

        return_fields = ['datetime', 'timestamp', 'url', 'tag', '__ts_emojis']

        data_frame = self.event_pandas(query_string=query,
                                       return_fields=return_fields)

        if not data_frame.shape[0]:
            return 'No browser events discovered.'

        sleeping_emoji = emojis.get_emoji('SLEEPING_FACE')

        # This query filters out all timestamps that have a zero timestamp as
        # well as those that occur after 2038-01-01, this may need to be
        # changed in the future.
        data_frame['timestamp'] = pd.to_numeric(data_frame.timestamp)
        data_frame = data_frame[(data_frame.timestamp > 0)
                                & (data_frame.timestamp < 2145916800000000)]

        data_frame['datetime'] = pd.to_datetime(data_frame.timestamp / 1e6,
                                                utc=True,
                                                unit='s')
        data_frame['hour'] = pd.to_numeric(
            data_frame.datetime.dt.strftime('%H'))

        total_count = data_frame.shape[0]
        activity_hours, threshold, aggregation = get_active_hours(data_frame)

        if not activity_hours:
            return 'Did not discover any activity hours.'

        hour_count = dict(aggregation.values.tolist())
        data_frame_outside = data_frame[~data_frame.hour.isin(activity_hours)]

        for event in utils.get_events_from_data_frame(data_frame_outside,
                                                      self.datastore):
            event.add_tags(['outside-active-hours'])
            hour = event.source.get('hour')
            this_hour_count = hour_count.get(hour)
            event.add_attributes({
                'activity_summary':
                ('Number of events for this hour ({0:d}): {1:d}, with the '
                 'threshold value: {2:0.2f}').format(hour, this_hour_count,
                                                     threshold),
                'hour_count':
                this_hour_count
            })
            event.add_emojis([sleeping_emoji])
            event.commit()

        tagged_events, _ = data_frame_outside.shape
        if tagged_events:
            story = self.sketch.add_story('{0:s} - {1:s}'.format(
                utils.BROWSER_STORY_TITLE, self.timeline_name))
            story.add_text(utils.BROWSER_STORY_HEADER, skip_if_exists=True)

            # Find some statistics about the run time of the analyzer.
            percent = (tagged_events / total_count) * 100.0
            last_hour = activity_hours[0]
            end = 0
            for hour in activity_hours[1:]:
                if hour != last_hour + 1:
                    end = hour
                    break
                last_hour = hour

            if not end:
                first = activity_hours[0]
                last = activity_hours[-1]
            else:
                first = end
                index = activity_hours.index(end)
                last = activity_hours[index - 1]

            story.add_text(
                '## Browser Timeframe Analyzer\n\nThe browser timeframe '
                'analyzer discovered {0:d} browser events that occurred '
                'outside of the typical browsing window of this browser '
                'history ({1:s}), or around {2:0.2f}%  of the {3:d} total '
                'events.\n\nThe analyzer determines the activity hours by '
                'finding the frequency of browsing events per hour, and then '
                'discovering the longest block of most active hours before '
                'proceeding with flagging all events outside of that time '
                'period. This information can be used by other analyzers '
                'or by manually looking for other activity within the '
                'inactive time period to find unusual actions.\n\n'
                'The hours considered to be active hours are the hours '
                'between {4:02d} and {5:02d} (hours in UTC) and the '
                'threshold used to determine if an hour was considered to be '
                'active was: {6:0.2f}.'.format(tagged_events,
                                               self.timeline_name, percent,
                                               total_count, first, last,
                                               threshold))

            group = self.sketch.add_aggregation_group(
                name='Browser Activity Per Hour',
                description='Created by the browser timeframe analyzer')
            group.set_layered()

            params = {
                'data':
                aggregation.to_dict(orient='records'),
                'title':
                'Browser Activity Per Hour ({0:s})'.format(self.timeline_name),
                'field':
                'hour',
                'order_field':
                'hour',
            }
            agg_obj = self.sketch.add_aggregation(
                name='Browser Activity Per Hour ({0:s})'.format(
                    self.timeline_name),
                agg_name='manual_feed',
                agg_params=params,
                chart_type='barchart',
                description='Created by the browser timeframe analyzer',
                label='informational')
            group.add_aggregation(agg_obj)

            lines = [{'hour': x, 'count': threshold} for x in range(0, 24)]
            params = {
                'data':
                lines,
                'title':
                'Browser Timeframe Threshold ({0:s})'.format(
                    self.timeline_name),
                'field':
                'hour',
                'order_field':
                'hour',
                'chart_color':
                'red',
            }
            agg_line = self.sketch.add_aggregation(
                name='Browser Activity Per Hour ({0:s})'.format(
                    self.timeline_name),
                agg_name='manual_feed',
                agg_params=params,
                chart_type='linechart',
                description='Created by the browser timeframe analyzer',
                label='informational')
            group.add_aggregation(agg_line)
            story.add_aggregation_group(group)

        return ('Tagged {0:d} out of {1:d} events as outside of normal '
                'active hours.').format(tagged_events, total_count)
Esempio n. 24
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        login_emoji = emojis.get_emoji('unlock')
        logoff_emoji = emojis.get_emoji('lock')
        screen_emoji = emojis.get_emoji('screen')
        screensaver_logon = LOGON_TYPES.get('7')
        login_counter = 0
        logoff_counter = 0

        # TODO: Add EVT lookups, ID 528 for logon and 538, 540 for logoff.
        # TODO: Add RDP EVT lookups, ID 682 for logon and 683 for logoff.
        query = (
            'data_type:"windows:evtx:record" AND (event_identifier:4624 OR '
            'event_identifier:4778 OR event_identifier:4779 OR '
            'event_identifier:4634 OR event_identifier:4647)')

        return_fields = [
            'message', 'data_type', 'strings', 'strings_parsed',
            'event_identifier']

        # Generator of events based on your query.
        events = self.event_stream(
            query_string=query, return_fields=return_fields)

        for event in events:
            strings = event.source.get('strings')
            strings_parsed = event.source.get('strings_parsed')
            identifier = event.source.get('event_identifier')
            emojis_to_add = []
            tags_to_add = []
            attribute_dict = {}

            if isinstance(identifier, six.text_type):
                try:
                    identifier = int(identifier, 10)
                except ValueError:
                    logging.warning((
                        'Unable to convert EVTX identifier to an integer, '
                        'value is {0:s}').format(identifier))
                    continue

            if identifier == 4624:
                attribute_dict = parse_evtx_logon_event(
                    strings, strings_parsed)
                if not attribute_dict:
                    continue
                emojis_to_add.append(login_emoji)
                tags_to_add.append('logon-event')
                login_counter += 1

            elif identifier in (4634, 4647):
                attribute_dict = parse_evtx_logoff_event(strings)
                if not attribute_dict:
                    continue
                emojis_to_add.append(logoff_emoji)
                tags_to_add.append('logoff-event')
                logoff_counter += 1

            # TODO: Add support for RDP events, ID 4778 (logon) and 4779
            # (logoff).
            if not attribute_dict:
                continue
            event.add_attributes(attribute_dict)

            # Want to add an emoji in case this is a screensaver unlock.
            if attribute_dict.get('logon_type', '') == screensaver_logon:
                emojis_to_add.append(screen_emoji)

            event.add_emojis(emojis_to_add)
            event.add_tags(tags_to_add)

            # Commit the event to the datastore.
            event.commit()

        # TODO: Add support for Linux syslog logon/logoff events.
        # TODO: Add support for Mac OS X logon/logoff events.

        return (
            'Total number of login events processed: {0:d} and '
            'logoff events: {1:d}').format(login_counter, logoff_counter)
Esempio n. 25
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        login_emoji = emojis.get_emoji('unlock')
        logoff_emoji = emojis.get_emoji('lock')
        screen_emoji = emojis.get_emoji('screen')
        screensaver_logon = LOGON_TYPES.get('7')
        login_counter = 0
        logoff_counter = 0

        # TODO: Add EVT lookups, ID 528 for logon and 538, 540 for logoff.
        # TODO: Add RDP EVT lookups, ID 682 for logon and 683 for logoff.
        query = (
            'data_type:"windows:evtx:record" AND (event_identifier:4624 OR '
            'event_identifier:4778 OR event_identifier:4779 OR '
            'event_identifier:4634 OR event_identifier:4647)')

        return_fields = [
            'message', 'data_type', 'strings', 'strings_parsed',
            'event_identifier'
        ]

        # Generator of events based on your query.
        events = self.event_stream(query_string=query,
                                   return_fields=return_fields)

        for event in events:
            strings = event.source.get('strings')
            strings_parsed = event.source.get('strings_parsed')
            identifier = event.source.get('event_identifier')
            emojis_to_add = []
            tags_to_add = []
            attribute_dict = {}

            if isinstance(identifier, six.text_type):
                try:
                    identifier = int(identifier, 10)
                except ValueError:
                    logger.warning(
                        ('Unable to convert EVTX identifier to an integer, '
                         'value is {0:s}').format(identifier))
                    continue

            if identifier == 4624:
                attribute_dict = parse_evtx_logon_event(
                    strings, strings_parsed)
                if not attribute_dict:
                    continue
                emojis_to_add.append(login_emoji)
                tags_to_add.append('logon-event')
                login_counter += 1

            elif identifier in (4634, 4647):
                attribute_dict = parse_evtx_logoff_event(strings)
                if not attribute_dict:
                    continue
                emojis_to_add.append(logoff_emoji)
                tags_to_add.append('logoff-event')
                logoff_counter += 1

            # TODO: Add support for RDP events, ID 4778 (logon) and 4779
            # (logoff).
            if not attribute_dict:
                continue
            event.add_attributes(attribute_dict)

            # Want to add an emoji in case this is a screensaver unlock.
            if attribute_dict.get('logon_type', '') == screensaver_logon:
                emojis_to_add.append(screen_emoji)

            event.add_emojis(emojis_to_add)
            event.add_tags(tags_to_add)

            # Commit the event to the datastore.
            event.commit()

        # TODO: Add support for Linux syslog logon/logoff events.
        # TODO: Add support for Mac OS X logon/logoff events.

        return ('Total number of login events processed: {0:d} and '
                'logoff events: {1:d}').format(login_counter, logoff_counter)
Esempio n. 26
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = ('{"query": { "bool": { "should": [ '
                 '{ "exists" : { "field" : "url" }}, '
                 '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url', 'message', 'human_readable']

        events = self.event_stream('',
                                   query_dsl=query,
                                   return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = utils.get_tld_from_domain(domain)
            tld_counter[tld] += 1

        watched_domains_list = current_app.config.get(
            'DOMAIN_ANALYZER_WATCHED_DOMAINS', [])
        domain_threshold = current_app.config.get(
            'DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD', 10)
        watched_domains_list.extend([
            utils.strip_www_from_domain(x)
            for x, _ in domain_counter.most_common(domain_threshold)
        ])
        watched_domains_list.extend(
            [x for x, _ in tld_counter.most_common(domain_threshold)])
        watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST)
        watched_domains_list_temp = set(watched_domains_list)
        watched_domains_list = []
        for domain in watched_domains_list_temp:
            if domain in self.domain_scoring_whitelist:
                continue
            if any(domain.endswith(x) for x in self.domain_scoring_whitelist):
                continue

            if '.' not in domain:
                continue
            watched_domains_list.append(domain)

        watched_domains = {}
        for domain in watched_domains_list:
            minhash = self._get_minhash_from_domain(domain)
            watched_domains[domain] = minhash

        similar_domain_counter = 0
        evil_emoji = emojis.get_emoji('SKULL_CROSSBONE')
        phishing_emoji = emojis.get_emoji('FISHING_POLE')
        for domain, _ in domain_counter.iteritems():
            emojis_to_add = []
            tags_to_add = []
            text = None

            similar_domains = self._get_similar_domains(
                domain, watched_domains)

            if similar_domains:
                similar_domain_counter += 1
                emojis_to_add.append(evil_emoji)
                emojis_to_add.append(phishing_emoji)
                tags_to_add.append('phishy-domain')
                similar_text_list = [
                    '{0:s} [score: {1:.2f}]'.format(phishy_domain, score)
                    for phishy_domain, score in similar_domains
                ]
                text = 'Domain {0:s} is similar to {1:s}'.format(
                    domain, ', '.join(similar_text_list))
                if any(
                        domain.endswith(x)
                        for x in self.domain_scoring_whitelist):
                    tags_to_add.append('known-network')

            for event in domains.get(domain, []):
                event.add_emojis(emojis_to_add)
                event.add_tags(tags_to_add)
                if text:
                    event.add_human_readable(text, self.NAME, append=False)

        if similar_domain_counter:
            self.sketch.add_view(view_name='Phishy Domains',
                                 analyzer_name=self.NAME,
                                 query_string='tag:"phishy-domain"')

        return ('{0:d} potentially phishy domains discovered.'
                ).format(similar_domain_counter)
Esempio n. 27
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = (
            '{"query": { "bool": { "should": [ '
            '{ "exists" : { "field" : "url" }}, '
            '{ "exists" : { "field" : "domain" }} ] } } }'
        )

        return_fields = ["domain", "url", "message", "human_readable"]

        events = self.event_stream("", query_dsl=query, return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()

        for event in events:
            domain = event.source.get("domain")

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = utils.get_tld_from_domain(domain)
            tld_counter[tld] += 1

        if not domain_counter:
            return "No domains discovered, so no phishy domains."

        watched_domains_list = current_app.config.get(
            "DOMAIN_ANALYZER_WATCHED_DOMAINS", []
        )
        domain_threshold = current_app.config.get(
            "DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD", 10
        )
        watched_domains_list.extend(
            [
                utils.strip_www_from_domain(x)
                for x, _ in domain_counter.most_common(domain_threshold)
            ]
        )
        watched_domains_list.extend(
            [x for x, _ in tld_counter.most_common(domain_threshold)]
        )
        watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST)
        watched_domains_list_temp = set(watched_domains_list)
        watched_domains_list = []
        for domain in watched_domains_list_temp:
            if domain in self.domain_scoring_exclude_domains:
                continue
            if any(domain.endswith(x) for x in self.domain_scoring_exclude_domains):
                continue

            if "." not in domain:
                continue
            watched_domains_list.append(domain)

        watched_domains = {}
        for domain in watched_domains_list:
            minhash = self._get_minhash_from_domain(domain)
            watched_domains[domain] = {"hash": minhash, "depth": len(domain.split("."))}

        similar_domain_counter = 0
        allowlist_encountered = False
        evil_emoji = emojis.get_emoji("SKULL_CROSSBONE")
        phishing_emoji = emojis.get_emoji("FISHING_POLE")
        for domain, _ in iter(domain_counter.items()):
            emojis_to_add = []
            tags_to_add = []
            text = None

            similar_domains = self._get_similar_domains(domain, watched_domains)

            if similar_domains:
                similar_domain_counter += 1
                emojis_to_add.append(evil_emoji)
                emojis_to_add.append(phishing_emoji)
                tags_to_add.append("phishy-domain")
                similar_text_list = [
                    "{0:s} [score: {1:.2f}]".format(phishy_domain, score)
                    for phishy_domain, score in similar_domains
                ]
                text = "Domain {0:s} is similar to {1:s}".format(
                    domain, ", ".join(similar_text_list)
                )
                if any(domain.endswith(x) for x in self.domain_scoring_exclude_domains):
                    tags_to_add.append("known-domain")
                    allowlist_encountered = True

            for event in domains.get(domain, []):
                event.add_emojis(emojis_to_add)
                event.add_tags(tags_to_add)
                if text:
                    event.add_human_readable(text, self.NAME, append=False)

                # Commit the event to the datastore.
                event.commit()

        if similar_domain_counter:
            self.sketch.add_view(
                view_name="Phishy Domains",
                analyzer_name=self.NAME,
                query_string='tag:"phishy-domain"',
            )

            if allowlist_encountered:
                self.sketch.add_view(
                    view_name="Phishy Domains, excl. known domains",
                    analyzer_name=self.NAME,
                    query_string=('tag:"phishy-domain" AND NOT tag:"known-domain"'),
                )

        return ("{0:d} potentially phishy domains discovered.").format(
            similar_domain_counter
        )
Esempio n. 28
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = (
            '{"query": { "bool": { "should": [ '
            '{ "exists" : { "field" : "url" }}, '
            '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url']

        events = self.event_stream(
            '', query_dsl=query, return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()
        cdn_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                url = event.source.get('url')
                if not url:
                    continue
                domain = utils.get_domain_from_url(url)

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = '.'.join(domain.split('.')[-2:])
            tld_counter[tld] += 1

        # Exit early if there are no domains in the data set to analyze.
        if not domain_counter:
            return 'No domains to analyze.'

        domain_count_array = numpy.array(list(domain_counter.values()))
        domain_20th_percentile = int(numpy.percentile(domain_count_array, 20))
        domain_85th_percentile = int(numpy.percentile(domain_count_array, 85))

        common_domains = [
            x for x, y in domain_counter.most_common()
            if y >= domain_85th_percentile]
        rare_domains = [
            x for x, y in domain_counter.most_common()
            if y <= domain_20th_percentile]

        satellite_emoji = emojis.get_emoji('SATELLITE')
        for domain, count in iter(domain_counter.items()):
            emojis_to_add = [satellite_emoji]
            tags_to_add = []

            cdn_provider = utils.get_cdn_provider(domain)
            if cdn_provider:
                tags_to_add.append('known-cdn')
                cdn_counter[cdn_provider] += 1

            if domain in common_domains:
                tags_to_add.append('common_domain')

            if domain in rare_domains:
                tags_to_add.append('rare_domain')

            for event in domains.get(domain, []):
                event.add_tags(tags_to_add)
                event.add_emojis(emojis_to_add)

                new_attributes = {'domain': domain, 'domain_count': count}
                if cdn_provider:
                    new_attributes['cdn_provider'] = cdn_provider
                event.add_attributes(new_attributes)

                # Commit the event to the datastore.
                event.commit()

        return (
            '{0:d} domains discovered ({1:d} TLDs) and {2:d} known '
            'CDN networks found.').format(
                len(domains), len(tld_counter), len(cdn_counter))
Esempio n. 29
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = (
            '{"query": { "bool": { "should": [ '
            '{ "exists" : { "field" : "url" }}, '
            '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url']

        events = self.event_stream(
            '', query_dsl=query, return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()
        cdn_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                url = event.source.get('url')
                if not url:
                    continue
                domain = utils.get_domain_from_url(url)

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = '.'.join(domain.split('.')[-2:])
            tld_counter[tld] += 1

        satellite_emoji = emojis.get_emoji('SATELLITE')
        for domain, count in iter(domain_counter.items()):
            emojis_to_add = [satellite_emoji]
            tags_to_add = []
            text = '{0:s} seen {1:d} times'.format(domain, count)

            cdn_provider = utils.get_cdn_provider(domain)
            if cdn_provider:
                tags_to_add.append('known-cdn')
                cdn_counter[cdn_provider] += 1

            for event in domains.get(domain, []):
                event.add_tags(tags_to_add)
                event.add_emojis(emojis_to_add)

                event.add_human_readable(text, self.NAME, append=False)
                new_attributes = {'domain': domain, 'domain_count': count}
                if cdn_provider:
                    new_attributes['cdn_provider'] = cdn_provider
                event.add_attributes(new_attributes)

                # Commit the event to the datastore.
                event.commit()

        return (
            '{0:d} domains discovered ({1:d} TLDs) and {2:d} known '
            'CDN networks found.').format(
                len(domains), len(tld_counter), len(cdn_counter))
Esempio n. 30
0
    def run(self):
        """Entry point for the browser search analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = 'source_short:"WEBHIST" OR source:"WEBHIST"'
        return_fields = ['url', 'datetime']
        search_emoji = emojis.get_emoji('MAGNIFYING_GLASS')

        # Generator of events based on your query.
        events = self.event_stream(query_string=query,
                                   return_fields=return_fields)

        simple_counter = 0
        for event in events:
            url = event.source.get('url')

            if url is None:
                continue

            for engine, expression, method_name, parameter in self._URL_FILTERS:
                callback_method = getattr(self, method_name, None)
                if not callback_method:
                    continue

                match = expression.search(url)
                if not match:
                    continue

                if parameter:
                    search_query = callback_method(url, parameter)
                else:
                    search_query = callback_method(url)

                if not search_query:
                    continue

                simple_counter += 1
                datetime = event.source.get('datetime')
                day, _, _ = datetime.partition('T')
                event.add_attributes({
                    'search_string': search_query,
                    'search_engine': engine,
                    'search_day': 'D:{0:s}'.format(day)
                })

                event.add_human_readable(
                    '{0:s} search query: {1:s}'.format(engine, search_query),
                    self.NAME)
                event.add_emojis([search_emoji])
                event.add_tags(['browser-search'])
                # We break at the first hit of a successful search engine.
                break

            # Commit the event to the datastore.
            event.commit()

        if simple_counter > 0:
            view = self.sketch.add_view(
                view_name='Browser Search',
                analyzer_name=self.NAME,
                query_string='tag:"browser-search"',
                additional_fields=self._FIELDS_TO_INCLUDE)
            params = {
                'field': 'search_string',
                'limit': 20,
            }
            agg_obj = self.sketch.add_aggregation(
                name='Top 20 browser search queries',
                agg_name='field_bucket',
                agg_params=params,
                view_id=view.id,
                chart_type='hbarchart',
                description='Created by the browser search analyzer')

            params = {
                'field': 'search_day',
                'limit': 20,
            }
            agg_days = self.sketch.add_aggregation(
                name='Top 20 days of search queries',
                agg_name='field_bucket',
                agg_params=params,
                chart_type='hbarchart',
                description='Created by the browser search analyzer')

            params = {
                'query_string': 'tag:"browser-search"',
                'field': 'domain',
            }
            agg_engines = self.sketch.add_aggregation(
                name='Top Search Engines',
                agg_name='query_bucket',
                agg_params=params,
                view_id=view.id,
                chart_type='hbarchart',
                description='Created by the browser search analyzer')

            story = self.sketch.add_story(utils.BROWSER_STORY_TITLE)
            story.add_text(utils.BROWSER_STORY_HEADER, skip_if_exists=True)

            story.add_text('## Browser Search Analyzer.\n\nThe browser search '
                           'analyzer takes URLs usually resevered for browser '
                           'search queries and extracts the search string.'
                           'In this timeline the analyzer discovered {0:d} '
                           'browser searches.\n\nThis is a summary of '
                           'it\'s findings.'.format(simple_counter))
            story.add_text(
                'The top 20 most commonly discovered searches were:')
            story.add_aggregation(agg_obj)
            story.add_text('The domains used to search:')
            story.add_aggregation(agg_engines, 'hbarchart')
            story.add_text('And the most common days of search:')
            story.add_aggregation(agg_days)
            story.add_text(
                'And an overview of all the discovered search terms:')
            story.add_view(view)

        return ('Browser Search completed with {0:d} search results '
                'extracted.').format(simple_counter)
Esempio n. 31
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = ('{"query": { "bool": { "should": [ '
                 '{ "exists" : { "field" : "url" }}, '
                 '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url']

        events = self.event_stream('',
                                   query_dsl=query,
                                   return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()
        cdn_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                url = event.source.get('url')
                if not url:
                    continue
                domain = utils.get_domain_from_url(url)

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = '.'.join(domain.split('.')[-2:])
            tld_counter[tld] += 1

        # Exit early if there are no domains in the data set to analyze.
        if not domain_counter:
            return 'No domains to analyze.'

        domain_count_array = numpy.array(list(domain_counter.values()))
        try:
            domain_20th_percentile = int(
                numpy.percentile(domain_count_array, 20))
        except IndexError:
            logging.warning('Unable to calculate the 20th percentile.')
            domain_20th_percentile = 0

        try:
            domain_85th_percentile = int(
                numpy.percentile(domain_count_array, 85))
        except IndexError:
            logging.warning('Unable to calculate the 85th percentile.')
            highest_count_domain = domain_counter.most_common(1)
            if highest_count_domain:
                _, highest_count = highest_count_domain[0]
                domain_85th_percentile = highest_count + 10
            else:
                domain_85th_percentile = 100

        common_domains = [
            x for x, y in domain_counter.most_common()
            if y >= domain_85th_percentile
        ]
        rare_domains = [
            x for x, y in domain_counter.most_common()
            if y <= domain_20th_percentile
        ]

        satellite_emoji = emojis.get_emoji('SATELLITE')
        for domain, count in iter(domain_counter.items()):
            emojis_to_add = [satellite_emoji]
            tags_to_add = []

            cdn_provider = utils.get_cdn_provider(domain)
            if cdn_provider:
                tags_to_add.append('known-cdn')
                cdn_counter[cdn_provider] += 1

            if domain in common_domains:
                tags_to_add.append('common_domain')

            if domain in rare_domains:
                tags_to_add.append('rare_domain')

            for event in domains.get(domain, []):
                event.add_tags(tags_to_add)
                event.add_emojis(emojis_to_add)

                new_attributes = {'domain': domain, 'domain_count': count}
                if cdn_provider:
                    new_attributes['cdn_provider'] = cdn_provider
                event.add_attributes(new_attributes)

                # Commit the event to the datastore.
                event.commit()

        return ('{0:d} domains discovered ({1:d} TLDs) and {2:d} known '
                'CDN networks found.').format(len(domains), len(tld_counter),
                                              len(cdn_counter))
Esempio n. 32
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        link_emoji = emojis.get_emoji('LINK')

        number_of_base_events = 0
        number_of_chains = 0
        counter = collections.Counter()
        events_to_update = {}

        # TODO: Have each plugin run in a separate task.
        # TODO: Add a time limit for each plugins run to prevent it from
        #       holding everything up.
        for chain_plugin in self._chain_plugins:
            if chain_plugin.SEARCH_QUERY_DSL:
                search_dsl = chain_plugin.SEARCH_QUERY_DSL
                search_string = None
            else:
                search_dsl = None
                search_string = chain_plugin.SEARCH_QUERY

            return_fields = chain_plugin.EVENT_FIELDS
            events = self.event_stream(query_string=search_string,
                                       query_dsl=search_dsl,
                                       return_fields=return_fields)

            for event in events:
                if not chain_plugin.process_chain(event):
                    continue
                chain_id = uuid.uuid4().hex

                chained_events = chain_plugin.build_chain(base_event=event,
                                                          chain_id=chain_id)
                number_chained_events = len(chained_events)
                if not number_chained_events:
                    continue

                for chained_event in chained_events:
                    chained_id = chained_event.get('event_id')
                    if chained_id not in events_to_update:
                        default = {
                            'event': chained_event.get('event'),
                            'chains': []
                        }
                        events_to_update[chained_id] = default

                    events_to_update[chained_id]['chains'].append(
                        chained_event.get('chain'))

                number_of_base_events += 1

                counter[chain_plugin.NAME] += number_chained_events
                counter['total'] += number_chained_events

                chain = {
                    'chain_id': chain_id,
                    'plugin': chain_plugin.NAME,
                    'is_base': True,
                    'leafs': number_chained_events,
                }
                if event.event_id not in events_to_update:
                    default = {'event': event, 'chains': []}
                    events_to_update[event.event_id] = default
                events_to_update[event.event_id]['chains'].append(chain)
                number_of_chains += 1

        for event_update in events_to_update.values():
            event = event_update.get('event')
            attributes = {'chains': event_update.get('chains')}
            event.add_attributes(attributes)
            event.add_emojis([link_emoji])
            event.commit()

        chain_string = ' - '.join([
            '[{0:s}] {1:d}'.format(x[0], x[1]) for x in counter.most_common()
            if x[0] != 'total'
        ])
        return ('{0:d} base events annotated with a chain UUID for {1:d} '
                'chains for a total of {2:d} events. {3:s}'.format(
                    number_of_base_events, number_of_chains, counter['total'],
                    chain_string))
Esempio n. 33
0
class BaseChainPlugin(object):
    """A base plugin for the chain analyzer.

    This is an interface for the chain analyzer plugins.

    """

    NAME = "chain"
    DESCRIPTION = ""

    # A string value that defines the search query used to find the original
    # event that starts the chain. In order for this plugin to work
    # either the SEARCH_QUERY or SEARCH_QUERY_DSL needs to be defined.
    SEARCH_QUERY = ""

    # Defines the original event search query DSL. If this attribute
    # is defined the SEARCH_QUERY attribute is ignored.
    SEARCH_QUERY_DSL = ""

    # Defines the fields that need to be returned as part of the
    # event object.
    EVENT_FIELDS = []

    _EMOJIS = [emojis.get_emoji("LINK")]

    def __init__(self, analyzer_object):
        """Initialize the plugin."""
        super().__init__()
        self.analyzer_object = analyzer_object

    def process_chain(self, base_event):
        """Determine if the extracted event fits the criteria of the plugin.

        Args:
            base_event: an event object (instance of Event).

        Returns:
            boolean to determine whether a chain should be generated from
            the event or not. By default this returns True.
        """
        if base_event:
            return True
        return True

    def build_chain(self, base_event, chain_id):
        """Returns a chain of events from a base event.

        Args:
            base_event: the base event of the chain, used to construct further
                queries (instance of Event).
            chain_id: a string with the chain UUID value.

        Returns:
            A list of dicts with the chain and event attached.
        """
        events = []
        for event in self.get_chained_events(base_event):
            chain = {"chain_id": chain_id, "plugin": self.NAME, "is_base": False}
            events.append(
                {
                    "event_id": event.event_id,
                    "event": event,
                    "chain": chain,
                }
            )
        return events

    @abc.abstractmethod
    def get_chained_events(self, base_event):
        """Yields an event that is chained or linked to the base event.
Esempio n. 34
0
 def testEmoji(self):
     """Test a flag emoji exists"""
     flag_emoji = emojis.get_emoji(self._TEST_ISO_CODE)
     self.assertEqual(flag_emoji, self._TEST_EMOJI)
Esempio n. 35
0
    def run(self):
        """Entry point for the analyzer.

        Returns:
            String with summary of the analyzer result
        """
        query = (
            '{"query": { "bool": { "should": [ '
            '{ "exists" : { "field" : "url" }}, '
            '{ "exists" : { "field" : "domain" }} ] } } }')

        return_fields = ['domain', 'url', 'message', 'human_readable']

        events = self.event_stream(
            '', query_dsl=query, return_fields=return_fields)

        domains = {}
        domain_counter = collections.Counter()
        tld_counter = collections.Counter()

        for event in events:
            domain = event.source.get('domain')

            if not domain:
                continue

            domain_counter[domain] += 1
            domains.setdefault(domain, [])
            domains[domain].append(event)

            tld = utils.get_tld_from_domain(domain)
            tld_counter[tld] += 1

        if not domain_counter:
            return 'No domains discovered, so no phishy domains.'

        watched_domains_list = current_app.config.get(
            'DOMAIN_ANALYZER_WATCHED_DOMAINS', [])
        domain_threshold = current_app.config.get(
            'DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD', 10)
        watched_domains_list.extend([
            utils.strip_www_from_domain(x)
            for x, _ in domain_counter.most_common(domain_threshold)])
        watched_domains_list.extend([
            x for x, _ in tld_counter.most_common(domain_threshold)])
        watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST)
        watched_domains_list_temp = set(watched_domains_list)
        watched_domains_list = []
        for domain in watched_domains_list_temp:
            if domain in self.domain_scoring_whitelist:
                continue
            if any(domain.endswith(x) for x in self.domain_scoring_whitelist):
                continue

            if '.' not in domain:
                continue
            watched_domains_list.append(domain)

        watched_domains = {}
        for domain in watched_domains_list:
            minhash = self._get_minhash_from_domain(domain)
            watched_domains[domain] = {
                'hash': minhash,
                'depth': len(domain.split('.'))
            }

        similar_domain_counter = 0
        whitelist_encountered = False
        evil_emoji = emojis.get_emoji('SKULL_CROSSBONE')
        phishing_emoji = emojis.get_emoji('FISHING_POLE')
        for domain, _ in iter(domain_counter.items()):
            emojis_to_add = []
            tags_to_add = []
            text = None

            similar_domains = self._get_similar_domains(
                domain, watched_domains)

            if similar_domains:
                similar_domain_counter += 1
                emojis_to_add.append(evil_emoji)
                emojis_to_add.append(phishing_emoji)
                tags_to_add.append('phishy-domain')
                similar_text_list = ['{0:s} [score: {1:.2f}]'.format(
                    phishy_domain,
                    score) for phishy_domain, score in similar_domains]
                text = 'Domain {0:s} is similar to {1:s}'.format(
                    domain, ', '.join(similar_text_list))
                if any(domain.endswith(
                        x) for x in self.domain_scoring_whitelist):
                    tags_to_add.append('whitelisted-domain')
                    whitelist_encountered = True

            for event in domains.get(domain, []):
                event.add_emojis(emojis_to_add)
                event.add_tags(tags_to_add)
                if text:
                    event.add_human_readable(text, self.NAME, append=False)

                # Commit the event to the datastore.
                event.commit()

        if similar_domain_counter:
            self.sketch.add_view(
                view_name='Phishy Domains', analyzer_name=self.NAME,
                query_string='tag:"phishy-domain"')

            if whitelist_encountered:
                self.sketch.add_view(
                    view_name='Phishy Domains, excl. whitelist',
                    analyzer_name=self.NAME,
                    query_string=(
                        'tag:"phishy-domain" AND NOT tag:"whitelisted-domain"'))

        return (
            '{0:d} potentially phishy domains discovered.').format(
                similar_domain_counter)
    def extract_feature(self, name, config):
        """Extract features from events.

        Args:
            name: String with the name describing the feature to be extracted.
            config: A dict that contains the configuration for the feature
                extraction. See data/features.yaml for fields and further
                documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get('query_string')
        query_dsl = config.get('query_dsl')
        attribute = config.get('attribute')

        if not attribute:
            logging.warning('No attribute defined.')
            return ''

        store_as = config.get('store_as')
        if not store_as:
            logging.warning('No attribute defined to store results in.')
            return ''

        tags = config.get('tags', [])

        expression_string = config.get('re')
        expression_flags = config.get('re_flags')
        if not expression_string:
            logging.warning('No regular expression defined.')
            return ''

        if expression_flags:
            flags = set()
            for flag in expression_flags:
                try:
                    flags.add(getattr(re, flag))
                except AttributeError:
                    logging.warning('Unknown regular expression flag defined.')
                    return ''
            re_flag = sum(flags)
        else:
            re_flag = 0

        try:
            expression = re.compile(expression_string, flags=re_flag)
        except re.error as exception:
            # pylint: disable=logging-format-interpolation
            logging.warning(('Regular expression failed to compile, with '
                             'error: {0!s}').format(exception))
            return ''

        emoji_names = config.get('emojis', [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        return_fields = [attribute]

        events = self.event_stream(query_string=query,
                                   query_dsl=query_dsl,
                                   return_fields=return_fields)

        event_counter = 0
        for event in events:
            attribute_field = event.source.get(attribute)
            if isinstance(attribute_field, six.text_type):
                attribute_value = attribute_field
            elif isinstance(attribute_field, (list, tuple)):
                attribute_value = ','.join(attribute_field)
            elif isinstance(attribute_field, (int, float)):
                attribute_value = attribute_field
            else:
                attribute_value = None

            if not attribute_value:
                continue

            result = expression.findall(attribute_value)
            if not result:
                continue

            event_counter += 1
            event.add_attributes({store_as: result[0]})
            event.add_emojis(emojis_to_add)
            event.add_tags(tags)

            # Commit the event to the datastore.
            event.commit()

        aggregate_results = config.get('aggregate', False)
        create_view = config.get('create_view', False)

        # If aggregation is turned on, we automatically create an aggregation.
        if aggregate_results:
            create_view = True

        if create_view and event_counter:
            view = self.sketch.add_view(name,
                                        self.NAME,
                                        query_string=query,
                                        query_dsl=query_dsl)

            if aggregate_results:
                params = {
                    'field': store_as,
                    'limit': 20,
                }
                self.sketch.add_aggregation(
                    name='Top 20 for: {0:s} [{1:s}]'.format(store_as, name),
                    agg_name='field_bucket',
                    agg_params=params,
                    description='Created by the feature extraction analyzer',
                    view_id=view.id,
                    chart_type='hbarchart')

        return 'Feature extraction [{0:s}] extracted {1:d} features.'.format(
            name, event_counter)
    def extract_feature(self, name, config):
        """Extract features from events.

        Args:
            name: String with the name describing the feature to be extracted.
            config: A dict that contains the configuration for the feature
                extraction. See ~/config/features.yaml for fields and further
                documentation of what needs to be defined.

        Returns:
            String with summary of the analyzer result.
        """
        query = config.get('query_string')
        query_dsl = config.get('query_dsl')
        attribute = config.get('attribute')

        if not attribute:
            logging.warning('No attribute defined.')
            return ''

        store_as = config.get('store_as')
        if not store_as:
            logging.warning('No attribute defined to store results in.')
            return ''

        tags = config.get('tags', [])

        expression_string = config.get('re')
        if not expression_string:
            logging.warning('No regular expression defined.')
            return ''
        try:
            expression = re.compile(expression_string)
        except re.error as exception:
            # pylint: disable=logging-format-interpolation
            logging.warning(('Regular expression failed to compile, with '
                             'error: {0!s}').format(exception))
            return ''

        emoji_names = config.get('emojis', [])
        emojis_to_add = [emojis.get_emoji(x) for x in emoji_names]

        return_fields = [attribute]

        events = self.event_stream(query_string=query,
                                   query_dsl=query_dsl,
                                   return_fields=return_fields)

        event_counter = 0
        for event in events:
            attribute_field = event.source.get(attribute)
            if isinstance(attribute_field, six.text_type):
                attribute_value = attribute_field.lower()
            elif isinstance(attribute_field, (list, tuple)):
                attribute_value = ','.join(attribute_field)
            elif isinstance(attribute_field, (int, float)):
                attribute_value = attribute_field
            else:
                attribute_value = None

            if not attribute_value:
                continue

            result = expression.findall(attribute_value)
            if not result:
                continue

            event_counter += 1
            event.add_attributes({store_as: result[0]})
            event.add_emojis(emojis_to_add)
            event.add_tags(tags)

            # Commit the event to the datastore.
            event.commit()

        create_view = config.get('create_view', False)
        if create_view and event_counter:
            if query:
                query_string = query
            else:
                query_string = query_dsl
            self.sketch.add_view(name, query_string)

        # TODO: Add aggregation check when that is exposed in the UI.
        # aggregate_results = config.get('aggregate', False)

        return 'Feature extraction [{0:s}] extracted {1:d} features.'.format(
            name, event_counter)