def test_get_emoji(self): """Test getting emoji code.""" skull_emoji = emojis.get_emoji('skull_crossbone') skull_code = '☠' self.assertEqual(skull_emoji, skull_code) locomotive_emoji = emojis.get_emoji('LOCOMOTIVE') locomotive_code = '🚂' self.assertEqual(locomotive_emoji, locomotive_code) does_not_exist = emojis.get_emoji('er_ekki_til') self.assertEqual(does_not_exist, '')
def test_get_emoji(self): """Test getting emoji code.""" skull_emoji = emojis.get_emoji('skull_crossbone') skull_code = '☠' self.assertEqual(skull_emoji, skull_code) locomotive_emoji = emojis.get_emoji('LOCOMOTIVE') locomotive_code = '🚂' self.assertEqual(locomotive_emoji, locomotive_code) does_not_exist = emojis.get_emoji('er_ekki_til') self.assertEqual(does_not_exist, '')
def test_get_emoji(self): """Test getting emoji code.""" skull_emoji = emojis.get_emoji("skull_crossbone") skull_code = "☠" self.assertEqual(skull_emoji, skull_code) locomotive_emoji = emojis.get_emoji("LOCOMOTIVE") locomotive_code = "🚂" self.assertEqual(locomotive_emoji, locomotive_code) does_not_exist = emojis.get_emoji("er_ekki_til") self.assertEqual(does_not_exist, "")
def test_get_chains(self): """Test the chain.""" for plugin in manager.ChainPluginsManager.get_plugins(None): manager.ChainPluginsManager.deregister_plugin(plugin) manager.ChainPluginsManager.register_plugin(FakeChainPlugin) analyzer = FakeAnalyzer("test_index", sketch_id=1) analyzer.datastore.client = mock.Mock() plugins = getattr(analyzer, "_chain_plugins") self.assertEqual(len(plugins), 1) plugin = plugins[0] self.assertIsInstance(plugin, interface.BaseChainPlugin) analyzer_result = analyzer.run() expected_result = ( "3 base events annotated with a chain UUID for 3 chains " "for a total of 9 events. [fake_chain] 9" ) self.assertEqual(analyzer_result, expected_result) link_emoji = emojis.get_emoji("LINK") for event in plugin.ALL_EVENTS: attributes = event.attributes chains = attributes.get("chains", []) for event_chain in chains: plugin = event_chain.get("plugin", "") self.assertEqual(plugin, "fake_chain") event_emojis = event.emojis self.assertEqual(len(event_emojis), 1) self.assertEqual(event_emojis[0], link_emoji)
def tagger(self, name, config): """Tag and add emojis to events. Args: name: String with the name describing what will be tagged. config: A dict that contains the configuration See data/tags.yaml for fields and documentation of what needs to be defined. Returns: String with summary of the analyzer result. """ query = config.get('query_string') query_dsl = config.get('query_dsl') create_view = config.get('create_view', False) view_name = config.get('view_name', name) tags = config.get('tags', []) emoji_names = config.get('emojis', []) emojis_to_add = [emojis.get_emoji(x) for x in emoji_names] event_counter = 0 events = self.event_stream(query_string=query, query_dsl=query_dsl) for event in events: event_counter += 1 event.add_tags(tags) event.add_emojis(emojis_to_add) # Commit the event to the datastore. event.commit() if create_view and event_counter: self.sketch.add_view( view_name, self.NAME, query_string=query, query_dsl=query_dsl) return '{0:d} events tagged for [{1:s}]'.format(event_counter, name)
def test_get_chains(self): """Test the chain.""" for plugin in manager.ChainPluginsManager.get_plugins(None): manager.ChainPluginsManager.deregister_plugin(plugin) manager.ChainPluginsManager.register_plugin(FakeChainPlugin) analyzer = FakeAnalyzer('test_index', sketch_id=1) analyzer.datastore.client = mock.Mock() plugins = getattr(analyzer, '_chain_plugins') self.assertEqual(len(plugins), 1) plugin = plugins[0] self.assertIsInstance(plugin, interface.BaseChainPlugin) analyzer_result = analyzer.run() expected_result = ( '3 base events tagged with a chain UUID for 3 chains ' 'for a total of 9 events.') self.assertEqual(analyzer_result, expected_result) link_emoji = emojis.get_emoji('LINK') for event in plugin.ALL_EVENTS: attributes = event.attributes self.assertEqual(attributes.get('chain_plugins', []), ['fake_chain']) event_emojis = event.emojis self.assertEqual(len(event_emojis), 1) self.assertEqual(event_emojis[0], link_emoji)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ # Elasticsearch query to find Chrome extensions from filestat query = ('(data_type:"fs:stat"' 'AND (filename:"Chrome" AND filename:"Extensions)') # Specify what returned fields you need for your analyzer. return_fields = ['filename', 'data_type'] question_emoji = emojis.get_emoji('QUESTION') # Generator of events based on your query. events = self.event_stream( query_string=query, return_fields=return_fields) # Add analyzer logic here. # Methods available to use for sketch analyzers: # sketch.get_all_indices() # sketch.add_view(name, query_string, query_filter={}) # event.add_attributes({'foo': 'bar'}) # event.add_tags(['tag_name']) # event_add_label('label') # event.add_star() # event.add_comment('comment') login_count = 0 for event in events: data_type = event.source.get('data_type') filename = event.source.get('message') extension_id = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', message) if extension_id = 0 continue
def run(self): """Entry point for the browser search analyzer. Returns: String with summary of the analyzer result """ query = 'source_short:"WEBHIST"' return_fields = ['url'] search_emoji = emojis.get_emoji('MAGNIFYING_GLASS') # Generator of events based on your query. events = self.event_stream( query_string=query, return_fields=return_fields) simple_counter = 0 for event in events: url = event.source.get('url') if url is None: continue for engine, expression, method_name, parameter in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: continue match = expression.search(url) if not match: continue if parameter: search_query = callback_method(url, parameter) else: search_query = callback_method(url) if not search_query: continue simple_counter += 1 event.add_attributes({'search_string': search_query}) event.add_human_readable('{0:s} search query: {1:s}'.format( engine, search_query), self.NAME) event.add_emojis([search_emoji]) event.add_tags(['browser_search']) # We break at the first hit of a successful search engine. break # Commit the event to the datastore. event.commit() if simple_counter > 0: self.sketch.add_view( view_name='Browser Search', analyzer_name=self.NAME, query_string='tag:"browser_search"') return ( 'Browser Search completed with {0:d} search results ' 'extracted.').format(simple_counter)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ # TODO: Once we can identify user generated events this should be # updated to include all user generated events instead of focusing # solely on browser events. query = 'source_short:"WEBHIST" OR source:"WEBHIST"' return_fields = ['timestamp', 'url', 'tag', '__ts_emojis'] data_frame = self.event_pandas( query_string=query, return_fields=return_fields) if not data_frame.shape[0]: return 'No browser events discovered.' sleeping_emoji = emojis.get_emoji('SLEEPING_FACE') # This query filters out all timestamps that have a zero timestamp as # well as those that occure after 2038-01-01, this may need to be # changed in the future. data_frame = data_frame[ (data_frame.timestamp > 0) & ( data_frame.timestamp < 2145916800000000)] data_frame['timestamp'] = pd.to_numeric(data_frame.timestamp) data_frame['datetime'] = pd.to_datetime( data_frame.timestamp / 1e6, utc=True, unit='s') data_frame['hour'] = pd.to_numeric( data_frame.datetime.dt.strftime('%H')) total_count = data_frame.shape[0] activity_hours, threshold, aggregation = get_active_hours(data_frame) if not activity_hours: return 'Did not discover any activity hours.' hour_count = dict(aggregation.values.tolist()) data_frame_outside = data_frame[~data_frame.hour.isin(activity_hours)] for event in utils.get_events_from_data_frame( data_frame_outside, self.datastore): event.add_tags(['outside-active-hours']) hour = event.source.get('hour') this_hour_count = hour_count.get(hour) event.add_attributes( {'activity_summary': ( 'Number of events for this hour ({0:d}): {1:d}, with the ' 'threshold value: {2:0.2f}').format( hour, this_hour_count, threshold), 'hour_count': this_hour_count}) event.add_emojis([sleeping_emoji]) event.commit() return ( 'Tagged {0:d} out of {1:d} events as outside of normal ' 'active hours.').format(data_frame_outside.shape[0], total_count)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ link_emoji = emojis.get_emoji('LINK') number_of_base_events = 0 counter = collections.Counter() # TODO: Have each plugin run in a separate task. # TODO: Add a time limit for each plugins run to prevent it from # holding everything up. for chain_plugin in self._chain_plugins: if chain_plugin.SEARCH_QUERY_DSL: search_dsl = chain_plugin.SEARCH_QUERY_DSL search_string = None else: search_dsl = None search_string = chain_plugin.SEARCH_QUERY return_fields = chain_plugin.EVENT_FIELDS return_fields.extend(['chain_id_list', 'chain_plugins']) events = self.event_stream( query_string=search_string, query_dsl=search_dsl, return_fields=return_fields) for event in events: if not chain_plugin.process_chain(event): continue number_of_base_events += 1 chain_id = uuid.uuid4().hex number_chained_events = chain_plugin.build_chain( base_event=event, chain_id=chain_id) counter[chain_id] = number_chained_events counter['total'] += number_chained_events chain_id_list = event.source.get('chain_id_list', []) chain_id_list.append(chain_id) chain_plugins_list = event.source.get('chain_plugins', []) chain_plugins_list.append(chain_plugin.NAME) attributes = { 'chain_id_list': chain_id_list, 'chain_plugins': chain_plugins_list} event.add_attributes(attributes) event.add_emojis([link_emoji]) event.commit() number_of_chains = len(counter.keys()) - 1 return ( '{0:d} base events annotated with a chain UUID for {1:d} ' 'chains for a total of {2:d} events.'.format( number_of_base_events, number_of_chains, counter['total']))
def test_get_helper_from_unicode(self): """Test getting helper text from an emoji code.""" skull_emoji = emojis.get_emoji('skull_crossbone') skull_helper = emojis.get_helper_from_unicode(skull_emoji) helper_text = 'Suspicious entry' self.assertEqual(skull_helper, helper_text) does_not_exist = emojis.get_helper_from_unicode('er_ekki_til') self.assertEqual(does_not_exist, '')
def test_get_helper_from_unicode(self): """Test getting helper text from an emoji code.""" skull_emoji = emojis.get_emoji('skull_crossbone') skull_helper = emojis.get_helper_from_unicode(skull_emoji) helper_text = 'Suspicious entry' self.assertEqual(skull_helper, helper_text) does_not_exist = emojis.get_helper_from_unicode('er_ekki_til') self.assertEqual(does_not_exist, '')
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ # Add Elasticsearch query to get the events you need. query = ('(data_type:"syslog:line"' 'AND body:"Invalid user")') # Specify what returned fields you need for your analyzer. return_fields = ['message', 'data_type', 'source_short'] stop_emoji = emojis.get_emoji('STOP') # Generator of events based on your query. events = self.event_stream( query_string=query, return_fields=return_fields) # Add analyzer logic here. # Methods available to use for sketch analyzers: # sketch.get_all_indices() # sketch.add_view(name, query_string, query_filter={}) # event.add_attributes({'foo': 'bar'}) # event.add_tags(['tag_name']) # event_add_label('label') # event.add_star() # event.add_comment('comment') login_count = 0 for event in events: data_type = event.source.get('data_type') source_short = event.source.get('source_short') message = event.source.get('message') ip_address = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', message) if ip_address: event.add_attributes({'ip_address': ip_address}) if ip_address is None: continue username = re.search(r'Invalid user ([a-zA-Z0-9_\.+\-]{1,32}) from', message) if username: event.add_attributes({'user': username}) if username is None: continue event.add_emojis([stop_emoji]) event.add_tags(['unknown_user']) if login_count > 0: self.sketch.add_view( view_name='Potential bruteforce', analyzer_name=self.NAME, query_string=query) # TODO: Return a summary from the analyzer. return 'Potential bruteforce analyzer completed, {0:d} login attempts from unknown users found'.format( login_count)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ( '{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url'] events = self.event_stream( '', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: url = event.source.get('url') if not url: continue domain = utils.get_domain_from_url(url) event.add_attributes({'domain': domain}) if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = '.'.join(domain.split('.')[-2:]) tld_counter[tld] += 1 satellite_emoji = emojis.get_emoji('SATELLITE') for domain, count in domain_counter.iteritems(): emojis_to_add = [satellite_emoji] text = '{0:s} seen {1:d} times'.format(domain, count) for event in domains.get(domain, []): event.add_emojis(emojis_to_add) event.add_human_readable(text, self.NAME, append=False) event.add_attributes({'domain_count': count}) return ( '{0:d} domains discovered with {1:d} TLDs.').format( len(domains), len(tld_counter))
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ if not self.yeti_api_root or not self.yeti_api_key: return 'No Yeti configuration settings found, aborting.' self.get_intrusion_sets() actors_found = [] for intrusion_set in self.intel.values(): if not intrusion_set['indicators']: continue found = False for indicator in intrusion_set['indicators']: query = build_query_for_indicators([indicator]) events = self.event_stream(query_string=query, return_fields=[]) name = intrusion_set['name'] for event in events: found = True event.add_emojis([emojis.get_emoji('SKULL')]) event.add_tags([name]) event.commit() event.add_comment( 'Indicator "{0:s}" found for actor "{1:s}"'.format( indicator['name'], name)) if found: actors_found.append(name) self.sketch.add_view( 'Domain activity for actor {0:s}'.format(name), self.NAME, query_string=query) if actors_found: return '{0:d} actors were found! [{1:s}]'.format( len(actors_found), ', '.join(actors_found)) return 'No indicators were found in the timeline.'
def matcher(self, name, config): """Entry point for the analyzer. Returns: String with summary of the analyzer result. """ event_field_name = config.get('event_field_name') bq_query = config.get('bq_query') bq_project = config.get('bq_project') tags = config.get('tags') emoji_names = config.get('emojis') emojis_to_add = [emojis.get_emoji(x) for x in emoji_names] es_query = ('{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "' + event_field_name + '" }} ] } } }') events_stream = self.event_stream( query_dsl=es_query, return_fields=[event_field_name], ) events = {} for event in events_stream: field = event.source.get(event_field_name) events.setdefault(field, []).append(event) try: bq_client = bigquery.Client(project=bq_project) except (google_auth_exceptions.DefaultCredentialsError) as exception: return 'Could not authenticate to BigQuery: {0!s}'.format(exception) num_matches = 0 for i in range(0, len(events), self._BQ_BATCH_SIZE): batch = list(itertools.islice(events, i, i + self._BQ_BATCH_SIZE)) query_job = self.bigquery_match(bq_client, bq_query, event_field_name, batch) for row in query_job: for event in events[row[0]]: event.add_tags(tags) event.add_emojis(emojis_to_add) event.commit() num_matches += 1 return ('{0:d} events found for matcher [{1:s}]').format( num_matches, name)
def mark_event(self, indicator, event, neighbors): """Anotate an event with data from indicators and neighbors. Tags with skull emoji, adds a comment to the event. """ event.add_emojis([emojis.get_emoji('SKULL')]) tags = [] for n in neighbors: slug = re.sub(r'[^a-z0-9]', '-', n['name'].lower()) slug = re.sub(r'-+', '-', slug) tags.append(slug) event.add_tags(tags) event.commit() msg = 'Indicator match: "{0:s}" ({1:s})\n'.format( indicator['name'], indicator['id']) msg += 'Related entities: {0!s}'.format([n['name'] for n in neighbors]) event.add_comment(msg) event.commit()
def mark_event(self, indicator, event, neighbors): """Anotate an event with data from indicators and neighbors. Tags with skull emoji, adds a comment to the event. """ event.add_emojis([emojis.get_emoji("SKULL")]) tags = [] for n in neighbors: slug = re.sub(r"[^a-z0-9]", "-", n["name"].lower()) slug = re.sub(r"-+", "-", slug) tags.append(slug) event.add_tags(tags) event.commit() msg = 'Indicator match: "{0:s}" ({1:s})\n'.format( indicator["name"], indicator["id"]) msg += "Related entities: {0!s}".format([n["name"] for n in neighbors]) event.add_comment(msg) event.commit()
def _config_validation(self, config): """Validate that all items of a config are valid.""" query = config.get("query_string", config.get("query_dsl")) self.assertIsNotNone(query) self.assertIsInstance(query, str) emojis_to_add = config.get("emojis") if emojis_to_add: self.assertIsInstance(emojis_to_add, (list, tuple)) for emoji_name in emojis_to_add: emoji_code = emojis.get_emoji(emoji_name) self.assertNotEqual(emoji_code, "") tags = config.get("tags") if tags: self.assertIsInstance(tags, (list, tuple)) create_view = config.get("create_view") if create_view: self.assertIsInstance(create_view, bool)
def _config_validation(self, config): """Validate that all items of a config are valid.""" query = config.get('query_string', config.get('query_dsl')) self.assertIsNotNone(query) self.assertIsInstance(query, str) attribute = config.get('attribute') self.assertIsNotNone(attribute) store_as = config.get('store_as') self.assertIsNotNone(store_as) expression = config.get('re') self.assertIsNotNone(expression) try: _ = re.compile(expression) except re.error as exception: self.assertIsNone(exception) emojis_to_add = config.get('emojis') if emojis_to_add: self.assertIsInstance(emojis_to_add, (list, tuple)) for emoji_name in emojis_to_add: emoji_code = emojis.get_emoji(emoji_name) self.assertNotEqual(emoji_code, '') tags = config.get('tags') if tags: self.assertIsInstance(tags, (list, tuple)) create_view = config.get('create_view') if create_view: self.assertIsInstance(create_view, bool) aggregate = config.get('aggregate') if aggregate: self.assertIsInstance(aggregate, bool)
def _config_validation(self, config): """Validate that all items of a config are valid.""" query = config.get('query_string', config.get('query_dsl')) self.assertIsNotNone(query) self.assertIsInstance(query, str) attribute = config.get('attribute') self.assertIsNotNone(attribute) store_as = config.get('store_as') self.assertIsNotNone(store_as) expression = config.get('re') self.assertIsNotNone(expression) try: _ = re.compile(expression) except re.error as exception: self.assertIsNone(exception) emojis_to_add = config.get('emojis') if emojis_to_add: self.assertIsInstance(emojis_to_add, (list, tuple)) for emoji_name in emojis_to_add: emoji_code = emojis.get_emoji(emoji_name) self.assertNotEqual(emoji_code, '') tags = config.get('tags') if tags: self.assertIsInstance(tags, (list, tuple)) create_view = config.get('create_view') if create_view: self.assertIsInstance(create_view, bool) aggregate = config.get('aggregate') if aggregate: self.assertIsInstance(aggregate, bool)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ('{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url'] events = self.event_stream('', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() cdn_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: url = event.source.get('url') if not url: continue domain = utils.get_domain_from_url(url) if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = '.'.join(domain.split('.')[-2:]) tld_counter[tld] += 1 satellite_emoji = emojis.get_emoji('SATELLITE') for domain, count in iter(domain_counter.items()): emojis_to_add = [satellite_emoji] tags_to_add = [] text = '{0:s} seen {1:d} times'.format(domain, count) cdn_provider = utils.get_cdn_provider(domain) if cdn_provider: tags_to_add.append('known-cdn') cdn_counter[cdn_provider] += 1 for event in domains.get(domain, []): event.add_tags(tags_to_add) event.add_emojis(emojis_to_add) event.add_human_readable(text, self.NAME, append=False) new_attributes = {'domain': domain, 'domain_count': count} if cdn_provider: new_attributes['cdn_provider'] = cdn_provider event.add_attributes(new_attributes) # Commit the event to the datastore. event.commit() return ('{0:d} domains discovered ({1:d} TLDs) and {2:d} known ' 'CDN networks found.').format(len(domains), len(tld_counter), len(cdn_counter))
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ # TODO: Once we can identify user generated events this should be # updated to include all user generated events instead of focusing # solely on browser events. query = 'source_short:"WEBHIST" OR source:"WEBHIST"' return_fields = ['datetime', 'timestamp', 'url', 'tag', '__ts_emojis'] data_frame = self.event_pandas(query_string=query, return_fields=return_fields) if not data_frame.shape[0]: return 'No browser events discovered.' sleeping_emoji = emojis.get_emoji('SLEEPING_FACE') # This query filters out all timestamps that have a zero timestamp as # well as those that occur after 2038-01-01, this may need to be # changed in the future. data_frame['timestamp'] = pd.to_numeric(data_frame.timestamp) data_frame = data_frame[(data_frame.timestamp > 0) & (data_frame.timestamp < 2145916800000000)] data_frame['datetime'] = pd.to_datetime(data_frame.timestamp / 1e6, utc=True, unit='s') data_frame['hour'] = pd.to_numeric( data_frame.datetime.dt.strftime('%H')) total_count = data_frame.shape[0] activity_hours, threshold, aggregation = get_active_hours(data_frame) if not activity_hours: return 'Did not discover any activity hours.' hour_count = dict(aggregation.values.tolist()) data_frame_outside = data_frame[~data_frame.hour.isin(activity_hours)] for event in utils.get_events_from_data_frame(data_frame_outside, self.datastore): event.add_tags(['outside-active-hours']) hour = event.source.get('hour') this_hour_count = hour_count.get(hour) event.add_attributes({ 'activity_summary': ('Number of events for this hour ({0:d}): {1:d}, with the ' 'threshold value: {2:0.2f}').format(hour, this_hour_count, threshold), 'hour_count': this_hour_count }) event.add_emojis([sleeping_emoji]) event.commit() tagged_events, _ = data_frame_outside.shape if tagged_events: story = self.sketch.add_story('{0:s} - {1:s}'.format( utils.BROWSER_STORY_TITLE, self.timeline_name)) story.add_text(utils.BROWSER_STORY_HEADER, skip_if_exists=True) # Find some statistics about the run time of the analyzer. percent = (tagged_events / total_count) * 100.0 last_hour = activity_hours[0] end = 0 for hour in activity_hours[1:]: if hour != last_hour + 1: end = hour break last_hour = hour if not end: first = activity_hours[0] last = activity_hours[-1] else: first = end index = activity_hours.index(end) last = activity_hours[index - 1] story.add_text( '## Browser Timeframe Analyzer\n\nThe browser timeframe ' 'analyzer discovered {0:d} browser events that occurred ' 'outside of the typical browsing window of this browser ' 'history ({1:s}), or around {2:0.2f}% of the {3:d} total ' 'events.\n\nThe analyzer determines the activity hours by ' 'finding the frequency of browsing events per hour, and then ' 'discovering the longest block of most active hours before ' 'proceeding with flagging all events outside of that time ' 'period. This information can be used by other analyzers ' 'or by manually looking for other activity within the ' 'inactive time period to find unusual actions.\n\n' 'The hours considered to be active hours are the hours ' 'between {4:02d} and {5:02d} (hours in UTC) and the ' 'threshold used to determine if an hour was considered to be ' 'active was: {6:0.2f}.'.format(tagged_events, self.timeline_name, percent, total_count, first, last, threshold)) group = self.sketch.add_aggregation_group( name='Browser Activity Per Hour', description='Created by the browser timeframe analyzer') group.set_layered() params = { 'data': aggregation.to_dict(orient='records'), 'title': 'Browser Activity Per Hour ({0:s})'.format(self.timeline_name), 'field': 'hour', 'order_field': 'hour', } agg_obj = self.sketch.add_aggregation( name='Browser Activity Per Hour ({0:s})'.format( self.timeline_name), agg_name='manual_feed', agg_params=params, chart_type='barchart', description='Created by the browser timeframe analyzer', label='informational') group.add_aggregation(agg_obj) lines = [{'hour': x, 'count': threshold} for x in range(0, 24)] params = { 'data': lines, 'title': 'Browser Timeframe Threshold ({0:s})'.format( self.timeline_name), 'field': 'hour', 'order_field': 'hour', 'chart_color': 'red', } agg_line = self.sketch.add_aggregation( name='Browser Activity Per Hour ({0:s})'.format( self.timeline_name), agg_name='manual_feed', agg_params=params, chart_type='linechart', description='Created by the browser timeframe analyzer', label='informational') group.add_aggregation(agg_line) story.add_aggregation_group(group) return ('Tagged {0:d} out of {1:d} events as outside of normal ' 'active hours.').format(tagged_events, total_count)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ login_emoji = emojis.get_emoji('unlock') logoff_emoji = emojis.get_emoji('lock') screen_emoji = emojis.get_emoji('screen') screensaver_logon = LOGON_TYPES.get('7') login_counter = 0 logoff_counter = 0 # TODO: Add EVT lookups, ID 528 for logon and 538, 540 for logoff. # TODO: Add RDP EVT lookups, ID 682 for logon and 683 for logoff. query = ( 'data_type:"windows:evtx:record" AND (event_identifier:4624 OR ' 'event_identifier:4778 OR event_identifier:4779 OR ' 'event_identifier:4634 OR event_identifier:4647)') return_fields = [ 'message', 'data_type', 'strings', 'strings_parsed', 'event_identifier'] # Generator of events based on your query. events = self.event_stream( query_string=query, return_fields=return_fields) for event in events: strings = event.source.get('strings') strings_parsed = event.source.get('strings_parsed') identifier = event.source.get('event_identifier') emojis_to_add = [] tags_to_add = [] attribute_dict = {} if isinstance(identifier, six.text_type): try: identifier = int(identifier, 10) except ValueError: logging.warning(( 'Unable to convert EVTX identifier to an integer, ' 'value is {0:s}').format(identifier)) continue if identifier == 4624: attribute_dict = parse_evtx_logon_event( strings, strings_parsed) if not attribute_dict: continue emojis_to_add.append(login_emoji) tags_to_add.append('logon-event') login_counter += 1 elif identifier in (4634, 4647): attribute_dict = parse_evtx_logoff_event(strings) if not attribute_dict: continue emojis_to_add.append(logoff_emoji) tags_to_add.append('logoff-event') logoff_counter += 1 # TODO: Add support for RDP events, ID 4778 (logon) and 4779 # (logoff). if not attribute_dict: continue event.add_attributes(attribute_dict) # Want to add an emoji in case this is a screensaver unlock. if attribute_dict.get('logon_type', '') == screensaver_logon: emojis_to_add.append(screen_emoji) event.add_emojis(emojis_to_add) event.add_tags(tags_to_add) # Commit the event to the datastore. event.commit() # TODO: Add support for Linux syslog logon/logoff events. # TODO: Add support for Mac OS X logon/logoff events. return ( 'Total number of login events processed: {0:d} and ' 'logoff events: {1:d}').format(login_counter, logoff_counter)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ login_emoji = emojis.get_emoji('unlock') logoff_emoji = emojis.get_emoji('lock') screen_emoji = emojis.get_emoji('screen') screensaver_logon = LOGON_TYPES.get('7') login_counter = 0 logoff_counter = 0 # TODO: Add EVT lookups, ID 528 for logon and 538, 540 for logoff. # TODO: Add RDP EVT lookups, ID 682 for logon and 683 for logoff. query = ( 'data_type:"windows:evtx:record" AND (event_identifier:4624 OR ' 'event_identifier:4778 OR event_identifier:4779 OR ' 'event_identifier:4634 OR event_identifier:4647)') return_fields = [ 'message', 'data_type', 'strings', 'strings_parsed', 'event_identifier' ] # Generator of events based on your query. events = self.event_stream(query_string=query, return_fields=return_fields) for event in events: strings = event.source.get('strings') strings_parsed = event.source.get('strings_parsed') identifier = event.source.get('event_identifier') emojis_to_add = [] tags_to_add = [] attribute_dict = {} if isinstance(identifier, six.text_type): try: identifier = int(identifier, 10) except ValueError: logger.warning( ('Unable to convert EVTX identifier to an integer, ' 'value is {0:s}').format(identifier)) continue if identifier == 4624: attribute_dict = parse_evtx_logon_event( strings, strings_parsed) if not attribute_dict: continue emojis_to_add.append(login_emoji) tags_to_add.append('logon-event') login_counter += 1 elif identifier in (4634, 4647): attribute_dict = parse_evtx_logoff_event(strings) if not attribute_dict: continue emojis_to_add.append(logoff_emoji) tags_to_add.append('logoff-event') logoff_counter += 1 # TODO: Add support for RDP events, ID 4778 (logon) and 4779 # (logoff). if not attribute_dict: continue event.add_attributes(attribute_dict) # Want to add an emoji in case this is a screensaver unlock. if attribute_dict.get('logon_type', '') == screensaver_logon: emojis_to_add.append(screen_emoji) event.add_emojis(emojis_to_add) event.add_tags(tags_to_add) # Commit the event to the datastore. event.commit() # TODO: Add support for Linux syslog logon/logoff events. # TODO: Add support for Mac OS X logon/logoff events. return ('Total number of login events processed: {0:d} and ' 'logoff events: {1:d}').format(login_counter, logoff_counter)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ('{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url', 'message', 'human_readable'] events = self.event_stream('', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = utils.get_tld_from_domain(domain) tld_counter[tld] += 1 watched_domains_list = current_app.config.get( 'DOMAIN_ANALYZER_WATCHED_DOMAINS', []) domain_threshold = current_app.config.get( 'DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD', 10) watched_domains_list.extend([ utils.strip_www_from_domain(x) for x, _ in domain_counter.most_common(domain_threshold) ]) watched_domains_list.extend( [x for x, _ in tld_counter.most_common(domain_threshold)]) watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST) watched_domains_list_temp = set(watched_domains_list) watched_domains_list = [] for domain in watched_domains_list_temp: if domain in self.domain_scoring_whitelist: continue if any(domain.endswith(x) for x in self.domain_scoring_whitelist): continue if '.' not in domain: continue watched_domains_list.append(domain) watched_domains = {} for domain in watched_domains_list: minhash = self._get_minhash_from_domain(domain) watched_domains[domain] = minhash similar_domain_counter = 0 evil_emoji = emojis.get_emoji('SKULL_CROSSBONE') phishing_emoji = emojis.get_emoji('FISHING_POLE') for domain, _ in domain_counter.iteritems(): emojis_to_add = [] tags_to_add = [] text = None similar_domains = self._get_similar_domains( domain, watched_domains) if similar_domains: similar_domain_counter += 1 emojis_to_add.append(evil_emoji) emojis_to_add.append(phishing_emoji) tags_to_add.append('phishy-domain') similar_text_list = [ '{0:s} [score: {1:.2f}]'.format(phishy_domain, score) for phishy_domain, score in similar_domains ] text = 'Domain {0:s} is similar to {1:s}'.format( domain, ', '.join(similar_text_list)) if any( domain.endswith(x) for x in self.domain_scoring_whitelist): tags_to_add.append('known-network') for event in domains.get(domain, []): event.add_emojis(emojis_to_add) event.add_tags(tags_to_add) if text: event.add_human_readable(text, self.NAME, append=False) if similar_domain_counter: self.sketch.add_view(view_name='Phishy Domains', analyzer_name=self.NAME, query_string='tag:"phishy-domain"') return ('{0:d} potentially phishy domains discovered.' ).format(similar_domain_counter)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ( '{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }' ) return_fields = ["domain", "url", "message", "human_readable"] events = self.event_stream("", query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() for event in events: domain = event.source.get("domain") if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = utils.get_tld_from_domain(domain) tld_counter[tld] += 1 if not domain_counter: return "No domains discovered, so no phishy domains." watched_domains_list = current_app.config.get( "DOMAIN_ANALYZER_WATCHED_DOMAINS", [] ) domain_threshold = current_app.config.get( "DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD", 10 ) watched_domains_list.extend( [ utils.strip_www_from_domain(x) for x, _ in domain_counter.most_common(domain_threshold) ] ) watched_domains_list.extend( [x for x, _ in tld_counter.most_common(domain_threshold)] ) watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST) watched_domains_list_temp = set(watched_domains_list) watched_domains_list = [] for domain in watched_domains_list_temp: if domain in self.domain_scoring_exclude_domains: continue if any(domain.endswith(x) for x in self.domain_scoring_exclude_domains): continue if "." not in domain: continue watched_domains_list.append(domain) watched_domains = {} for domain in watched_domains_list: minhash = self._get_minhash_from_domain(domain) watched_domains[domain] = {"hash": minhash, "depth": len(domain.split("."))} similar_domain_counter = 0 allowlist_encountered = False evil_emoji = emojis.get_emoji("SKULL_CROSSBONE") phishing_emoji = emojis.get_emoji("FISHING_POLE") for domain, _ in iter(domain_counter.items()): emojis_to_add = [] tags_to_add = [] text = None similar_domains = self._get_similar_domains(domain, watched_domains) if similar_domains: similar_domain_counter += 1 emojis_to_add.append(evil_emoji) emojis_to_add.append(phishing_emoji) tags_to_add.append("phishy-domain") similar_text_list = [ "{0:s} [score: {1:.2f}]".format(phishy_domain, score) for phishy_domain, score in similar_domains ] text = "Domain {0:s} is similar to {1:s}".format( domain, ", ".join(similar_text_list) ) if any(domain.endswith(x) for x in self.domain_scoring_exclude_domains): tags_to_add.append("known-domain") allowlist_encountered = True for event in domains.get(domain, []): event.add_emojis(emojis_to_add) event.add_tags(tags_to_add) if text: event.add_human_readable(text, self.NAME, append=False) # Commit the event to the datastore. event.commit() if similar_domain_counter: self.sketch.add_view( view_name="Phishy Domains", analyzer_name=self.NAME, query_string='tag:"phishy-domain"', ) if allowlist_encountered: self.sketch.add_view( view_name="Phishy Domains, excl. known domains", analyzer_name=self.NAME, query_string=('tag:"phishy-domain" AND NOT tag:"known-domain"'), ) return ("{0:d} potentially phishy domains discovered.").format( similar_domain_counter )
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ( '{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url'] events = self.event_stream( '', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() cdn_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: url = event.source.get('url') if not url: continue domain = utils.get_domain_from_url(url) if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = '.'.join(domain.split('.')[-2:]) tld_counter[tld] += 1 # Exit early if there are no domains in the data set to analyze. if not domain_counter: return 'No domains to analyze.' domain_count_array = numpy.array(list(domain_counter.values())) domain_20th_percentile = int(numpy.percentile(domain_count_array, 20)) domain_85th_percentile = int(numpy.percentile(domain_count_array, 85)) common_domains = [ x for x, y in domain_counter.most_common() if y >= domain_85th_percentile] rare_domains = [ x for x, y in domain_counter.most_common() if y <= domain_20th_percentile] satellite_emoji = emojis.get_emoji('SATELLITE') for domain, count in iter(domain_counter.items()): emojis_to_add = [satellite_emoji] tags_to_add = [] cdn_provider = utils.get_cdn_provider(domain) if cdn_provider: tags_to_add.append('known-cdn') cdn_counter[cdn_provider] += 1 if domain in common_domains: tags_to_add.append('common_domain') if domain in rare_domains: tags_to_add.append('rare_domain') for event in domains.get(domain, []): event.add_tags(tags_to_add) event.add_emojis(emojis_to_add) new_attributes = {'domain': domain, 'domain_count': count} if cdn_provider: new_attributes['cdn_provider'] = cdn_provider event.add_attributes(new_attributes) # Commit the event to the datastore. event.commit() return ( '{0:d} domains discovered ({1:d} TLDs) and {2:d} known ' 'CDN networks found.').format( len(domains), len(tld_counter), len(cdn_counter))
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ( '{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url'] events = self.event_stream( '', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() cdn_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: url = event.source.get('url') if not url: continue domain = utils.get_domain_from_url(url) if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = '.'.join(domain.split('.')[-2:]) tld_counter[tld] += 1 satellite_emoji = emojis.get_emoji('SATELLITE') for domain, count in iter(domain_counter.items()): emojis_to_add = [satellite_emoji] tags_to_add = [] text = '{0:s} seen {1:d} times'.format(domain, count) cdn_provider = utils.get_cdn_provider(domain) if cdn_provider: tags_to_add.append('known-cdn') cdn_counter[cdn_provider] += 1 for event in domains.get(domain, []): event.add_tags(tags_to_add) event.add_emojis(emojis_to_add) event.add_human_readable(text, self.NAME, append=False) new_attributes = {'domain': domain, 'domain_count': count} if cdn_provider: new_attributes['cdn_provider'] = cdn_provider event.add_attributes(new_attributes) # Commit the event to the datastore. event.commit() return ( '{0:d} domains discovered ({1:d} TLDs) and {2:d} known ' 'CDN networks found.').format( len(domains), len(tld_counter), len(cdn_counter))
def run(self): """Entry point for the browser search analyzer. Returns: String with summary of the analyzer result """ query = 'source_short:"WEBHIST" OR source:"WEBHIST"' return_fields = ['url', 'datetime'] search_emoji = emojis.get_emoji('MAGNIFYING_GLASS') # Generator of events based on your query. events = self.event_stream(query_string=query, return_fields=return_fields) simple_counter = 0 for event in events: url = event.source.get('url') if url is None: continue for engine, expression, method_name, parameter in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: continue match = expression.search(url) if not match: continue if parameter: search_query = callback_method(url, parameter) else: search_query = callback_method(url) if not search_query: continue simple_counter += 1 datetime = event.source.get('datetime') day, _, _ = datetime.partition('T') event.add_attributes({ 'search_string': search_query, 'search_engine': engine, 'search_day': 'D:{0:s}'.format(day) }) event.add_human_readable( '{0:s} search query: {1:s}'.format(engine, search_query), self.NAME) event.add_emojis([search_emoji]) event.add_tags(['browser-search']) # We break at the first hit of a successful search engine. break # Commit the event to the datastore. event.commit() if simple_counter > 0: view = self.sketch.add_view( view_name='Browser Search', analyzer_name=self.NAME, query_string='tag:"browser-search"', additional_fields=self._FIELDS_TO_INCLUDE) params = { 'field': 'search_string', 'limit': 20, } agg_obj = self.sketch.add_aggregation( name='Top 20 browser search queries', agg_name='field_bucket', agg_params=params, view_id=view.id, chart_type='hbarchart', description='Created by the browser search analyzer') params = { 'field': 'search_day', 'limit': 20, } agg_days = self.sketch.add_aggregation( name='Top 20 days of search queries', agg_name='field_bucket', agg_params=params, chart_type='hbarchart', description='Created by the browser search analyzer') params = { 'query_string': 'tag:"browser-search"', 'field': 'domain', } agg_engines = self.sketch.add_aggregation( name='Top Search Engines', agg_name='query_bucket', agg_params=params, view_id=view.id, chart_type='hbarchart', description='Created by the browser search analyzer') story = self.sketch.add_story(utils.BROWSER_STORY_TITLE) story.add_text(utils.BROWSER_STORY_HEADER, skip_if_exists=True) story.add_text('## Browser Search Analyzer.\n\nThe browser search ' 'analyzer takes URLs usually resevered for browser ' 'search queries and extracts the search string.' 'In this timeline the analyzer discovered {0:d} ' 'browser searches.\n\nThis is a summary of ' 'it\'s findings.'.format(simple_counter)) story.add_text( 'The top 20 most commonly discovered searches were:') story.add_aggregation(agg_obj) story.add_text('The domains used to search:') story.add_aggregation(agg_engines, 'hbarchart') story.add_text('And the most common days of search:') story.add_aggregation(agg_days) story.add_text( 'And an overview of all the discovered search terms:') story.add_view(view) return ('Browser Search completed with {0:d} search results ' 'extracted.').format(simple_counter)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ('{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url'] events = self.event_stream('', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() cdn_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: url = event.source.get('url') if not url: continue domain = utils.get_domain_from_url(url) if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = '.'.join(domain.split('.')[-2:]) tld_counter[tld] += 1 # Exit early if there are no domains in the data set to analyze. if not domain_counter: return 'No domains to analyze.' domain_count_array = numpy.array(list(domain_counter.values())) try: domain_20th_percentile = int( numpy.percentile(domain_count_array, 20)) except IndexError: logging.warning('Unable to calculate the 20th percentile.') domain_20th_percentile = 0 try: domain_85th_percentile = int( numpy.percentile(domain_count_array, 85)) except IndexError: logging.warning('Unable to calculate the 85th percentile.') highest_count_domain = domain_counter.most_common(1) if highest_count_domain: _, highest_count = highest_count_domain[0] domain_85th_percentile = highest_count + 10 else: domain_85th_percentile = 100 common_domains = [ x for x, y in domain_counter.most_common() if y >= domain_85th_percentile ] rare_domains = [ x for x, y in domain_counter.most_common() if y <= domain_20th_percentile ] satellite_emoji = emojis.get_emoji('SATELLITE') for domain, count in iter(domain_counter.items()): emojis_to_add = [satellite_emoji] tags_to_add = [] cdn_provider = utils.get_cdn_provider(domain) if cdn_provider: tags_to_add.append('known-cdn') cdn_counter[cdn_provider] += 1 if domain in common_domains: tags_to_add.append('common_domain') if domain in rare_domains: tags_to_add.append('rare_domain') for event in domains.get(domain, []): event.add_tags(tags_to_add) event.add_emojis(emojis_to_add) new_attributes = {'domain': domain, 'domain_count': count} if cdn_provider: new_attributes['cdn_provider'] = cdn_provider event.add_attributes(new_attributes) # Commit the event to the datastore. event.commit() return ('{0:d} domains discovered ({1:d} TLDs) and {2:d} known ' 'CDN networks found.').format(len(domains), len(tld_counter), len(cdn_counter))
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ link_emoji = emojis.get_emoji('LINK') number_of_base_events = 0 number_of_chains = 0 counter = collections.Counter() events_to_update = {} # TODO: Have each plugin run in a separate task. # TODO: Add a time limit for each plugins run to prevent it from # holding everything up. for chain_plugin in self._chain_plugins: if chain_plugin.SEARCH_QUERY_DSL: search_dsl = chain_plugin.SEARCH_QUERY_DSL search_string = None else: search_dsl = None search_string = chain_plugin.SEARCH_QUERY return_fields = chain_plugin.EVENT_FIELDS events = self.event_stream(query_string=search_string, query_dsl=search_dsl, return_fields=return_fields) for event in events: if not chain_plugin.process_chain(event): continue chain_id = uuid.uuid4().hex chained_events = chain_plugin.build_chain(base_event=event, chain_id=chain_id) number_chained_events = len(chained_events) if not number_chained_events: continue for chained_event in chained_events: chained_id = chained_event.get('event_id') if chained_id not in events_to_update: default = { 'event': chained_event.get('event'), 'chains': [] } events_to_update[chained_id] = default events_to_update[chained_id]['chains'].append( chained_event.get('chain')) number_of_base_events += 1 counter[chain_plugin.NAME] += number_chained_events counter['total'] += number_chained_events chain = { 'chain_id': chain_id, 'plugin': chain_plugin.NAME, 'is_base': True, 'leafs': number_chained_events, } if event.event_id not in events_to_update: default = {'event': event, 'chains': []} events_to_update[event.event_id] = default events_to_update[event.event_id]['chains'].append(chain) number_of_chains += 1 for event_update in events_to_update.values(): event = event_update.get('event') attributes = {'chains': event_update.get('chains')} event.add_attributes(attributes) event.add_emojis([link_emoji]) event.commit() chain_string = ' - '.join([ '[{0:s}] {1:d}'.format(x[0], x[1]) for x in counter.most_common() if x[0] != 'total' ]) return ('{0:d} base events annotated with a chain UUID for {1:d} ' 'chains for a total of {2:d} events. {3:s}'.format( number_of_base_events, number_of_chains, counter['total'], chain_string))
class BaseChainPlugin(object): """A base plugin for the chain analyzer. This is an interface for the chain analyzer plugins. """ NAME = "chain" DESCRIPTION = "" # A string value that defines the search query used to find the original # event that starts the chain. In order for this plugin to work # either the SEARCH_QUERY or SEARCH_QUERY_DSL needs to be defined. SEARCH_QUERY = "" # Defines the original event search query DSL. If this attribute # is defined the SEARCH_QUERY attribute is ignored. SEARCH_QUERY_DSL = "" # Defines the fields that need to be returned as part of the # event object. EVENT_FIELDS = [] _EMOJIS = [emojis.get_emoji("LINK")] def __init__(self, analyzer_object): """Initialize the plugin.""" super().__init__() self.analyzer_object = analyzer_object def process_chain(self, base_event): """Determine if the extracted event fits the criteria of the plugin. Args: base_event: an event object (instance of Event). Returns: boolean to determine whether a chain should be generated from the event or not. By default this returns True. """ if base_event: return True return True def build_chain(self, base_event, chain_id): """Returns a chain of events from a base event. Args: base_event: the base event of the chain, used to construct further queries (instance of Event). chain_id: a string with the chain UUID value. Returns: A list of dicts with the chain and event attached. """ events = [] for event in self.get_chained_events(base_event): chain = {"chain_id": chain_id, "plugin": self.NAME, "is_base": False} events.append( { "event_id": event.event_id, "event": event, "chain": chain, } ) return events @abc.abstractmethod def get_chained_events(self, base_event): """Yields an event that is chained or linked to the base event.
def testEmoji(self): """Test a flag emoji exists""" flag_emoji = emojis.get_emoji(self._TEST_ISO_CODE) self.assertEqual(flag_emoji, self._TEST_EMOJI)
def run(self): """Entry point for the analyzer. Returns: String with summary of the analyzer result """ query = ( '{"query": { "bool": { "should": [ ' '{ "exists" : { "field" : "url" }}, ' '{ "exists" : { "field" : "domain" }} ] } } }') return_fields = ['domain', 'url', 'message', 'human_readable'] events = self.event_stream( '', query_dsl=query, return_fields=return_fields) domains = {} domain_counter = collections.Counter() tld_counter = collections.Counter() for event in events: domain = event.source.get('domain') if not domain: continue domain_counter[domain] += 1 domains.setdefault(domain, []) domains[domain].append(event) tld = utils.get_tld_from_domain(domain) tld_counter[tld] += 1 if not domain_counter: return 'No domains discovered, so no phishy domains.' watched_domains_list = current_app.config.get( 'DOMAIN_ANALYZER_WATCHED_DOMAINS', []) domain_threshold = current_app.config.get( 'DOMAIN_ANALYZER_WATCHED_DOMAINS_THRESHOLD', 10) watched_domains_list.extend([ utils.strip_www_from_domain(x) for x, _ in domain_counter.most_common(domain_threshold)]) watched_domains_list.extend([ x for x, _ in tld_counter.most_common(domain_threshold)]) watched_domains_list.extend(self.WATCHED_DOMAINS_BASE_LIST) watched_domains_list_temp = set(watched_domains_list) watched_domains_list = [] for domain in watched_domains_list_temp: if domain in self.domain_scoring_whitelist: continue if any(domain.endswith(x) for x in self.domain_scoring_whitelist): continue if '.' not in domain: continue watched_domains_list.append(domain) watched_domains = {} for domain in watched_domains_list: minhash = self._get_minhash_from_domain(domain) watched_domains[domain] = { 'hash': minhash, 'depth': len(domain.split('.')) } similar_domain_counter = 0 whitelist_encountered = False evil_emoji = emojis.get_emoji('SKULL_CROSSBONE') phishing_emoji = emojis.get_emoji('FISHING_POLE') for domain, _ in iter(domain_counter.items()): emojis_to_add = [] tags_to_add = [] text = None similar_domains = self._get_similar_domains( domain, watched_domains) if similar_domains: similar_domain_counter += 1 emojis_to_add.append(evil_emoji) emojis_to_add.append(phishing_emoji) tags_to_add.append('phishy-domain') similar_text_list = ['{0:s} [score: {1:.2f}]'.format( phishy_domain, score) for phishy_domain, score in similar_domains] text = 'Domain {0:s} is similar to {1:s}'.format( domain, ', '.join(similar_text_list)) if any(domain.endswith( x) for x in self.domain_scoring_whitelist): tags_to_add.append('whitelisted-domain') whitelist_encountered = True for event in domains.get(domain, []): event.add_emojis(emojis_to_add) event.add_tags(tags_to_add) if text: event.add_human_readable(text, self.NAME, append=False) # Commit the event to the datastore. event.commit() if similar_domain_counter: self.sketch.add_view( view_name='Phishy Domains', analyzer_name=self.NAME, query_string='tag:"phishy-domain"') if whitelist_encountered: self.sketch.add_view( view_name='Phishy Domains, excl. whitelist', analyzer_name=self.NAME, query_string=( 'tag:"phishy-domain" AND NOT tag:"whitelisted-domain"')) return ( '{0:d} potentially phishy domains discovered.').format( similar_domain_counter)
def extract_feature(self, name, config): """Extract features from events. Args: name: String with the name describing the feature to be extracted. config: A dict that contains the configuration for the feature extraction. See data/features.yaml for fields and further documentation of what needs to be defined. Returns: String with summary of the analyzer result. """ query = config.get('query_string') query_dsl = config.get('query_dsl') attribute = config.get('attribute') if not attribute: logging.warning('No attribute defined.') return '' store_as = config.get('store_as') if not store_as: logging.warning('No attribute defined to store results in.') return '' tags = config.get('tags', []) expression_string = config.get('re') expression_flags = config.get('re_flags') if not expression_string: logging.warning('No regular expression defined.') return '' if expression_flags: flags = set() for flag in expression_flags: try: flags.add(getattr(re, flag)) except AttributeError: logging.warning('Unknown regular expression flag defined.') return '' re_flag = sum(flags) else: re_flag = 0 try: expression = re.compile(expression_string, flags=re_flag) except re.error as exception: # pylint: disable=logging-format-interpolation logging.warning(('Regular expression failed to compile, with ' 'error: {0!s}').format(exception)) return '' emoji_names = config.get('emojis', []) emojis_to_add = [emojis.get_emoji(x) for x in emoji_names] return_fields = [attribute] events = self.event_stream(query_string=query, query_dsl=query_dsl, return_fields=return_fields) event_counter = 0 for event in events: attribute_field = event.source.get(attribute) if isinstance(attribute_field, six.text_type): attribute_value = attribute_field elif isinstance(attribute_field, (list, tuple)): attribute_value = ','.join(attribute_field) elif isinstance(attribute_field, (int, float)): attribute_value = attribute_field else: attribute_value = None if not attribute_value: continue result = expression.findall(attribute_value) if not result: continue event_counter += 1 event.add_attributes({store_as: result[0]}) event.add_emojis(emojis_to_add) event.add_tags(tags) # Commit the event to the datastore. event.commit() aggregate_results = config.get('aggregate', False) create_view = config.get('create_view', False) # If aggregation is turned on, we automatically create an aggregation. if aggregate_results: create_view = True if create_view and event_counter: view = self.sketch.add_view(name, self.NAME, query_string=query, query_dsl=query_dsl) if aggregate_results: params = { 'field': store_as, 'limit': 20, } self.sketch.add_aggregation( name='Top 20 for: {0:s} [{1:s}]'.format(store_as, name), agg_name='field_bucket', agg_params=params, description='Created by the feature extraction analyzer', view_id=view.id, chart_type='hbarchart') return 'Feature extraction [{0:s}] extracted {1:d} features.'.format( name, event_counter)
def extract_feature(self, name, config): """Extract features from events. Args: name: String with the name describing the feature to be extracted. config: A dict that contains the configuration for the feature extraction. See ~/config/features.yaml for fields and further documentation of what needs to be defined. Returns: String with summary of the analyzer result. """ query = config.get('query_string') query_dsl = config.get('query_dsl') attribute = config.get('attribute') if not attribute: logging.warning('No attribute defined.') return '' store_as = config.get('store_as') if not store_as: logging.warning('No attribute defined to store results in.') return '' tags = config.get('tags', []) expression_string = config.get('re') if not expression_string: logging.warning('No regular expression defined.') return '' try: expression = re.compile(expression_string) except re.error as exception: # pylint: disable=logging-format-interpolation logging.warning(('Regular expression failed to compile, with ' 'error: {0!s}').format(exception)) return '' emoji_names = config.get('emojis', []) emojis_to_add = [emojis.get_emoji(x) for x in emoji_names] return_fields = [attribute] events = self.event_stream(query_string=query, query_dsl=query_dsl, return_fields=return_fields) event_counter = 0 for event in events: attribute_field = event.source.get(attribute) if isinstance(attribute_field, six.text_type): attribute_value = attribute_field.lower() elif isinstance(attribute_field, (list, tuple)): attribute_value = ','.join(attribute_field) elif isinstance(attribute_field, (int, float)): attribute_value = attribute_field else: attribute_value = None if not attribute_value: continue result = expression.findall(attribute_value) if not result: continue event_counter += 1 event.add_attributes({store_as: result[0]}) event.add_emojis(emojis_to_add) event.add_tags(tags) # Commit the event to the datastore. event.commit() create_view = config.get('create_view', False) if create_view and event_counter: if query: query_string = query else: query_string = query_dsl self.sketch.add_view(name, query_string) # TODO: Add aggregation check when that is exposed in the UI. # aggregate_results = config.get('aggregate', False) return 'Feature extraction [{0:s}] extracted {1:d} features.'.format( name, event_counter)