def _CheckPythonVersionAndDisableWarnings(self): """Checks python version, and disables SSL warnings. urllib3 will warn on each HTTPS request made by older versions of Python. Rather than spamming the user, we print one warning message, then disable warnings in urllib3. """ if self._checked_for_old_python_version: return if sys.version_info[0:3] < (2, 7, 9): logger.warning( 'You are running a version of Python prior to 2.7.9. Your version ' 'of Python has multiple weaknesses in its SSL implementation that ' 'can allow an attacker to read or modify SSL encrypted data. ' 'Please update. Further SSL warnings will be suppressed. See ' 'https://www.python.org/dev/peps/pep-0466/ for more information.') # Some distributions de-vendor urllib3 from requests, so we have to # check if this has occurred and disable warnings in the correct # package. urllib3_module = urllib3 if not urllib3_module: if hasattr(requests, 'packages'): urllib3_module = getattr(requests.packages, 'urllib3') if urllib3_module and hasattr(urllib3_module, 'disable_warnings'): urllib3_module.disable_warnings() self._checked_for_old_python_version = True
def ExamineEvent(self, mediator, event): """Evaluates whether an event contains the right data for a hash lookup. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. """ self._EnsureRequesterStarted() path_spec = event.pathspec event_identifiers = self._event_identifiers_by_pathspec[path_spec] event_identifier = event.GetIdentifier() event_identifiers.append(event_identifier) if event.data_type not in self.DATA_TYPES or not self._analyzer.lookup_hash: return lookup_hash = '{0:s}_hash'.format(self._analyzer.lookup_hash) lookup_hash = getattr(event, lookup_hash, None) if not lookup_hash: display_name = mediator.GetDisplayNameForPathSpec(path_spec) logger.warning( ('Lookup hash attribute: {0:s}_hash missing from event that ' 'originated from: {1:s}.').format(self._analyzer.lookup_hash, display_name)) return path_specs = self._hash_pathspecs[lookup_hash] path_specs.append(path_spec) # There may be multiple path specification that have the same hash. We only # want to look them up once. if len(path_specs) == 1: self.hash_queue.put(lookup_hash)
def _CheckPythonVersionAndDisableWarnings(self): """Checks python version, and disables SSL warnings. urllib3 will warn on each HTTPS request made by older versions of Python. Rather than spamming the user, we print one warning message, then disable warnings in urllib3. """ if self._checked_for_old_python_version: return if sys.version_info[0:3] < (2, 7, 9): logger.warning( 'You are running a version of Python prior to 2.7.9. Your version ' 'of Python has multiple weaknesses in its SSL implementation that ' 'can allow an attacker to read or modify SSL encrypted data. ' 'Please update. Further SSL warnings will be suppressed. See ' 'https://www.python.org/dev/peps/pep-0466/ for more information.' ) # Some distributions de-vendor urllib3 from requests, so we have to # check if this has occurred and disable warnings in the correct # package. urllib3_module = urllib3 if not urllib3_module: if hasattr(requests, 'packages'): urllib3_module = getattr(requests.packages, 'urllib3') if urllib3_module and hasattr(urllib3_module, 'disable_warnings'): urllib3_module.disable_warnings() self._checked_for_old_python_version = True
def ExamineEvent(self, mediator, event): """Evaluates whether an event contains the right data for a hash lookup. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. """ self._EnsureRequesterStarted() path_spec = event.pathspec event_identifiers = self._event_identifiers_by_pathspec[path_spec] event_identifier = event.GetIdentifier() event_identifiers.append(event_identifier) if event.data_type not in self.DATA_TYPES or not self._analyzer.lookup_hash: return lookup_hash = '{0:s}_hash'.format(self._analyzer.lookup_hash) lookup_hash = getattr(event, lookup_hash, None) if not lookup_hash: display_name = mediator.GetDisplayNameForPathSpec(path_spec) logger.warning(( 'Lookup hash attribute: {0:s}_hash missing from event that ' 'originated from: {1:s}.').format( self._analyzer.lookup_hash, display_name)) return path_specs = self._hash_pathspecs[lookup_hash] path_specs.append(path_spec) # There may be multiple path specification that have the same hash. We only # want to look them up once. if len(path_specs) == 1: self.hash_queue.put(lookup_hash)
def ExamineEvent(self, mediator, event, event_data, event_data_stream): """Evaluates whether an event contains the right data for a hash lookup. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. """ if (event_data.data_type not in self.DATA_TYPES or not self._analyzer.lookup_hash): return self._EnsureRequesterStarted() path_specification = getattr(event_data_stream, 'path_spec', None) if not path_specification: # Note that support for event_data.pathspec is kept for backwards # compatibility. path_specification = getattr(event_data, 'pathspec', None) # Not all events have a path specification, such as "fs:stat". if not path_specification: return event_identifiers = self._event_identifiers_by_path_spec[ path_specification] event_identifier = event.GetIdentifier() event_identifiers.append(event_identifier) hash_attributes_container = event_data_stream if not hash_attributes_container: hash_attributes_container = event_data lookup_hash = '{0:s}_hash'.format(self._analyzer.lookup_hash) lookup_hash = getattr(hash_attributes_container, lookup_hash, None) if not lookup_hash: display_name = mediator.GetDisplayNameForPathSpec( path_specification) logger.warning( ('Lookup hash attribute: {0:s}_hash missing from event that ' 'originated from: {1:s}.').format(self._analyzer.lookup_hash, display_name)) return path_specs = self._hash_path_specs[lookup_hash] path_specs.append(path_specification) # There may be multiple path specifications that have the same hash. We only # want to look them up once. if len(path_specs) == 1: self.hash_queue.put(lookup_hash)
def ExamineEvent(self, analysis_mediator, event, event_data, event_data_stream): """Analyzes an event. Args: analysis_mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfVFS. event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. """ if event_data.data_type not in self._SUPPORTED_EVENT_DATA_TYPES: return url = getattr(event_data, 'url', None) if not url: return for engine, url_expression, method_name in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: logger.warning( 'Missing callback method: {0:s} to parse search query'. format(method_name)) continue match = url_expression.search(url) if not match: continue search_query = callback_method(url) if not search_query: analysis_mediator.ProduceAnalysisWarning( 'Unable to determine search query: {0:s} in URL: {1:s}'. format(method_name, url), self.NAME) continue try: search_query = urlparse.unquote(search_query) except TypeError: search_query = None if not search_query: analysis_mediator.ProduceAnalysisWarning( 'Unable to decode search query: {0:s} in URL: {1:s}'. format(method_name, url), self.NAME) continue event_tag = self._CreateEventTag(event, self._EVENT_TAG_LABELS) analysis_mediator.ProduceEventTag(event_tag) lookup_key = '{0:s}:{1:s}'.format(engine, search_query) self._analysis_counter[lookup_key] += 1
def ExamineEvent(self, mediator, event): """Analyzes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event to examine. """ # This event requires an URL attribute. url = getattr(event, 'url', None) if not url: return # TODO: refactor this the source should be used in formatting only. # Check if we are dealing with a web history event. source, _ = formatters_manager.FormattersManager.GetSourceStrings( event) if source != 'WEBHIST': return for engine, url_expression, method_name in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: logger.warning('Missing method: {0:s}'.format(callback_method)) continue match = url_expression.search(url) if not match: continue search_query = callback_method(url) if not search_query: logger.warning( 'Missing search query for URL: {0:s}'.format(url)) continue search_query = self._DecodeURL(search_query) if not search_query: continue event_tag = self._CreateEventTag(event, self._EVENT_TAG_COMMENT, self._EVENT_TAG_LABELS) mediator.ProduceEventTag(event_tag) self._counter['{0:s}:{1:s}'.format(engine, search_query)] += 1 # Add the timeline format for each search term. timestamp = getattr(event, 'timestamp', 0) source = getattr(event, 'parser', 'N/A') source = getattr(event, 'plugin', source) self._search_term_timeline.append( SEARCH_OBJECT(timestamp, source, engine, search_query))
def ExamineEvent(self, mediator, event): """Analyzes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event to examine. """ # This event requires an URL attribute. url = getattr(event, 'url', None) if not url: return # TODO: refactor this the source should be used in formatting only. # Check if we are dealing with a web history event. source, _ = formatters_manager.FormattersManager.GetSourceStrings(event) if source != 'WEBHIST': return for engine, url_expression, method_name in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: logger.warning('Missing method: {0:s}'.format(callback_method)) continue match = url_expression.search(url) if not match: continue search_query = callback_method(url) if not search_query: logger.warning('Missing search query for URL: {0:s}'.format(url)) continue search_query = self._DecodeURL(search_query) if not search_query: continue event_tag = self._CreateEventTag( event, self._EVENT_TAG_COMMENT, self._EVENT_TAG_LABELS) mediator.ProduceEventTag(event_tag) self._counter['{0:s}:{1:s}'.format(engine, search_query)] += 1 # Add the timeline format for each search term. timestamp = getattr(event, 'timestamp', 0) source = getattr(event, 'parser', 'N/A') source = getattr(event, 'plugin', source) self._search_term_timeline.append( SEARCH_OBJECT(timestamp, source, engine, search_query))
def ExamineEvent(self, mediator, event, event_data, event_data_stream): """Analyzes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. """ if event_data.data_type not in self._SUPPORTED_EVENT_DATA_TYPES: return url = getattr(event_data, 'url', None) if not url: return parser_or_plugin_name = getattr(event_data, 'parser', 'N/A') for engine, url_expression, method_name in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: logger.warning('Missing method: {0:s}'.format(callback_method)) continue match = url_expression.search(url) if not match: continue search_query = callback_method(url) if not search_query: logger.warning( 'Missing search query for URL: {0:s}'.format(url)) continue search_query = self._DecodeURL(search_query) if not search_query: continue event_tag = self._CreateEventTag(event, self._EVENT_TAG_LABELS) mediator.ProduceEventTag(event_tag) self._counter['{0:s}:{1:s}'.format(engine, search_query)] += 1 # Add the timeline format for each search term. search_object = SEARCH_OBJECT(event.timestamp, parser_or_plugin_name, engine, search_query) self._search_term_timeline.append(search_object)
def ExamineEvent(self, mediator, event, event_data): """Analyzes an event. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event. event_data (EventData): event data. """ if event_data.data_type not in self._SUPPORTED_EVENT_DATA_TYPES: return url = getattr(event_data, 'url', None) if not url: return parser_or_plugin_name = getattr(event_data, 'parser', 'N/A') for engine, url_expression, method_name in self._URL_FILTERS: callback_method = getattr(self, method_name, None) if not callback_method: logger.warning('Missing method: {0:s}'.format(callback_method)) continue match = url_expression.search(url) if not match: continue search_query = callback_method(url) if not search_query: logger.warning('Missing search query for URL: {0:s}'.format(url)) continue search_query = self._DecodeURL(search_query) if not search_query: continue event_tag = self._CreateEventTag( event, self._EVENT_TAG_COMMENT, self._EVENT_TAG_LABELS) mediator.ProduceEventTag(event_tag) self._counter['{0:s}:{1:s}'.format(engine, search_query)] += 1 # Add the timeline format for each search term. search_object = SEARCH_OBJECT( event.timestamp, parser_or_plugin_name, engine, search_query) self._search_term_timeline.append(search_object)
def _GetChromeWebStorePage(self, extension_identifier): """Retrieves the page for the extension from the Chrome store website. Args: extension_identifier (str): Chrome extension identifier. Returns: str: page content or None. """ web_store_url = self._WEB_STORE_URL.format(xid=extension_identifier) try: response = requests.get(web_store_url) except (requests.ConnectionError, requests.HTTPError) as exception: logger.warning(( '[{0:s}] unable to retrieve URL: {1:s} with error: {2!s}').format( self.NAME, web_store_url, exception)) return None return response.text
def _GetChromeWebStorePage(self, extension_identifier): """Retrieves the page for the extension from the Chrome store website. Args: extension_identifier (str): Chrome extension identifier. Returns: str: page content or None. """ web_store_url = self._WEB_STORE_URL.format(xid=extension_identifier) try: response = requests.get(web_store_url) except (requests.ConnectionError, requests.HTTPError) as exception: logger.warning( ('[{0:s}] unable to retrieve URL: {1:s} with error: {2!s}' ).format(self.NAME, web_store_url, exception)) return None return response.text
def ExamineEvent(self, mediator, event, event_data, event_data_stream): """Evaluates whether an event contains the right data for a hash lookup. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfVFS. event (EventObject): event. event_data (EventData): event data. event_data_stream (EventDataStream): event data stream. """ if (not self._analyzer.lookup_hash or not event_data_stream or event_data.data_type not in self.DATA_TYPES): return self._EnsureAnalyzerThreadIsStarted() data_stream_identifier = event_data_stream.GetIdentifier() if data_stream_identifier not in self._data_stream_identifiers: self._data_stream_identifiers.add(data_stream_identifier) lookup_hash = '{0:s}_hash'.format(self._analyzer.lookup_hash) lookup_hash = getattr(event_data_stream, lookup_hash, None) if not lookup_hash: path_specification = getattr(event_data_stream, 'path_spec', None) display_name = mediator.GetDisplayNameForPathSpec( path_specification) logger.warning(( 'Lookup hash attribute: {0:s}_hash missing from event data stream: ' '{1:s}.').format(self._analyzer.lookup_hash, display_name)) else: self._data_streams_by_hash[lookup_hash].add( data_stream_identifier) self._hash_queue.put(lookup_hash) event_identifier = event.GetIdentifier() self._event_identifiers_by_data_stream[data_stream_identifier].add( event_identifier)
def _DecodeURL(self, url): """Decodes the URL, replaces %XX to their corresponding characters. Args: url (str): encoded URL. Returns: str: decoded URL. """ if not url: return '' decoded_url = urlparse.unquote(url) if isinstance(decoded_url, py2to3.BYTES_TYPE): try: decoded_url = decoded_url.decode('utf-8') except UnicodeDecodeError as exception: decoded_url = decoded_url.decode('utf-8', errors='replace') logger.warning( 'Unable to decode URL: {0:s} with error: {1!s}'.format( url, exception)) return decoded_url
def _DecodeURL(self, url): """Decodes the URL, replaces %XX to their corresponding characters. Args: url (str): encoded URL. Returns: str: decoded URL. """ if not url: return '' decoded_url = urlparse.unquote(url) if isinstance(decoded_url, bytes): try: decoded_url = decoded_url.decode('utf-8') except UnicodeDecodeError as exception: decoded_url = decoded_url.decode('utf-8', errors='replace') logger.warning( 'Unable to decode URL: {0:s} with error: {1!s}'.format( url, exception)) return decoded_url
def ExamineEvent(self, mediator, event): """Analyzes an EventObject and tags it according to rules in the tag file. Args: mediator (AnalysisMediator): mediates interactions between analysis plugins and other components, such as storage and dfvfs. event (EventObject): event to examine. """ if self._tag_rules is None: if self._autodetect_tag_file_attempt: # There's nothing to tag with, and we've already tried to find a good # tag file, so there's nothing we can do with this event (or any other). return if not self._AttemptAutoDetectTagFile(mediator): logger.info( 'No tag definition file specified, and plaso was not able to ' 'autoselect a tagging file. As no definitions were specified, ' 'no events will be tagged.') return try: matched_labels = efilter_api.apply(self._tag_rules, vars=event) except efilter_errors.EfilterTypeError as exception: logger.warning( 'Unable to apply efilter query with error: {0!s}'.format( exception)) matched_labels = None if not matched_labels: return labels = list(efilter_api.getvalues(matched_labels)) event_tag = self._CreateEventTag(event, self._EVENT_TAG_COMMENT, labels) mediator.ProduceEventTag(event_tag) self._number_of_event_tags += 1
def _GetTitleFromChromeWebStore(self, extension_identifier): """Retrieves the name of the extension from the Chrome store website. Args: extension_identifier (str): Chrome extension identifier. Returns: str: name of the extension or None. """ # Check if we have already looked this extension up. if extension_identifier in self._extensions: return self._extensions.get(extension_identifier) page_content = self._GetChromeWebStorePage(extension_identifier) if not page_content: logger.warning( '[{0:s}] no data returned for extension identifier: {1:s}'. format(self.NAME, extension_identifier)) return None first_line, _, _ = page_content.partition(b'\n') match = self._TITLE_RE.search(first_line) name = None if match: title = match.group(1) if title.startswith(b'Chrome Web Store - '): name = title[19:] elif title.endswith(b'- Chrome Web Store'): name = title[:-19] if not name: self._extensions[extension_identifier] = 'UNKNOWN' return None name = name.decode('utf-8', errors='replace') self._extensions[extension_identifier] = name return name
def _GetTitleFromChromeWebStore(self, extension_identifier): """Retrieves the name of the extension from the Chrome store website. Args: extension_identifier (str): Chrome extension identifier. Returns: str: name of the extension or None. """ # Check if we have already looked this extension up. if extension_identifier in self._extensions: return self._extensions.get(extension_identifier) page_content = self._GetChromeWebStorePage(extension_identifier) if not page_content: logger.warning( '[{0:s}] no data returned for extension identifier: {1:s}'.format( self.NAME, extension_identifier)) return None first_line, _, _ = page_content.partition('\n') match = self._TITLE_RE.search(first_line) name = None if match: title = match.group(1) if title.startswith('Chrome Web Store - '): name = title[19:] elif title.endswith('- Chrome Web Store'): name = title[:-19] if not name: self._extensions[extension_identifier] = 'UNKNOWN' return None self._extensions[extension_identifier] = name return name