def _jump(self, data): ''' Obtains the jump-to-offset value of a signature, if any. @data - String result data. Returns the offset to jump to. ''' offset = 0 offset_str = self._get_keyword_arg(data, 'jump') if offset_str: try: offset = str2int(offset_str) except: pass return offset
def _get_math_arg(self, data, keyword): ''' Retrieves the argument for keywords that specifiy mathematical expressions as arguments. @data - String result data, as returned by libmagic. @keyword - Keyword index in KEYWORDS. Returns the resulting calculated value. ''' value = 0 arg = self._get_keyword_arg(data, keyword) if arg: for string_int in arg.split('+'): try: value += str2int(string_int) except: self.invalid = True return value
def parse(self, data): ''' Parse a given data string for smart signature keywords. If any are found, interpret them and strip them. @data - String to parse, as returned by libmagic. Returns a dictionary of parsed values. ''' results = { 'offset' : '', # Offset where the match was found, filled in by Binwalk.single_scan. 'description' : '', # The libmagic data string, stripped of all keywords 'name' : '', # The original name of the file, if known 'delay' : '', # Extract delay description 'extract' : '', # Name of the extracted file, filled in by Binwalk.single_scan. 'jump' : 0, # The relative offset to resume the scan from 'size' : 0, # The size of the file, if known 'adjust' : 0, # The relative offset to add to the reported offset 'year' : 0, # The file's creation/modification year, if reported in the signature 'epoch' : 0, # The file's creation/modification epoch time, if reported in the signature 'invalid' : False, # Set to True if parsed numerical values appear invalid } self.invalid = False # If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious), # don't parse anything, just return the raw data as the description. if self.ignore_smart_signatures or not self._is_valid(data): results['description'] = data else: # Calculate and replace math keyword values data = self._replace_maths(data) # Parse the offset-adjust value. This is used to adjust the reported offset at which # a signature was located due to the fact that MagicParser.match expects all signatures # to be located at offset 0, which some wil not be. results['adjust'] = self._get_math_arg(data, 'adjust') # Parse the file-size value. This is used to determine how many bytes should be extracted # when extraction is enabled. If not specified, everything to the end of the file will be # extracted (see Binwalk.scan). try: results['size'] = str2int(self._get_math_arg(data, 'filesize')) except: pass try: results['year'] = str2int(self._get_keyword_arg(data, 'year')) except: pass try: results['epoch'] = str2int(self._get_keyword_arg(data, 'epoch')) except: pass results['delay'] = self._get_keyword_arg(data, 'delay') # Parse the string for the jump-to-offset keyword. # This keyword is honored, even if this string result is one of many. results['jump'] = self._get_math_arg(data, 'jump') # If this is one of many, don't do anything and leave description as a blank string. # Else, strip all keyword tags from the string and process additional keywords as necessary. if not self._one_of_many(data): results['name'] = self._get_keyword_arg(data, 'filename').strip('"') results['description'] = self._strip_tags(data) results['invalid'] = self.invalid return results
def _parse_raw_strings(self, data): ''' Process strings that aren't NULL byte terminated, but for which we know the string length. This should be called prior to any other smart parsing functions. @data - String to parse. Returns a parsed string. ''' if not self.ignore_smart_signatures and self._is_valid(data): # Get the raw string keyword arg raw_string = self._get_keyword_arg(data, 'raw-string') # Was a raw string keyword specified? if raw_string: # Get the raw string length arg raw_size = self._get_keyword_arg(data, 'raw-size') # Is the raw string length arg is a numeric value? if re.match('^-?[0-9]+$', raw_size): # Replace all instances of raw-replace in data with raw_string[:raw_size] # Also strip out everything after the raw-string keyword, including the keyword itself. # Failure to do so may (will) result in non-printable characters and this string will be # marked as invalid when it shouldn't be. data = data[:data.find(self.KEYWORDS['raw-string'])].replace(self.KEYWORDS['raw-replace'], '"' + raw_string[:str2int(raw_size)] + '"') return data
def _parse_line(self, line): ''' Parses a signature line into its four parts (offset, type, condition and description), looking for the first line of a given signature. @line - The signature line to parse. Returns a dictionary with the respective line parts populated if the line is the first of a signature. Returns a dictionary with all parts set to None if the line is not the first of a signature. ''' entry = { 'offset' : '', 'type' : '', 'condition' : '', 'description' : '', 'length' : 0 } # Quick and dirty pre-filter. We are only concerned with the first line of a # signature, which will always start with a number. Make sure the first byte of # the line is a number; if not, don't process. if line[:1] < '0' or line[:1] > '9': return None try: # Split the line into white-space separated parts. # For this to work properly, replace escaped spaces ('\ ') with '\x20'. # This means the same thing, but doesn't confuse split(). line_parts = line.replace('\\ ', '\\x20').split() entry['offset'] = line_parts[0] entry['type'] = line_parts[1] # The condition line may contain escaped sequences, so be sure to decode it properly. entry['condition'] = string_decode(line_parts[2]) entry['description'] = ' '.join(line_parts[3:]) except Exception as e: raise Exception("%s :: %s", (str(e), line)) # We've already verified that the first character in this line is a number, so this *shouldn't* # throw an exception, but let's catch it just in case... try: entry['offset'] = str2int(entry['offset']) except Exception as e: raise Exception("%s :: %s", (str(e), line)) # If this is a string, get the length of the string if 'string' in entry['type'] or entry['condition'] == self.WILDCARD: entry['length'] = len(entry['condition']) # Else, we need to jump through a few more hoops... else: # Default to little endian, unless the type field starts with 'be'. # This assumes that we're running on a little endian system... if entry['type'].startswith('be'): endianess = self.BIG_ENDIAN else: endianess = self.LITTLE_ENDIAN # Try to convert the condition to an integer. This does not allow # for more advanced conditions for the first line of a signature, # but needing that is rare. try: intval = str2int(entry['condition'].strip('L')) except Exception as e: raise Exception("Failed to evaluate condition for '%s' type: '%s', condition: '%s', error: %s" % (entry['description'], entry['type'], entry['condition'], str(e))) # How long is the field type? if entry['type'] == 'byte': entry['length'] = 1 elif 'short' in entry['type']: entry['length'] = 2 elif 'long' in entry['type']: entry['length'] = 4 elif 'quad' in entry['type']: entry['length'] = 8 # Convert the integer value to a string of the appropriate endianess entry['condition'] = self._to_string(intval, entry['length'], endianess) return entry