def __init__(self, byte_stream): self.byte_stream = byte_stream self.bit_stream = BitsIO(byte_stream) #Initialize settings self.reverse_all = False self.invert_all = False self.endianswap_all = False self.last_value = None self.last_index_stack = None self.labels = {} self.data_stream = [] self.flat_labels = [] self.flat_pattern = [] #list of characters self.flat_pos = 0 self.index_stack = [0] self.data_structure = [] self.stack_data = [self.data_structure] self.mod_operations = [] # tok, modtype, start, offset, num_bits self.logger = logarhythm.getLogger('Extractor') self.logger.format = logarhythm.build_format(time=None, level=False)
def test_logarhythm(): import logging logging_default = logging.getLogger() logging_module = logging.getLogger(__name__) assert(logging_module.name == 'tests.test_logarhythm') logarhythm_module = logarhythm.getLogger() assert(logarhythm_module.name == 'tests.test_logarhythm') logarhythm_root = logarhythm.root_logger assert(logarhythm_root.name == '') assert(logging_default is not logarhythm_root) assert(logging_default is logging_module) assert(logging_default is logarhythm_module._logger) logarhythm_module.disarm_logging_module = False logging_default = logging.getLogger() logging_module = logging.getLogger(__name__) assert(logging_default is logarhythm_root._logger) assert(logging_module is logarhythm_module._logger)
def __init__(self, data_structure): self.data_structure = data_structure #Simply flatten the data obj. The order of traversal is what is important, not the structure. self.data_stream, self.flat_pattern = flatten(data_structure) self.flat_labels = [None] * len(self.data_stream) self.flat_pos = 0 self.index_stack = [0] #initialize settings self.reverse_all = False self.invert_all = False self.endianswap_all = False self.last_value = None self.last_index_stack = None self.byte_stream = io.BytesIO() self.bit_stream = BitsIO(self.byte_stream) self.labels = {} self.mod_operations = [] self.logger = logarhythm.getLogger('Constructor') self.logger.format = logarhythm.build_format(time=None, level=False)
""" This module provides blueprint functions for zip files. See zip file structure definition here: https://en.m.wikipedia.org/wiki/Zip_(file_format) """ import unittest, sys, os.path, zlib from collections import OrderedDict import logarhythm sys.path.append(os.path.abspath('../..')) import bitarchitect logger = logarhythm.getLogger('blueprints.zip') logger.format = logarhythm.build_format(time=None, level=False) def zip_file(maker): """ This blueprint parses a zip file. This is not performance optimized, but has value in zip file inspection. Structure: 0: EOCD 1: Central Directory 2: Files """ maker('Ey ##turn on endian-swap for everything') #parse the EOCD eocd_record(maker) #get central directory offset and size (each in bytes) cd_offset = maker["cd_offset"]
def pattern_parse(pattern): """ Interprets the provided pattern into a sequence of directives and arguments that are provided to a maker. Yields tuples where the first element is the matched token string, the second is the directive enum value, and the rest are the arguments for that directive. """ logger = logarhythm.getLogger('parse_pattern') logger.format = logarhythm.build_format(time=None, level=False) logger.debug('pattern started') pattern = pattern.strip() pos = 0 tok_parse = re.compile( '\\s*([rip]\\d+\\.(?:\\d+|$)|[usfxXbBnpjJrizoeC]\\d+|[RIE][ynt]|!#"|#["#]|=#"|[\\[\\]=\\{\\}]|[riBC]$|m[$^]"|j[sfbe]\\d+)' ) label_parse = re.compile('([^"]+)"') space_equals_parse = re.compile('\\s*=') expr_parse = re.compile('([^;]+);') num_parse = re.compile('\\d+') num_inf_parse = re.compile('\\d+|\\$') comment_parse = re.compile('.*?$', re.S | re.M) hex_parse = re.compile('([A-F0-9a-f]+)\"') no_arg_codes = { '[': Directive.NESTOPEN, ']': Directive.NESTCLOSE, } num_codes = { 'z': Directive.ZEROS, 'o': Directive.ONES, 'n': Directive.NEXT, } modoff_codes = { 'r': (Directive.MODOFF, ModType.REVERSE), 'i': (Directive.MODOFF, ModType.INVERT), 'p': (Directive.MODOFF, ModType.PULL), } setting_codes = { 'R': (Directive.MODSET, ModType.REVERSE), 'I': (Directive.MODSET, ModType.INVERT), 'E': (Directive.MODSET, ModType.ENDIANSWAP), } num_and_arg_codes = { 'u': (Directive.VALUE, Encoding.UINT), 's': (Directive.VALUE, Encoding.SINT), 'x': (Directive.VALUE, Encoding.LHEX), 'X': (Directive.VALUE, Encoding.UHEX), 'b': (Directive.VALUE, Encoding.BINS), 'B': (Directive.VALUE, Encoding.BYTS), 'C': (Directive.VALUE, Encoding.CHAR), 'r': (Directive.MOD, ModType.REVERSE), 'i': (Directive.MOD, ModType.INVERT), 'e': (Directive.MOD, ModType.ENDIANSWAP), } negate_num_codes = set('Jp') setting_map = { 'y': Setting.TRUE, 'n': Setting.FALSE, 't': Setting.TOGGLE, } jump_codes = { 's': JumpType.START, 'f': JumpType.FORWARD, 'b': JumpType.BACKWARD, 'e': JumpType.END, } repetition_stack = [] tokmatch = tok_parse.match(pattern, pos) if tokmatch is not None: pos = tokmatch.end(0) while tokmatch is not None: tok = tokmatch.group(1) code = tok[0] instruction = None if '.' in tok: #MODOFF if '$' in tok: #MODOFF with $ m = int(tok[1:].split('.')[0]) n = None directive, modtype = modoff_codes[code] instruction = (tok, directive, m, n, modtype) else: #MODOFF with numbers m, n = [int(x) for x in tok[1:].split('.')] directive, modtype = modoff_codes[code] instruction = (tok, directive, m, n, modtype) elif tok == 'B$': #TAKEALL BYTS instruction = (tok, Directive.TAKEALL, Encoding.BYTS) elif tok == 'C$': #TAKEALL CHAR instruction = (tok, Directive.TAKEALL, Encoding.CHAR) elif tok == 'r$': #MOD instruction = (tok, Directive.MOD, None, ModType.REVERSE) elif tok == 'i$': #MOD instruction = (tok, Directive.MOD, None, ModType.REVERSE) elif code in num_and_arg_codes: #VALUE, MOD directive, arg = num_and_arg_codes[code] n = int(tok[1:]) if code in negate_num_codes: n = -n if code == 'e': if n % 8 != 0: raise Exception( '"e" tokens must have a size that is a multiple of 8 bits: %s' % tok) instruction = (tok, directive, n, arg) elif code in no_arg_codes: #NESTOPEN, NESTCLOSE directive = no_arg_codes[code] instruction = (tok, directive) elif code in setting_codes: #MODSET directive, modtype = setting_codes[code] setting = setting_map[tok[1]] instruction = (tok, directive, modtype, setting) elif code in num_codes: #ZEROS, ONES, NEXT directive = num_codes[code] n = int(tok[1:]) instruction = (tok, directive, n) elif tok == '#"': #SETLABEL labelmatch = label_parse.match(pattern, pos) tok += labelmatch.group(0) pos = labelmatch.end(0) label = labelmatch.group(1) instruction = (tok, Directive.SETLABEL, label) elif tok == '!#"': #DEFLABEL labelmatch = label_parse.match(pattern, pos) tok += labelmatch.group(0) pos = labelmatch.end(0) label = labelmatch.group(1) space_equals_match = space_equals_parse.match(pattern, pos) tok += space_equals_match.group(0) pos = space_equals_match.end(0) expr_match = expr_parse.match(pattern, pos) tok += expr_match.group(0) pos = expr_match.end(0) expr = expr_match.group(1) value = ast.literal_eval(expr.strip()) instruction = (tok, Directive.DEFLABEL, label, value) elif tok == '=#"': #MATCHLABEL labelmatch = label_parse.match(pattern, pos) tok += labelmatch.group(0) pos = labelmatch.end(0) label = labelmatch.group(1) instruction = (tok, Directive.MATCHLABEL, label) elif tok == '=': #ASSERTION expr_match = expr_parse.match(pattern, pos) tok += expr_match.group(0) pos = expr_match.end(0) expr = expr_match.group(1) value = ast.literal_eval(expr.strip()) instruction = (tok, Directive.ASSERTION, value) elif tok == '{': #REPETITION CAPTURE START new_capture = [ None ] #first element is how many times to repeat; initialized to None and filled out when capture is complete if len( repetition_stack ) > 0: #if nested repetition, need to connect previous capture to this new one repetition_stack[-1].append(new_capture) repetition_stack.append(new_capture) #new capture is focus now logger.debug('Beginning "{" repetition level %d' % len(repetition_stack)) elif tok == '}': #REPETITION CAPTURE END logger.debug('Ending "}" repetition level %d' % len(repetition_stack)) repetition_capture = repetition_stack.pop(-1) num_inf_match = num_inf_parse.match(pattern, pos) #collect number tok += num_inf_match.group(0) pos = num_inf_match.end(0) if num_inf_match.group(0) == '$': repetition_capture[0] = float('inf') else: repetition_capture[0] = int(num_inf_match.group( 0)) #population first element with repetition number if len(repetition_stack) == 0: #if all repetitions are done yield from _process_repetition_capture(repetition_capture, logger) elif tok == '##': #COMMENT comment_match = comment_parse.match(pattern, pos) tok += comment_match.group(0) pos = comment_match.end(0) logger.debug('Comment: %s' % tok) elif tok.startswith('m'): if tok[1] == '^': #MARKERSTART directive = Directive.MARKERSTART elif tok[1] == '$': #MARKEREND directive = Directive.MARKEREND hexmatch = hex_parse.match(pattern, pos) tok += hexmatch.group(0) pos = hexmatch.end(0) hex_literal = hexmatch.group(1) byte_literal = b16decode(hex_literal, True) instruction = (tok, directive, byte_literal) elif code == 'j': code2 = tok[1] num_bits = int(tok[2:]) jump_type = jump_codes[code2] instruction = (tok, Directive.JUMP, num_bits, jump_type) else: raise Exception('Unknown token: %s' % tok) if instruction is not None: if len(repetition_stack) > 0: logger.debug('store rep level %d %s' % (len(repetition_stack), repr(instruction))) repetition_stack[-1].append(instruction) else: logger.debug('yield %s' % (repr(instruction))) yield instruction tokmatch = tok_parse.match(pattern, pos) if tokmatch is not None: pos = tokmatch.end(0) if pos < len(pattern): raise Exception('Unable to parse pattern after position %d: %s' % (pos, pattern[pos:pos + 20] + '...')) logger.debug('pattern completed')