def __init__(self, byte_stream):
        self.byte_stream = byte_stream
        self.bit_stream = BitsIO(byte_stream)

        #Initialize settings
        self.reverse_all = False
        self.invert_all = False
        self.endianswap_all = False

        self.last_value = None
        self.last_index_stack = None

        self.labels = {}

        self.data_stream = []
        self.flat_labels = []
        self.flat_pattern = []  #list of characters
        self.flat_pos = 0
        self.index_stack = [0]

        self.data_structure = []
        self.stack_data = [self.data_structure]
        self.mod_operations = []  # tok, modtype, start, offset, num_bits
        self.logger = logarhythm.getLogger('Extractor')
        self.logger.format = logarhythm.build_format(time=None, level=False)
Beispiel #2
0
def test_logarhythm():
    import logging
    logging_default = logging.getLogger()
    logging_module = logging.getLogger(__name__)
    assert(logging_module.name == 'tests.test_logarhythm')
    logarhythm_module = logarhythm.getLogger()
    assert(logarhythm_module.name == 'tests.test_logarhythm')
    logarhythm_root = logarhythm.root_logger
    assert(logarhythm_root.name == '')
    assert(logging_default is not logarhythm_root)
    assert(logging_default is logging_module)
    assert(logging_default is logarhythm_module._logger)
    logarhythm_module.disarm_logging_module = False
    logging_default = logging.getLogger()
    logging_module = logging.getLogger(__name__)
    assert(logging_default is logarhythm_root._logger)
    assert(logging_module is logarhythm_module._logger)
    def __init__(self, data_structure):
        self.data_structure = data_structure

        #Simply flatten the data obj. The order of traversal is what is important, not the structure.
        self.data_stream, self.flat_pattern = flatten(data_structure)
        self.flat_labels = [None] * len(self.data_stream)
        self.flat_pos = 0
        self.index_stack = [0]

        #initialize settings
        self.reverse_all = False
        self.invert_all = False
        self.endianswap_all = False

        self.last_value = None
        self.last_index_stack = None
        self.byte_stream = io.BytesIO()
        self.bit_stream = BitsIO(self.byte_stream)
        self.labels = {}
        self.mod_operations = []
        self.logger = logarhythm.getLogger('Constructor')
        self.logger.format = logarhythm.build_format(time=None, level=False)
Beispiel #4
0
"""
This module provides blueprint functions for zip files.
See zip file structure definition here:
        https://en.m.wikipedia.org/wiki/Zip_(file_format)
"""
import unittest, sys, os.path, zlib
from collections import OrderedDict
import logarhythm
sys.path.append(os.path.abspath('../..'))
import bitarchitect

logger = logarhythm.getLogger('blueprints.zip')
logger.format = logarhythm.build_format(time=None, level=False)


def zip_file(maker):
    """
    This blueprint parses a zip file. This is not performance optimized, but has value in zip file inspection.

    Structure:
        0: EOCD
        1: Central Directory
        2: Files
    """
    maker('Ey ##turn on endian-swap for everything')

    #parse the EOCD
    eocd_record(maker)

    #get central directory offset and size (each in bytes)
    cd_offset = maker["cd_offset"]
def pattern_parse(pattern):
    """
    Interprets the provided pattern into a sequence of directives and arguments that are provided to a maker.

    Yields tuples where the first element is the matched token string, the second is the directive enum value, and the rest are the arguments for that directive.
    """
    logger = logarhythm.getLogger('parse_pattern')
    logger.format = logarhythm.build_format(time=None, level=False)
    logger.debug('pattern started')
    pattern = pattern.strip()
    pos = 0
    tok_parse = re.compile(
        '\\s*([rip]\\d+\\.(?:\\d+|$)|[usfxXbBnpjJrizoeC]\\d+|[RIE][ynt]|!#"|#["#]|=#"|[\\[\\]=\\{\\}]|[riBC]$|m[$^]"|j[sfbe]\\d+)'
    )
    label_parse = re.compile('([^"]+)"')
    space_equals_parse = re.compile('\\s*=')
    expr_parse = re.compile('([^;]+);')
    num_parse = re.compile('\\d+')
    num_inf_parse = re.compile('\\d+|\\$')
    comment_parse = re.compile('.*?$', re.S | re.M)
    hex_parse = re.compile('([A-F0-9a-f]+)\"')

    no_arg_codes = {
        '[': Directive.NESTOPEN,
        ']': Directive.NESTCLOSE,
    }
    num_codes = {
        'z': Directive.ZEROS,
        'o': Directive.ONES,
        'n': Directive.NEXT,
    }
    modoff_codes = {
        'r': (Directive.MODOFF, ModType.REVERSE),
        'i': (Directive.MODOFF, ModType.INVERT),
        'p': (Directive.MODOFF, ModType.PULL),
    }
    setting_codes = {
        'R': (Directive.MODSET, ModType.REVERSE),
        'I': (Directive.MODSET, ModType.INVERT),
        'E': (Directive.MODSET, ModType.ENDIANSWAP),
    }
    num_and_arg_codes = {
        'u': (Directive.VALUE, Encoding.UINT),
        's': (Directive.VALUE, Encoding.SINT),
        'x': (Directive.VALUE, Encoding.LHEX),
        'X': (Directive.VALUE, Encoding.UHEX),
        'b': (Directive.VALUE, Encoding.BINS),
        'B': (Directive.VALUE, Encoding.BYTS),
        'C': (Directive.VALUE, Encoding.CHAR),
        'r': (Directive.MOD, ModType.REVERSE),
        'i': (Directive.MOD, ModType.INVERT),
        'e': (Directive.MOD, ModType.ENDIANSWAP),
    }
    negate_num_codes = set('Jp')
    setting_map = {
        'y': Setting.TRUE,
        'n': Setting.FALSE,
        't': Setting.TOGGLE,
    }
    jump_codes = {
        's': JumpType.START,
        'f': JumpType.FORWARD,
        'b': JumpType.BACKWARD,
        'e': JumpType.END,
    }

    repetition_stack = []

    tokmatch = tok_parse.match(pattern, pos)
    if tokmatch is not None:
        pos = tokmatch.end(0)

    while tokmatch is not None:
        tok = tokmatch.group(1)
        code = tok[0]

        instruction = None

        if '.' in tok:  #MODOFF
            if '$' in tok:  #MODOFF with $
                m = int(tok[1:].split('.')[0])
                n = None
                directive, modtype = modoff_codes[code]
                instruction = (tok, directive, m, n, modtype)

            else:  #MODOFF with numbers
                m, n = [int(x) for x in tok[1:].split('.')]
                directive, modtype = modoff_codes[code]
                instruction = (tok, directive, m, n, modtype)
        elif tok == 'B$':  #TAKEALL BYTS
            instruction = (tok, Directive.TAKEALL, Encoding.BYTS)
        elif tok == 'C$':  #TAKEALL CHAR
            instruction = (tok, Directive.TAKEALL, Encoding.CHAR)
        elif tok == 'r$':  #MOD
            instruction = (tok, Directive.MOD, None, ModType.REVERSE)
        elif tok == 'i$':  #MOD
            instruction = (tok, Directive.MOD, None, ModType.REVERSE)
        elif code in num_and_arg_codes:  #VALUE, MOD
            directive, arg = num_and_arg_codes[code]
            n = int(tok[1:])
            if code in negate_num_codes:
                n = -n
            if code == 'e':
                if n % 8 != 0:
                    raise Exception(
                        '"e" tokens must have a size that is a multiple of 8 bits: %s'
                        % tok)
            instruction = (tok, directive, n, arg)
        elif code in no_arg_codes:  #NESTOPEN, NESTCLOSE
            directive = no_arg_codes[code]
            instruction = (tok, directive)
        elif code in setting_codes:  #MODSET
            directive, modtype = setting_codes[code]
            setting = setting_map[tok[1]]
            instruction = (tok, directive, modtype, setting)
        elif code in num_codes:  #ZEROS, ONES, NEXT
            directive = num_codes[code]
            n = int(tok[1:])
            instruction = (tok, directive, n)
        elif tok == '#"':  #SETLABEL
            labelmatch = label_parse.match(pattern, pos)
            tok += labelmatch.group(0)
            pos = labelmatch.end(0)
            label = labelmatch.group(1)
            instruction = (tok, Directive.SETLABEL, label)
        elif tok == '!#"':  #DEFLABEL
            labelmatch = label_parse.match(pattern, pos)
            tok += labelmatch.group(0)
            pos = labelmatch.end(0)
            label = labelmatch.group(1)
            space_equals_match = space_equals_parse.match(pattern, pos)
            tok += space_equals_match.group(0)
            pos = space_equals_match.end(0)
            expr_match = expr_parse.match(pattern, pos)
            tok += expr_match.group(0)
            pos = expr_match.end(0)
            expr = expr_match.group(1)
            value = ast.literal_eval(expr.strip())
            instruction = (tok, Directive.DEFLABEL, label, value)

        elif tok == '=#"':  #MATCHLABEL
            labelmatch = label_parse.match(pattern, pos)
            tok += labelmatch.group(0)
            pos = labelmatch.end(0)
            label = labelmatch.group(1)
            instruction = (tok, Directive.MATCHLABEL, label)

        elif tok == '=':  #ASSERTION
            expr_match = expr_parse.match(pattern, pos)
            tok += expr_match.group(0)
            pos = expr_match.end(0)
            expr = expr_match.group(1)
            value = ast.literal_eval(expr.strip())
            instruction = (tok, Directive.ASSERTION, value)
        elif tok == '{':  #REPETITION CAPTURE START
            new_capture = [
                None
            ]  #first element is how many times to repeat; initialized to None and filled out when capture is complete
            if len(
                    repetition_stack
            ) > 0:  #if nested repetition, need to connect previous capture to this new one
                repetition_stack[-1].append(new_capture)
            repetition_stack.append(new_capture)  #new capture is focus now
            logger.debug('Beginning "{" repetition level %d' %
                         len(repetition_stack))
        elif tok == '}':  #REPETITION CAPTURE END
            logger.debug('Ending "}" repetition level %d' %
                         len(repetition_stack))
            repetition_capture = repetition_stack.pop(-1)
            num_inf_match = num_inf_parse.match(pattern, pos)  #collect number
            tok += num_inf_match.group(0)
            pos = num_inf_match.end(0)
            if num_inf_match.group(0) == '$':
                repetition_capture[0] = float('inf')
            else:
                repetition_capture[0] = int(num_inf_match.group(
                    0))  #population first element with repetition number
            if len(repetition_stack) == 0:  #if all repetitions are done
                yield from _process_repetition_capture(repetition_capture,
                                                       logger)
        elif tok == '##':  #COMMENT
            comment_match = comment_parse.match(pattern, pos)
            tok += comment_match.group(0)
            pos = comment_match.end(0)
            logger.debug('Comment: %s' % tok)
        elif tok.startswith('m'):
            if tok[1] == '^':  #MARKERSTART
                directive = Directive.MARKERSTART
            elif tok[1] == '$':  #MARKEREND
                directive = Directive.MARKEREND
            hexmatch = hex_parse.match(pattern, pos)
            tok += hexmatch.group(0)
            pos = hexmatch.end(0)
            hex_literal = hexmatch.group(1)
            byte_literal = b16decode(hex_literal, True)
            instruction = (tok, directive, byte_literal)
        elif code == 'j':
            code2 = tok[1]
            num_bits = int(tok[2:])
            jump_type = jump_codes[code2]
            instruction = (tok, Directive.JUMP, num_bits, jump_type)
        else:
            raise Exception('Unknown token: %s' % tok)

        if instruction is not None:
            if len(repetition_stack) > 0:
                logger.debug('store rep level %d %s' %
                             (len(repetition_stack), repr(instruction)))
                repetition_stack[-1].append(instruction)
            else:
                logger.debug('yield %s' % (repr(instruction)))
                yield instruction
        tokmatch = tok_parse.match(pattern, pos)
        if tokmatch is not None:
            pos = tokmatch.end(0)
    if pos < len(pattern):
        raise Exception('Unable to parse pattern after position %d: %s' %
                        (pos, pattern[pos:pos + 20] + '...'))
    logger.debug('pattern completed')