def _build_iterator_warp(state, last_addr, instructions): iterator = instructions[-2] iterator_addr = last_addr - 1 assert iterator.opcode in (ins.ITERC.opcode, ins.ITERN.opcode) warp = nodes.IteratorWarp() base = iterator.A warp.controls.contents = [ _build_slot(state, iterator_addr, base - 3), # generator _build_slot(state, iterator_addr, base - 2), # state _build_slot(state, iterator_addr, base - 1) # control ] last_slot = base + iterator.B - 2 slot = base while slot <= last_slot: variable = _build_slot(state, iterator_addr - 1, slot) warp.variables.contents.append(variable) slot += 1 jump = instructions[-1] jump_addr = last_addr destination = get_jump_destination(jump_addr, jump) warp.way_out = state._warp_in_block(jump_addr + 1) warp.body = state._warp_in_block(destination) return warp, 2
def _build_conditional_warp(state, last_addr, instructions): condition = instructions[-2] condition_addr = last_addr - 1 warp = nodes.ConditionalWarp() if condition.opcode in (ins.ISTC.opcode, ins.ISFC.opcode): expression = _build_unary_expression(state, condition_addr, condition) setattr(warp, "_slot", condition.A) elif condition.opcode >= ins.IST.opcode: expression = _build_unary_expression(state, condition_addr, condition) setattr(warp, "_slot", condition.CD) else: expression = _build_comparison_expression(state, condition_addr, condition) warp.condition = expression jump = instructions[-1] jump_addr = last_addr destination = get_jump_destination(jump_addr, jump) # A condition is inverted during the preparation phase above warp.false_target = state._warp_in_block(destination) warp.true_target = state._warp_in_block(jump_addr + 1) return warp, 2
def _blockenize(state, instructions): # Fix inverted comparison expressions (e.g. 0 < variable): _fix_inverted_comparison_expressions(state, instructions) # Fix "repeat until true" encapsulated by another loop _fix_broken_repeat_until_loops(state, instructions) # Fix "var_1 = var_1 [comparison] var_2 and (operation) var_1 or var_1" edge case _fix_broken_unary_expressions(state, instructions) addr = 1 # Duplicates are possible and ok, but we need to sort them out last_addresses = set() while addr < len(instructions): instruction = instructions[addr] opcode = instruction.opcode if opcode not in _WARP_INSTRUCTIONS: addr += 1 continue if opcode in _JUMP_WARP_INSTRUCTIONS: destination = get_jump_destination(addr, instruction) if opcode != ins.UCLO.opcode or destination != addr + 1: last_addresses.add(destination - 1) last_addresses.add(addr) else: last_addresses.add(addr) addr += 1 last_addresses = sorted(list(last_addresses)) last_addresses.append(len(instructions) - 1) # This could happen if something jumps to the first instruction # We don't need "zero block" with function header, so simply ignore # this if last_addresses[0] == 0: last_addresses.pop(0) previous_last_address = 0 index = 0 for last_address in last_addresses: block = nodes.Block() block.index = index block.first_address = previous_last_address + 1 block.last_address = last_address state.blocks.append(block) state.block_starts[block.first_address] = block previous_last_address = last_address index += 1
def _fix_broken_unary_expressions(state, instructions): enumerated_instructions = enumerate(instructions) for i, instruction in enumerated_instructions: if i > 2 and instruction.opcode == ins.ISTC.opcode \ and ins.ADDVN.opcode <= instructions[i - 1].opcode <= ins.CAT.opcode: # Search for a jump that precedes the ISTC op leading_jump_found = False for j in range(1, i): if instructions[i - j].opcode == ins.JMP.opcode: leading_jump_found = True break elif instructions[i - j].opcode not in range( ins.ADDVN.opcode, ins.CAT.opcode): break # Make sure the preceding jump matches the destination of the ISTC op instruction_destination = get_jump_destination( i + 1, instructions[i + 1]) if instruction_destination == i + 2 and leading_jump_found: # Additional jump edge case of an edge case when expression is in an else body if not instruction_destination == get_jump_destination( i - j, instructions[i - j]): if instructions[i + 2].opcode == ins.JMP.opcode: instruction_destination = get_jump_destination( i + 2, instructions[i + 2]) if instruction_destination == get_jump_destination( i - j, instructions[i - j]): instructions[i - 1].A = instruction.A # Remove the broken condition _remove_instruction(state, instructions, i + 1) _remove_instruction(state, instructions, i) else: instructions[i - 1].A = instruction.A # Remove the broken condition _remove_instruction(state, instructions, i + 1) _remove_instruction(state, instructions, i)
def _build_conditional_warp(state, last_addr, instructions): condition = instructions[-2] condition_addr = last_addr - 1 warp = nodes.ConditionalWarp() if condition.opcode in (ins.ISTC.opcode, ins.ISFC.opcode): expression = _build_unary_expression(state, condition_addr, condition) setattr(warp, "_slot", condition.A) elif condition.opcode >= ins.IST.opcode: expression = _build_unary_expression(state, condition_addr, condition) setattr(warp, "_slot", condition.CD) else: expression = _build_comparison_expression(state, condition_addr, condition) warp.condition = expression jump = instructions[-1] jump_addr = last_addr destination = get_jump_destination(jump_addr, jump) # A condition is inverted during the preparation phase above warp.false_target = state._warp_in_block(destination) warp.true_target = state._warp_in_block(jump_addr + 1) shift = 2 if destination == (jump_addr + 1) \ and condition.opcode not in (ins.ISTC.opcode, ins.ISFC.opcode): # This is an empty 'then' or 'else'. The simplest way to handle it is # to insert a Block containing just a no-op statement. block = nodes.Block() block.first_address = jump_addr + 1 block.last_address = block.first_address block.index = warp.true_target.index block.warpins_count = 1 setattr(block, "_last_body_addr", block.last_address - shift) block.warp = nodes.UnconditionalWarp() block.warp.type = nodes.UnconditionalWarp.T_FLOW block.warp.target = warp.true_target setattr(block.warp, "_addr", block.last_address - shift + 1) state.blocks.insert(state.blocks.index(warp.true_target), block) warp.true_target = block _create_no_op(state, jump_addr, block) return warp, shift
def _blockenize(state, instructions): "将指令划分成代码块" addr = 1 # Duplicates are possible and ok, but we need to sort them out last_addresses = set() while addr < len(instructions): instruction = instructions[addr] opcode = instruction.opcode if opcode not in _WARP_INSTRUCTIONS: addr += 1 continue if opcode in _JUMP_WARP_INSTRUCTIONS: # 跳转:当前指令地址,及跳转目的地的前一地址为一个代码块的末端 destination = get_jump_destination(addr, instruction) if opcode != ins.UCLO.opcode or destination != addr + 1: last_addresses.add(destination - 1) last_addresses.add(addr) else: last_addresses.add(addr) addr += 1 last_addresses = sorted(list(last_addresses)) last_addresses.append(len(instructions) - 1) # This could happen if something jumps to the first instruction # We don't need "zero block" with function header, so simply ignore # this if last_addresses[0] == 0: last_addresses.pop(0) previous_last_address = 0 index = 0 for last_address in last_addresses: block = nodes.Block() block.index = index block.first_address = previous_last_address + 1 block.last_address = last_address state.blocks.append(block) state.block_starts[block.first_address] = block previous_last_address = last_address index += 1
def _blockenize(state, instructions): addr = 1 # Duplicates are possible and ok, but we need to sort them out last_addresses = set() while addr < len(instructions): instruction = instructions[addr] opcode = instruction.opcode if opcode not in _WARP_INSTRUCTIONS: addr += 1 continue if opcode in _JUMP_WARP_INSTRUCTIONS: destination = get_jump_destination(addr, instruction) if opcode != ins.UCLO.opcode or destination != addr + 1: last_addresses.add(destination - 1) last_addresses.add(addr) else: last_addresses.add(addr) addr += 1 last_addresses = sorted(list(last_addresses)) last_addresses.append(len(instructions) - 1) # This could happen if something jumps to the first instruction # We don't need "zero block" with function header, so simply ignore # this if last_addresses[0] == 0: last_addresses.pop(0) previous_last_address = 0 index = 0 for last_address in last_addresses: block = nodes.Block() block.index = index block.first_address = previous_last_address + 1 block.last_address = last_address state.blocks.append(block) state.block_starts[block.first_address] = block previous_last_address = last_address index += 1
def _build_unconditional_warp(state, addr, instruction): warp = nodes.UnconditionalWarp() warp.type = nodes.UnconditionalWarp.T_JUMP opcode = instruction.opcode warp.is_uclo = opcode == ins.UCLO.opcode if warp.is_uclo and instruction.CD == 0: # Not a jump return _build_flow_warp(state, addr, instruction) else: destination = get_jump_destination(addr, instruction) warp.target = state._warp_in_block(destination) return warp, 1
def _build_numeric_loop_warp(state, addr, instruction): warp = nodes.NumericLoopWarp() base = instruction.A warp.index = _build_slot(state, addr, base + 3) warp.controls.contents = [ _build_slot(state, addr, base + 0), # start _build_slot(state, addr, base + 1), # limit _build_slot(state, addr, base + 2) # step ] destination = get_jump_destination(addr, instruction) warp.body = state._warp_in_block(destination) warp.way_out = state._warp_in_block(addr + 1) return warp, 1
def _shift_warp_destinations(state, instructions, shift, modified_index): for current_index, moved_instruction in enumerate(instructions): opcode = moved_instruction.opcode if opcode in _WARP_INSTRUCTIONS: if current_index < modified_index and moved_instruction.CD >= 0: destination = get_jump_destination(current_index, moved_instruction) if destination > modified_index or ( destination == modified_index and shift > 0): moved_instruction.CD += shift elif current_index >= modified_index and moved_instruction.CD < 0: destination = current_index + moved_instruction.CD - shift + 1 if destination < modified_index or ( destination == modified_index and shift > 0): moved_instruction.CD -= shift
def _establish_warps(state, instructions): state.blocks[0].warpins_count = 1 enumerated_blocks = enumerate(state.blocks[:-1]) for i, block in enumerated_blocks: if state.blocks.__contains__(block) is None: continue state.block = block end_addr = block.last_address + 1 start_addr = max(block.last_address - 1, block.first_address) # Catch certain double unconditional jumps caused by logical primitives in expressions: if start_addr == (end_addr - 1) \ and end_addr + 1 < len(instructions) \ and instructions[start_addr].opcode == ins.JMP.opcode \ and instructions[end_addr].opcode == ins.JMP.opcode \ and instructions[start_addr].A == instructions[end_addr].A \ and instructions[start_addr].CD == 0: end_instruction_destination = end_addr + instructions[ end_addr].CD + 1 target_instruction_A = instructions[start_addr].A exit_instruction_found = False # When two consecutive jumps are found with the same A operand, lookahead for the end jump. following_destination = -1 for j in range(end_addr + 1, len(instructions) - 1): following_instruction = instructions[j] if following_instruction.opcode == ins.JMP.opcode: if following_instruction.A == target_instruction_A: following_destination = get_jump_destination( j, following_instruction) exit_instruction_found = True break # If we find the exit jump and we're not skipping it (if true then break else), # form the original two jumps into a fake conditional warp. if exit_instruction_found \ and end_instruction_destination <= following_destination: fixed_instruction = ins.ISF() fixed_instruction.CD = ins.SLOT_FALSE instructions[start_addr] = fixed_instruction state.blocks.pop(state.blocks.index(block) + 1) block.last_address += 1 start_addr = max(block.last_address - 1, block.first_address) end_addr = block.last_address + 1 warp = instructions[start_addr:end_addr] block.warp, shift = _build_warp(state, block.last_address, warp) setattr(block, "_last_body_addr", block.last_address - shift) setattr(block.warp, "_addr", block.last_address - shift + 1) last_block = state.blocks[-1] last_block.warp = nodes.EndWarp() setattr(last_block, "_last_body_addr", last_block.last_address) setattr(last_block.warp, "_addr", last_block.last_address)
def _fix_broken_repeat_until_loops(state, instructions): enumerated_instructions = enumerate(instructions) for i, instruction in enumerated_instructions: if instruction.opcode == ins.LOOP.opcode: # Check for the conditional jump that restarts the loop loop_exit_addr = get_jump_destination(i, instruction) loop_condition_addr = loop_exit_addr - 1 loop_condition_instruction = instructions[loop_condition_addr] if not loop_condition_instruction.opcode == ins.JMP.opcode: if get_jump_destination(loop_condition_addr, loop_condition_instruction) <= i: continue # It's not there, so this is probably a repeat-until true loop. # We need a fake conditional warp that is treated as 'true' by the writer fixed_cond_instruction = ins.ISF() fixed_cond_instruction.CD = ins.SLOT_TRUE # Resulting jump to the loop starting point fixed_jump_instruction = ins.JMP() fixed_jump_instruction.CD = i - loop_condition_addr - 1 # Add fake conditional instructions insertion_index = loop_condition_addr + 1 _insert_instruction(state, instructions, insertion_index, fixed_jump_instruction) _insert_instruction(state, instructions, insertion_index, fixed_cond_instruction) shift = 2 # Fix non-break destinations within the loop # Breaks in the empty-condition loop point towards the same exit destination # as non-breaks, so we'll have to search for a pattern of jumps. leading_jump = False start_index = i + 1 for j in range(start_index, insertion_index): checked_instruction = instructions[j] # Look for following JMP instructions if checked_instruction.opcode == ins.JMP.opcode: # Leading jump indicates this is a break? if not leading_jump: checked_instruction_destination \ = get_jump_destination(j, checked_instruction) # If the destination would've been moved if checked_instruction.CD >= shift \ and checked_instruction_destination == insertion_index + shift: # Check for an inverted jump pair next_index = j + 1 following_instruction = instructions[ next_index] if following_instruction.opcode == ins.JMP.opcode: following_destination \ = get_jump_destination(next_index, following_instruction) # e.g. goto 277 followed directly by goto 176 if following_destination < checked_instruction_destination: leading_jump = True continue # e.g. goto 277 followed directly by goto 277 elif following_destination == checked_instruction_destination: leading_jump = False continue # Check for else-break-end following this jump following_else_break_found = False prev_jump = False for k in range(next_index, insertion_index): following_instruction = instructions[k] if following_instruction.opcode == ins.JMP.opcode: if not prev_jump: prev_jump = True else: following_destination \ = get_jump_destination(k, following_instruction) # Don't adjust the checked jump, it's probably a break if following_instruction.CD >= shift \ and following_destination \ == checked_instruction_destination: following_else_break_found = True break prev_jump = False else: if prev_jump: last_destination \ = get_jump_destination(k - 1, instructions[k - 1]) # We can adjust, it's probably not a break if last_destination < checked_instruction_destination: break prev_jump = False if not following_else_break_found: checked_instruction.CD -= shift leading_jump = True else: leading_jump = False