def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 actor_learning_rate = 1e-4 critic_learning_rate = 1e-3 self.critic_loss_fn = nn.MSELoss() self.actor = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.actor_target = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.copy_networks(self.actor, self.actor_target) self.critic = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.critic_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.critic, self.critic_target) self.memory = Memory(memory_size) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_learning_rate) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_learning_rate)
def __init__(self): self.capture_view = CaptureView.instance() self.memory = Memory() self.cursor = Cursor() self.setup_pages() self.setup_name_table() self.monitor = 'a' self.channel = 1 self.set_page('input_monitor.' + self.monitor)
def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): import capstone as CAPSTONE self.capstone_inst = {} # capstone instruction cache if database.loaded: self.mem = database.mem else: self.mem = Memory() database.mem = self.mem self.binary = Binary(self.mem, filename, raw_type, raw_base, raw_big_endian) self.binary.load_section_names() arch, mode = self.binary.get_arch() if arch is None or mode is None: raise ExcArch(self.binary.get_arch_string()) self.jmptables = database.jmptables self.user_inline_comments = database.user_inline_comments self.internal_inline_comments = database.internal_inline_comments self.user_previous_comments = database.user_previous_comments self.internal_previous_comments = database.internal_previous_comments self.functions = database.functions self.func_id = database.func_id self.end_functions = database.end_functions self.xrefs = database.xrefs # TODO: is it a global constant or $gp can change during the execution ? self.mips_gp = database.mips_gp if database.loaded: self.binary.symbols = database.symbols self.binary.reverse_symbols = database.reverse_symbols self.binary.imports = database.imports else: self.binary.load_symbols() database.symbols = self.binary.symbols database.reverse_symbols = self.binary.reverse_symbols database.imports = self.binary.imports self.capstone = CAPSTONE self.md = CAPSTONE.Cs(arch, mode) self.md.detail = True self.arch = arch self.mode = mode for s in self.binary.iter_sections(): s.big_endian = self.mode & self.capstone.CS_MODE_BIG_ENDIAN # TODO: useful ? if not database.loaded: self.mem.add(s.start, s.end, MEM_UNK)
def __load_memory(self, data): self.mem = Memory() try: self.mem.code = data["mem_code"] except: # Not available in previous versions, this try will be # removed in the future pass
def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 self.q_lr = 3e-4 self.actor_lr = 3e-4 self.alpha_lr = 3e-3 self.update_step = 0 self.delay_step = 2 self.action_range = [env.action_space.low, env.action_space.high] self.memory = Memory(memory_size) # entropy temperature self.alpha = 0.2 self.target_entropy = -torch.prod(torch.Tensor( env.action_space.shape)).item() self.log_alpha = torch.zeros(1, requires_grad=True) self.alpha_optim = optim.Adam([self.log_alpha], lr=self.alpha_lr) self.actor = SacActor(env.observation_space.shape[0], env.action_space.shape[0]) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.actor_lr) self.q_net_1 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_1_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_1, self.q_net_1_target) self.q_net_1_optimizer = optim.Adam(self.q_net_1.parameters(), lr=self.q_lr) self.q_net_2 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_2_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_2, self.q_net_2_target) self.q_net_2_optimizer = optim.Adam(self.q_net_2.parameters(), lr=self.q_lr)
def __load_memory(self, data): self.mem = Memory() try: if self.version == -1: self.mem.mm = data["mem_code"] for ad in self.mem.mm: self.mem.mm[ad].append(-1) return self.mem.mm = data["mem"] except: # Not available in previous versions, this try will be # removed in the future pass
class MemoryTest(unittest.TestCase): def setUp(self): self.memory = Memory(1) def test_add_memory(self): self.memory.add_memory("string") self.assertEqual(self.memory.buffer, ["string"]) def test_memory_fill(self): self.memory.add_memory("string 1") self.memory.add_memory("string 2") self.assertEqual(self.memory.buffer, ["string 2"]) def test_sample_memory(self): self.memory.add_memory("string") taken_sample = self.memory.sample_memory(3) self.assertEqual(taken_sample, ["string"])
class Disassembler(): def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): import capstone as CAPSTONE self.capstone_inst = {} # capstone instruction cache self.binary = Binary(filename, raw_type, raw_base, raw_big_endian) arch, mode = self.binary.get_arch() if arch is None or mode is None: raise ExcArch(self.binary.get_arch_string()) if database.loaded: self.binary.symbols = database.symbols self.binary.reverse_symbols = database.reverse_symbols self.mem = database.mem else: self.binary.load_symbols() database.symbols = self.binary.symbols database.reverse_symbols = self.binary.reverse_symbols self.mem = Memory() database.mem = self.mem self.jmptables = database.jmptables self.user_inline_comments = database.user_inline_comments self.internal_inline_comments = database.internal_inline_comments self.user_previous_comments = database.user_previous_comments self.internal_previous_comments = database.internal_previous_comments self.functions = database.functions self.end_functions = database.end_functions # TODO: is it a global constant or $gp can change during the execution ? self.mips_gp = database.mips_gp self.binary.load_section_names() self.capstone = CAPSTONE self.md = CAPSTONE.Cs(arch, mode) self.md.detail = True self.arch = arch self.mode = mode for s in self.binary.iter_sections(): s.big_endian = self.mode & self.capstone.CS_MODE_BIG_ENDIAN if not database.loaded: self.mem.add(s.start, s.end, MEM_UNK) def get_unpack_str(self, size_word): if self.mode & self.capstone.CS_MODE_BIG_ENDIAN: endian = ">" else: endian = "<" if size_word == 1: unpack_str = endian + "B" elif size_word == 2: unpack_str = endian + "H" elif size_word == 4: unpack_str = endian + "L" elif size_word == 8: unpack_str = endian + "Q" else: return None return unpack_str def add_symbol(self, addr, name): if name in self.binary.symbols: last = self.binary.symbols[name] del self.binary.reverse_symbols[last] self.binary.symbols[name] = [addr, SYM_UNK] self.binary.reverse_symbols[addr] = [name, SYM_UNK] return name # TODO: create a function in SectionAbs def read_array(self, ad, array_max_size, size_word, s=None): unpack_str = self.get_unpack_str(size_word) N = size_word * array_max_size if s is None: s = self.binary.get_section(ad) array = [] l = 0 while l < array_max_size: buf = s.read(ad, N) if not buf: break i = 0 while i < len(buf): b = buf[i:i + size_word] if ad > s.end or len(b) != size_word: return array w = struct.unpack(unpack_str, b)[0] array.append(w) ad += size_word i += size_word l += 1 if l >= array_max_size: return array return array def load_arch_module(self): if self.arch == self.capstone.CS_ARCH_X86: import lib.arch.x86 as ARCH elif self.arch == self.capstone.CS_ARCH_ARM: import lib.arch.arm as ARCH elif self.arch == self.capstone.CS_ARCH_MIPS: import lib.arch.mips as ARCH else: raise NotImplementedError return ARCH def get_addr_from_string(self, opt_addr, raw=False): if opt_addr is None: if raw: return 0 search = ["main", "_main"] else: search = [opt_addr] for s in search: if s.startswith("0x"): try: a = int(opt_addr, 16) except: raise ExcSymNotFound(search[0]) else: a = self.binary.symbols.get(s, -1) if a == -1: a = self.binary.section_names.get(s, -1) else: a = a[0] # it contains [ad, type] if a != -1: return a raise ExcSymNotFound(search[0]) def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): from capstone import CS_OP_IMM ARCH = self.load_arch_module() ARCH_UTILS = ARCH.utils ARCH_OUTPUT = ARCH.output ad = ctx.entry_addr s = self.binary.get_section(ctx.entry_addr) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.section_prefix = True o.curr_section = s l = 0 while 1: if ad == s.start: o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad != until and until != -1)) \ and ad <= s.end: if self.mem.is_code(ad): # TODO optimize if ad in self.functions: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) o._asm_inst(i) if ad in self.end_functions: for e in self.end_functions[ad]: sy = self.binary.reverse_symbols[e][0] o._user_comment("; end function %s" % sy) o._new_line() o._new_line() ad += i.size else: if o.is_symbol(ad): o._symbol(ad) o._new_line() o._address(ad) o._db(s.read_byte(ad)) o._new_line() ad += 1 l += 1 if (l >= lines and until == -1) or (ad == until and until != -1): break s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break ad = s.start if ad == until: break o.curr_section = s if until in self.functions: o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() # TODO: move it in the analyzer if self.binary.type == T_BIN_PE: # TODO: if ret != 0 : database is modified self.binary.pe_reverse_stripped_symbols(self, o.addr_line) return o def find_addr_before(self, ad): l = 0 s = self.binary.get_section(ad) while l < NB_LINES_TO_DISASM: if self.mem.is_code(ad): size = self.mem.code[ad][0] l += 1 l -= size else: l += 1 if ad == s.start: s = self.binary.get_prev_section(ad) if s is None: return ad ad = s.end ad -= 1 return ad def dump_data_ascii(self, ctx, lines): N = 128 # read by block of 128 bytes addr = ctx.entry_addr s = self.binary.get_section(ctx.entry_addr) s.print_header() l = 0 ascii_str = [] addr_str = -1 while l < lines: buf = s.read(addr, N) if not buf: break i = 0 while i < len(buf): if addr > s.end: return j = i while j < len(buf): c = buf[j] if c not in BYTES_PRINTABLE_SET: break if addr_str == -1: addr_str = addr ascii_str.append(c) j += 1 if c != 0 and j == len(buf): addr += j - i break if c == 0 and len(ascii_str) >= 2: print_no_end(color_addr(addr_str)) print_no_end(color_string( "\"" + "".join(map(get_char, ascii_str)) + "\"")) print(", 0") addr += j - i i = j else: print_no_end(color_addr(addr)) print("0x%.2x " % buf[i]) addr += 1 i += 1 addr_str = -1 ascii_str = [] l += 1 if l >= lines: return def dump_data(self, ctx, lines, size_word): s = self.binary.get_section(ctx.entry_addr) s.print_header() ad = ctx.entry_addr for w in self.read_array(ctx.entry_addr, lines, size_word, s): if ad in self.binary.reverse_symbols: print(color_symbol(self.binary.reverse_symbols[ad][0])) print_no_end(color_addr(ad)) print_no_end("0x%.2x" % w) section = self.binary.get_section(w) if section is not None: print_no_end(" (") print_no_end(color_section(section.name)) print_no_end(")") if size_word >= 4 and w in self.binary.reverse_symbols: print_no_end(" ") print_no_end(color_symbol(self.binary.reverse_symbols[w][0])) ad += size_word print() def print_calls(self, ctx): ARCH = self.load_arch_module() ARCH_UTILS = ARCH.utils ARCH_OUTPUT = ARCH.output s = self.binary.get_section(ctx.entry_addr) s.print_header() o = ARCH_OUTPUT.Output(ctx) o._new_line() ad = s.start while ad < s.end: i = self.lazy_disasm(ad, s.start) if i is None: ad += 1 else: ad += i.size if ARCH_UTILS.is_call(i): o._asm_inst(i) o.print() # # sym_filter : search a symbol, non case-sensitive # if it starts with '-', it prints non-matching symbols # def print_symbols(self, print_sections, sym_filter=None, only_func=False): if sym_filter is not None: sym_filter = sym_filter.lower() if sym_filter[0] == "-": invert_match = True sym_filter = sym_filter[1:] else: invert_match = False total = 0 # TODO: race condition with the analyzer for sy in list(self.binary.symbols): addr, ty = self.binary.symbols[sy] if only_func and ty != SYM_FUNC: continue if sym_filter is None or \ (invert_match and sym_filter not in sy.lower()) or \ (not invert_match and sym_filter in sy.lower()): if sy: section = self.binary.get_section(addr) print_no_end(color_addr(addr) + " " + sy) if print_sections and section is not None: print_no_end(" (" + color_section(section.name) + ")") print() total += 1 print("Total:", total) def lazy_disasm(self, addr, stay_in_section=-1, s=None): s = self.binary.get_section(addr) if s is None: return None # if stay_in_section != -1 and s.start != stay_in_section: # return None, s if addr in self.capstone_inst: return self.capstone_inst[addr] # TODO: remove when it's too big ? if len(self.capstone_inst) > CAPSTONE_CACHE_SIZE: self.capstone_inst.clear() # Disassemble by block of N bytes N = 128 d = s.read(addr, N) gen = self.md.disasm(d, addr) try: first = next(gen) except StopIteration: return None self.capstone_inst[first.address] = first for i in gen: if i.address in self.capstone_inst: break self.capstone_inst[i.address] = i return first def __prefetch_inst(self, inst): return self.lazy_disasm(inst.address + inst.size) # Generate a flow graph of the given function (addr) def get_graph(self, entry_addr): from capstone import CS_OP_IMM, CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry_addr) stack = [entry_addr] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: nxt = op.value.imm stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): if self.arch == CS_ARCH_MIPS: prefetch = self.__prefetch_inst(inst) addresses.add(prefetch.address) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: if self.arch == CS_ARCH_MIPS: direct_nxt = prefetch.address + prefetch.size else: direct_nxt = inst.address + inst.size nxt_jmp = op.value.imm stack.append(direct_nxt) stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.pe_reverse_stripped_symbols(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms def add_jmptable(self, inst_addr, table_addr, entry_size, nb_entries): name = self.add_symbol(table_addr, "jmptable_0x%x" % table_addr) table = self.read_array(table_addr, nb_entries, entry_size) self.jmptables[inst_addr] = Jmptable(inst_addr, table_addr, table, name) self.internal_inline_comments[inst_addr] = "switch statement %s" % name all_cases = {} for ad in table: all_cases[ad] = [] case = 0 for ad in table: all_cases[ad].append(case) case += 1 for ad in all_cases: self.internal_previous_comments[ad] = \ ["case %s %s" % ( ", ".join(map(str, all_cases[ad])), name )]
class Mixer(object): def __init__(self): self.capture_view = CaptureView.instance() self.memory = Memory() self.cursor = Cursor() self.setup_pages() self.setup_name_table() self.monitor = 'a' self.channel = 1 self.set_page('input_monitor.' + self.monitor) def setup_pages(self): self.pages = { "input_monitor.a": InputPage(self, 'a'), "input_monitor.b": InputPage(self, 'b'), "input_monitor.c": InputPage(self, 'c'), "input_monitor.d": InputPage(self, 'd'), "daw_monitor.a": OutputPage(self, 'a'), "daw_monitor.b": OutputPage(self, 'b'), "daw_monitor.c": OutputPage(self, 'c'), "daw_monitor.d": OutputPage(self, 'd'), "preamp": PreampPage(self), "compressor": CompressorPage(self), "line": LinePage(self), "reverb": ReverbPage(self), "patchbay": Patchbay(self), } for ch in range(0, 16): self.pages |= {"channel.%d" % (ch + 1): ChannelPage(self, ch + 1)} def setup_controls(self): self.page = self.pages[self.page_name] controls = [] self.header = self.page.get_header() self.controls = self.page.get_controls() def setup_name_table(self): for name, page in self.pages.items(): for row in page.get_controls(): for control in row: if control is None: continue self.capture_view.add_name_to_table(control) def height(self): return len(self.controls) def width(self): return len(self.controls[self.cursor.y]) def set_page(self, page): self.page_name = page self.setup_controls() self.cursor.y = min(self.cursor.y, self.height() - 1) self.cursor.x = min(self.cursor.x, self.width() - 1) def set_monitor(self, m): self.monitor = m if 'monitor.' in self.page_name: self.set_page(self.page_name[:-1] + self.monitor) else: print(self.page_name) def set_channel(self, ch): self.channel = ch if 'channel.' in self.page_name: self.set_page("channel.%d" % ch) else: print(self.page_name) def cursor_down(self): w = self.width() if self.cursor.y + 1 < self.height(): self.cursor.y += 1 if self.width() >= w * 2: self.cursor.x *= 2 def cursor_up(self): w = self.width() if self.cursor.y > 0: self.cursor.y -= 1 if self.width() <= w // 2: self.cursor.x //= 2 def cursor_left(self): if self.cursor.x > 0: self.cursor.x -= 1 def cursor_right(self): if self.cursor.x + 1 < self.width(): self.cursor.x += 1 def get_selected_control(self): row = self.cursor.y col = self.cursor.x return self.controls[row][col] def get_selected_addr(self): control = self.get_selected_control() if control is None: return None return Capture.get_addr(control) def get_memory_value(self, control): addr = Capture.get_addr(control) return self.memory.get_value(addr) def decrement_selected(self): addr = self.get_selected_addr() data = self.memory.decrement(addr) if addr else None return addr, data def increment_selected(self): addr = self.get_selected_addr() data = self.memory.increment(addr) if addr else None return addr, data def zero_selected(self): addr = self.get_selected_addr() data = self.memory.zero(addr) if addr else None return addr, data def set_memory_value(self, name, value): addr = Capture.get_addr(name) data = self.memory.set_value(addr, value) return addr, data
import unittest from math import inf from lib.memory import Memory from lib.types import Volume mem = Memory() class TestMemory(unittest.TestCase): @classmethod def setUpClass(cls): cls.mem = mem cls.mem.capture_view.add_name_to_table( 'input_monitor.a.channel.1.volume') def test_memory_get_empty(self): self.assertEqual(None, self.mem.get(0)) def test_memory_set_get_erase(self): self.mem.set(0, 1) self.assertEqual(mem.get(0), 1) self.mem.set(1, 2) self.assertEqual(mem.get(0), 1) self.assertEqual(mem.get(1), 2) self.mem.set(0, 3) self.assertEqual(mem.get(0), 3) self.assertEqual(mem.get(1), 2) self.mem.erase(0) self.mem.erase(1) self.assertIsNone(self.mem.get(0))
class SacAgent(object): actor_store_dir = 'actor' q_net_1_store_dir = 'q_1' q_net_2_store_dir = 'q_2' def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 self.q_lr = 3e-4 self.actor_lr = 3e-4 self.alpha_lr = 3e-3 self.update_step = 0 self.delay_step = 2 self.action_range = [env.action_space.low, env.action_space.high] self.memory = Memory(memory_size) # entropy temperature self.alpha = 0.2 self.target_entropy = -torch.prod(torch.Tensor( env.action_space.shape)).item() self.log_alpha = torch.zeros(1, requires_grad=True) self.alpha_optim = optim.Adam([self.log_alpha], lr=self.alpha_lr) self.actor = SacActor(env.observation_space.shape[0], env.action_space.shape[0]) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.actor_lr) self.q_net_1 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_1_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_1, self.q_net_1_target) self.q_net_1_optimizer = optim.Adam(self.q_net_1.parameters(), lr=self.q_lr) self.q_net_2 = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.q_net_2_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.q_net_2, self.q_net_2_target) self.q_net_2_optimizer = optim.Adam(self.q_net_2.parameters(), lr=self.q_lr) def copy_networks(self, org_net, dest_net): for dest_param, param in zip(dest_net.parameters(), org_net.parameters()): dest_param.data.copy_(param.data) def get_test_action(self, state): # 100% deterministic. It is not always the best option to do it this way state = torch.FloatTensor(state).unsqueeze(0) mean, log_std = self.actor.forward(state) action = torch.tanh(mean) action = action.detach().squeeze(0).numpy() return self.rescale_action(action) def get_action(self, state): state = torch.FloatTensor(state).unsqueeze(0) action, log_pi = self.actor.sample(state) action = action.detach().squeeze(0).numpy() return self.rescale_action(action) def rescale_action(self, action): return action * (self.action_range[1] - self.action_range[0]) / 2.0 +\ (self.action_range[1] + self.action_range[0]) / 2.0 def save(self, state, action, reward, new_state, cost, fail): self.memory.push(state, action, reward, new_state, cost, fail) def save_model(self, data_dir): actor_dir = os.path.join(data_dir, self.actor_store_dir) torch.save(self.actor, actor_dir) q_net_1_dir = os.path.join(data_dir, self.q_net_1_store_dir) torch.save(self.q_net_1, q_net_1_dir) q_net_2_dir = os.path.join(data_dir, self.q_net_2_store_dir) torch.save(self.q_net_2, q_net_2_dir) def load_model(self, data_dir): actor_dir = os.path.join(data_dir, self.actor_store_dir) self.actor = torch.load(actor_dir) q_net_1_dir = os.path.join(data_dir, self.q_net_1_store_dir) self.q_net_1 = torch.load(q_net_1_dir) self.copy_networks(self.q_net_1, self.q_net_1_target) q_net_2_dir = os.path.join(data_dir, self.q_net_2_store_dir) self.q_net_2 = torch.load(q_net_2_dir) self.copy_networks(self.q_net_2, self.q_net_2_target) def update(self, num=1): for _ in range(num): self.__one_update() def __one_update(self): if (len(self.memory) < self.batch_size): return states, actions, rewards, next_states, costs, fails = self.memory.get_batch( self.batch_size) not_fails = (fails == 0) next_actions, next_log_pi = self.actor.sample(next_states) next_q_1 = self.q_net_1_target(next_states, next_actions) next_q_2 = self.q_net_2_target(next_states, next_actions) next_q_target = torch.min(next_q_1, next_q_2) - self.alpha * next_log_pi expected_q = rewards - costs + not_fails * self.gamma * next_q_target curr_q_1 = self.q_net_1.forward(states, actions) curr_q_2 = self.q_net_2.forward(states, actions) q1_loss = F.mse_loss(curr_q_1, expected_q.detach()) q2_loss = F.mse_loss(curr_q_2, expected_q.detach()) self.q_net_1_optimizer.zero_grad() q1_loss.backward() self.q_net_1_optimizer.step() self.q_net_2_optimizer.zero_grad() q2_loss.backward() self.q_net_2_optimizer.step() # delayed update for policy network and target q networks new_actions, log_pi = self.actor.sample(states) if self.update_step % self.delay_step == 0: min_q = torch.min(self.q_net_1.forward(states, new_actions), self.q_net_2.forward(states, new_actions)) actor_loss = (self.alpha * log_pi - min_q).mean() self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() # target networks for target_param, param in zip(self.q_net_1_target.parameters(), self.q_net_1.parameters()): target_param.data.copy_(self.tau * param + (1 - self.tau) * target_param) for target_param, param in zip(self.q_net_2_target.parameters(), self.q_net_2.parameters()): target_param.data.copy_(self.tau * param + (1 - self.tau) * target_param) # update temperature alpha_loss = (self.log_alpha * (-log_pi - self.target_entropy).detach()).mean() self.alpha_optim.zero_grad() alpha_loss.backward() self.alpha_optim.step() self.alpha = self.log_alpha.exp() self.update_step += 1
class Disassembler(): def __init__(self, filename, raw_type, raw_base, raw_big_endian, database): import capstone as CAPSTONE self.capstone_inst = {} # capstone instruction cache if database.loaded: self.mem = database.mem else: self.mem = Memory() database.mem = self.mem self.binary = Binary(self.mem, filename, raw_type, raw_base, raw_big_endian) self.binary.load_section_names() arch, mode = self.binary.get_arch() if arch is None or mode is None: raise ExcArch(self.binary.get_arch_string()) self.jmptables = database.jmptables self.user_inline_comments = database.user_inline_comments self.internal_inline_comments = database.internal_inline_comments self.user_previous_comments = database.user_previous_comments self.internal_previous_comments = database.internal_previous_comments self.functions = database.functions self.func_id = database.func_id self.end_functions = database.end_functions self.xrefs = database.xrefs # TODO: is it a global constant or $gp can change during the execution ? self.mips_gp = database.mips_gp if database.loaded: self.binary.symbols = database.symbols self.binary.reverse_symbols = database.reverse_symbols self.binary.imports = database.imports else: self.binary.load_symbols() database.symbols = self.binary.symbols database.reverse_symbols = self.binary.reverse_symbols database.imports = self.binary.imports self.capstone = CAPSTONE self.md = CAPSTONE.Cs(arch, mode) self.md.detail = True self.arch = arch self.mode = mode for s in self.binary.iter_sections(): s.big_endian = self.mode & self.capstone.CS_MODE_BIG_ENDIAN # TODO: useful ? if not database.loaded: self.mem.add(s.start, s.end, MEM_UNK) def get_unpack_str(self, size_word): if self.mode & self.capstone.CS_MODE_BIG_ENDIAN: endian = ">" else: endian = "<" if size_word == 1: unpack_str = endian + "B" elif size_word == 2: unpack_str = endian + "H" elif size_word == 4: unpack_str = endian + "L" elif size_word == 8: unpack_str = endian + "Q" else: return None return unpack_str def add_xref(self, from_ad, to_ad): if isinstance(to_ad, list): for x in to_ad: if x in self.xrefs: if from_ad not in self.xrefs[x]: self.xrefs[x].append(from_ad) else: self.xrefs[x] = [from_ad] else: if to_ad in self.xrefs: if from_ad not in self.xrefs[to_ad]: self.xrefs[to_ad].append(from_ad) else: self.xrefs[to_ad] = [from_ad] def add_symbol(self, ad, name): if name in self.binary.symbols: last = self.binary.symbols[name] del self.binary.reverse_symbols[last] if ad in self.binary.reverse_symbols: last = self.binary.reverse_symbols[ad] del self.binary.symbols[last] self.binary.symbols[name] = ad self.binary.reverse_symbols[ad] = name if not self.mem.exists(ad): self.mem.add(ad, 1, MEM_UNK) return name # TODO: create a function in SectionAbs def read_array(self, ad, array_max_size, size_word, s=None): unpack_str = self.get_unpack_str(size_word) N = size_word * array_max_size if s is None: s = self.binary.get_section(ad) array = [] l = 0 while l < array_max_size: buf = s.read(ad, N) if not buf: break i = 0 while i < len(buf): b = buf[i:i + size_word] if ad > s.end or len(b) != size_word: return array w = struct.unpack(unpack_str, b)[0] array.append(w) ad += size_word i += size_word l += 1 if l >= array_max_size: return array return array def load_arch_module(self): if self.arch == self.capstone.CS_ARCH_X86: import lib.arch.x86 as ARCH elif self.arch == self.capstone.CS_ARCH_ARM: import lib.arch.arm as ARCH elif self.arch == self.capstone.CS_ARCH_MIPS: import lib.arch.mips as ARCH else: raise NotImplementedError return ARCH def dump_xrefs(self, ctx, ad): ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output o = ARCH_OUTPUT.Output(ctx) o._new_line() o.print_labels = False for x in ctx.gctx.dis.xrefs[ad]: s = self.binary.get_section(x) if self.mem.is_code(x): func_id = self.mem.get_func_id(x) if func_id != -1: fad = self.func_id[func_id] o._label(fad) diff = x - fad if diff >= 0: o._add(" + %d " % diff) else: o._add(" - %d " % (-diff)) o._pad_width(20) i = self.lazy_disasm(x, s.start) o._asm_inst(i) else: o._address(x) o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o def is_label(self, ad): return ad in self.binary.reverse_symbols or ad in self.xrefs def get_symbol(self, ad): s = self.binary.reverse_symbols.get(ad, None) if s is None: ty = self.mem.get_type(ad) if ty == MEM_FUNC: return "sub_%x" % ad if ty == MEM_CODE: return "loc_%x" % ad if ty == MEM_UNK: return "unk_%x" % ad return s def dump_asm(self, ctx, lines=NB_LINES_TO_DISASM, until=-1): ARCH = self.load_arch_module() ARCH_OUTPUT = ARCH.output ad = ctx.entry s = self.binary.get_section(ad) if s is None: # until is != -1 only from the visual mode # It allows to not go before the first section. if until != -1: return None # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: return None ad = s.start o = ARCH_OUTPUT.Output(ctx) o._new_line() o.section_prefix = True o.curr_section = s o.mode_dump = True l = 0 while 1: if ad == s.start: o._new_line() o._dash() o._section(s.name) o._add(" 0x%x -> 0x%x" % (s.start, s.end)) o._new_line() o._new_line() while ((l < lines and until == -1) or (ad < until and until != -1)) \ and ad <= s.end: # A PE import should not be displayed as a subroutine if not(self.binary.type == T_BIN_PE and ad in self.binary.imports) \ and self.mem.is_code(ad): is_func = ad in self.functions and self.functions[ad][ 0] != -1 if is_func: if not o.is_last_2_line_empty(): o._new_line() o._dash() o._user_comment("; SUBROUTINE") o._new_line() o._dash() i = self.lazy_disasm(ad, s.start) if not is_func and ad in self.xrefs and \ not o.is_last_2_line_empty(): o._new_line() o._asm_inst(i) if ad in self.end_functions: for fad in self.end_functions[ad]: sy = self.get_symbol(fad) o._user_comment("; end function %s" % sy) o._new_line() o._new_line() ad += i.size else: o._label_and_address(ad) o.set_line(ad) o._db(s.read_byte(ad)) o._new_line() ad += 1 l += 1 if (l >= lines and until == -1) or (ad >= until and until != -1): break s = self.binary.get_section(ad) if s is None: # Get the next section, it's not mandatory that sections # are consecutives ! s = self.binary.get_next_section(ad) if s is None: break ad = s.start if until != -1 and ad >= until: break o.curr_section = s if until in self.functions: o._new_line() # remove the last empty line o.lines.pop(-1) o.token_lines.pop(-1) o.join_lines() return o def find_addr_before(self, ad): l = 0 s = self.binary.get_section(ad) while l < NB_LINES_TO_DISASM: if self.mem.is_code(ad): size = self.mem.mm[ad][0] l += 1 l -= size else: l += 1 if ad == s.start: s = self.binary.get_prev_section(ad) if s is None: return ad ad = s.end ad -= 1 return ad def dump_data_ascii(self, ctx, lines): N = 128 # read by block of 128 bytes ad = ctx.entry s = self.binary.get_section(ad) print(hex(ad)) s.print_header() l = 0 ascii_str = [] ad_str = -1 while l < lines: buf = s.read(ad, N) if not buf: break i = 0 while i < len(buf): if ad > s.end: return j = i while j < len(buf): c = buf[j] if c not in BYTES_PRINTABLE_SET: break if ad_str == -1: ad_str = ad ascii_str.append(c) j += 1 if c != 0 and j == len(buf): ad += j - i break if c == 0 and len(ascii_str) >= 2: if self.is_label(ad_str): print(color_symbol(self.get_symbol(ad_str))) print_no_end(color_addr(ad_str)) print_no_end( color_string("\"" + "".join(map(get_char, ascii_str)) + "\"")) print(", 0") ad += j - i i = j else: if self.is_label(ad): print(color_symbol(self.get_symbol(ad))) print_no_end(color_addr(ad)) print("0x%.2x " % buf[i]) ad += 1 i += 1 ad_str = -1 ascii_str = [] l += 1 if l >= lines: return def dump_data(self, ctx, lines, size_word): ad = ctx.entry s = self.binary.get_section(ad) s.print_header() for w in self.read_array(ad, lines, size_word, s): if self.is_label(ad): print(color_symbol(self.get_symbol(ad))) print_no_end(color_addr(ad)) print_no_end("0x%.2x" % w) section = self.binary.get_section(w) if section is not None: print_no_end(" (") print_no_end(color_section(section.name)) print_no_end(")") if size_word >= 4 and self.is_label(w): print_no_end(" ") print_no_end(color_symbol(self.get_symbol(w))) ad += size_word print() def print_functions(self): total = 0 # TODO: race condition with the analyzer ? for ad in list(self.functions): print(color_addr(ad) + " " + self.get_symbol(ad)) total += 1 print("Total:", total) # # sym_filter : search a symbol, non case-sensitive # if it starts with '-', it prints non-matching symbols # def print_symbols(self, print_sections, sym_filter=None): if sym_filter is not None: sym_filter = sym_filter.lower() if sym_filter[0] == "-": invert_match = True sym_filter = sym_filter[1:] else: invert_match = False total = 0 # TODO: race condition with the analyzer ? for sy in list(self.binary.symbols): ad = self.binary.symbols[sy] if sym_filter is None or \ (invert_match and sym_filter not in sy.lower()) or \ (not invert_match and sym_filter in sy.lower()): if sy: section = self.binary.get_section(ad) print_no_end(color_addr(ad) + " " + sy) if print_sections and section is not None: print_no_end(" (" + color_section(section.name) + ")") print() total += 1 print("Total:", total) def lazy_disasm(self, ad, stay_in_section=-1, s=None): s = self.binary.get_section(ad) if s is None: return None # if stay_in_section != -1 and s.start != stay_in_section: # return None, s if ad in self.capstone_inst: return self.capstone_inst[ad] # TODO: remove when it's too big ? if len(self.capstone_inst) > CAPSTONE_CACHE_SIZE: self.capstone_inst.clear() # Disassemble by block of N bytes N = 128 d = s.read(ad, N) gen = self.md.disasm(d, ad) try: first = next(gen) except StopIteration: return None self.capstone_inst[first.address] = first for i in gen: if i.address in self.capstone_inst: break self.capstone_inst[i.address] = i return first def __add_prefetch(self, addr_set, inst): if self.arch == self.CS_ARCH_MIPS: prefetch = self.lazy_disasm(inst.address + inst.size) addr_set.add(prefetch.address) return prefetch return None def is_noreturn(self, ad): return self.functions[ad][1] & FUNC_FLAG_NORETURN # Generate a flow graph of the given function (addr) def get_graph(self, entry): from capstone import CS_OP_IMM, CS_ARCH_MIPS self.CS_ARCH_MIPS = CS_ARCH_MIPS ARCH_UTILS = self.load_arch_module().utils gph = Graph(self, entry) stack = [entry] start = time() prefetch = None addresses = set() # WARNING: this assume that on every architectures the jump # address is the last operand (operands[-1]) # Here each instruction is a node. Blocks will be created in the # function __simplify. while stack: ad = stack.pop() inst = self.lazy_disasm(ad) if inst is None: # Remove all previous instructions which have a link # to this instruction. if ad in gph.link_in: for i in gph.link_in[ad]: gph.link_out[i].remove(ad) for i in gph.link_in[ad]: if not gph.link_out[i]: del gph.link_out[i] del gph.link_in[ad] continue if gph.exists(inst): continue addresses.add(ad) if ARCH_UTILS.is_ret(inst): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_uncond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.uncond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: nxt = op.value.imm if nxt in self.functions: gph.new_node(inst, prefetch, None) else: stack.append(nxt) gph.new_node(inst, prefetch, [nxt]) else: if inst.address in self.jmptables: table = self.jmptables[inst.address].table stack += table gph.new_node(inst, prefetch, table) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) elif ARCH_UTILS.is_cond_jump(inst): prefetch = self.__add_prefetch(addresses, inst) gph.cond_jumps_set.add(ad) op = inst.operands[-1] if op.type == CS_OP_IMM: if prefetch is None: direct_nxt = inst.address + inst.size else: direct_nxt = prefetch.address + prefetch.size nxt_jmp = op.value.imm stack.append(direct_nxt) if nxt_jmp in self.functions: gph.new_node(inst, prefetch, [direct_nxt]) else: stack.append(nxt_jmp) gph.new_node(inst, prefetch, [direct_nxt, nxt_jmp]) else: # Can't interpret jmp ADDR|reg gph.new_node(inst, prefetch, None) else: if ad != entry and ARCH_UTILS.is_call(inst): op = inst.operands[0] if op.type == CS_OP_IMM: imm = op.value.imm if imm in self.functions and self.is_noreturn(imm): prefetch = self.__add_prefetch(addresses, inst) gph.new_node(inst, prefetch, None) continue nxt = inst.address + inst.size stack.append(nxt) gph.new_node(inst, None, [nxt]) if len(gph.nodes) == 0: return None, 0 if self.binary.type == T_BIN_PE: nb_new_syms = self.binary.pe_reverse_stripped_list(self, addresses) else: nb_new_syms = 0 elapsed = time() elapsed = elapsed - start debug__("Graph built in %fs (%d instructions)" % (elapsed, len(gph.nodes))) return gph, nb_new_syms def add_jmptable(self, inst_addr, table_addr, entry_size, nb_entries): name = self.add_symbol(table_addr, "jmptable_%x" % table_addr) table = self.read_array(table_addr, nb_entries, entry_size) self.jmptables[inst_addr] = Jmptable(inst_addr, table_addr, table, name) self.internal_inline_comments[inst_addr] = "switch statement %s" % name all_cases = {} for ad in table: all_cases[ad] = [] case = 0 for ad in table: all_cases[ad].append(case) case += 1 for ad in all_cases: self.internal_previous_comments[ad] = \ ["case %s %s" % ( ", ".join(map(str, all_cases[ad])), name )]
class DdpgAgent(object): actor_store_dir = 'actor' critic_store_dir = 'critic' def __init__(self, env, batch_size): self.batch_size = batch_size self.tau = 1e-2 memory_size = 1000000 self.gamma = 0.99 actor_learning_rate = 1e-4 critic_learning_rate = 1e-3 self.critic_loss_fn = nn.MSELoss() self.actor = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.actor_target = DdpgActor(env.observation_space.shape[0], env.action_space.shape[0], env.action_space.high, env.action_space.low) self.copy_networks(self.actor, self.actor_target) self.critic = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.critic_target = Critic(env.observation_space.shape[0], env.action_space.shape[0]) self.copy_networks(self.critic, self.critic_target) self.memory = Memory(memory_size) self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=actor_learning_rate) self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=critic_learning_rate) def copy_networks(self, org_net, dest_net): for dest_param, param in zip(dest_net.parameters(), org_net.parameters()): dest_param.data.copy_(param.data) def get_action(self, state): tensor_state = Variable(torch.from_numpy(state).float().unsqueeze(0)) tensor_action = self.actor.noisy_forward(tensor_state) #tensor_action = self.actor.forward(tensor_state) return tensor_action.detach().numpy()[0] def get_test_action(self, state): tensor_state = Variable(torch.from_numpy(state).float().unsqueeze(0)) tensor_action = self.actor.forward(tensor_state) return tensor_action.detach().numpy()[0] def save(self, state, action, reward, new_state, cost, fail): self.memory.push(state, action, reward, new_state, cost, fail) def save_model(self, data_dir): actor_dir = os.path.join(data_dir, self.actor_store_dir) torch.save(self.actor, actor_dir) critic_dir = os.path.join(data_dir, self.critic_store_dir) torch.save(self.critic, critic_dir) def load_model(self, data_dir): actor_dir = os.path.join(data_dir, self.actor_store_dir) self.actor = torch.load(actor_dir) self.copy_networks(self.actor, self.actor_target) critic_dir = os.path.join(data_dir, self.critic_store_dir) self.critic = torch.load(critic_dir) self.copy_networks(self.critic, self.critic_target) def update(self, num=1): for _ in range(num): self.__one_update() self.actor.reset_noise() def __one_update(self): if (len(self.memory) < self.batch_size): return states, actions, rewards, next_states, costs, fails = self.memory.get_batch( self.batch_size) states_q_values = self.critic.forward(states, actions) next_actions = self.actor_target.forward(next_states) next_states_q_value = self.critic_target.forward( next_states, next_actions.detach()) not_fails = (fails == 0) next_states_q_value = next_states_q_value * not_fails new_q_value = rewards - costs + (self.gamma * next_states_q_value) critic_loss = self.critic_loss_fn(states_q_values, new_q_value) actor_loss = -self.critic.forward(states, self.actor.forward(states)).mean() self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() self.critic_optimizer.zero_grad() critic_loss.backward() self.critic_optimizer.step() for target_param, param in zip(self.actor_target.parameters(), self.actor.parameters()): target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau)) for target_param, param in zip(self.critic_target.parameters(), self.critic.parameters()): target_param.data.copy_(param.data * self.tau + target_param.data * (1.0 - self.tau))
def setUp(self): self.memory = Memory(1)
def main(): memory_bank = Memory(MEMORY_SIZE) pong_game = Game(GAME_LENGTH, GAME_STEP_TIME) champion = Network(3, 7, hidden_layer_size=HIDDEN_LAYER_SIZE, no_hidden_layers=NO_HIDDEN_LAYERS, learning_rate=LEARNING_RATE) competitor = Network(3, 7, hidden_layer_size=HIDDEN_LAYER_SIZE, no_hidden_layers=NO_HIDDEN_LAYERS) trainer = Trainer(pong_game, memory_bank, champion, competitor, MAX_EPSILON, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, batch_size=BATCH_SIZE) champion.save_network(DIRECTORY + '/version_' + str(STARTING_VERSION)) for version in range(STARTING_VERSION, STARTING_VERSION + NUMBER_OF_TRAINING_SESSIONS): start_time = time.time() for _ in range(GAMES_PER_TRAINING_SESSION): print('New game') trainer.run_game() trainer.game = Game(GAME_LENGTH, GAME_STEP_TIME) print("Time taken for training session: ", time.time() - start_time) champion.save_network(DIRECTORY + '/version_' + str(version + 1)) current_epsilon = trainer.epsilon current_returns_parameter = trainer.returns_parameter current_winners_parameter = trainer.winners_parameter trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME), memory_bank, champion, competitor, current_epsilon, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, returns_parameter=current_returns_parameter, winners_parameter=current_winners_parameter, batch_size=BATCH_SIZE) test_score = trainer.test_game() if test_score < 0: print('Competitor wins, score was ' + str(test_score)) competitor.save_network(DIRECTORY + '/competitor_save') champion.load_network(DIRECTORY + '/competitor_save') else: print('Champion continues, score was ' + str(test_score)) new_competitor_version = random.randint(max(0, version - 5), version) print('New competitor version: ' + str(new_competitor_version)) competitor.load_network(DIRECTORY + '/version_' + str(new_competitor_version)) current_epsilon = trainer.epsilon print('epsilon is ' + str(current_epsilon)) current_returns_parameter = trainer.returns_parameter current_winners_parameter = trainer.winners_parameter trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME), memory_bank, champion, competitor, current_epsilon, MIN_EPSILON, EPSILON_DECAY, GAMMA, RETURNS_DECAY, WINNERS_GROWTH, returns_parameter=current_returns_parameter, winners_parameter=current_winners_parameter, batch_size=BATCH_SIZE)