Exemple #1
0
    def __init__(self, env, batch_size):
        self.batch_size = batch_size
        self.tau = 1e-2
        memory_size = 1000000
        self.gamma = 0.99
        actor_learning_rate = 1e-4
        critic_learning_rate = 1e-3
        self.critic_loss_fn = nn.MSELoss()

        self.actor = DdpgActor(env.observation_space.shape[0],
                               env.action_space.shape[0],
                               env.action_space.high, env.action_space.low)
        self.actor_target = DdpgActor(env.observation_space.shape[0],
                                      env.action_space.shape[0],
                                      env.action_space.high,
                                      env.action_space.low)
        self.copy_networks(self.actor, self.actor_target)

        self.critic = Critic(env.observation_space.shape[0],
                             env.action_space.shape[0])
        self.critic_target = Critic(env.observation_space.shape[0],
                                    env.action_space.shape[0])
        self.copy_networks(self.critic, self.critic_target)

        self.memory = Memory(memory_size)

        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=actor_learning_rate)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=critic_learning_rate)
Exemple #2
0
 def __init__(self):
     self.capture_view = CaptureView.instance()
     self.memory = Memory()
     self.cursor = Cursor()
     self.setup_pages()
     self.setup_name_table()
     self.monitor = 'a'
     self.channel = 1
     self.set_page('input_monitor.' + self.monitor)
Exemple #3
0
    def __load_memory(self, data):
        self.mem = Memory()

        try:
            self.mem.code = data["mem_code"]
        except:
            # Not available in previous versions, this try will be
            # removed in the future
            pass
Exemple #4
0
    def __init__(self, filename, raw_type, raw_base, raw_big_endian, database):
        import capstone as CAPSTONE

        self.capstone_inst = {}  # capstone instruction cache

        if database.loaded:
            self.mem = database.mem
        else:
            self.mem = Memory()
            database.mem = self.mem

        self.binary = Binary(self.mem, filename, raw_type, raw_base,
                             raw_big_endian)

        self.binary.load_section_names()
        arch, mode = self.binary.get_arch()

        if arch is None or mode is None:
            raise ExcArch(self.binary.get_arch_string())

        self.jmptables = database.jmptables
        self.user_inline_comments = database.user_inline_comments
        self.internal_inline_comments = database.internal_inline_comments
        self.user_previous_comments = database.user_previous_comments
        self.internal_previous_comments = database.internal_previous_comments
        self.functions = database.functions
        self.func_id = database.func_id
        self.end_functions = database.end_functions
        self.xrefs = database.xrefs

        # TODO: is it a global constant or $gp can change during the execution ?
        self.mips_gp = database.mips_gp

        if database.loaded:
            self.binary.symbols = database.symbols
            self.binary.reverse_symbols = database.reverse_symbols
            self.binary.imports = database.imports
        else:
            self.binary.load_symbols()
            database.symbols = self.binary.symbols
            database.reverse_symbols = self.binary.reverse_symbols
            database.imports = self.binary.imports

        self.capstone = CAPSTONE
        self.md = CAPSTONE.Cs(arch, mode)
        self.md.detail = True
        self.arch = arch
        self.mode = mode

        for s in self.binary.iter_sections():
            s.big_endian = self.mode & self.capstone.CS_MODE_BIG_ENDIAN

            # TODO: useful ?
            if not database.loaded:
                self.mem.add(s.start, s.end, MEM_UNK)
Exemple #5
0
    def __load_memory(self, data):
        self.mem = Memory()

        try:
            if self.version == -1:
                self.mem.mm = data["mem_code"]
                for ad in self.mem.mm:
                    self.mem.mm[ad].append(-1)
                return

            self.mem.mm = data["mem"]
        except:
            # Not available in previous versions, this try will be
            # removed in the future
            pass
Exemple #6
0
    def __init__(self, env, batch_size):
        self.batch_size = batch_size
        self.tau = 1e-2
        memory_size = 1000000
        self.gamma = 0.99
        self.q_lr = 3e-4
        self.actor_lr = 3e-4
        self.alpha_lr = 3e-3

        self.update_step = 0
        self.delay_step = 2

        self.action_range = [env.action_space.low, env.action_space.high]

        self.memory = Memory(memory_size)

        # entropy temperature
        self.alpha = 0.2
        self.target_entropy = -torch.prod(torch.Tensor(
            env.action_space.shape)).item()
        self.log_alpha = torch.zeros(1, requires_grad=True)
        self.alpha_optim = optim.Adam([self.log_alpha], lr=self.alpha_lr)

        self.actor = SacActor(env.observation_space.shape[0],
                              env.action_space.shape[0])
        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=self.actor_lr)

        self.q_net_1 = Critic(env.observation_space.shape[0],
                              env.action_space.shape[0])
        self.q_net_1_target = Critic(env.observation_space.shape[0],
                                     env.action_space.shape[0])
        self.copy_networks(self.q_net_1, self.q_net_1_target)
        self.q_net_1_optimizer = optim.Adam(self.q_net_1.parameters(),
                                            lr=self.q_lr)

        self.q_net_2 = Critic(env.observation_space.shape[0],
                              env.action_space.shape[0])
        self.q_net_2_target = Critic(env.observation_space.shape[0],
                                     env.action_space.shape[0])
        self.copy_networks(self.q_net_2, self.q_net_2_target)
        self.q_net_2_optimizer = optim.Adam(self.q_net_2.parameters(),
                                            lr=self.q_lr)
import unittest
from math import inf
from lib.memory import Memory
from lib.types import Volume

mem = Memory()


class TestMemory(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.mem = mem
        cls.mem.capture_view.add_name_to_table(
            'input_monitor.a.channel.1.volume')

    def test_memory_get_empty(self):
        self.assertEqual(None, self.mem.get(0))

    def test_memory_set_get_erase(self):
        self.mem.set(0, 1)
        self.assertEqual(mem.get(0), 1)
        self.mem.set(1, 2)
        self.assertEqual(mem.get(0), 1)
        self.assertEqual(mem.get(1), 2)
        self.mem.set(0, 3)
        self.assertEqual(mem.get(0), 3)
        self.assertEqual(mem.get(1), 2)

        self.mem.erase(0)
        self.mem.erase(1)
        self.assertIsNone(self.mem.get(0))
Exemple #8
0
 def setUp(self):
     self.memory = Memory(1)
Exemple #9
0
def main():
    memory_bank = Memory(MEMORY_SIZE)
    pong_game = Game(GAME_LENGTH, GAME_STEP_TIME)

    champion = Network(3,
                       7,
                       hidden_layer_size=HIDDEN_LAYER_SIZE,
                       no_hidden_layers=NO_HIDDEN_LAYERS,
                       learning_rate=LEARNING_RATE)
    competitor = Network(3,
                         7,
                         hidden_layer_size=HIDDEN_LAYER_SIZE,
                         no_hidden_layers=NO_HIDDEN_LAYERS)

    trainer = Trainer(pong_game,
                      memory_bank,
                      champion,
                      competitor,
                      MAX_EPSILON,
                      MIN_EPSILON,
                      EPSILON_DECAY,
                      GAMMA,
                      RETURNS_DECAY,
                      WINNERS_GROWTH,
                      batch_size=BATCH_SIZE)

    champion.save_network(DIRECTORY + '/version_' + str(STARTING_VERSION))

    for version in range(STARTING_VERSION,
                         STARTING_VERSION + NUMBER_OF_TRAINING_SESSIONS):

        start_time = time.time()
        for _ in range(GAMES_PER_TRAINING_SESSION):
            print('New game')
            trainer.run_game()
            trainer.game = Game(GAME_LENGTH, GAME_STEP_TIME)

        print("Time taken for training session: ", time.time() - start_time)
        champion.save_network(DIRECTORY + '/version_' + str(version + 1))

        current_epsilon = trainer.epsilon
        current_returns_parameter = trainer.returns_parameter
        current_winners_parameter = trainer.winners_parameter
        trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME),
                          memory_bank,
                          champion,
                          competitor,
                          current_epsilon,
                          MIN_EPSILON,
                          EPSILON_DECAY,
                          GAMMA,
                          RETURNS_DECAY,
                          WINNERS_GROWTH,
                          returns_parameter=current_returns_parameter,
                          winners_parameter=current_winners_parameter,
                          batch_size=BATCH_SIZE)
        test_score = trainer.test_game()

        if test_score < 0:
            print('Competitor wins, score was ' + str(test_score))
            competitor.save_network(DIRECTORY + '/competitor_save')
            champion.load_network(DIRECTORY + '/competitor_save')
        else:
            print('Champion continues, score was ' + str(test_score))

        new_competitor_version = random.randint(max(0, version - 5), version)
        print('New competitor version: ' + str(new_competitor_version))

        competitor.load_network(DIRECTORY + '/version_' +
                                str(new_competitor_version))

        current_epsilon = trainer.epsilon
        print('epsilon is ' + str(current_epsilon))
        current_returns_parameter = trainer.returns_parameter
        current_winners_parameter = trainer.winners_parameter
        trainer = Trainer(Game(GAME_LENGTH, GAME_STEP_TIME),
                          memory_bank,
                          champion,
                          competitor,
                          current_epsilon,
                          MIN_EPSILON,
                          EPSILON_DECAY,
                          GAMMA,
                          RETURNS_DECAY,
                          WINNERS_GROWTH,
                          returns_parameter=current_returns_parameter,
                          winners_parameter=current_winners_parameter,
                          batch_size=BATCH_SIZE)