コード例 #1
0
def train(size=(4, 4), verbose=False, batches=50, batch_size=20):
    model = Model(size=size)
    problem = CgolProblemV2(model,
                            init_flip=None,
                            density=density,
                            high_density=1.0 - density,
                            pop_record_size=10)
    layers = [
        problem.length + problem.key_dim,
        problem.length + problem.cols + problem.rows, problem.length // 2
    ]
    rl = RL(
        problem,
        shape=[None, *layers, None],
        verbose=verbose,
        mu=0.01,
        batches=batches,
        batch_size=batch_size,
        max_steps=250,
        epsilon_init=1.0,
        epsilon_decay_factor=0.9,
        epsilon_min=0.01,
        #replay_count=2,
    )
    rl.train(iterations=100)
    return rl
コード例 #2
0
    def test_rl_basic(self):
        # build and train
        full_test = False
        if not full_test:
            self.batch_size = 5
            self.batches = 5
            self.replay_count = 5

        rl = RL(self.problem, [None, *self.inner, None],
                verbose=self.verbose_model,
                epsilon_decay_factor=self.epsilon_decay_factor,
                epsilon_init=self.epsilon_init,
                epsilon_min=self.epsilon_min,
                stochastic=self.stochastic,
                mu=self.mu,
                discount_factor=self.discount_factor,
                batches=self.batches,
                batch_size=self.batch_size,
                replay_count=self.replay_count,
                h=None,
                optim=None)
        rl.train(batches=self.batches,
                 batch_size=self.batch_size,
                 replay_count=self.replay_count,
                 iterations=self.iterations,
                 epsilon=self.epsilon_init)

        # test it
        steps = 0
        self.problem.reset()
        while not self.problem.is_terminal():
            action, q_val = rl.choose_best_action(explore=False)
            if self.verbose:
                print('action:', action, 'q_val:', q_val)
            self.problem.do(action)
            steps += 1
            if steps > 100:
                break
        if self.verbose and steps != 7:
            print(steps)
        if full_test:
            self.assertEqual(steps, 7)
コード例 #3
0
 def test_cgolai_compat(self):
     verbose = False
     if verbose:
         print('start')
     model = Model(size=(3, 3))
     problem = CgolProblem(model)
     rl = RL(problem, [None, 100, 100, 100, None],
             batches=5,
             batch_size=3,
             epsilon_decay_factor=0.9999,
             epsilon_init=0.5,
             max_steps=100,
             replay_count=3)
     rl.train(iterations=100)
     if verbose:
         problem.reset()
         print(problem.model.board)
         import numpy as np
         print(
             np.array([
                 rl.get_value(action) for action in problem.actions()[:-1]
             ]).reshape(problem.model.size))
         print('end')
コード例 #4
0
 def test_rl_basic_nonn(self):
     rl = RL(problem=self.problem, stochastic=True, verbose=False)
     rl.train(500)
     steps = 0
     self.problem.reset()
     while not self.problem.is_terminal():
         action, _ = rl.choose_best_action(explore=False)
         self.problem.do(action)
         steps += 1
         if steps > 100:
             break
     if self.verbose and steps != 7:
         print(steps)
     self.assertEqual(steps, 7)