Esempio n. 1
0
    def __init__(self, use_gpu, enable_controller, dim):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim

        print("Initializing Q-Network...")

        hidden_dim1 = 64
        #hidden_dim1 = 32
        hidden_dim2 = 128
        hidden_dim3 = 10
        hidden_cont = 100

        self.model = FunctionSet(
            l4=linearL4_link.LinearL4_link(self.dim * self.hist_size *
                                           self.time_M,
                                           hidden_cont,
                                           wscale=np.sqrt(2)),
            l5=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M,
                                      hidden_dim3 * hidden_cont,
                                      wscale=np.sqrt(2)),
            l6=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M,
                                      hidden_dim3 * hidden_cont,
                                      wscale=np.sqrt(2)),
            l7=attention.Attention(hidden_cont, hidden_dim3 * hidden_cont,
                                   hidden_dim3),
            l8=retrieval.Retrieval(hidden_dim3, hidden_dim3 * hidden_cont,
                                   hidden_cont),
            l9=F.Bilinear(hidden_cont, hidden_cont, hidden_dim2),
            q_value=F.Linear(hidden_dim2,
                             self.num_of_actions,
                             initialW=np.zeros(
                                 (self.num_of_actions, hidden_dim2),
                                 dtype=np.float32)))
        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s(now & 10history), a, r, s_dash, end_episode_flag]
        # modified to MQN
        self.d = [
            np.zeros((self.data_size, self.hist_size * self.time_M, self.dim),
                     dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Esempio n. 2
0
 def __init__(self):
     super(Linear, self).__init__(l1=F.Bilinear(settings["n_frames"],
                                                settings["n_frames"], 200),
                                  l2=F.Linear(200, 100, wscale=np.sqrt(2)),
                                  l3=F.Linear(100, 100, wscale=np.sqrt(2)),
                                  l4=F.Linear(100, 50, wscale=np.sqrt(2)),
                                  l5=F.Linear(50,
                                              simulator.n_actions,
                                              wscale=np.sqrt(2)))
Esempio n. 3
0
    def setUp(self):
        self.f = functions.Bilinear(self.in_shape[0], self.in_shape[1],
                                    self.out_size, True)
        self.f.W = numpy.random.uniform(-1, 1,
                                        self.f.W.shape).astype(numpy.float32)
        self.f.zero_grads()

        self.W = self.f.W.copy()

        self.e1 = _uniform(self.batch_size, self.in_shape[0])
        self.e2 = _uniform(self.batch_size, self.in_shape[1])
        self.gy = _uniform(self.batch_size, self.out_size)

        self.y = numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W)
Esempio n. 4
0
    def setUp(self):
        self.f = functions.Bilinear(self.in_shape[0], self.in_shape[1],
                                    self.out_size)
        self.f.W = _uniform(*self.f.W.shape)
        self.f.V1 = _uniform(*self.f.V1.shape)
        self.f.V2 = _uniform(*self.f.V2.shape)
        self.f.b = _uniform(*self.f.b.shape)
        self.f.zero_grads()

        self.W = self.f.W.copy()
        self.V1 = self.f.V1.copy()
        self.V2 = self.f.V2.copy()
        self.b = self.f.b.copy()

        self.e1 = _uniform(self.batch_size, self.in_shape[0])
        self.e2 = _uniform(self.batch_size, self.in_shape[1])
        self.gy = _uniform(self.batch_size, self.out_size)

        self.y = (numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W) +
                  self.e1.dot(self.V1) + self.e2.dot(self.V2) + self.b)
Esempio n. 5
0
 def check_invalid(self, initialW, initial_bias, nobias):
     with self.assertRaises(AssertionError):
         functions.Bilinear(self.in_shape[0], self.in_shape[1],
                            self.out_size, nobias, initialW, initial_bias)
Esempio n. 6
0
 def check_normal(self, initialW, initial_bias, nobias):
     functions.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size,
                        nobias, initialW, initial_bias)