def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim1 = 64 #hidden_dim1 = 32 hidden_dim2 = 128 hidden_dim3 = 10 hidden_cont = 100 self.model = FunctionSet( l4=linearL4_link.LinearL4_link(self.dim * self.hist_size * self.time_M, hidden_cont, wscale=np.sqrt(2)), l5=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M, hidden_dim3 * hidden_cont, wscale=np.sqrt(2)), l6=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M, hidden_dim3 * hidden_cont, wscale=np.sqrt(2)), l7=attention.Attention(hidden_cont, hidden_dim3 * hidden_cont, hidden_dim3), l8=retrieval.Retrieval(hidden_dim3, hidden_dim3 * hidden_cont, hidden_cont), l9=F.Bilinear(hidden_cont, hidden_cont, hidden_dim2), q_value=F.Linear(hidden_dim2, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim2), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s(now & 10history), a, r, s_dash, end_episode_flag] # modified to MQN self.d = [ np.zeros((self.data_size, self.hist_size * self.time_M, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self): super(Linear, self).__init__(l1=F.Bilinear(settings["n_frames"], settings["n_frames"], 200), l2=F.Linear(200, 100, wscale=np.sqrt(2)), l3=F.Linear(100, 100, wscale=np.sqrt(2)), l4=F.Linear(100, 50, wscale=np.sqrt(2)), l5=F.Linear(50, simulator.n_actions, wscale=np.sqrt(2)))
def setUp(self): self.f = functions.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, True) self.f.W = numpy.random.uniform(-1, 1, self.f.W.shape).astype(numpy.float32) self.f.zero_grads() self.W = self.f.W.copy() self.e1 = _uniform(self.batch_size, self.in_shape[0]) self.e2 = _uniform(self.batch_size, self.in_shape[1]) self.gy = _uniform(self.batch_size, self.out_size) self.y = numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W)
def setUp(self): self.f = functions.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size) self.f.W = _uniform(*self.f.W.shape) self.f.V1 = _uniform(*self.f.V1.shape) self.f.V2 = _uniform(*self.f.V2.shape) self.f.b = _uniform(*self.f.b.shape) self.f.zero_grads() self.W = self.f.W.copy() self.V1 = self.f.V1.copy() self.V2 = self.f.V2.copy() self.b = self.f.b.copy() self.e1 = _uniform(self.batch_size, self.in_shape[0]) self.e2 = _uniform(self.batch_size, self.in_shape[1]) self.gy = _uniform(self.batch_size, self.out_size) self.y = (numpy.einsum('ij,ik,jkl->il', self.e1, self.e2, self.W) + self.e1.dot(self.V1) + self.e2.dot(self.V2) + self.b)
def check_invalid(self, initialW, initial_bias, nobias): with self.assertRaises(AssertionError): functions.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, nobias, initialW, initial_bias)
def check_normal(self, initialW, initial_bias, nobias): functions.Bilinear(self.in_shape[0], self.in_shape[1], self.out_size, nobias, initialW, initial_bias)