def __init__(self, learning_width, learning_height, interval_w, interval_h): #経験メモリ self.tmp_memory = deque() self.tmp_size = 1 #一つの状態が持つ画面数(実際のメモリサイズは+1) self.memory = deque() #メモリ(キュー) self.batch_size = 8 #バッチサイズ #モデル設定 self.model = Neuralnet(self.tmp_size) self.target_model = copy.deepcopy(self.model) #最適化設定 self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model) #学習に使う画面サイズ(上のをコピーしてるだけ) self.learning_width = learning_width self.learning_height = learning_height self.interval_w = interval_w self.interval_h = interval_h #εグリーディに使用する値 self.epsilon = 1 #初期値 self.epsilon_decay = 1.0 / 200000 #一回に下げる値 self.epsilon_min = 0.05 #最低値 self.exploration = 70 #その他設定 self.step = 0 #ステップ数 self.total_step = 0 self.goal = 0 self.gamma = 0.90 # 割引率(ガンマ)
def __init__(self, gpu, batchsize, data_dir, dataset, net, mode, epochs, save_every, size, **kwargs): super(Network, self).__init__(epochs, save_every) print "building ..." self.input_height = size self.input_width = size self.net = net self.mode = mode self.dataset = dataset self.train_data, self.test_data = self.get_dataset(data_dir, dataset) print 'input_channel ==> %d using %s dataset' % (self.in_channel, self.dataset) self.enc = GoogLeNet() self.dec = Decoder(self.in_size) self.xp = cuda.cupy cuda.get_device(gpu).use() self.enc.to_gpu() self.dec.to_gpu() self.o_dec = optimizers.RMSpropGraves() self.o_dec.setup(self.dec) self.batchsize = batchsize
def setup_optimizer(self, optimizer_name, gradient_clipping=3, weight_decay=0.00001, **kwargs): # set optimizer if optimizer_name == "Adam": self.opt = optimizers.Adam(**kwargs) elif optimizer_name == "AdaDelta": self.opt = optimizers.AdaDelta(**kwargs) elif optimizer_name == "AdaGrad": self.opt = optimizers.AdaGrad(**kwargs) elif optimizer_name == "RMSprop": self.opt = optimizers.RMSprop(**kwargs) elif optimizer_name == "RMSpropGraves": self.opt = optimizers.RMSpropGraves(**kwargs) elif optimizer_name == "SGD": self.opt = optimizers.SGD(**kwargs) elif optimizer_name == "MomentumSGD": self.opt = optimizers.MomentumSGD(**kwargs) # self.opt.use_cleargrads() self.opt.setup(self) self.opt.add_hook(optimizer.GradientClipping(gradient_clipping)) self.opt.add_hook(optimizer.WeightDecay(weight_decay)) self.opt_params = { "optimizer_name": optimizer_name, "gradient_clipping": gradient_clipping, "weight_decay": weight_decay }
def __init__(self, gpu_id, state_dimention,batchsize,historysize, enable_controller=[1, -1, 0],targetFlag=False): self.gpu_id = gpu_id self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" self.replay_size = batchsize self.data_size = historysize self.state_dimention = state_dimention self.targetFlag = targetFlag print "Initializing DQN..." # Initialization of Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() print "Model Building" self.model = dnn_6.Q_DNN(self.state_dimention,200,self.num_of_actions) self.model.to_gpu(self.gpu_id) self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, 1, self.state_dimention), dtype=np.float32), np.zeros(self.data_size, dtype=np.int8), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, 1, self.state_dimention), dtype=np.float32), np.zeros((self.data_size, 1), dtype=np.bool)]
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if(opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif(opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif(opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif(opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif(opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif(opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif(opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif(opt_str.lower() == 'sgd'): opt = O.SGD() elif(opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) print('\n[Warning] {0}\n\t{1}->{2}\n'.format( fileFuncLine(), opt_str, opt.__doc__.split('.')[0]) ) print('Optimizer:', opt.__doc__.split('.')[0]) return opt
def which_is_best_optimizer(k=10, model=CNN()): k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.Adam(), tag='Adam') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.SGD(), tag='SGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSpropGraves(), tag='RMSpropGraves') # k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaDelta(), tag='AdaDelta') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.AdaGrad(), tag='AdaGrad') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.MomentumSGD(), tag='MomentumSGD') k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.NesterovAG(), tag='NesterovAG')
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." print "CUDA init" cuda.init() print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 16, ksize=8, stride=4, wscale=np.sqrt(2)), l2=F.Convolution2D(16, 32, ksize=4, stride=2, wscale=np.sqrt(2)), l3=F.Linear(2592, 256), q_value=F.Linear(256, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 256), dtype=np.float32))).to_gpu() print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [ np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." # Initialization of Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() print "Model Building" self.model = FunctionSet( l1=convlstm_link.CONVLSTM(7056, 7056), l4=F.Linear(7056, 512, wscale=np.sqrt(2)), q_value=F.Linear(512, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 512), dtype=np.float32))).to_gpu() self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [ np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, n_history, n_act): print "Initializing DQN..." self.step = 0 # number of steps that DQN is updated self.n_act = n_act self.n_history = n_history # Number of obervations used to construct the single state print "Model Building" self.model = ActionValue(n_history, n_act).to_gpu() self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] hs = self.n_history ims = self.img_size self.replay_buffer = [ np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") print("Input Dim of Q-Network : ",self.dim*self.hist_size) hidden_dim = 256 self.model = FunctionSet( l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)), l5=F.Linear(hidden_dim,hidden_dim,wscale=np.sqrt(2)), q_value=F.Linear(hidden_dim, self.num_of_actions, initialW=np.zeros((self.num_of_actions, hidden_dim), dtype=np.float32)) ) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def agent_init(self, task_spec_str): task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str) if not task_spec.valid: raise ValueError( 'Task spec could not be parsed: {}'.format(task_spec_str)) self.gamma = task_spec.getDiscountFactor() # 割引率 # DQN 作成 # Arg1: 入力層サイズ # Arg2: 隠れ層ノード数 # Arg3: 出力層サイズ self.Q = QNet(self.bdim * self.n_frames, self.bdim * self.n_frames, self.dim) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.Q.to_gpu() self.xp = np if self.gpu < 0 else cuda.cupy self.targetQ = copy.deepcopy(self.Q) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.0) self.optimizer.setup(self.Q)
def optimizer(opt_str): """ 入力文字列からオプティマイザを推定する """ if (opt_str.lower() == 'adam'): opt = O.Adam(amsgrad=True) elif (opt_str.lower() == 'ada_d'): opt = O.AdaDelta() elif (opt_str.lower() == 'ada_g'): opt = O.AdaGrad() elif (opt_str.lower() == 'm_sgd'): opt = O.MomentumSGD() elif (opt_str.lower() == 'n_ag'): opt = O.NesterovAG() elif (opt_str.lower() == 'rmsp'): opt = O.RMSprop() elif (opt_str.lower() == 'rmsp_g'): opt = O.RMSpropGraves() elif (opt_str.lower() == 'sgd'): opt = O.SGD() elif (opt_str.lower() == 'smorms'): opt = O.SMORMS3() else: opt = O.Adam(amsgrad=True) logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0])) logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0])) return opt
def __init__(self, input_vector_length,enable_controller=[0, 1, 2]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" self.input_vector_length = input_vector_length print "Initializing DQN..." # Initialization for Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() #inputs --> 5 * 14 (with 10 temporality) + 5 (of last one hour) + 5 (of last 24 hour) print "Model Building" self.model = FunctionSet( l1=F.Linear(input_vector_length, 500), l2=F.Linear(500, 250), l3=F.Linear(250, 80), q_value=F.Linear(80, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 80), dtype=np.float32)) ).to_gpu() print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, file=None, lr=0.01): self.cnn = CNN() self.lr = lr self.model = L.Classifier(self.cnn, F.mean_squared_error, F.mean_squared_error) self.optimizer = optimizers.RMSpropGraves() self.optimizer.setup(self.model) self.num_act = 12 self.cur_inp = np.array([]) self.cur_out = np.array([]) self.cur_act = 0 self.prv_inp = np.array([]) self.prv_out = np.array([]) self.prv_act = 0 self.ins = [] self.tcs = [] self.gamma = 0.99 self.epsilon = 10 # epsilon greedy / choose random eplison percent if file: serializers.load_npz(file, self.cnn)
def get_opt(args): if args.opt_model == "SGD": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.SGD(lr=alpha0) if args.opt_model == "AdaGrad": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 return optimizers.AdaGrad(lr=alpha0) if args.opt_model == "AdaDelta": alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0 alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1 return optimizers.AdaDelta(rho=alpha0, eps=alpha1) if args.opt_model == "Momentum": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1) if args.opt_model == "NAG": alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 return optimizers.NesterovAG(lr=alpha0, momentum=alpha1) if args.opt_model == "RMS": return optimizers.RMSpropGraves() if args.opt_model == "SM": return optimizers.SMORMS3() if args.opt_model == "Adam": # default case alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0 alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1 alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2 alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3 return optimizers.Adam(alpha=alpha0, beta1=alpha1, beta2=alpha2, eps=alpha3) print('no such optimization method', args.opt_model) sys.exit(1)
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim self.hidden_dim = 256 self.predictor_error = 0 print("Initializing Q-Network...") self.model = QCalculationChain(self.dim*self.hist_size, self.hidden_dim, self.num_of_actions) if self.use_gpu >= 0: self.model.to_gpu(self.use_gpu) self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model) self.Predictor = DegInterestChain(self.dim, self.hist_size, self.num_of_actions, 1000) if self.use_gpu >= 0: self.Predictor.to_gpu(self.use_gpu) self.optimizer_pred = optimizers.Adam() self.optimizer_pred.setup(self.Predictor) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.float32), np.zeros(self.data_size, dtype=np.float32), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.float32), np.zeros((self.data_size, 1), dtype=np.bool)]
def __init__(self, actions, max_steps, n_history=1): print "Initializing DQN..." self.actions = actions self.n_history = n_history self.max_steps = max_steps #print "n_history = ", n_history self.time_stamp = 0 print("Model Building") self.model = ActionValue(self.n_history, self.n_act) self.model_target = copy.deepcopy(self.model) print("Initizlizing Optimizer") self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] hs = self.n_history #ims = self.img_size self.replay_buffer = [ np.zeros((self.data_size, hs, self.max_steps, self.n_act), dtype=np.float32), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, hs, self.max_steps, self.n_act), dtype=np.float32), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, agent: Agent, memory_size: int = 10**4, replay_size: int = 32, gamma: float = 0.99, initial_exploration: int = 10**4, target_update_freq: int = 10**4, learning_rate: float = 0.00025, epsilon_decay: float = 1e-6, minimum_epsilon: float = 0.1) -> None: ''' Trainer for Renju Agent class. @param memory_size: @param replay_size: @param gamma: decay rate of time @param initial_exploration: @param target_update_freq: frequency rate for update weights @param learning_rate: learning rate for TD error @param epsilon_decay: decay rate of epsilon in epsilon-greedy @param minimu_epsilon: minimum epsilon in epsilon-greedy after decay ''' self._agent = agent self._target = RenjuQ(self._agent.get_num_history(), self._agent.get_num_action(), on_gpu=self._agent.get_on_gpu()) self._memory_size = memory_size self._replay_size = replay_size self._gamma = gamma self._initial_exploration = initial_exploration self._target_update_freq = target_update_freq self._learning_rate = learning_rate self._epsilon_decay = epsilon_decay self._minimum_epsilon = minimum_epsilon self._step = 0 # Prepare memory for replay num_history = self._agent.get_num_history() size = RenjuAgent.SIZE self._memory = [ np.zeros((memory_size, num_history, size, size), dtype=np.float32), np.zeros(memory_size, dtype=np.uint8), np.zeros((memory_size, 1), dtype=np.float32), np.zeros((memory_size, num_history, size, size), dtype=np.float32), np.zeros((memory_size, 1), dtype=np.bool) ] self._memory_text = [ "state", "action", "reward", "next_state", "episode_end" ] # Prepare optimize self._optimizer = optimizers.RMSpropGraves(lr=learning_rate, alpha=0.95, momentum=0.95, eps=0.01) self._optimizer.setup(self._agent.get_q()) self._loss = 0 self._qv = 0
def setup_optimizer(self): optimizer = optimizers.RMSpropGraves(lr=self.args.start_lr, alpha=0.95, momentum=0.9, eps=1e-08) optimizer.setup(self) optimizer.add_hook( chainer.optimizer.GradientClipping(self.args.grad_clip)) return optimizer
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim1 = 64 #hidden_dim1 = 32 hidden_dim2 = 128 hidden_dim3 = 10 hidden_cont = 100 self.model = FunctionSet( l4=linearL4_link.LinearL4_link(self.dim * self.hist_size * self.time_M, hidden_cont, wscale=np.sqrt(2)), l5=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M, hidden_dim3 * hidden_cont, wscale=np.sqrt(2)), l6=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M, hidden_dim3 * hidden_cont, wscale=np.sqrt(2)), l7=attention.Attention(hidden_cont, hidden_dim3 * hidden_cont, hidden_dim3), l8=retrieval.Retrieval(hidden_dim3, hidden_dim3 * hidden_cont, hidden_cont), l9=F.Bilinear(hidden_cont, hidden_cont, hidden_dim2), q_value=F.Linear(hidden_dim2, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim2), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s(now & 10history), a, r, s_dash, end_episode_flag] # modified to MQN self.d = [ np.zeros((self.data_size, self.hist_size * self.time_M, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim self.scene_loss = 0 print("Initializing Q-Network...") hidden_dim = 256 self.action_model = PredictActionModel(self.dim, hidden_dim, self.num_of_actions) self.scene_model = PredictSceneModel(self.dim) if self.use_gpu >= 0: self.action_model.to_gpu() self.scene_model.to_gpu() self.action_model_target = copy.deepcopy(self.action_model) self.action_optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.action_optimizer.setup(self.action_model) self.scene_optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.scene_optimizer.setup(self.scene_model) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, use_gpu, enable_controller, cnn_input_dim, feature_dim, agent_count, other_input_dim, model): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.cnn_input_dim = cnn_input_dim self.feature_dim = feature_dim self.agent_count = agent_count self.other_input_dim = other_input_dim self.data_size = self.timestep_per_episode self.loss_log_file = self.loss_log + "loss.log" self.loss_per_episode = 0 self.time_of_episode = 0 print("Initializing Q-Network...") if model == 'None': self.model = Chain( conv1=L.Convolution2D(3 * self.hist_size, 32, 4, stride=2), bn1=L.BatchNormalization(32), conv2=L.Convolution2D(32, 32, 4, stride=2), bn2=L.BatchNormalization(32), conv3=L.Convolution2D(32, 32, 4, stride=2), bn3=L.BatchNormalization(32), # conv4=L.Convolution2D(64, 64, 4, stride=2), # bn4=L.BatchNormalization(64), l1=L.Linear( self.feature_dim + self.other_input_dim * self.hist_size, 128), l2=L.Linear(128, 128), l3=L.Linear(128, 96), l4=L.Linear(96, 64), q_value=L.Linear(64, self.num_of_actions)) else: with open(model, 'rb') as i: self.model = pickle.load(i) self.data_size = 0 if self.use_gpu >= 0: self.model.to_gpu() self.optimizer = optimizers.RMSpropGraves() self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.agent_count, self.data_size, self.hist_size, 128, 128, 3), dtype=np.uint8), np.zeros((self.agent_count, self.data_size, self.hist_size, self.other_input_dim), dtype=np.uint8), np.zeros((self.agent_count, self.data_size), dtype=np.uint8), np.zeros((self.agent_count, self.data_size, 1), dtype=np.float32), np.zeros((self.agent_count, self.data_size, 1), dtype=np.bool) ]
def main(epochs=257*8, lr=0.38, seq_len=120, pred_len=39, out="result", device=0): # CHOOSE ONE: # get the training dataset but keep a slice for validation dataset = get_dataset(182, -39 -39) # get the entire dataset #dataset = get_dataset(182, -39) iter = ParallelSequentialIterator(dataset, pred_len=1, repeat=True) model = Model(pred_len=pred_len, dropout=0.1) if device >= 0: model.to_gpu() # Try some different optimizers #optimizer = optimizers.Adam(alpha=lr) #optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9) optimizer = optimizers.RMSpropGraves(lr=lr, alpha=0.95, momentum=0.2) #optimizer = optimizers.RMSprop(lr=lr, alpha=0.5) optimizer.setup(model) #optimizer.add_hook(chainer.optimizer.GradientClipping(5))#grad_clip)) #optimizer.add_hook(chainer.optimizer.WeightDecay(1.E-7)) updater = BPTTUpdater(iter, optimizer, seq_len=seq_len, pred_len=pred_len, data_len=len(dataset), device=device) trainer = Trainer(updater, (epochs, 'epoch'), out=out) interval = 10 # Try some learning-rate decay #trainer.extend(extensions.ExponentialShift('lr', 0.995)) #0.1, lr, lr * 0.1), trigger=(10, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(interval, "iteration")) trainer.extend(extensions.LogReport(trigger=(interval, 'iteration'))) trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'loss', 'lr']), trigger=(interval, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=interval)) # export snapshots to resume training trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(257*6, "epoch")) trainer.extend(extensions.snapshot_object(model, "model_epoch_{.updater.epoch}"), trigger=(257*2, "epoch")) # change to True to resume from file if False: chainer.serializers.load_npz('result/snapshot_epoch_1030', trainer) trainer.run() # save model from chainer import serializers serializers.save_npz('restaurant.model', model)
def optimize_rmsprop(self, init_img, lr=0.1, alpha=0.95, momentum=0.9, eps=1e-4, iterations=2000, save=50, filename='iter', str_contrast=False): chainer_rms = optimizers.RMSpropGraves(lr=lr, alpha=alpha, momentum=momentum, eps=eps) state = { 'n': xp.zeros_like(init_img.data), 'g': xp.zeros_like(init_img.data), 'delta': xp.zeros_like(init_img.data) } out_img = Variable(xp.zeros_like(init_img.data), volatile=True) time_start = time.time() for epoch in range(iterations): loss = self.loss_total(init_img) loss.backward() loss.unchain_backward() # normalize gradient grad_l1_norm = xp.sum(xp.absolute(init_img.grad * init_img.grad)) init_img.grad /= grad_l1_norm if gpu_flag: chainer_rms.update_one_gpu(init_img, state) else: chainer_rms.update_one_cpu(init_img, state) init_img.zerograd() # save image every 'save' iteration if save != 0 and (epoch + 1) % save == 0: if self.preserve_color: init_img_lum = separate_lum_chr(init_img)[0] if gpu_flag: init_img_lum.to_gpu() out_img.copydata(init_img_lum + self.content_img_chr) else: out_img.copydata(init_img) save_image(out_img, filename + '_' + str(epoch + 1) + '.png', contrast=str_contrast) print( "Image Saved at Iteration %.0f, Time Used: %.4f, Total Loss: %.4f" % ((epoch + 1), (time.time() - time_start), loss.data))
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." print "CUDA init" cuda.init() print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)), l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)), l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)), l4=F.Linear(3136, 512, wscale=np.sqrt(2)), q_value=F.Linear(512, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 512), dtype=np.float32))).to_gpu() self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [ np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, agent, memory_size=10**4, replay_size=32, gamma=0.99, initial_exploration=10**4, target_update_freq=10**4, learning_rate=0.00025, epsilon_decay=1e-6, minimum_epsilon=0.1, L1_rate=None): self.agent = agent self.target = Q(self.agent.q.n_history, self.agent.q.n_action, on_gpu=self.agent.q.on_gpu) self.memory_size = memory_size self.replay_size = replay_size self.gamma = gamma self.initial_exploration = initial_exploration self.target_update_freq = target_update_freq self.laerning_rate = learning_rate self.epslon_decay = epsilon_decay self.minimum_epsilon = minimum_epsilon self._step = 0 # prepare for replay n_hist = self.agent.q.n_history size = self.agent.q.SIZE self.memory = [ np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32), np.zeros(memory_size, dtype=np.uint8), np.zeros((memory_size, 1), dtype=np.float32), np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32), np.zeros((memory_size, 1), dtype=np.bool) ] self.memory_text = [ "state", "action", "reward", "next_state", "episode_end" ] #prepare optimizer self.optimizer = optimizers.RMSpropGraves(lr=learning_rate, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.agent.q) if L1_rate is not None: self.optimizer.add_hook(optimizer.Lasso(L1_rate)) self._loss = 9 self._qv = 0
def open(self): global REMOTE_MODE # load model file if(REMOTE_MODE == "TEXTURE"): self.model = CNN() elif(REMOTE_MODE == "DISTANCE"): self.model = MLP() self.brain = DQN(self.model) try: serializers.load_hdf5(MODEL_FILE, self.brain) #serializers.save_hdf5("MLP.model", self.brain.predictor) print("succeed to load model file") except: print("failed to load model file") # load history file try: fp = open(HISTORY_FILE, "r") self.history = pickle.load(fp) self.turn = self.history.turn + 1 self.state = self.history.getNewestState().copy() self.last_state = self.history.getNewestLastState().copy() fp.close() print "succeed to load history file. [restart from turn " + str(self.turn) + "]" except: self.history = History(self.historySize, self.observeLength, self.imageHeight, self.imageWidth) self.turn = 0 if(REMOTE_MODE == "TEXTURE"): self.state = np.zeros((self.observeLength, self.imageHeight, self.imageWidth), dtype=np.float32) self.last_state = np.zeros((self.observeLength, self.imageHeight, self.imageWidth), dtype=np.float32) elif(REMOTE_MODE == "DISTANCE"): self.state = np.zeros((DISTANCE_INPUT), dtype=np.float32) self.last_state = np.zeros((DISTANCE_INPUT), dtype=np.float32) print "failed to load history file" self.brain.to_gpu() #self.optimizer = optimizers.Adam() self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.brain) # hold connection if(self not in simulator_clients): simulator_clients.append(self)
def __init__(self, use_gpu, num_of_action_type, num_of_pad, dim): self.use_gpu = use_gpu self.num_of_action_type = num_of_action_type self.num_of_pad = num_of_pad self.num_of_actions = num_of_action_type * num_of_pad self.dim = dim print("Initializing Q-Network...\n") self.q_net_filename = "q_net.pickle" if os.path.exists(self.q_net_filename): print("Loading Q-Network Model...\n") self.model = self.load_model() else: hidden_dim = 256 self.model = FunctionSet(l4=F.Linear(self.dim * self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear( hidden_dim, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, self.num_of_pad), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__(self, use_gpu, enable_controller, dim, epsilon, epsilon_delta, min_eps): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim self.epsilon = epsilon self.epsilon_delta = epsilon_delta self.min_eps = min_eps self.time = 0 app_logger.info("Initializing Q-Network...") hidden_dim = 256 self.model = Chain( l4=L.Linear(self.dim * self.hist_size, hidden_dim, initialW=initializers.Normal( 0.5 / math.sqrt(self.dim * self.hist_size))), q_value=L.Linear(hidden_dim, self.num_of_actions, initialW=np.zeros( (self.num_of_actions, hidden_dim), dtype=np.float32))) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [ np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool) ]
def __init__( self, state_shape, action_num, image_num_per_state, model, gamma=0.99, # discount factor replay_batch_size=32, replay_memory_size=5 * 10**4, target_model_update_freq=1, max_step=50, lr=0.00025, clipping=False # if True, ignore reward intensity ): print("initializing DQN...") self.action_num = action_num self.image_num_per_state = image_num_per_state self.gamma = gamma self.replay_batch_size = replay_batch_size self.replay_memory_size = replay_memory_size self.target_model_update_freq = target_model_update_freq self.max_step = max_step self.clipping = clipping print("Initializing Model...") self.model = model self.model_target = copy.deepcopy(self.model) print("Initializing Optimizer") self.optimizer = optimizers.RMSpropGraves(lr=lr, alpha=0.95, momentum=0.0, eps=0.01) self.optimizer.setup(self.model) self.optimizer.add_hook(chainer.optimizer.GradientClipping(20)) print("Initializing Replay Buffer...") self.dataset = dataset.DataSet(max_size=replay_memory_size, max_step=max_step, frame_shape=state_shape, frame_dtype=np.uint8) self.xp = model.xp self.state_shape = state_shape