Example #1
0
    def __init__(self, learning_width, learning_height, interval_w,
                 interval_h):
        #経験メモリ
        self.tmp_memory = deque()
        self.tmp_size = 1  #一つの状態が持つ画面数(実際のメモリサイズは+1)
        self.memory = deque()  #メモリ(キュー)
        self.batch_size = 8  #バッチサイズ
        #モデル設定
        self.model = Neuralnet(self.tmp_size)
        self.target_model = copy.deepcopy(self.model)
        #最適化設定
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model)
        #学習に使う画面サイズ(上のをコピーしてるだけ)
        self.learning_width = learning_width
        self.learning_height = learning_height
        self.interval_w = interval_w
        self.interval_h = interval_h

        #εグリーディに使用する値
        self.epsilon = 1  #初期値
        self.epsilon_decay = 1.0 / 200000  #一回に下げる値
        self.epsilon_min = 0.05  #最低値
        self.exploration = 70
        #その他設定
        self.step = 0  #ステップ数
        self.total_step = 0
        self.goal = 0
        self.gamma = 0.90  # 割引率(ガンマ)
    def __init__(self, gpu, batchsize, data_dir, dataset, net, mode, epochs,
                 save_every, size, **kwargs):
        super(Network, self).__init__(epochs, save_every)
        print "building ..."
        self.input_height = size
        self.input_width = size
        self.net = net
        self.mode = mode
        self.dataset = dataset
        self.train_data, self.test_data = self.get_dataset(data_dir, dataset)
        print 'input_channel ==> %d using %s dataset' % (self.in_channel,
                                                         self.dataset)

        self.enc = GoogLeNet()
        self.dec = Decoder(self.in_size)

        self.xp = cuda.cupy
        cuda.get_device(gpu).use()

        self.enc.to_gpu()
        self.dec.to_gpu()

        self.o_dec = optimizers.RMSpropGraves()
        self.o_dec.setup(self.dec)

        self.batchsize = batchsize
Example #3
0
    def setup_optimizer(self,
                        optimizer_name,
                        gradient_clipping=3,
                        weight_decay=0.00001,
                        **kwargs):
        # set optimizer
        if optimizer_name == "Adam":
            self.opt = optimizers.Adam(**kwargs)
        elif optimizer_name == "AdaDelta":
            self.opt = optimizers.AdaDelta(**kwargs)
        elif optimizer_name == "AdaGrad":
            self.opt = optimizers.AdaGrad(**kwargs)
        elif optimizer_name == "RMSprop":
            self.opt = optimizers.RMSprop(**kwargs)
        elif optimizer_name == "RMSpropGraves":
            self.opt = optimizers.RMSpropGraves(**kwargs)
        elif optimizer_name == "SGD":
            self.opt = optimizers.SGD(**kwargs)
        elif optimizer_name == "MomentumSGD":
            self.opt = optimizers.MomentumSGD(**kwargs)

        # self.opt.use_cleargrads()
        self.opt.setup(self)
        self.opt.add_hook(optimizer.GradientClipping(gradient_clipping))
        self.opt.add_hook(optimizer.WeightDecay(weight_decay))

        self.opt_params = {
            "optimizer_name": optimizer_name,
            "gradient_clipping": gradient_clipping,
            "weight_decay": weight_decay
        }
    def __init__(self, gpu_id, state_dimention,batchsize,historysize, enable_controller=[1, -1, 0],targetFlag=False):
        self.gpu_id = gpu_id
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"
        self.replay_size = batchsize
        self.data_size = historysize
        
        self.state_dimention = state_dimention
        self.targetFlag = targetFlag
        
        print "Initializing DQN..."
        #	Initialization of Chainer 1.1.0 or older.
        #        print "CUDA init"
        #        cuda.init()

        print "Model Building"
        self.model = dnn_6.Q_DNN(self.state_dimention,200,self.num_of_actions)
        self.model.to_gpu(self.gpu_id)
        
        
        self.model_target = copy.deepcopy(self.model)

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [np.zeros((self.data_size, 1, self.state_dimention), dtype=np.float32),
                  np.zeros(self.data_size, dtype=np.int8),
                  np.zeros((self.data_size, 1), dtype=np.float32),
                  np.zeros((self.data_size, 1, self.state_dimention), dtype=np.float32),
                  np.zeros((self.data_size, 1), dtype=np.bool)]
Example #5
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
Example #6
0
def which_is_best_optimizer(k=10, model=CNN()):
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.Adam(),
                      tag='Adam')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.SGD(),
                      tag='SGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.RMSpropGraves(),
                      tag='RMSpropGraves')
    #    k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaDelta(),
                      tag='AdaDelta')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaGrad(),
                      tag='AdaGrad')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.MomentumSGD(),
                      tag='MomentumSGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.NesterovAG(),
                      tag='NesterovAG')
Example #7
0
    def __init__(self, enable_controller=[0, 3, 4]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"

        print "Initializing DQN..."
        print "CUDA init"
        cuda.init()

        print "Model Building"
        self.model = FunctionSet(
            l1=F.Convolution2D(4, 16, ksize=8, stride=4, wscale=np.sqrt(2)),
            l2=F.Convolution2D(16, 32, ksize=4, stride=2, wscale=np.sqrt(2)),
            l3=F.Linear(2592, 256),
            q_value=F.Linear(256,
                             self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 256),
                                               dtype=np.float32))).to_gpu()

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.0002,
                                                  alpha=0.3,
                                                  momentum=0.2)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
    def __init__(self, enable_controller=[0, 3, 4]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"

        print "Initializing DQN..."
        #	Initialization of Chainer 1.1.0 or older.
        #        print "CUDA init"
        #        cuda.init()

        print "Model Building"
        self.model = FunctionSet(
            l1=convlstm_link.CONVLSTM(7056, 7056),
            l4=F.Linear(7056, 512, wscale=np.sqrt(2)),
            q_value=F.Linear(512,
                             self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 512),
                                               dtype=np.float32))).to_gpu()

        self.model_target = copy.deepcopy(self.model)

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #9
0
    def __init__(self, n_history, n_act):
        print "Initializing DQN..."
        self.step = 0  # number of steps that DQN is updated
        self.n_act = n_act
        self.n_history = n_history  # Number of obervations used to construct the single state

        print "Model Building"
        self.model = ActionValue(n_history, n_act).to_gpu()
        self.model_target = copy.deepcopy(self.model)

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.01)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        hs = self.n_history
        ims = self.img_size
        self.replay_buffer = [
            np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.float32),
            np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #10
0
    def __init__(self, use_gpu, enable_controller, dim):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim

        print("Initializing Q-Network...")
        print("Input Dim of Q-Network : ",self.dim*self.hist_size)

        hidden_dim = 256
        self.model = FunctionSet(
            l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
            l5=F.Linear(hidden_dim,hidden_dim,wscale=np.sqrt(2)),
            q_value=F.Linear(hidden_dim, self.num_of_actions,
                    initialW=np.zeros((self.num_of_actions, hidden_dim),
                    dtype=np.float32))
        )

        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [np.zeros((self.data_size, self.hist_size, self.dim),
                    dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.int8),
                  np.zeros((self.data_size, self.hist_size, self.dim),
                    dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)]
    def agent_init(self, task_spec_str):
        task_spec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_str)

        if not task_spec.valid:
            raise ValueError(
                'Task spec could not be parsed: {}'.format(task_spec_str))

        self.gamma = task_spec.getDiscountFactor()  # 割引率
        # DQN 作成
        # Arg1: 入力層サイズ
        # Arg2: 隠れ層ノード数
        # Arg3: 出力層サイズ
        self.Q = QNet(self.bdim * self.n_frames, self.bdim * self.n_frames,
                      self.dim)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.Q.to_gpu()
        self.xp = np if self.gpu < 0 else cuda.cupy

        self.targetQ = copy.deepcopy(self.Q)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.0)
        self.optimizer.setup(self.Q)
Example #12
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
Example #13
0
    def __init__(self, input_vector_length,enable_controller=[0, 1, 2]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"
        self.input_vector_length = input_vector_length

        print "Initializing DQN..."
#   Initialization for Chainer 1.1.0 or older.
#        print "CUDA init"
#        cuda.init()
        
        #inputs --> 5 * 14 (with 10 temporality) + 5 (of last one hour) + 5 (of last 24 hour)
        print "Model Building"
        self.model = FunctionSet(
            l1=F.Linear(input_vector_length, 500),
            l2=F.Linear(500, 250),
            l3=F.Linear(250, 80),
            q_value=F.Linear(80, self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 80),
                                               dtype=np.float32))
        ).to_gpu()

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8),
                  np.zeros(self.data_size, dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.int8),
                  np.zeros((self.data_size, self.input_vector_length), dtype=np.uint8),
                  np.zeros((self.data_size, 1), dtype=np.bool)]
Example #14
0
    def __init__(self, file=None, lr=0.01):
        self.cnn = CNN()

        self.lr = lr
        self.model = L.Classifier(self.cnn, F.mean_squared_error,
                                  F.mean_squared_error)
        self.optimizer = optimizers.RMSpropGraves()
        self.optimizer.setup(self.model)

        self.num_act = 12

        self.cur_inp = np.array([])
        self.cur_out = np.array([])
        self.cur_act = 0
        self.prv_inp = np.array([])
        self.prv_out = np.array([])
        self.prv_act = 0

        self.ins = []
        self.tcs = []

        self.gamma = 0.99
        self.epsilon = 10  # epsilon greedy / choose random eplison percent

        if file:
            serializers.load_npz(file, self.cnn)
Example #15
0
def get_opt(args):
    if args.opt_model == "SGD":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.SGD(lr=alpha0)
    if args.opt_model == "AdaGrad":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.AdaGrad(lr=alpha0)
    if args.opt_model == "AdaDelta":
        alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0
        alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1
        return optimizers.AdaDelta(rho=alpha0, eps=alpha1)
    if args.opt_model == "Momentum":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1)
    if args.opt_model == "NAG":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.NesterovAG(lr=alpha0, momentum=alpha1)
    if args.opt_model == "RMS":
        return optimizers.RMSpropGraves()
    if args.opt_model == "SM":
        return optimizers.SMORMS3()
    if args.opt_model == "Adam":  # default case
        alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2
        alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3
        return optimizers.Adam(alpha=alpha0,
                               beta1=alpha1,
                               beta2=alpha2,
                               eps=alpha3)
    print('no such optimization method', args.opt_model)
    sys.exit(1)
Example #16
0
    def __init__(self, use_gpu, enable_controller, dim):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim
        self.hidden_dim = 256
        self.predictor_error = 0
        
        print("Initializing Q-Network...")      
        self.model = QCalculationChain(self.dim*self.hist_size, self.hidden_dim, self.num_of_actions)
        if self.use_gpu >= 0:
            self.model.to_gpu(self.use_gpu)
        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
        self.optimizer.setup(self.model)
        
        self.Predictor = DegInterestChain(self.dim, self.hist_size, self.num_of_actions, 1000)
        if self.use_gpu >= 0:
            self.Predictor.to_gpu(self.use_gpu)
            
        self.optimizer_pred = optimizers.Adam()
        self.optimizer_pred.setup(self.Predictor)
        
        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.float32),
                  np.zeros(self.data_size, dtype=np.float32),
                  np.zeros((self.data_size, 1), dtype=np.float32),
                  np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.float32),
                  np.zeros((self.data_size, 1), dtype=np.bool)]
Example #17
0
    def __init__(self, actions, max_steps, n_history=1):
        print "Initializing DQN..."
        self.actions = actions
        self.n_history = n_history
        self.max_steps = max_steps
        #print "n_history = ", n_history
        self.time_stamp = 0

        print("Model Building")
        self.model = ActionValue(self.n_history, self.n_act)
        self.model_target = copy.deepcopy(self.model)

        print("Initizlizing Optimizer")
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.01)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        hs = self.n_history
        #ims = self.img_size
        self.replay_buffer = [
            np.zeros((self.data_size, hs, self.max_steps, self.n_act),
                     dtype=np.float32),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.float32),
            np.zeros((self.data_size, hs, self.max_steps, self.n_act),
                     dtype=np.float32),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #18
0
    def __init__(self,
                 agent: Agent,
                 memory_size: int = 10**4,
                 replay_size: int = 32,
                 gamma: float = 0.99,
                 initial_exploration: int = 10**4,
                 target_update_freq: int = 10**4,
                 learning_rate: float = 0.00025,
                 epsilon_decay: float = 1e-6,
                 minimum_epsilon: float = 0.1) -> None:
        '''
      Trainer for Renju Agent class.
      @param memory_size:
      @param replay_size:
      @param gamma: decay rate of time
      @param initial_exploration:
      @param target_update_freq: frequency rate for update weights
      @param learning_rate: learning rate for TD error
      @param epsilon_decay: decay rate of epsilon in epsilon-greedy
      @param minimu_epsilon: minimum epsilon in epsilon-greedy after decay
    '''
        self._agent = agent
        self._target = RenjuQ(self._agent.get_num_history(),
                              self._agent.get_num_action(),
                              on_gpu=self._agent.get_on_gpu())

        self._memory_size = memory_size
        self._replay_size = replay_size
        self._gamma = gamma
        self._initial_exploration = initial_exploration
        self._target_update_freq = target_update_freq
        self._learning_rate = learning_rate
        self._epsilon_decay = epsilon_decay
        self._minimum_epsilon = minimum_epsilon
        self._step = 0

        # Prepare memory for replay
        num_history = self._agent.get_num_history()
        size = RenjuAgent.SIZE
        self._memory = [
            np.zeros((memory_size, num_history, size, size), dtype=np.float32),
            np.zeros(memory_size, dtype=np.uint8),
            np.zeros((memory_size, 1), dtype=np.float32),
            np.zeros((memory_size, num_history, size, size), dtype=np.float32),
            np.zeros((memory_size, 1), dtype=np.bool)
        ]
        self._memory_text = [
            "state", "action", "reward", "next_state", "episode_end"
        ]

        # Prepare optimize
        self._optimizer = optimizers.RMSpropGraves(lr=learning_rate,
                                                   alpha=0.95,
                                                   momentum=0.95,
                                                   eps=0.01)
        self._optimizer.setup(self._agent.get_q())
        self._loss = 0
        self._qv = 0
Example #19
0
 def setup_optimizer(self):
     optimizer = optimizers.RMSpropGraves(lr=self.args.start_lr,
                                          alpha=0.95,
                                          momentum=0.9,
                                          eps=1e-08)
     optimizer.setup(self)
     optimizer.add_hook(
         chainer.optimizer.GradientClipping(self.args.grad_clip))
     return optimizer
Example #20
0
    def __init__(self, use_gpu, enable_controller, dim):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim

        print("Initializing Q-Network...")

        hidden_dim1 = 64
        #hidden_dim1 = 32
        hidden_dim2 = 128
        hidden_dim3 = 10
        hidden_cont = 100

        self.model = FunctionSet(
            l4=linearL4_link.LinearL4_link(self.dim * self.hist_size *
                                           self.time_M,
                                           hidden_cont,
                                           wscale=np.sqrt(2)),
            l5=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M,
                                      hidden_dim3 * hidden_cont,
                                      wscale=np.sqrt(2)),
            l6=MU_l6.memory_unit_link(self.dim * self.hist_size * self.time_M,
                                      hidden_dim3 * hidden_cont,
                                      wscale=np.sqrt(2)),
            l7=attention.Attention(hidden_cont, hidden_dim3 * hidden_cont,
                                   hidden_dim3),
            l8=retrieval.Retrieval(hidden_dim3, hidden_dim3 * hidden_cont,
                                   hidden_cont),
            l9=F.Bilinear(hidden_cont, hidden_cont, hidden_dim2),
            q_value=F.Linear(hidden_dim2,
                             self.num_of_actions,
                             initialW=np.zeros(
                                 (self.num_of_actions, hidden_dim2),
                                 dtype=np.float32)))
        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s(now & 10history), a, r, s_dash, end_episode_flag]
        # modified to MQN
        self.d = [
            np.zeros((self.data_size, self.hist_size * self.time_M, self.dim),
                     dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #21
0
    def __init__(self, use_gpu, enable_controller, dim):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim
        self.scene_loss = 0

        print("Initializing Q-Network...")

        hidden_dim = 256

        self.action_model = PredictActionModel(self.dim, hidden_dim,
                                               self.num_of_actions)
        self.scene_model = PredictSceneModel(self.dim)

        if self.use_gpu >= 0:
            self.action_model.to_gpu()
            self.scene_model.to_gpu()

        self.action_model_target = copy.deepcopy(self.action_model)

        self.action_optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                         alpha=0.95,
                                                         momentum=0.95,
                                                         eps=0.0001)
        self.action_optimizer.setup(self.action_model)

        self.scene_optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                        alpha=0.95,
                                                        momentum=0.95,
                                                        eps=0.0001)
        self.scene_optimizer.setup(self.scene_model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #22
0
    def __init__(self, use_gpu, enable_controller, cnn_input_dim, feature_dim,
                 agent_count, other_input_dim, model):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.cnn_input_dim = cnn_input_dim
        self.feature_dim = feature_dim
        self.agent_count = agent_count
        self.other_input_dim = other_input_dim
        self.data_size = self.timestep_per_episode
        self.loss_log_file = self.loss_log + "loss.log"
        self.loss_per_episode = 0
        self.time_of_episode = 0

        print("Initializing Q-Network...")

        if model == 'None':
            self.model = Chain(
                conv1=L.Convolution2D(3 * self.hist_size, 32, 4, stride=2),
                bn1=L.BatchNormalization(32),
                conv2=L.Convolution2D(32, 32, 4, stride=2),
                bn2=L.BatchNormalization(32),
                conv3=L.Convolution2D(32, 32, 4, stride=2),
                bn3=L.BatchNormalization(32),
                #                 conv4=L.Convolution2D(64, 64, 4, stride=2),
                #                 bn4=L.BatchNormalization(64),
                l1=L.Linear(
                    self.feature_dim + self.other_input_dim * self.hist_size,
                    128),
                l2=L.Linear(128, 128),
                l3=L.Linear(128, 96),
                l4=L.Linear(96, 64),
                q_value=L.Linear(64, self.num_of_actions))
        else:
            with open(model, 'rb') as i:
                self.model = pickle.load(i)
                self.data_size = 0
        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.optimizer = optimizers.RMSpropGraves()
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [
            np.zeros((self.agent_count, self.data_size, self.hist_size, 128,
                      128, 3),
                     dtype=np.uint8),
            np.zeros((self.agent_count, self.data_size, self.hist_size,
                      self.other_input_dim),
                     dtype=np.uint8),
            np.zeros((self.agent_count, self.data_size), dtype=np.uint8),
            np.zeros((self.agent_count, self.data_size, 1), dtype=np.float32),
            np.zeros((self.agent_count, self.data_size, 1), dtype=np.bool)
        ]
def main(epochs=257*8, lr=0.38, seq_len=120, pred_len=39, out="result", device=0):
    
    # CHOOSE ONE:
    # get the training dataset but keep a slice for validation
    dataset = get_dataset(182, -39 -39)
    # get the entire dataset
    #dataset = get_dataset(182, -39)
    
    iter = ParallelSequentialIterator(dataset, pred_len=1, repeat=True)

    model = Model(pred_len=pred_len, dropout=0.1)
    if device >= 0:
        model.to_gpu()

    # Try some different optimizers
    #optimizer = optimizers.Adam(alpha=lr)
    #optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9)
    optimizer = optimizers.RMSpropGraves(lr=lr, alpha=0.95, momentum=0.2)
    #optimizer = optimizers.RMSprop(lr=lr, alpha=0.5)

    optimizer.setup(model)

    #optimizer.add_hook(chainer.optimizer.GradientClipping(5))#grad_clip))
    #optimizer.add_hook(chainer.optimizer.WeightDecay(1.E-7))

    updater = BPTTUpdater(iter, optimizer, seq_len=seq_len, pred_len=pred_len, data_len=len(dataset), device=device)
    trainer = Trainer(updater, (epochs, 'epoch'), out=out)

    interval = 10

    # Try some learning-rate decay
    #trainer.extend(extensions.ExponentialShift('lr', 0.995)) #0.1, lr, lr * 0.1), trigger=(10, 'epoch'))

    trainer.extend(extensions.observe_lr(), trigger=(interval, "iteration"))
    
    trainer.extend(extensions.LogReport(trigger=(interval, 'iteration')))
    trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'loss', 'lr']),
            trigger=(interval, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=interval))

    # export snapshots to resume training
    trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(257*6, "epoch"))
    trainer.extend(extensions.snapshot_object(model, "model_epoch_{.updater.epoch}"), trigger=(257*2, "epoch"))

    # change to True to resume from file
    if False:
        chainer.serializers.load_npz('result/snapshot_epoch_1030', trainer)
        
    trainer.run()


    # save model
    from chainer import serializers
    serializers.save_npz('restaurant.model', model)
Example #24
0
    def optimize_rmsprop(self,
                         init_img,
                         lr=0.1,
                         alpha=0.95,
                         momentum=0.9,
                         eps=1e-4,
                         iterations=2000,
                         save=50,
                         filename='iter',
                         str_contrast=False):
        chainer_rms = optimizers.RMSpropGraves(lr=lr,
                                               alpha=alpha,
                                               momentum=momentum,
                                               eps=eps)
        state = {
            'n': xp.zeros_like(init_img.data),
            'g': xp.zeros_like(init_img.data),
            'delta': xp.zeros_like(init_img.data)
        }
        out_img = Variable(xp.zeros_like(init_img.data), volatile=True)

        time_start = time.time()
        for epoch in range(iterations):
            loss = self.loss_total(init_img)
            loss.backward()
            loss.unchain_backward()

            # normalize gradient
            grad_l1_norm = xp.sum(xp.absolute(init_img.grad * init_img.grad))
            init_img.grad /= grad_l1_norm

            if gpu_flag:
                chainer_rms.update_one_gpu(init_img, state)
            else:
                chainer_rms.update_one_cpu(init_img, state)

            init_img.zerograd()

            # save image every 'save' iteration
            if save != 0 and (epoch + 1) % save == 0:
                if self.preserve_color:
                    init_img_lum = separate_lum_chr(init_img)[0]
                    if gpu_flag:
                        init_img_lum.to_gpu()
                    out_img.copydata(init_img_lum + self.content_img_chr)
                else:
                    out_img.copydata(init_img)
                save_image(out_img,
                           filename + '_' + str(epoch + 1) + '.png',
                           contrast=str_contrast)
                print(
                    "Image Saved at Iteration %.0f, Time Used: %.4f, Total Loss: %.4f"
                    % ((epoch + 1), (time.time() - time_start), loss.data))
    def __init__(self, enable_controller=[0, 3, 4]):
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller  # Default setting : "Pong"

        print "Initializing DQN..."
        print "CUDA init"
        cuda.init()

        print "Model Building"
        self.model = FunctionSet(
            l1=F.Convolution2D(4,
                               32,
                               ksize=8,
                               stride=4,
                               nobias=False,
                               wscale=np.sqrt(2)),
            l2=F.Convolution2D(32,
                               64,
                               ksize=4,
                               stride=2,
                               nobias=False,
                               wscale=np.sqrt(2)),
            l3=F.Convolution2D(64,
                               64,
                               ksize=3,
                               stride=1,
                               nobias=False,
                               wscale=np.sqrt(2)),
            l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
            q_value=F.Linear(512,
                             self.num_of_actions,
                             initialW=np.zeros((self.num_of_actions, 512),
                                               dtype=np.float32))).to_gpu()

        self.model_target = copy.deepcopy(self.model)

        print "Initizlizing Optimizer"
        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.D = [
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #26
0
    def __init__(self,
                 agent,
                 memory_size=10**4,
                 replay_size=32,
                 gamma=0.99,
                 initial_exploration=10**4,
                 target_update_freq=10**4,
                 learning_rate=0.00025,
                 epsilon_decay=1e-6,
                 minimum_epsilon=0.1,
                 L1_rate=None):
        self.agent = agent
        self.target = Q(self.agent.q.n_history,
                        self.agent.q.n_action,
                        on_gpu=self.agent.q.on_gpu)

        self.memory_size = memory_size
        self.replay_size = replay_size
        self.gamma = gamma
        self.initial_exploration = initial_exploration
        self.target_update_freq = target_update_freq
        self.laerning_rate = learning_rate
        self.epslon_decay = epsilon_decay
        self.minimum_epsilon = minimum_epsilon
        self._step = 0

        # prepare for replay
        n_hist = self.agent.q.n_history
        size = self.agent.q.SIZE
        self.memory = [
            np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32),
            np.zeros(memory_size, dtype=np.uint8),
            np.zeros((memory_size, 1), dtype=np.float32),
            np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32),
            np.zeros((memory_size, 1), dtype=np.bool)
        ]
        self.memory_text = [
            "state", "action", "reward", "next_state", "episode_end"
        ]

        #prepare optimizer
        self.optimizer = optimizers.RMSpropGraves(lr=learning_rate,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.01)
        self.optimizer.setup(self.agent.q)
        if L1_rate is not None:
            self.optimizer.add_hook(optimizer.Lasso(L1_rate))
        self._loss = 9
        self._qv = 0
Example #27
0
  def open(self):
    global REMOTE_MODE

    # load model file
    if(REMOTE_MODE == "TEXTURE"):
      self.model = CNN()
    elif(REMOTE_MODE == "DISTANCE"):
      self.model = MLP()
    self.brain = DQN(self.model)
    try:
      serializers.load_hdf5(MODEL_FILE, self.brain)
      #serializers.save_hdf5("MLP.model", self.brain.predictor)
      print("succeed to load model file")
    except:
      print("failed to load model file")

    # load history file
    try:
      fp = open(HISTORY_FILE, "r")
      self.history = pickle.load(fp)
      self.turn = self.history.turn + 1
      self.state = self.history.getNewestState().copy()
      self.last_state = self.history.getNewestLastState().copy()
      fp.close()
      print "succeed to load history file. [restart from turn " + str(self.turn) +  "]"
    except:
      self.history = History(self.historySize, self.observeLength, self.imageHeight, self.imageWidth)
      self.turn = 0

      if(REMOTE_MODE == "TEXTURE"):
        self.state = np.zeros((self.observeLength, self.imageHeight, self.imageWidth), dtype=np.float32)
        self.last_state = np.zeros((self.observeLength, self.imageHeight, self.imageWidth), dtype=np.float32)
      elif(REMOTE_MODE == "DISTANCE"):
        self.state = np.zeros((DISTANCE_INPUT), dtype=np.float32)
        self.last_state = np.zeros((DISTANCE_INPUT), dtype=np.float32)

      print "failed to load history file"


    self.brain.to_gpu()
    #self.optimizer = optimizers.Adam()
    self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2)
    self.optimizer.setup(self.brain)

    # hold connection
    if(self not in simulator_clients):
      simulator_clients.append(self)
Example #28
0
    def __init__(self, use_gpu, num_of_action_type, num_of_pad, dim):
        self.use_gpu = use_gpu
        self.num_of_action_type = num_of_action_type
        self.num_of_pad = num_of_pad
        self.num_of_actions = num_of_action_type * num_of_pad
        self.dim = dim

        print("Initializing Q-Network...\n")

        self.q_net_filename = "q_net.pickle"
        if os.path.exists(self.q_net_filename):
            print("Loading Q-Network Model...\n")
            self.model = self.load_model()
        else:
            hidden_dim = 256
            self.model = FunctionSet(l4=F.Linear(self.dim * self.hist_size,
                                                 hidden_dim,
                                                 wscale=np.sqrt(2)),
                                     q_value=F.Linear(
                                         hidden_dim,
                                         self.num_of_actions,
                                         initialW=np.zeros(
                                             (self.num_of_actions, hidden_dim),
                                             dtype=np.float32)))

        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model.collect_parameters())

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, self.num_of_pad), dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #29
0
    def __init__(self, use_gpu, enable_controller, dim, epsilon, epsilon_delta,
                 min_eps):
        self.use_gpu = use_gpu
        self.num_of_actions = len(enable_controller)
        self.enable_controller = enable_controller
        self.dim = dim
        self.epsilon = epsilon
        self.epsilon_delta = epsilon_delta
        self.min_eps = min_eps
        self.time = 0

        app_logger.info("Initializing Q-Network...")

        hidden_dim = 256
        self.model = Chain(
            l4=L.Linear(self.dim * self.hist_size,
                        hidden_dim,
                        initialW=initializers.Normal(
                            0.5 / math.sqrt(self.dim * self.hist_size))),
            q_value=L.Linear(hidden_dim,
                             self.num_of_actions,
                             initialW=np.zeros(
                                 (self.num_of_actions, hidden_dim),
                                 dtype=np.float32)))
        if self.use_gpu >= 0:
            self.model.to_gpu()

        self.model_target = copy.deepcopy(self.model)

        self.optimizer = optimizers.RMSpropGraves(lr=0.00025,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.0001)
        self.optimizer.setup(self.model)

        # History Data :  D=[s, a, r, s_dash, end_episode_flag]
        self.d = [
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros(self.data_size, dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.int8),
            np.zeros((self.data_size, self.hist_size, self.dim),
                     dtype=np.uint8),
            np.zeros((self.data_size, 1), dtype=np.bool)
        ]
Example #30
0
    def __init__(
            self,
            state_shape,
            action_num,
            image_num_per_state,
            model,
            gamma=0.99,  # discount factor
            replay_batch_size=32,
            replay_memory_size=5 * 10**4,
            target_model_update_freq=1,
            max_step=50,
            lr=0.00025,
            clipping=False  # if True, ignore reward intensity
    ):
        print("initializing DQN...")
        self.action_num = action_num
        self.image_num_per_state = image_num_per_state
        self.gamma = gamma
        self.replay_batch_size = replay_batch_size
        self.replay_memory_size = replay_memory_size
        self.target_model_update_freq = target_model_update_freq
        self.max_step = max_step
        self.clipping = clipping

        print("Initializing Model...")
        self.model = model
        self.model_target = copy.deepcopy(self.model)

        print("Initializing Optimizer")
        self.optimizer = optimizers.RMSpropGraves(lr=lr,
                                                  alpha=0.95,
                                                  momentum=0.0,
                                                  eps=0.01)
        self.optimizer.setup(self.model)
        self.optimizer.add_hook(chainer.optimizer.GradientClipping(20))

        print("Initializing Replay Buffer...")
        self.dataset = dataset.DataSet(max_size=replay_memory_size,
                                       max_step=max_step,
                                       frame_shape=state_shape,
                                       frame_dtype=np.uint8)

        self.xp = model.xp
        self.state_shape = state_shape