예제 #1
0
        def __init__(self, n_in_elements, n_actions, explor_rate=0.0):
            '''
            Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。
            活性化関数は、負の値も取ることが可能なものを選択する必要がある。
            例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。
            
            n_size_twn_status: TWN自身の状態
            num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数)
            n_size_eb_status: TWNセンサーでとらえた、EBの情報
            n_actions: 離散アクション空間
            explor_rate=0.0: 探索行動比率(現時点で未使用)
            '''

            super().__init__()
            with self.init_scope():
                self.l4 = links.MLP(n_in_elements, int(n_in_elements*1.2), (n_in_elements*2, int(n_in_elements*1.8), int(n_in_elements*1.5)), nonlinearity=F.leaky_relu)
                self.l5 = links.MLP(int(n_in_elements*1.2)+4, 4, (n_in_elements, int(n_in_elements*0.8), (n_in_elements*2)//3), nonlinearity=F.leaky_relu)
                local_action_links_list = []
                for i in range(n_actions):
                    action_links = links.MLP(4, 1, (n_in_elements//2,), nonlinearity=F.leaky_relu)
                    local_action_links_list.append(action_links)
                self.action_chain_list = chainer.ChainList(*local_action_links_list)

            self.explor_rate = explor_rate
            
            self.debug_info = None
예제 #2
0
        def __init__(self,
                     n_size_twn_status,
                     num_ray,
                     n_size_eb_status,
                     n_actions,
                     explor_rate=0.0):

            self.n_size_twn_status = n_size_twn_status
            self.num_ray = num_ray
            self.n_size_eb_status = n_size_eb_status
            self.num_history = 1

            self.n_clasfy_ray = 32

            super().__init__()

            with self.init_scope():
                self.ml1 = links.MLP(self.num_ray,
                                     self.n_clasfy_ray,
                                     ((self.num_ray // 3) * 2,
                                      self.num_ray // 2, self.num_ray // 3),
                                     nonlinearity=F.leaky_relu)
                aaa = n_size_twn_status + self.n_clasfy_ray + n_size_eb_status
                self.l4 = L.Linear(aaa, aaa)  #クラス分類用
                self.ml5 = links.MLP(aaa,
                                     n_actions, (aaa, aaa, aaa, aaa, aaa),
                                     nonlinearity=nl)

            self.explor_rate = explor_rate

            self.debug_info = None
예제 #3
0
 def __init__(self, ndim_obs, n_actions, hidden_sizes=(50, 50, 50)):
     self.pi = policies.SoftmaxPolicy(model=links.MLP(
         ndim_obs, n_actions, hidden_sizes, nonlinearity=F.tanh))
     self.v = links.MLP(ndim_obs,
                        1,
                        hidden_sizes=hidden_sizes,
                        nonlinearity=F.tanh)
     super().__init__(self.pi, self.v)
예제 #4
0
 def __init__(self,
              obs_size,
              action_space,
              n_hidden_layers=2,
              n_hidden_channels=64,
              bound_mean=None,
              normalize_obs=None):
     assert bound_mean in [False, True]
     assert normalize_obs in [False, True]
     super().__init__()
     hidden_sizes = (n_hidden_channels, ) * n_hidden_layers
     self.normalize_obs = normalize_obs
     with self.init_scope():
         self.pi = policies.FCGaussianPolicyWithStateIndependentCovariance(
             obs_size,
             action_space.low.size,
             n_hidden_layers,
             n_hidden_channels,
             var_type='diagonal',
             nonlinearity=F.tanh,
             bound_mean=bound_mean,
             min_action=action_space.low,
             max_action=action_space.high,
             mean_wscale=1e-2)
         self.v = links.MLP(obs_size, 1, hidden_sizes=hidden_sizes)
         if self.normalize_obs:
             self.obs_filter = links.EmpiricalNormalization(shape=obs_size)
예제 #5
0
        def __init__(self, n_in_elements, n_actions, explor_rate=0.0):
            '''
            Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。
            活性化関数は、負の値も取ることが可能なものを選択する必要がある。
            例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。
            
            n_size_twn_status: TWN自身の状態
            num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数)
            n_size_eb_status: TWNセンサーでとらえた、EBの情報
            n_actions: 離散アクション空間
            explor_rate=0.0: 探索行動比率(現時点で未使用)
            '''

            super().__init__()
            with self.init_scope():
                self.ml5 = links.MLP(
                    n_in_elements, n_actions,
                    (
                        n_in_elements*2,
                        int(n_in_elements*1.8),
                        int(n_in_elements*1.5),
                        int(n_in_elements*1.2),
                        n_in_elements,
                        int(n_in_elements*0.8),
                        (n_in_elements*2)//3,
                        (n_in_elements//2)*n_actions
                        ),
                        nonlinearity=F.leaky_relu)

            self.explor_rate = explor_rate
            
            self.debug_info = None
    def __init__(self, obs_space, action_space, out_size=1, gpu=-1):
        hidden_sizes = (64, 64)
        self.reward_net = links.MLP(obs_space + action_space,
                                    out_size,
                                    hidden_sizes=hidden_sizes)
        self.value_net = links.MLP(obs_space,
                                   out_size,
                                   hidden_sizes=hidden_sizes)
        if gpu >= 0:
            self.reward_net.to_gpu(gpu)
            self.value_net.to_gpu(gpu)

        self.reward_optimizer = chainer.optimizers.Adam()
        self.reward_optimizer.setup(self.reward_net)
        self.value_optimizer = chainer.optimizers.Adam()
        self.value_optimizer.setup(self.value_net)
예제 #7
0
        def __init__(self,
                     n_size_twn_status,
                     num_ray,
                     n_size_eb_status,
                     n_actions,
                     explor_rate=0.0):

            self.n_size_twn_status = n_size_twn_status
            self.num_ray = num_ray
            self.n_size_eb_status = n_size_eb_status
            self.num_history = 1

            self.n_clasfy_ray = 16

            self.in_channel_1st = 1
            out_channel_1st = 16
            filter_size_1st = 5
            slide_size_1st = 1
            self.pooling_size_1st = 2

            out_channel_2nd = 64
            filter_size_2nd = 3
            slide_size_2nd = 1
            self.pooling_size_2nd = 4

            self.num_of_out_elements_1st = self.calc_num_out_elements1D(
                self.num_ray, self.in_channel_1st, out_channel_1st,
                filter_size_1st, slide_size_1st, self.pooling_size_1st)
            self.num_of_out_elements_2nd = self.calc_num_out_elements1D(
                self.num_of_out_elements_1st, out_channel_1st, out_channel_2nd,
                filter_size_2nd, slide_size_2nd, self.pooling_size_2nd)

            #self.logger.info('1st out: {}  2nd out: {}  n_actions'.format(self.num_of_out_elements_1st, self.num_of_out_elements_2nd, n_actions))
            print('1st out: {}  2nd out: {}  n_actions: {}'.format(
                self.num_of_out_elements_1st, self.num_of_out_elements_2nd,
                n_actions))

            super().__init__()

            with self.init_scope():
                self.conv1 = L.ConvolutionND(
                    1, self.in_channel_1st, out_channel_1st,
                    filter_size_1st)  # 1層目の畳み込み層(チャンネル数は16)
                self.conv2 = L.ConvolutionND(
                    1, out_channel_1st, out_channel_2nd,
                    filter_size_2nd)  # 2層目の畳み込み層(チャンネル数は64)
                self.l3 = L.Linear(self.num_of_out_elements_2nd *
                                   out_channel_2nd, self.n_clasfy_ray)  #クラス分類用
                aaa = n_size_twn_status + self.n_clasfy_ray + n_size_eb_status
                self.l4 = L.Linear(aaa, aaa)  #クラス分類用
                self.ml5 = links.MLP(aaa,
                                     n_actions,
                                     (aaa * 3, aaa * 2,
                                      (aaa // 2) * 3, aaa, aaa // 2),
                                     nonlinearity=F.leaky_relu)

            self.explor_rate = explor_rate

            self.debug_info = None
예제 #8
0
    def __init__(self, observation_dim, action_dim, hidden_sizes, loss_type='gan', gpu=-1):

        self.model = links.MLP(observation_dim + action_dim, 1, hidden_sizes=hidden_sizes, nonlinearity=F.leaky_relu)

        if gpu >= 0: self.model.to_gpu(gpu)

        self.optimizer = chainer.optimizers.Adam(alpha=1e-5, eps=1e-5)  # should alpha be somewhat higher?
        self.optimizer.setup(self.model)
        self.loss_type = loss_type
        self.loss = None
예제 #9
0
        def __init__(self,
                     n_in_elements,
                     n_original_input,
                     n_actions,
                     explor_rate=0.0):
            '''
            Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。
            活性化関数は、負の値も取ることが可能なものを選択する必要がある。
            例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。
            
            n_size_twn_status: TWN自身の状態
            num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数)
            n_size_eb_status: TWNセンサーでとらえた、EBの情報
            n_actions: 離散アクション空間
            explor_rate=0.0: 探索行動比率(現時点で未使用)
            '''

            super().__init__()

            self.num_noise_roots = np.array([
                n_original_input // 8, n_original_input // 8,
                n_original_input // 8, n_original_input // 8
            ])

            with self.init_scope():
                self.l4 = links.MLP(
                    n_in_elements + np.sum(self.num_noise_roots),
                    n_original_input // 2, (n_original_input, ),
                    nonlinearity=F.leaky_relu)
                self.action_links_list = []
                for i in range(n_actions):
                    action_links = links.MLP(n_original_input // 2,
                                             1, (n_original_input // 4, ),
                                             nonlinearity=F.leaky_relu)
                    self.action_links_list.append(action_links)

            self.explor_rate = explor_rate

            self.debug_info = None
예제 #10
0
    def __init__(self,
                 input_dim,
                 hidden_sizes=(64, 64, 64),
                 loss_type='wgangp',
                 gpu=-1):
        self.model = links.MLP(input_dim, 1, hidden_sizes=hidden_sizes)

        if gpu >= 0:
            self.model.to_gpu(gpu)

        self.optimizer = chainer.optimizers.Adam(alpha=1e-5, eps=1e-5)
        self.optimizer.setup(self.model)
        self.loss_type = loss_type
        self.loss = None
예제 #11
0
 def __init__(self, ndim_obs, n_actions, hidden_sizes=(64, 64)):
     self.pi = policies.MellowmaxPolicy(
         model=links.MLP(ndim_obs, n_actions, hidden_sizes))
     self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes)
     super().__init__(self.pi, self.v)