def __init__(self, n_in_elements, n_actions, explor_rate=0.0): ''' Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。 活性化関数は、負の値も取ることが可能なものを選択する必要がある。 例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。 n_size_twn_status: TWN自身の状態 num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数) n_size_eb_status: TWNセンサーでとらえた、EBの情報 n_actions: 離散アクション空間 explor_rate=0.0: 探索行動比率(現時点で未使用) ''' super().__init__() with self.init_scope(): self.l4 = links.MLP(n_in_elements, int(n_in_elements*1.2), (n_in_elements*2, int(n_in_elements*1.8), int(n_in_elements*1.5)), nonlinearity=F.leaky_relu) self.l5 = links.MLP(int(n_in_elements*1.2)+4, 4, (n_in_elements, int(n_in_elements*0.8), (n_in_elements*2)//3), nonlinearity=F.leaky_relu) local_action_links_list = [] for i in range(n_actions): action_links = links.MLP(4, 1, (n_in_elements//2,), nonlinearity=F.leaky_relu) local_action_links_list.append(action_links) self.action_chain_list = chainer.ChainList(*local_action_links_list) self.explor_rate = explor_rate self.debug_info = None
def __init__(self, n_size_twn_status, num_ray, n_size_eb_status, n_actions, explor_rate=0.0): self.n_size_twn_status = n_size_twn_status self.num_ray = num_ray self.n_size_eb_status = n_size_eb_status self.num_history = 1 self.n_clasfy_ray = 32 super().__init__() with self.init_scope(): self.ml1 = links.MLP(self.num_ray, self.n_clasfy_ray, ((self.num_ray // 3) * 2, self.num_ray // 2, self.num_ray // 3), nonlinearity=F.leaky_relu) aaa = n_size_twn_status + self.n_clasfy_ray + n_size_eb_status self.l4 = L.Linear(aaa, aaa) #クラス分類用 self.ml5 = links.MLP(aaa, n_actions, (aaa, aaa, aaa, aaa, aaa), nonlinearity=nl) self.explor_rate = explor_rate self.debug_info = None
def __init__(self, ndim_obs, n_actions, hidden_sizes=(50, 50, 50)): self.pi = policies.SoftmaxPolicy(model=links.MLP( ndim_obs, n_actions, hidden_sizes, nonlinearity=F.tanh)) self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes, nonlinearity=F.tanh) super().__init__(self.pi, self.v)
def __init__(self, obs_size, action_space, n_hidden_layers=2, n_hidden_channels=64, bound_mean=None, normalize_obs=None): assert bound_mean in [False, True] assert normalize_obs in [False, True] super().__init__() hidden_sizes = (n_hidden_channels, ) * n_hidden_layers self.normalize_obs = normalize_obs with self.init_scope(): self.pi = policies.FCGaussianPolicyWithStateIndependentCovariance( obs_size, action_space.low.size, n_hidden_layers, n_hidden_channels, var_type='diagonal', nonlinearity=F.tanh, bound_mean=bound_mean, min_action=action_space.low, max_action=action_space.high, mean_wscale=1e-2) self.v = links.MLP(obs_size, 1, hidden_sizes=hidden_sizes) if self.normalize_obs: self.obs_filter = links.EmpiricalNormalization(shape=obs_size)
def __init__(self, n_in_elements, n_actions, explor_rate=0.0): ''' Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。 活性化関数は、負の値も取ることが可能なものを選択する必要がある。 例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。 n_size_twn_status: TWN自身の状態 num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数) n_size_eb_status: TWNセンサーでとらえた、EBの情報 n_actions: 離散アクション空間 explor_rate=0.0: 探索行動比率(現時点で未使用) ''' super().__init__() with self.init_scope(): self.ml5 = links.MLP( n_in_elements, n_actions, ( n_in_elements*2, int(n_in_elements*1.8), int(n_in_elements*1.5), int(n_in_elements*1.2), n_in_elements, int(n_in_elements*0.8), (n_in_elements*2)//3, (n_in_elements//2)*n_actions ), nonlinearity=F.leaky_relu) self.explor_rate = explor_rate self.debug_info = None
def __init__(self, obs_space, action_space, out_size=1, gpu=-1): hidden_sizes = (64, 64) self.reward_net = links.MLP(obs_space + action_space, out_size, hidden_sizes=hidden_sizes) self.value_net = links.MLP(obs_space, out_size, hidden_sizes=hidden_sizes) if gpu >= 0: self.reward_net.to_gpu(gpu) self.value_net.to_gpu(gpu) self.reward_optimizer = chainer.optimizers.Adam() self.reward_optimizer.setup(self.reward_net) self.value_optimizer = chainer.optimizers.Adam() self.value_optimizer.setup(self.value_net)
def __init__(self, n_size_twn_status, num_ray, n_size_eb_status, n_actions, explor_rate=0.0): self.n_size_twn_status = n_size_twn_status self.num_ray = num_ray self.n_size_eb_status = n_size_eb_status self.num_history = 1 self.n_clasfy_ray = 16 self.in_channel_1st = 1 out_channel_1st = 16 filter_size_1st = 5 slide_size_1st = 1 self.pooling_size_1st = 2 out_channel_2nd = 64 filter_size_2nd = 3 slide_size_2nd = 1 self.pooling_size_2nd = 4 self.num_of_out_elements_1st = self.calc_num_out_elements1D( self.num_ray, self.in_channel_1st, out_channel_1st, filter_size_1st, slide_size_1st, self.pooling_size_1st) self.num_of_out_elements_2nd = self.calc_num_out_elements1D( self.num_of_out_elements_1st, out_channel_1st, out_channel_2nd, filter_size_2nd, slide_size_2nd, self.pooling_size_2nd) #self.logger.info('1st out: {} 2nd out: {} n_actions'.format(self.num_of_out_elements_1st, self.num_of_out_elements_2nd, n_actions)) print('1st out: {} 2nd out: {} n_actions: {}'.format( self.num_of_out_elements_1st, self.num_of_out_elements_2nd, n_actions)) super().__init__() with self.init_scope(): self.conv1 = L.ConvolutionND( 1, self.in_channel_1st, out_channel_1st, filter_size_1st) # 1層目の畳み込み層(チャンネル数は16) self.conv2 = L.ConvolutionND( 1, out_channel_1st, out_channel_2nd, filter_size_2nd) # 2層目の畳み込み層(チャンネル数は64) self.l3 = L.Linear(self.num_of_out_elements_2nd * out_channel_2nd, self.n_clasfy_ray) #クラス分類用 aaa = n_size_twn_status + self.n_clasfy_ray + n_size_eb_status self.l4 = L.Linear(aaa, aaa) #クラス分類用 self.ml5 = links.MLP(aaa, n_actions, (aaa * 3, aaa * 2, (aaa // 2) * 3, aaa, aaa // 2), nonlinearity=F.leaky_relu) self.explor_rate = explor_rate self.debug_info = None
def __init__(self, observation_dim, action_dim, hidden_sizes, loss_type='gan', gpu=-1): self.model = links.MLP(observation_dim + action_dim, 1, hidden_sizes=hidden_sizes, nonlinearity=F.leaky_relu) if gpu >= 0: self.model.to_gpu(gpu) self.optimizer = chainer.optimizers.Adam(alpha=1e-5, eps=1e-5) # should alpha be somewhat higher? self.optimizer.setup(self.model) self.loss_type = loss_type self.loss = None
def __init__(self, n_in_elements, n_original_input, n_actions, explor_rate=0.0): ''' Q値の範囲が報酬体系によって負の値をとる場合、F.reluは負の値をとれないので、学習に適さない。 活性化関数は、負の値も取ることが可能なものを選択する必要がある。 例えば、F.leaky_relu等。勾配消失問題を考えると、これが良い感じ。 n_size_twn_status: TWN自身の状態 num_ray_out: TWNセンサーをCNN層で処理した結果得られるデータ数のタプル (CNN層の出力要素数, CNN層の出力チャンネル数) n_size_eb_status: TWNセンサーでとらえた、EBの情報 n_actions: 離散アクション空間 explor_rate=0.0: 探索行動比率(現時点で未使用) ''' super().__init__() self.num_noise_roots = np.array([ n_original_input // 8, n_original_input // 8, n_original_input // 8, n_original_input // 8 ]) with self.init_scope(): self.l4 = links.MLP( n_in_elements + np.sum(self.num_noise_roots), n_original_input // 2, (n_original_input, ), nonlinearity=F.leaky_relu) self.action_links_list = [] for i in range(n_actions): action_links = links.MLP(n_original_input // 2, 1, (n_original_input // 4, ), nonlinearity=F.leaky_relu) self.action_links_list.append(action_links) self.explor_rate = explor_rate self.debug_info = None
def __init__(self, input_dim, hidden_sizes=(64, 64, 64), loss_type='wgangp', gpu=-1): self.model = links.MLP(input_dim, 1, hidden_sizes=hidden_sizes) if gpu >= 0: self.model.to_gpu(gpu) self.optimizer = chainer.optimizers.Adam(alpha=1e-5, eps=1e-5) self.optimizer.setup(self.model) self.loss_type = loss_type self.loss = None
def __init__(self, ndim_obs, n_actions, hidden_sizes=(64, 64)): self.pi = policies.MellowmaxPolicy( model=links.MLP(ndim_obs, n_actions, hidden_sizes)) self.v = links.MLP(ndim_obs, 1, hidden_sizes=hidden_sizes) super().__init__(self.pi, self.v)