def __init__(self, train=False): self.train = train super(Mynet, self).__init__() with self.init_scope(): self.enc1 = chainer.Sequential() for i in range(2): self.enc1.append(L.Convolution2D(None, 32, ksize=3, pad=1, stride=1, nobias=True)) self.enc1.append(F.relu) self.enc1.append(L.BatchNormalization(32)) self.enc2 = chainer.Sequential() for i in range(2): self.enc2.append(L.Convolution2D(None, 32, ksize=3, pad=1, stride=1, nobias=True)) self.enc2.append(F.relu) self.enc2.append(L.BatchNormalization(32)) self.upsample = chainer.Sequential() self.upsample.append(L.Deconvolution2D(None, 32, ksize=2, stride=2)) self.upsample.append(F.relu) self.upsample.append(L.BatchNormalization(32)) self.concat = chainer.Sequential() self.concat.append(L.Convolution2D(None, 32, ksize=1, stride=1)) self.concat.append(F.relu) self.concat.append(L.BatchNormalization(32)) self.dec1 = chainer.Sequential() for i in range(2): self.dec1.append(L.Convolution2D(None, 32, ksize=3, pad=1, stride=1, nobias=True)) self.dec1.append(F.relu) self.dec1.append(L.BatchNormalization(32)) self.out = L.Convolution2D(None, num_classes+1, ksize=1, pad=0, stride=1, nobias=False)
def test_addgrads(self): l1 = links.Linear(2, 3) l2 = links.Linear(3, 2) l3 = links.Linear(2, 3) s1 = chainer.Sequential(l1, l2) s2 = chainer.Sequential(s1, l3) l1.b.grad.fill(1) l2.W.grad.fill(2) l2.b.grad.fill(3) l3.W.grad.fill(4) l3.b.grad.fill(5) l1.W.grad.fill(6) self.l1.b.grad.fill(-1) self.l2.W.grad.fill(-2) self.l2.b.grad.fill(-3) self.l3.W.grad.fill(-4) self.l3.b.grad.fill(-5) self.l1.W.cleargrad() self.s2.addgrads(s2) numpy.testing.assert_array_equal(self.l1.b.grad, numpy.zeros((3, ))) numpy.testing.assert_array_equal(self.l1.W.grad, l1.W.grad) numpy.testing.assert_array_equal(self.l2.W.grad, numpy.zeros((2, 3))) numpy.testing.assert_array_equal(self.l2.b.grad, numpy.zeros((2, ))) numpy.testing.assert_array_equal(self.l3.W.grad, numpy.zeros((3, 2))) numpy.testing.assert_array_equal(self.l3.b.grad, numpy.zeros((3, )))
def __init__(self, in_channels, out_channels, nf0, num_down, max_channels, norm=L.BatchNormalization, outermost_linear=False): ''' :param in_channels: Number of input channels :param out_channels: Number of output channels :param nf0: Number of features at highest level of U-Net :param num_down: Number of downsampling stages. :param max_channels: Maximum number of channels (channels multiply by 2 with every downsampling stage) :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. :param outermost_linear: Whether the output layer should be a linear layer or a nonlinear one. ''' super().__init__() assert (num_down > 0), "Need at least one downsampling layer in UNet3d." # Define the in block in_layer = [Conv3dSame(in_channels, nf0, kernel_size=3, bias=False)] if norm is not None: in_layer += [norm(nf0)] in_layer += [F.leaky_relu] # Define the center UNet block. The feature map has height and width 1 --> no batchnorm. unet_block = UnetSkipConnectionBlock3d( int(min(2**(num_down - 1) * nf0, max_channels)), int(min(2**(num_down - 1) * nf0, max_channels)), norm=None) for i in list(range(0, num_down - 1))[::-1]: unet_block = UnetSkipConnectionBlock3d( int(min(2**i * nf0, max_channels)), int(min(2**(i + 1) * nf0, max_channels)), submodule=unet_block, norm=norm) # Define the out layer. Each unet block concatenates its inputs with its outputs - so the output layer # automatically receives the output of the in_layer and the output of the last unet layer. out_layer = [ Conv3dSame(2 * nf0, out_channels, kernel_size=3, bias=outermost_linear) ] if not outermost_linear: if norm is not None: out_layer += [norm(out_channels)] out_layer += [F.relu] with self.init_scope(): self.in_layer = chainer.Sequential(*in_layer) self.unet_block = unet_block self.out_layer = chainer.Sequential(*out_layer)
def __init__(self, etype, idim, elayers, eunits, eprojs, subsample, dropout, in_channel=1): super(Encoder, self).__init__() typ = etype.lstrip("vgg").rstrip("p") if typ not in ['lstm', 'gru', 'blstm', 'bgru']: logging.error( "Error: need to specify an appropriate encoder architecture") with self.init_scope(): if etype.startswith("vgg"): if etype[-1] == "p": self.enc = chainer.Sequential( VGG2L(in_channel), RNNP(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, subsample, dropout, typ=typ)) logging.info('Use CNN-VGG + ' + typ.upper() + 'P for encoder') else: self.enc = chainer.Sequential( VGG2L(in_channel), RNN(get_vgg2l_odim(idim, in_channel=in_channel), elayers, eunits, eprojs, dropout, typ=typ)) logging.info('Use CNN-VGG + ' + typ.upper() + ' for encoder') else: if etype[-1] == "p": self.enc = chainer.Sequential( RNNP(idim, elayers, eunits, eprojs, subsample, dropout, typ=typ)) logging.info(typ.upper() + ' with every-layer projection for encoder') else: self.enc = chainer.Sequential( RNN(idim, elayers, eunits, eprojs, dropout, typ=typ)) logging.info(typ.upper() + ' without projection for encoder')
def setUp(self): self.l1 = links.Linear(None, 3) self.l2 = links.Linear(3, 2) self.l3 = links.Linear(2, 3) # s1: l1 -> l2 self.s1 = chainer.Sequential(self.l1) self.s1.append(self.l2) # s2: s1 (l1 -> l2) -> l3 self.s2 = chainer.Sequential(self.s1) self.s2.append(self.l3)
def get_encoder_decoder(src_vocab_size, tgt_vocab_size, N=6, model_size=512, ff_size=2048, num_heads=8, dropout_ratio=0.1): """ Convenience function that returns the full transformer model including encoder and decoder. :param src_vocab_size: the number of classes for the encoder :param tgt_vocab_size: the number of classes for the decoder :param N: stack size of the decoder :param model_size: the number of hidden units in the transformer :param ff_size: the number of hidden units in the PositionwiseFeedForward part of the decoder :param num_heads: number of attention heads in the attention parts of the model :param dropout_ratio: dropout ratio for regularization :return: the transformer model """ attention = MultiHeadedAttention(num_heads, model_size, dropout_ratio=dropout_ratio) feed_forward = PositionwiseFeedForward(model_size, ff_size, dropout_ratio=dropout_ratio) positional_encoding = PositionalEncoding(model_size, dropout_ratio=dropout_ratio) encoder_layer = EncoderLayer(model_size, copy.deepcopy(attention), copy.deepcopy(feed_forward), dropout_ratio=dropout_ratio) encoder = Encoder(encoder_layer, N) decoder_layer = DecoderLayer(model_size, copy.deepcopy(attention), copy.deepcopy(attention), feed_forward, dropout_ratio=dropout_ratio) decoder = Decoder(decoder_layer, N) src_embeddings = Embedding(model_size, src_vocab_size) tgt_embeddings = Embedding(model_size, tgt_vocab_size) src_embeddings = chainer.Sequential(src_embeddings, positional_encoding) tgt_embeddings = chainer.Sequential(tgt_embeddings, positional_encoding) model = EncoderDecoder(encoder, decoder, src_embeddings, tgt_embeddings) return model
def get_network(model_name, **kwargs): if model_name == 'mv2': from pose.models.network_mobilenetv2 import MobilenetV2 return MobilenetV2(**kwargs) elif model_name == 'resnet18': from pose.models.network_resnet import ResNet, AdditionalLayer resnet = chainer.Sequential(ResNet(n_layers=18), AdditionalLayer(ch=512)) return resnet elif model_name == 'resnet34': from pose.models.network_resnet import ResNet, AdditionalLayer resnet = chainer.Sequential(ResNet(n_layers=34), AdditionalLayer(ch=512)) return resnet else: raise Exception('Invalid model name')
def test_init(self): self.assertIs(self.s1[0], self.l1) self.assertEqual(self.l1.name, '0') self.assertIs(self.s2[0], self.s1) self.assertEqual(self.s1.name, '0') with self.assertRaises(ValueError): chainer.Sequential(0)
def __init__(self, in_ch, out_ch, stride): super(BasicBlock, self).__init__() reduction = 0.5 self.stride = stride self.in_ch = in_ch self.out_ch = out_ch if stride == 2: reduction = 1 elif in_ch > out_ch: reduction = 0.25 with self.init_scope(): self.conv1 = L.Convolution2D(in_ch, int(in_ch * reduction), ksize=1, stride=stride, nobias=False) self.bn1 = L.BatchNormalization(int(in_ch * reduction)) self.conv2 = L.Convolution2D(int(in_ch * reduction), int(in_ch * reduction * 0.5), ksize=1, stride=1, nobias=False) self.bn2 = L.BatchNormalization(int(in_ch * reduction * 0.5)) self.conv3 = L.Convolution2D(int(in_ch * reduction * 0.5), int(in_ch * reduction), ksize=(1, 3), stride=1, pad=(0, 1), nobias=False) self.bn3 = L.BatchNormalization(int(in_ch * reduction)) self.conv4 = L.Convolution2D(int(in_ch * reduction), int(in_ch * reduction), ksize=(3, 1), stride=1, pad=(1, 0), nobias=False) self.bn4 = L.BatchNormalization(int(in_ch * reduction)) self.conv5 = L.Convolution2D(int(in_ch * reduction), out_ch, ksize=1, stride=1, nobias=False) self.bn5 = L.BatchNormalization(out_ch) self.shortcut = chainer.Sequential(lambda x: x) if stride == 2 or in_ch != out_ch: self.shortcut.append(L.Convolution2D(in_ch, out_ch, ksize=1, stride=stride, nobias=False)) self.shortcut.append(L.BatchNormalization(out_ch))
def test_insert(self): funcs = [ functions.sin, functions.cos, functions.tan, ] # Prepare the original sequential before insertion. orig = [] for orig_is_link in self.orig: if orig_is_link: orig.append(links.Linear((3, 3))) else: orig.append(funcs.pop(0)) # The subject of insertion if self.is_link: subj = links.Linear((3, 3)) else: subj = funcs.pop(0) # Instantiate the sequential seq = chainer.Sequential(*orig) if self.expect_error: with pytest.raises(IndexError): seq.insert(self.pos, subj) else: seq.insert(self.pos, subj) # Inserting to the `orig` similarly for the following comparison orig.insert(self.pos, subj) assert len(seq) == len(self.orig) + 1 for i in range(len(self.orig) + 1): assert seq[i] is orig[i]
def test_call_with_multiple_inputs(self): model = chainer.Sequential( lambda x, y: (x * 2, y * 3, x + y), lambda x, y, z: x + y + z ) y = model(2, 3) self.assertEqual(y, 18)
def test_get_item_error(slices): model = chainer.Sequential( lambda x: F.get_item(x, slices=slices)) x = input_generator.increasing(2, 3, 4) with pytest.raises(ValueError): export(model, x)
def build_decoder(vocab_size, N=6, model_size=512, ff_size=2048, num_heads=8, dropout_ratio=0.1): attention = MultiHeadedAttention(num_heads, model_size, dropout_ratio=dropout_ratio) feed_forward = PositionwiseFeedForward(model_size, ff_size, dropout_ratio=dropout_ratio) positional_encoding = PositionalEncoding(model_size, dropout_ratio=dropout_ratio) decoder_layer = DecoderLayer(model_size, copy.deepcopy(attention), copy.deepcopy(attention), feed_forward, dropout_ratio=dropout_ratio) decoder = Decoder(decoder_layer, N) embeddings = Embedding(model_size, vocab_size) return chainer.Sequential(embeddings, positional_encoding), decoder
def __init__(self, n_fg_class=None, pretrained_model=None): super(YOLOv3, self).__init__() param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class}, pretrained_model, self._models) self.n_fg_class = param['n_fg_class'] self.use_preset('visualize') with self.init_scope(): self.extractor = Darknet53Extractor() self.subnet = chainer.ChainList() for i, n in enumerate((512, 256, 128)): self.subnet.append(chainer.Sequential( Conv2DBNActiv(n * 2, 3, pad=1, activ=_leaky_relu), Convolution2D( len(self._anchors[i]) * (4 + 1 + self.n_fg_class), 1))) default_bbox = [] step = [] for k, grid in enumerate(self.extractor.grids): for v, u in itertools.product(range(grid), repeat=2): for h, w in self._anchors[k]: default_bbox.append((v, u, h, w)) step.append(self.insize / grid) self._default_bbox = np.array(default_bbox, dtype=np.float32) self._step = np.array(step, dtype=np.float32) if path: chainer.serializers.load_npz(path, self, strict=False)
def __init__(self, nf0, occnet_nf, frustrum_dims, accmulative_threshold=None): super().__init__() self.occnet_nf = occnet_nf self.frustrum_dims = frustrum_dims self.frustrum_depth = frustrum_dims[-1] self.depth_coords = None self.accmulative_threshold = accmulative_threshold if accmulative_threshold else 4 print(self.accmulative_threshold) with self.init_scope(): self.occlusion = chainer.Sequential( Conv3dSame(nf0 + 1, self.occnet_nf, kernel_size=1, bias=True), # L.BatchNormalization(self.occnet_nf), F.leaky_relu, Conv3dSame(self.occnet_nf, 1, kernel_size=1, bias=True), lambda x: x - self.accmulative_threshold, F.sigmoid, ) depth_coords = np.arange( -self.frustrum_depth // 2, self.frustrum_depth // 2)[None, None, :, None, None] / self.frustrum_depth self.depth_coords = np.tile(depth_coords, (1, 1, 1, self.frustrum_dims[0], self.frustrum_dims[0])).astype("float32") self.register_persistent('depth_coords')
def create_model(weight='auto', activate=F.sigmoid): resnet = ResNet50Layers(pretrained_model=weight) model = chainer.Sequential(lambda x: resnet(x, layers=['res5'])['res5'], L.Linear(None, 128)) if activate: model.append(activate) return model
def _make_layers(self, num_block, width_x, out_ch, stride): strides = [stride] + [1] * (num_block -1) layers = chainer.Sequential() for _stride in strides: layers.append(BasicBlock(int(width_x * self.in_ch), int(width_x * out_ch), _stride)) self.in_ch = out_ch return layers
def __init__(self, n_units: int, num_hidden_layers: int = 3, initialW: Union[None, ch.Initializer] = None, initial_bias: Union[None, ch.Initializer] = None): """ Constructor Parameters ---------- n_units : int The number of units in the hidden layers. 256 was used in the paper. num_hidden_layers : int The number of hidden layers. 3 was used in the paper. initialW : ch.Initializer or None The initial value of the weights initial_bias : ch.Initializer or None The initial value of the biases """ super(Encoder, self).__init__() linears = [] with self.init_scope(): layer = ch.Sequential( L.Linear(n_units, initialW=initialW, initial_bias=initial_bias), F.sigmoid) self._hidden = layer.repeat(num_hidden_layers)
def __init__(self, in_channels, out_channels, kernel_size, bias=True, padding_layer=None): ''' :param in_channels: Number of input channels :param out_channels: Number of output channels :param kernel_size: Scalar. Spatial dimensions of kernel (only quadratic kernels supported). :param bias: Whether or not to use bias. :param padding_layer: Which padding to use. Default is reflection padding. ''' super().__init__() if padding_layer is not None: assert False, "padding layer is not supported" ka = kernel_size // 2 kb = ka - 1 if kernel_size % 2 == 0 else ka with self.init_scope(): self.net = chainer.Sequential( ReflectionPad(pad_width=((ka, kb), (ka, kb), (ka, kb))), EqualizedConv3d(in_channels, out_channels, kernel_size, stride=1, nobias=not bias))
def test_output(self, name, slices): name = 'get_item_' + name model = chainer.Sequential(lambda x: F.get_item(x, slices=slices)) x = input_generator.increasing(2, 3, 4) self.expect(model, x, name=name, expected_num_initializers=0)
def test_sequential(self): model = chainer.Sequential( chainer.links.Linear(3), chainer.functions.relu, chainer.links.Linear(4), ) self.assertEqual( names_of_links(model), {'/0', '/1'} ) self.assertIs(model._layers[1], chainer.functions.relu) to_factorized_noisy(model) self.assertEqual( names_of_links(model), { '/0', '/0/mu', '/0/sigma', '/1', '/1/mu', '/1/sigma', }) self.assertIs(model._layers[1], chainer.functions.relu) model(numpy.ones((2, 3), numpy.float32)) # assert new parameters are used y = model(numpy.ones((2, 3), numpy.float32)) chainer.functions.sum(y).backward() for p in model.params(): self.assertIsNotNone(p.grad)
def _net_model(self): layer = chainer.Sequential(L.Linear(self.n_units), F.relu) model = layer.repeat(1) model.append(L.Linear(self.n_out)) return L.Classifier(self._net_model(), lossfun=F.sigmoid_cross_entropy, accfun=F.binary_accuracy)
def __init__(self, idim, attention_dim=256, attention_heads=4, linear_units=2048, num_blocks=6, dropout_rate=0.1, positional_dropout_rate=0.1, attention_dropout_rate=0.0, input_layer="conv2d", pos_enc_class=PositionalEncoding, initialW=None, initial_bias=None): """Initialize Encoder. Args: idim (int): Input dimension. args (Namespace): Training config. initialW (int, optional): Initializer to initialize the weight. initial_bias (bool, optional): Initializer to initialize the bias. """ super(Encoder, self).__init__() initialW = chainer.initializers.Uniform if initialW is None else initialW initial_bias = chainer.initializers.Uniform if initial_bias is None else initial_bias self.do_history_mask = False with self.init_scope(): channels = 64 # Based in paper if input_layer == 'conv2d': idim = int(np.ceil(np.ceil(idim / 2) / 2)) * channels self.input_layer = Conv2dSubsampling(channels, idim, attention_dim, dropout=dropout_rate, initialW=initialW, initial_bias=initial_bias) elif input_layer == 'linear': self.input_layer = LinearSampling(idim, attention_dim, initialW=initialW, initial_bias=initial_bias) elif input_layer == "embed": self.input_layer = chainer.Sequential( L.EmbedID(idim, attention_dim, ignore_label=-1), pos_enc_class(attention_dim, positional_dropout_rate)) self.do_history_mask = True else: raise ValueError("unknown input_layer: " + input_layer) self.norm = LayerNorm(attention_dim) for i in range(num_blocks): name = 'encoders.' + str(i) layer = EncoderLayer(attention_dim, d_units=linear_units, h=attention_heads, dropout=attention_dropout_rate, initialW=initialW, initial_bias=initial_bias) self.add_link(name, layer) self.n_layers = num_blocks
def make_model(self, env): n_hidden_channels = 20 obs_size = env.observation_space.low.size if self.recurrent: v = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), L.Linear( None, 1, initialW=chainer.initializers.LeCunNormal(1e-1)), ) if self.discrete: n_actions = env.action_space.n pi = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), policies.FCSoftmaxPolicy( n_hidden_channels, n_actions, n_hidden_layers=0, nonlinearity=F.tanh, last_wscale=1e-1, ) ) else: action_size = env.action_space.low.size pi = StatelessRecurrentSequential( L.NStepLSTM(1, obs_size, n_hidden_channels, 0), policies.FCGaussianPolicy( n_hidden_channels, action_size, n_hidden_layers=0, nonlinearity=F.tanh, mean_wscale=1e-1, ) ) return StatelessRecurrentBranched(pi, v) else: v = chainer.Sequential( L.Linear(None, n_hidden_channels), F.tanh, L.Linear( None, 1, initialW=chainer.initializers.LeCunNormal(1e-1)), ) if self.discrete: n_actions = env.action_space.n pi = policies.FCSoftmaxPolicy( obs_size, n_actions, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, last_wscale=1e-1, ) else: action_size = env.action_space.low.size pi = policies.FCGaussianPolicy( obs_size, action_size, n_hidden_layers=1, n_hidden_channels=n_hidden_channels, nonlinearity=F.tanh, mean_wscale=1e-1, ) return A3CSeparateModel(pi=pi, v=v)
def test_output(self): model = chainer.Sequential( F.where ) cond = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.bool) x = input_generator.increasing(2, 3) y = np.zeros((2, 3), np.float32) self.expect(model, (cond, x, y), skip_opset_version=[7, 8])
def _test_load_ppo(self, gpu): winit = chainerrl.initializers.Orthogonal(1.) winit_last = chainerrl.initializers.Orthogonal(1e-2) action_size = 3 policy = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, action_size, initialW=winit_last), policies.GaussianHeadWithStateIndependentCovariance( action_size=action_size, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std var_param_init=0, # log std = 0 => std = 1 ), ) vf = chainer.Sequential(L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 1, initialW=winit)) model = links.Branched(policy, vf) opt = chainer.optimizers.Adam(3e-4, eps=1e-5) opt.setup(model) agent = agents.PPO(model, opt, obs_normalizer=None, gpu=gpu, update_interval=2048, minibatch_size=64, epochs=10, clip_eps_vf=None, entropy_coef=0, standardize_advantages=True, gamma=0.995, lambd=0.97) model, exists = download_model("PPO", "Hopper-v2", model_type=self.pretrained_type) agent.load(model) if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): assert exists
def test_load_trpo(self): winit = chainerrl.initializers.Orthogonal(1.) winit_last = chainerrl.initializers.Orthogonal(1e-2) action_size = 3 policy = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, action_size, initialW=winit_last), policies.GaussianHeadWithStateIndependentCovariance( action_size=action_size, var_type='diagonal', var_func=lambda x: F.exp(2 * x), # Parameterize log std var_param_init=0, # log std = 0 => std = 1 ), ) vf = chainer.Sequential( L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 64, initialW=winit), F.tanh, L.Linear(None, 1, initialW=winit), ) vf_opt = chainer.optimizers.Adam() vf_opt.setup(vf) agent = agents.TRPO(policy=policy, vf=vf, vf_optimizer=vf_opt, update_interval=5000, max_kl=0.01, conjugate_gradient_max_iter=20, conjugate_gradient_damping=1e-1, gamma=0.995, lambd=0.97, vf_epochs=5, entropy_coef=0) model, exists = download_model("TRPO", "Hopper-v2", model_type=self.pretrained_type) agent.load(model) if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): assert exists
def test_iadd(self): l4 = links.Linear(3, 1) self.s2 += chainer.Sequential(l4) self.assertIs(self.s2[0], self.s1) self.assertIs(self.s2[1], self.l3) self.assertIs(self.s2[2], l4) with self.assertRaises(ValueError): self.s2 += 0
def test_extend(self): l1 = links.Linear(3, 2) l2 = links.Linear(2, 3) s3 = chainer.Sequential(l1, l2) self.s2.extend(s3) self.assertEqual(len(self.s2), 4) self.assertIs(self.s2[2], s3[0]) self.assertIs(self.s2[3], s3[1])
def make_agent(self, env, gpu): obs_size = env.observation_space.low.size action_size = env.action_space.low.size hidden_size = 20 policy = chainer.Sequential( L.Linear(obs_size, hidden_size), F.relu, L.Linear(hidden_size, action_size, initialW=chainer.initializers.LeCunNormal(1e-1)), F.tanh, chainerrl.distribution.ContinuousDeterministicDistribution, ) policy_optimizer = optimizers.Adam().setup(policy) policy_optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(1)) def make_q_func_with_optimizer(): q_func = chainer.Sequential( concat_obs_and_action, L.Linear(obs_size + action_size, hidden_size), F.relu, L.Linear(hidden_size, 1, initialW=chainer.initializers.LeCunNormal(1e-1)), ) q_func_optimizer = optimizers.Adam(1e-2).setup(q_func) q_func_optimizer.add_hook( chainer.optimizer_hooks.GradientClipping(1)) return q_func, q_func_optimizer q_func1, q_func1_optimizer = make_q_func_with_optimizer() q_func2, q_func2_optimizer = make_q_func_with_optimizer() rbuf = chainerrl.replay_buffer.ReplayBuffer(10**6) explorer = chainerrl.explorers.AdditiveGaussian( scale=0.3, low=env.action_space.low, high=env.action_space.high) def burnin_action_func(): return np.random.uniform(env.action_space.low, env.action_space.high).astype(np.float32) agent = chainerrl.agents.TD3( policy=policy, q_func1=q_func1, q_func2=q_func2, policy_optimizer=policy_optimizer, q_func1_optimizer=q_func1_optimizer, q_func2_optimizer=q_func2_optimizer, replay_buffer=rbuf, explorer=explorer, gamma=0.5, minibatch_size=100, replay_start_size=100, burnin_action_func=burnin_action_func, ) return agent