def __init__(self, dim_state, dim_action, hidden_sizes): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes with self.scope: self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state]) self.op_actions = tf.placeholder(tf.float32, shape=[None, dim_action]) layers = [] all_sizes = [dim_state + dim_action, *self.hidden_sizes] for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], 1)) self.net1 = nn.Sequential(*layers) layers = [] all_sizes = [dim_state + dim_action, *self.hidden_sizes] for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], 1)) self.net2 = nn.Sequential(*layers) self.op_q1, self.op_q2 = self.forward(self.op_states, self.op_actions)
def __init__(self, dim_state: int, dim_action: int, normalizers: Normalizers, *, arch: FLAGS.arch): super().__init__() initializer = tf.truncated_normal_initializer(mean=0.0, stddev=1e-5) self.dim_state = dim_state self.dim_action = dim_action self.op_states = tf.placeholder(tf.float32, shape=[None, self.dim_state], name='states') self.op_actions = tf.placeholder(tf.float32, shape=[None, self.dim_action], name='actions') self.mlp = nn.Sequential( nn.Linear(dim_state + dim_action, arch.n_units, weight_initializer=initializer), nn.ReLU(), make_blocks(FixupResBlock, arch.n_units, arch.n_blocks, arch.n_blocks), nn.Linear(arch.n_units, dim_state, weight_initializer=initializer), ) self.normalizers = normalizers self.build()
def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int], normalizer: Normalizers, save_normalizer=False): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes # this avoid to save normalizer into self.state_dict self.state_process_fn = lambda states_: normalizer.state(states_) self.action_process_fn = lambda actions_: actions_ if save_normalizer: self.normalizer = normalizer with self.scope: self.op_states = tf.placeholder(tf.float32, [None, dim_state], "state") self.op_actions = tf.placeholder(tf.float32, [None, dim_action], "action") layers = [] all_sizes = [dim_state + dim_action, *self.hidden_sizes] for i, (in_features, out_features) in enumerate( zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], 1)) self.net = nn.Sequential(*layers) self.op_logits = self(self.op_states, self.op_actions) self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
def forward(self, x): x = self.net(x) x = tf.layers.flatten(x) if not self.initialized: layer = [ FCLayer(nin=x.shape[-1].value, nh=512, init_scale=np.sqrt(2)), nn.ReLU() ] self.conv_to_fc = nn.Sequential(*layer) self.initialized = True x = self.conv_to_fc(x) return x
def __init__(self, x, n_total_blocks): super().__init__() std = np.sqrt(2. / x / n_total_blocks) self.bias1a = nn.Parameter(tf.zeros(1), name='bias1a') self.fc1 = nn.Linear(x, x, bias=False, weight_initializer=tf.initializers.random_normal( 0, stddev=std)) self.bias1b = nn.Parameter(tf.zeros(1), name='bias1b') self.relu = nn.ReLU() self.bias2a = nn.Parameter(tf.zeros(1), name='bias2a') self.fc2 = nn.Linear(x, x, bias=False, weight_initializer=tf.initializers.zeros()) self.scale = nn.Parameter(tf.ones(1), name='scale') self.bias2b = nn.Parameter(tf.zeros(1), name='bias2b')
def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int], state_process_fn, action_process_fn, activ_fn='none'): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes # this avoid to save normalizer into self.state_dict self.state_process_fn = state_process_fn self.action_process_fn = action_process_fn with self.scope: self.op_states = tf.placeholder(tf.float32, [None, dim_state], "state") self.op_actions = tf.placeholder(tf.float32, [None, dim_action], "action") self.op_next_states = tf.placeholder(tf.float32, [None, dim_state], "next_state") layers = [] all_sizes = [dim_state * 2 + dim_action, *self.hidden_sizes] for i, (in_features, out_features) in enumerate( zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], 1)) if activ_fn == 'none': pass elif activ_fn == 'sigmoid': layers.append(nn.Sigmoid()) elif activ_fn == 'tanh': layers.append(nn.Tanh()) else: raise ValueError('%s is not supported' % activ_fn) self.net = nn.Sequential(*layers) self.op_logits = self(self.op_states, self.op_actions, self.op_next_states) self.op_rewards = -tf.log(1 - tf.nn.sigmoid(self.op_logits) + 1e-6)
def __init__(self, nin, hidden_sizes=( 32, 64, 64, ), kernel_sizes=(8, 4, 3), strides=(4, 2, 1), init_scale=np.sqrt(2)): super().__init__() assert len(hidden_sizes) == len(kernel_sizes) == len(strides) layer = [] for i in range(len(hidden_sizes)): nf, rf, stride = hidden_sizes[i], kernel_sizes[i], strides[i] layer.append(ConvLayer(nin, nf, rf, stride, init_scale=init_scale)) layer.append(nn.ReLU()) nin = nf self.layer = nn.Sequential(*layer)
def __init__(self, dim_state, dim_action, hidden_sizes: List[int]): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes with self.scope: self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states') layers = [] all_sizes = [dim_state, *self.hidden_sizes] for i, (in_features, out_features) in enumerate( zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], dim_action, init_scale=0.01)) layers.append(nn.Tanh()) self.net = nn.Sequential(*layers) self.op_actions = self(self.op_states)
def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int]): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes with self.scope: self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states') self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions') layers = [] all_sizes = [dim_state, *self.hidden_sizes] for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])): layers.append(FCLayer(in_features, out_features)) layers.append(nn.ReLU()) layers.append(FCLayer(all_sizes[-1], dim_action*2)) self.net = nn.Sequential(*layers) self.op_actions, self.op_log_density, pd, self.op_dist_mean, self.op_dist_log_std = self(self.op_states) self.op_actions_mean = tf.tanh(self.op_dist_mean) pi_ = tf.atanh(clip_but_pass_gradient(self.op_actions_, -1+EPS, 1-EPS)) log_prob_pi_ = pd.log_prob(pi_).reduce_sum(axis=1) log_prob_pi_ -= tf.reduce_sum(tf.log(1 - self.op_actions_ ** 2 + EPS), axis=1) self.op_log_density_ = log_prob_pi_