def __init__(self, dim_state: int, dim_action: int, hidden_sizes: List[int], normalizer: GaussianNormalizer, init_std=1.): super().__init__() self.dim_state = dim_state self.dim_action = dim_action self.hidden_sizes = hidden_sizes self.init_std = init_std self.normalizer = normalizer with self.scope: self.op_states = tf.placeholder(tf.float32, shape=[None, dim_state], name='states') self.op_actions_ = tf.placeholder(tf.float32, shape=[None, dim_action], name='actions') layers = [] # note that the placeholder has size 105. all_sizes = [dim_state, *self.hidden_sizes] for i, (in_features, out_features) in enumerate(zip(all_sizes[:-1], all_sizes[1:])): layers.append(nn.Linear(in_features, out_features, weight_initializer=normc_initializer(1))) layers.append(nn.Tanh()) layers.append(nn.Linear(all_sizes[-1], dim_action, weight_initializer=normc_initializer(0.01))) self.net = nn.Sequential(*layers) self.op_log_std = nn.Parameter( tf.constant(np.log(self.init_std), shape=[self.dim_action], dtype=tf.float32), name='log_std') self.distribution = self(self.op_states) self.op_actions = self.distribution.sample() self.op_actions_mean = self.distribution.mean() self.op_actions_std = self.distribution.stddev() self.op_nlls_ = -self.distribution.log_prob(self.op_actions_).reduce_sum(axis=1)
def __init__(self, dim_state: int, dim_action: int, normalizers: Normalizers, *, arch: FLAGS.arch): super().__init__() initializer = tf.truncated_normal_initializer(mean=0.0, stddev=1e-5) self.dim_state = dim_state self.dim_action = dim_action self.op_states = tf.placeholder(tf.float32, shape=[None, self.dim_state], name='states') self.op_actions = tf.placeholder(tf.float32, shape=[None, self.dim_action], name='actions') self.mlp = nn.Sequential( nn.Linear(dim_state + dim_action, arch.n_units, weight_initializer=initializer), nn.ReLU(), make_blocks(FixupResBlock, arch.n_units, arch.n_blocks, arch.n_blocks), nn.Linear(arch.n_units, dim_state, weight_initializer=initializer), ) self.normalizers = normalizers self.build()
def __init__(self, blocks, activation=nn.ReLU, squeeze=False, weight_initializer=None, build=True): super().__init__() self._blocks = blocks if build: self.op_inputs = tf.placeholder(tf.float32, [None, self._blocks[0]]) with self.scope: kwargs = {} if weight_initializer is not None: kwargs['weight_initializer'] = weight_initializer layers = [] for in_features, out_features in zip(blocks[:-1], blocks[1:]): if layers: layers.append(activation()) layers.append(nn.Linear(in_features, out_features, **kwargs)) if squeeze: layers.append(nn.Squeeze(axis=1)) self.net = nn.Sequential(*layers) self._squeeze = squeeze self._activation = activation if build: self.build()
def __init__(self, blocks, activation, squeeze=False, weight_initializer=None, output_activation=None): super().__init__() self._blocks = blocks with self.scope: kwargs = {} if weight_initializer is not None: kwargs['weight_initializer'] = weight_initializer layers = [] for in_features, out_features in zip(blocks[:-1], blocks[1:]): if layers: layers.append(activation()) layers.append(nn.Linear(in_features, out_features, **kwargs)) if squeeze: layers.append(nn.Squeeze(axis=-1)) if output_activation: layers.append(output_activation()) self._modules = {i: module for i, module in enumerate(layers)} self._squeeze = squeeze self._activation = activation self._built = False
def __init__(self, x, n_total_blocks): super().__init__() std = np.sqrt(2. / x / n_total_blocks) self.bias1a = nn.Parameter(tf.zeros(1), name='bias1a') self.fc1 = nn.Linear(x, x, bias=False, weight_initializer=tf.initializers.random_normal( 0, stddev=std)) self.bias1b = nn.Parameter(tf.zeros(1), name='bias1b') self.relu = nn.ReLU() self.bias2a = nn.Parameter(tf.zeros(1), name='bias2a') self.fc2 = nn.Linear(x, x, bias=False, weight_initializer=tf.initializers.zeros()) self.scale = nn.Parameter(tf.ones(1), name='scale') self.bias2b = nn.Parameter(tf.zeros(1), name='bias2b')
def __init__(self, n_params): ''' self.goal_velocity: [-1,1] ''' super().__init__() with self.scope: layers = [] layers.append(nn.Linear(1, n_params, bias=False, weight_initializer=normc_initializer(1.0))) if FLAGS.task.scaler == 'tanh': layers.append(nn.Tanh()) self.net = nn.Sequential(*layers) c = tf.constant(1, shape=[1,1], dtype=tf.float32) #self.net(c) -> [None, n_params] if n_params == 1: self.goal_velocity = self.net(c)[0] else: self.goal_velocity = self.net(c)[0] print (self.goal_velocity)