def __init__(self, n_actions, rnn_units=256, conv_units=32): super(CRPolicy, self).__init__() cells = [ GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2) ] self.gru = MultiRNNCell(cells) self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.cv1 = Conv2D(conv_units, 3, strides=2, activation='elu', kernel_initializer=orthogonal()) self.cv2 = Conv2D(conv_units, 3, strides=2, activation='elu', kernel_initializer=orthogonal()) self.cv3 = Conv2D(conv_units, 3, strides=2, activation='elu', kernel_initializer=orthogonal()) self.cv4 = Conv2D(conv_units, 3, strides=2, activation='elu', kernel_initializer=orthogonal()) self.flatten = Flatten() self.fc1 = Dense(rnn_units, kernel_initializer=orthogonal()) self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01)) self.val = Dense(1, kernel_initializer=orthogonal())
def __init__(self, nact, rnn_units=256): cells = [GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2)] self.gru = MultiRNNCell(cells) self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.obs_ph = tf.placeholder(dtype=tf.float32) fc1 = dense(self.obs_ph, rnn_units, activation=elu, kernel_initializer=orthogonal(), name='fc1') expand = tf.expand_dims(fc1, axis=0, name='expand') rnn_out, self.state = dynamic_rnn(self.gru, expand, initial_state=self.state) reshape = tf.reshape(rnn_out, shape=[-1, rnn_units], name='reshape') self.logits = dense(reshape, nact, kernel_initializer=orthogonal(0.01), name='logits') self.pi = tf.nn.softmax(self.logits, name='pi') self.action = boltzmann(self.pi) self.value = dense(self.logits, 1, kernel_initializer=orthogonal(), name='value')
def build(self, input_shape: Union[list, tuple]): super().build(input_shape) k_init = init.orthogonal() self.__create_weights__(tf.zeros(self.state_shape(1)), 'initial_state') self.__create_weights__( k_init((14, self.__power_type_encoding_size__)), 'power_type_encoding')
def __init__(self, n_actions, rnn_units=256): super(RPolicy, self).__init__() cells = [ GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2) ] self.gru = MultiRNNCell(cells) self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.fc1 = Dense(rnn_units, activation='relu') self.pol = Dense(n_actions) self.val = Dense(1)
def __init__(self, n_actions): super(CRPolicy, self).__init__() cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)] self.gru = MultiRNNCell(cells) self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.cv1 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp1 = MaxPool2D() self.cv2 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp2 = MaxPool2D() self.cv3 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp3 = MaxPool2D() self.flatten = Flatten() self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01)) self.val = Dense(1, kernel_initializer=orthogonal(1))
def __init__(self, h, w, c, nact, rnn_units=256, cnn_units=32): cells = [GRUCell(rnn_units, kernel_initializer=orthogonal()) for _ in range(2)] self.gru = MultiRNNCell(cells) self.state = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.obs_ph = tf.placeholder(dtype=tf.float32, shape=[None, h, w, c]) cv1 = conv2d(self.obs_ph, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv1') cv2 = conv2d(cv1, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv2') cv3 = conv2d(cv2, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv3') cv4 = conv2d(cv3, cnn_units, 3, strides=2, activation=elu, kernel_initializer=orthogonal(), name='cv4') flat = flatten(cv4, name='flatten') fc1 = dense(flat, rnn_units, activation=elu, kernel_initializer=orthogonal(), name='fc1') expand = tf.expand_dims(fc1, axis=0, name='expand') rnn_out, self.state = dynamic_rnn(self.gru, expand, initial_state=self.state) reshape = tf.reshape(rnn_out, shape=[-1, rnn_units], name='reshape') self.logits = dense(reshape, nact, kernel_initializer=orthogonal(0.01), name='logits') self.pi = tf.nn.softmax(self.logits, name='pi') self.action = boltzmann(self.pi) value = dense(reshape, 1, kernel_initializer=orthogonal(), name='value') self.value = tf.squeeze(value)
def __init__(self, num_output, num_layers=2, res_block_size=2, activation=tf.nn.relu, kernel_initializer=init.orthogonal(np.sqrt(2)), bias_initializer=init.zeros(), name=None): super().__init__(name) self.__num_output__ = num_output self.__num_layers__ = num_layers self.__block_size__ = res_block_size self.__activation__ = activation self.__kernel_initializer__ = kernel_initializer self.__bias_initializer__ = bias_initializer
def __init__(self, n_actions): super(RPolicy, self).__init__() cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)] self.gru = MultiRNNCell(cells) self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01)) self.val = Dense(1, kernel_initializer=orthogonal(np.sqrt(2)))
def build(self, input_shape: Union[list, tuple]): """ Input shape format in NCH ConvolutionEncoder kernel upgrades the shape to NCH1 to perform convolution """ super().build(input_shape) l_initializer = init.orthogonal() k_initializer = self.__kernel_initializer__ b_initializer = self.__bias_initializer__ kernel_size = (int(input_shape[-1] - self.__projection_size__[1] + 1), 1, input_shape[1], self.__projection_size__[0]) self.__create_weights__(l_initializer(kernel_size), 'initial_projection_kernel') self.__create_weights__(b_initializer(self.__projection_size__[0]), 'initial_projection_bias') projection_in_c = self.__projection_size__[0] projection_out_k = 1 for idx in range(self.__num_layers__): out_c = int(self.__num_output__ / (2**(self.__num_layers__ - idx - 1))) kernel_size = (3, 1, kernel_size[-1], out_c) self.__create_weights__(k_initializer(kernel_size), "conv_kernel_%d" % idx) self.__create_weights__(b_initializer(out_c), "conv_bias_%d" % idx) projection_out_k = projection_out_k * self.__strides__[idx] if idx % self.__block_size__ == 1: residx = idx // self.__block_size__ p_kernel_size = (projection_out_k, 1, projection_in_c, out_c) self.__create_weights__(l_initializer(p_kernel_size), "projection_kernel_%d" % residx) self.__create_weights__(b_initializer(out_c), "projection_bias_%d" % residx) initial_gate_value = tf.ones((1, out_c, 1, 1)) self.__create_weights__(initial_gate_value, "cgate_kernel_%d" % residx) projection_in_c = out_c projection_out_k = 1
def __init__(self, num_output, initial_dim=(8, 24), ssizes=(2, 2, 2, 3), res_block_size=2, activation=tf.nn.relu, kernel_initializer=init.orthogonal(np.sqrt(2)), bias_initializer=init.zeros(), name=None): super().__init__(name) self.__num_output__ = num_output self.__num_layers__ = len(ssizes) self.__projection_size__ = initial_dim self.__strides__ = ssizes self.__block_size__ = res_block_size self.__activation__ = activation self.__kernel_initializer__ = kernel_initializer self.__bias_initializer__ = bias_initializer
def build(self, input_shape: Union[list, tuple]): """ Input shape format in NCH DepthEncoder kernel upgrades the shape to NCH1 to perform convolution """ super().build(input_shape) l_initializer = init.orthogonal() k_initializer = self.__kernel_initializer__ b_initializer = self.__bias_initializer__ inc_layer_count = (self.__num_output__ - input_shape[1]) // self.__num_layers__ kernel_size = (1, 1, 1, input_shape[1]) projection_in_c = input_shape[1] for idx in range(self.__num_layers__): if idx < self.__num_layers__ - 1: out_c = kernel_size[-1] + inc_layer_count else: out_c = self.__num_output__ kernel_size = (1, 1, kernel_size[-1], out_c) self.__create_weights__(k_initializer(kernel_size), "conv_kernel_%d" % idx) self.__create_weights__(b_initializer(out_c), "conv_bias_%d" % idx) if idx % self.__block_size__ == 1: residx = idx // self.__block_size__ p_kernel_size = (1, 1, projection_in_c, out_c) self.__create_weights__(l_initializer(p_kernel_size), "projection_kernel_%d" % residx) self.__create_weights__(b_initializer(out_c), "projection_bias_%d" % residx) initial_gate_value = tf.ones((1, out_c, 1, 1)) self.__create_weights__(initial_gate_value, "cgate_kernel_%d" % residx) projection_in_c = out_c
def build(self, input_shape: list): super().build(input_shape) l_initializer = init.orthogonal() k_initializer = self.__kernel_initializer__ b_initializer = self.__bias_initializer__ inc_layer_count = (self.__num_output__ - input_shape[1]) // self.__num_layers__ kernel_size = (1, input_shape[1]) projection_in_c = input_shape[1] for idx in range(self.__num_layers__): if idx < self.__num_layers__ - 1: out_c = kernel_size[-1] + inc_layer_count else: out_c = self.__num_output__ kernel_size = (kernel_size[-1], out_c) self.__create_weights__(k_initializer(kernel_size), "dense_kernel_%d" % idx) self.__create_weights__(b_initializer(out_c), "dense_bias_%d" % idx) if idx % self.__block_size__ == 1: residx = idx // self.__block_size__ p_kernel_size = (projection_in_c, out_c) self.__create_weights__(l_initializer(p_kernel_size), "projection_kernel_%d" % residx) self.__create_weights__(b_initializer(out_c), "projection_bias_%d" % residx) initial_gate_value = tf.ones((1, out_c)) self.__create_weights__(initial_gate_value, "cgate_kernel_%d" % residx) projection_in_c = out_c
def __init__(self, nact): self.obs_ph = tf.placeholder(dtype=tf.float32) self.logits = dense(self.obs_ph, nact, kernel_initializer=orthogonal(0.01), name='logits') self.pi = tf.nn.softmax(self.logits, name='pi') self.action = boltzmann(self.pi) self.value = dense(self.logits, 1, kernel_initializer=orthogonal(), name='value')
testY[np.arange(test_Y.shape[0]), test_Y-1] = 1 # one hot matrix # experiment with small datasets trainX = trainX[:1000] trainY = trainY[:1000] n = trainX.shape[0] # Create the model x = tf.placeholder(tf.float32, [None, NUM_FEATURES]) y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES]) # Build the graph for the deep net hidden_layer = layers.dense(x, 10, activation=tf.nn.relu, kernel_initializer=init.orthogonal( np.sqrt(2)), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-6)) output_layer = layers.dense(hidden_layer, 6, kernel_initializer=init.orthogonal( ), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-6)) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=y_, logits=output_layer) loss = tf.reduce_mean(cross_entropy) + get_regularization_loss() # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) correct_prediction = tf.cast( tf.equal(tf.argmax(output_layer, 1), tf.argmax(y_, 1)), tf.float32) accuracy = tf.reduce_mean(correct_prediction)
# experiment with small datasets trainX = trainX[:1000] trainY = trainY[:1000] for neuron_count in neurons: tf.reset_default_graph() with tf.Session() as sess: # Create the model lr = tf.placeholder(tf.float32, []) x = tf.placeholder(tf.float32, [None, NUM_FEATURES]) y_ = tf.placeholder(tf.float32, [None, 1]) # Build the graph for the deep net hidden_layer = layers.dense(x, neuron_count, activation=tf.nn.relu, kernel_initializer=init.orthogonal(np.sqrt(2)), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-3)) output_layer = layers.dense(hidden_layer, 1, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-3)) loss = tf.reduce_mean(tf.square(y_ - output_layer) ) + get_regularization_loss() # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(lr) train_op = optimizer.minimize(loss) error = tf.reduce_mean(tf.square(y_ - output_layer)) fold_errors = [] for o in range(NUM_FOLDS): sess.run(tf.global_variables_initializer())
# experiment with small datasets trainX = trainX[:1000] trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0) trainY = trainY[:1000] n = trainX.shape[0] # Create the model x = tf.placeholder(tf.float32, [None, NUM_FEATURES]) y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES]) # Build the graph for the deep nets hidden_layer = layers.dense(x, 10, activation=tf.nn.selu, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-6)) output_layer = layers.dense(x, NUM_CLASSES, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-6)) cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=output_layer) loss = tf.reduce_mean(cross_entropy) + get_regularization_loss() # Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) correct_prediction = tf.cast(tf.equal(tf.argmax(output_layer, 1), tf.argmax(y_, 1)), tf.float32) accuracy = tf.reduce_mean(correct_prediction)
trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0) # experiment with small datasets trainX = trainX[:1000] trainY = trainY[:1000] # Create the model x = tf.placeholder(tf.float32, [None, NUM_FEATURES]) y_ = tf.placeholder(tf.float32, [None, 1]) # Build the graph for the deep net hidden_layer = layers.dense(x, 30, activation=tf.nn.relu, kernel_initializer=init.orthogonal(np.sqrt(2)), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-3)) output_layer = layers.dense(hidden_layer, 1, kernel_initializer=init.orthogonal(), bias_initializer=init.zeros(), kernel_regularizer=l2_regularizer(1e-3)) loss = tf.reduce_mean(tf.square(y_ - output_layer)) + get_regularization_loss() #Create the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) error = tf.reduce_mean(tf.square(y_ - output_layer))
def __init__(self, name, session, observation_space, action_space): self.name = name assert isinstance(action_space, spaces.Box) assert len(action_space.shape) == 1 assert (np.abs(action_space.low) == action_space.high ).all() # we assume symmetric actions. self.session = session with tf.variable_scope(self.name): self.observations_input = tf.placeholder( observation_space.dtype, shape=tuple([None] + list(observation_space.shape)), name="observations_input") self.actions_input = tf.placeholder( action_space.dtype, shape=tuple([None] + list(action_space.shape)), name="actions_input") self.actions_size = action_space.shape[0] self.combined_input = tf.concat( [self.observations_input, self.actions_input], axis=1, name="combined_input") with tf.variable_scope("actor"): self.actor01 = layers.dense( inputs=self.observations_input, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.actor02 = layers.dense( inputs=self.actor01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.actor_head = layers.dense( inputs=self.actor02, units=self.actions_input.shape[1].value, activation=tf.tanh, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal(gain=0.01)) self.actor_head_rescaled = self.actor_head * action_space.high self.model_computed_combined = tf.concat( [self.observations_input, self.actor_head_rescaled], axis=1, name="model_computed_combined") with tf.variable_scope("critic"): self.critic01 = layers.dense( inputs=self.combined_input, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.critic02 = layers.dense( inputs=self.critic01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2))) self.action_value_head = layers.dense( inputs=self.critic02, units=1, activation=None, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.random_uniform( -3.0e-3, 3.0e-3)) self.model_critic01 = layers.dense( inputs=self.model_computed_combined, units=64, activation=tf.tanh, name="layer_one", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2)), reuse= True # Use the same weights for 'critic' and for 'model critic' ) self.model_critic02 = layers.dense( inputs=self.model_critic01, units=64, activation=tf.tanh, name="layer_two", bias_initializer=initializers.zeros(), kernel_initializer=initializers.orthogonal( gain=np.sqrt(2)), reuse= True # Use the same weights for 'critic' and for 'model critic' ) self.state_value_head = layers.dense( inputs=self.model_critic02, units=1, activation=None, name="head", bias_initializer=initializers.zeros(), kernel_initializer=initializers.random_uniform( -3.0e-3, 3.0e-3), reuse= True # Use the same weights for 'critic' and for 'model critic' )