def build_graph(self): input = layer.Input(cfg.config.input) self.ph_action = graph.Placeholder( np.float32, (None, cfg.config.output.action_size)) sizes = cfg.config.hidden_sizes assert len( sizes) > 1, 'You need to provide sizes at least for 2 layers' dense_1st = layer.Dense(layer.Flatten(input), sizes[0], layer.Activation.Relu) dense_2nd = layer.DoubleDense(dense_1st, self.ph_action, sizes[1], layer.Activation.Relu) layers = [input, dense_1st, dense_2nd] net = layer.GenericLayers(dense_2nd, [ dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in sizes[2:] ]) if len(sizes[2:]) > 0: layers.append(net) self.critic = layer.Dense(net, 1, init_var=3e-3) self.ph_state = input.ph_state layers.append(self.critic) self.weights = layer.Weights(*layers)
def build_graph(self, input_placeholder): input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder) layers = [input] sizes = dppo_config.config.hidden_sizes activation = layer.get_activation(dppo_config.config) fc_layers = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=activation) for size in sizes[:-1]]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if dppo_config.config.use_lstm: lstm = layer.lstm(dppo_config.config.lstm_type, graph.Expand(fc_layers, 0), n_units=last_size, n_cores=dppo_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.lstm_items = {"ph_lstm_state": lstm.ph_state, "lstm_zero_state": lstm.zero_state, "lstm_state": lstm.state, "lstm_reset_timestep": lstm.reset_timestep} else: head = layer.Dense(fc_layers, last_size, activation=activation) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self): input = layer.Input(cfg.input) self.perception =\ layer.Dense(layer.Flatten(input), cfg.d, # d=256 activation=layer.Activation.Relu) self.weights = layer.Weights(input, self.perception) self.ph_state = input.ph_state
def build_graph(self): input = layer.ConfiguredInput(cfg.config.input) dense = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in cfg.config.hidden_sizes]) actor = layer.DDPGActor(dense, cfg.config.output) self.ph_state = input.ph_state self.actor = actor.scaled_out self.weights = layer.Weights(input, dense, actor)
def build_graph(self): input = layer.Input(pg_config.config.input) dense = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in pg_config.config.hidden_sizes]) actor = layer.Dense(dense, pg_config.config.output.action_size, activation=layer.Activation.Softmax) self.state = input.ph_state self.weights = layer.Weights(input, dense, actor) return actor.node
def build_graph(self): input = layer.ConfiguredInput(trpo_config.config.input) # add one extra feature for timestep ph_step = graph.Placeholder(np.float32, shape=[None, 1]) state = (input.ph_state, ph_step) concatenated = graph.Concat([layer.Flatten(input), ph_step], axis=1) activation = layer.Activation.get_activation( trpo_config.config.activation) head = layer.GenericLayers(concatenated, [ dict(type=layer.Dense, size=size, activation=activation) for size in trpo_config.config.hidden_sizes ]) value = layer.Dense(head, 1) ph_ytarg_ny = graph.Placeholder(np.float32) mse = graph.TfNode( tf.reduce_mean(tf.square(ph_ytarg_ny.node - value.node))) weights = layer.Weights(input, head, value) sg_get_weights_flatten = graph.GetVariablesFlatten(weights) sg_set_weights_flatten = graph.SetVariablesFlatten(weights) l2 = graph.TfNode(1e-3 * tf.add_n([ tf.reduce_sum(tf.square(v)) for v in utils.Utils.flatten(weights.node) ])) loss = graph.TfNode(l2.node + mse.node) sg_gradients = optimizer.Gradients(weights, loss=loss) sg_gradients_flatten = graph.GetVariablesFlatten( sg_gradients.calculate) self.op_value = self.Op(value, state=state) self.op_get_weights_flatten = self.Op(sg_get_weights_flatten) self.op_set_weights_flatten = self.Op( sg_set_weights_flatten, value=sg_set_weights_flatten.ph_value) self.op_compute_loss_and_gradient = self.Ops(loss, sg_gradients_flatten, state=state, ytarg_ny=ph_ytarg_ny) self.op_losses = self.Ops(loss, mse, l2, state=state, ytarg_ny=ph_ytarg_ny)
def build_graph(self, input_placeholder): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer) ] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers, input_placeholder=input_placeholder) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) fc_layers = layer.GenericLayers(flattened_input, [ dict( type=layer.Dense, size=size, activation=layer.Activation.Relu6) for size in sizes[:-1] ]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.Dense(fc_layers, last_size, activation=layer.Activation.Relu6) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self): input = layer.Input(trpo_config.config.input) head = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh) for size in trpo_config.config.hidden_sizes]) if trpo_config.config.output.continuous: output = layer.Dense(head, trpo_config.config.output.action_size) actor = ConcatFixedStd(output) actor_layers = [output, actor] else: actor = layer.Dense(head, trpo_config.config.output.action_size, activation=layer.Activation.Softmax) actor_layers = [actor] self.ph_state = input.ph_state self.actor = actor self.weights = layer.Weights(*([input, head] + actor_layers))
def build_graph(self): self.ph_perception =\ graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_perception") # tf.placeholder(tf.float32, shape=[None, cfg.d], name="ph_perception") self.Mspace =\ layer.Dense(self.ph_perception, cfg.d, # d=256 activation=layer.Activation.Relu) Mspace_expanded = graph.Expand(self.Mspace, 0) self.lstm = DilatedLSTMCell(cfg.d, num_cores=cfg.d) # needs wrap as layer to retrieve weights self.ph_step_size =\ graph.Placeholder(np.float32, shape=(1,), name="ph_m_step_size") # tf.placeholder(tf.float32, [1], name="ph_m_step_size") self.ph_initial_lstm_state =\ graph.Placeholder(np.float32, shape=(1, self.lstm.state_size), name="ph_m_lstm_state") # tf.placeholder(tf.float32, [1, self.lstm.state_size], name="ph_m_lstm_state") lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( self.lstm, Mspace_expanded, initial_state=self.ph_initial_lstm_state, sequence_length=self.ph_step_size, time_major=False) lstm_outputs = tf.reshape(lstm_outputs, [-1, cfg.d]) sg_lstm_outputs = graph.TfNode(lstm_outputs) self.goal = tf.nn.l2_normalize(graph.Flatten(sg_lstm_outputs), dim=1) critic = layer.Dense(sg_lstm_outputs, 1) self.value = layer.Flatten(critic) self.weights = layer.Weights( self.Mspace, graph.TfNode((self.lstm.matrix, self.lstm.bias)), critic) self.lstm_state_out =\ graph.VarAssign(graph.Variable(np.zeros([1, self.lstm.state_size]), dtype=np.float32, name="lstm_state_out"), np.zeros([1, self.lstm.state_size]))
def build_graph(self): input = layer.ConfiguredInput(da3c_config.config.input) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.lstm(da3c_config.config.lstm_type, graph.Expand(flattened_input, 0), n_units=last_size, n_cores=da3c_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state self.lstm_reset_timestep = lstm.reset_timestep else: activation = layer.get_activation(da3c_config.config) head = layer.GenericLayers(flattened_input, [ dict(type=layer.Dense, size=size, activation=activation) for size in sizes ]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)
def build_graph(self): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.GenericLayers(flattened_input, [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in sizes]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)
def build_graph(self): input = layer.ConfiguredInput(config.input) hidden = layer.GenericLayers(layer.Flatten(input), [ dict(type=layer.Dense, size=size, activation=layer.Activation.Tanh) for size in config.hidden_sizes ]) weights = [input, hidden] if config.dueling_dqn: if config.hidden_sizes: v_input, a_input = tf.split(hidden.node, [ config.hidden_sizes[-1] // 2, config.hidden_sizes[-1] // 2 ], axis=1) v_input = graph.TfNode(v_input) a_input = graph.TfNode(a_input) else: v_input, a_input = hidden, hidden v_output = layer.Dense(v_input, 1) a_output = layer.Dense(a_input, config.output.action_size) output = v_output.node + a_output.node - tf.reduce_mean( a_output.node, axis=1, keep_dims=True) output = graph.TfNode(output) weights.extend([v_output, a_output]) else: output = layer.Dense(hidden, config.output.action_size) weights.append(output) self.ph_state = input.ph_state self.output = output self.weights = layer.Weights(*weights)
def build_graph(self): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input = layer.Input(cfg.config.input, descs=[dict(conv_layer)] * 4) shape = [None] + [cfg.config.output.action_size] self.ph_probs = graph.Placeholder(np.float32, shape=shape, name='act_probs') self.ph_taken = graph.Placeholder(np.int32, shape=(None,), name='act_taken') flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] inverse_inp = graph.Reshape(input, [-1, last_size*2]) get_first = graph.TfNode(inverse_inp.node[:, :last_size]) get_second = graph.TfNode(inverse_inp.node[:, last_size:]) forward_inp = graph.Concat([get_first, self.ph_probs], axis=1) fc_size = cfg.config.hidden_sizes[-1] inv_fc1 = layer.Dense(inverse_inp, fc_size, layer.Activation.Relu) inv_fc2 = layer.Dense(inv_fc1, shape[-1]) # layer.Activation.Softmax fwd_fc1 = layer.Dense(forward_inp, fc_size, layer.Activation.Relu) fwd_fc2 = layer.Dense(fwd_fc1, last_size) inv_loss = graph.SparseSoftmaxCrossEntropyWithLogits(inv_fc2, self.ph_taken).op fwd_loss = graph.L2loss(fwd_fc2.node - get_second.node).op self.ph_state = input.ph_state # should be even wrt to batch_size for now self.rew_out = graph.TfNode(cfg.config.icm.nu * fwd_loss) self.loss = graph.TfNode(cfg.config.icm.beta * fwd_loss + (1 - cfg.config.icm.beta) * inv_loss) layers = [input, inv_fc1, inv_fc2, fwd_fc1, fwd_fc2] self.weights = layer.Weights(*layers)