def build_graph(self, input_placeholder): input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder) layers = [input] sizes = dppo_config.config.hidden_sizes activation = layer.get_activation(dppo_config.config) fc_layers = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=activation) for size in sizes[:-1]]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if dppo_config.config.use_lstm: lstm = layer.lstm(dppo_config.config.lstm_type, graph.Expand(fc_layers, 0), n_units=last_size, n_cores=dppo_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.lstm_items = {"ph_lstm_state": lstm.ph_state, "lstm_zero_state": lstm.zero_state, "lstm_state": lstm.state, "lstm_reset_timestep": lstm.reset_timestep} else: head = layer.Dense(fc_layers, last_size, activation=activation) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self): super(_WorkerNetwork, self).__init__() self.lstm = CustomBasicLSTMCell(cfg.d) # d=256 # needs wrap as layer to retrieve weights self.ph_goal =\ graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_goal") # self.ph_goal = tf.placeholder(tf.float32, [None, cfg.d], name="ph_goal") perception_expanded = graph.Expand(self.perception.node, 0) self.ph_step_size = \ graph.Placeholder(np.float32, shape=(1,), name="ph_w_step_size") # tf.placeholder(tf.float32, [1], name="ph_w_step_size") self.ph_initial_lstm_state = \ graph.Placeholder(np.float32, shape=(1, self.lstm.state_size), name="ph_w_lstm_state") # tf.placeholder(tf.float32, [1, self.lstm.state_size], name="ph_w_lstm_state") lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( self.lstm, perception_expanded, initial_state=self.ph_initial_lstm_state, sequence_length=self.ph_step_size, time_major=False) lstm_outputs = tf.reshape(lstm_outputs, [-1, cfg.d]) sg_lstm_outputs = graph.TfNode(lstm_outputs) U = layer.LinearLayer(sg_lstm_outputs, shape=(cfg.d, cfg.action_size * cfg.k), transformation=tf.matmul) U_embedding = tf.transpose(tf.reshape(U, [cfg.action_size, cfg.k, -1])) w = layer.LinearLayer(self.ph_goal, shape=(cfg.d, cfg.k), transformation=tf.matmul, bias=False) w_reshaped = tf.reshape(w.node, [-1, 1, cfg.k]) self.pi = layer.MatmulLayer(w_reshaped, U_embedding, activation=layer.Activation.Softmax) self.vi = layer.LinearLayer(sg_lstm_outputs, shape=(cfg.d, 1), transformation=tf.matmul) self.weights = layer.Weights( self.weights, graph.TfNode((self.lstm.matrix, self.lstm.bias)), U, w, self.vi) self.lstm_state_out =\ graph.VarAssign(graph.Variable(np.zeros([1, self.lstm.state_size]), dtype=np.float32, name="lstm_state_out"), np.zeros([1, self.lstm.state_size]))
def build_graph(self, input_placeholder): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer) ] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers, input_placeholder=input_placeholder) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) fc_layers = layer.GenericLayers(flattened_input, [ dict( type=layer.Dense, size=size, activation=layer.Activation.Relu6) for size in sizes[:-1] ]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.Dense(fc_layers, last_size, activation=layer.Activation.Relu6) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self): self.ph_perception =\ graph.Placeholder(np.float32, shape=(None, cfg.d), name="ph_perception") # tf.placeholder(tf.float32, shape=[None, cfg.d], name="ph_perception") self.Mspace =\ layer.Dense(self.ph_perception, cfg.d, # d=256 activation=layer.Activation.Relu) Mspace_expanded = graph.Expand(self.Mspace, 0) self.lstm = DilatedLSTMCell(cfg.d, num_cores=cfg.d) # needs wrap as layer to retrieve weights self.ph_step_size =\ graph.Placeholder(np.float32, shape=(1,), name="ph_m_step_size") # tf.placeholder(tf.float32, [1], name="ph_m_step_size") self.ph_initial_lstm_state =\ graph.Placeholder(np.float32, shape=(1, self.lstm.state_size), name="ph_m_lstm_state") # tf.placeholder(tf.float32, [1, self.lstm.state_size], name="ph_m_lstm_state") lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn( self.lstm, Mspace_expanded, initial_state=self.ph_initial_lstm_state, sequence_length=self.ph_step_size, time_major=False) lstm_outputs = tf.reshape(lstm_outputs, [-1, cfg.d]) sg_lstm_outputs = graph.TfNode(lstm_outputs) self.goal = tf.nn.l2_normalize(graph.Flatten(sg_lstm_outputs), dim=1) critic = layer.Dense(sg_lstm_outputs, 1) self.value = layer.Flatten(critic) self.weights = layer.Weights( self.Mspace, graph.TfNode((self.lstm.matrix, self.lstm.bias)), critic) self.lstm_state_out =\ graph.VarAssign(graph.Variable(np.zeros([1, self.lstm.state_size]), dtype=np.float32, name="lstm_state_out"), np.zeros([1, self.lstm.state_size]))
def build_graph(self): input = layer.ConfiguredInput(da3c_config.config.input) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.lstm(da3c_config.config.lstm_type, graph.Expand(flattened_input, 0), n_units=last_size, n_cores=da3c_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state self.lstm_reset_timestep = lstm.reset_timestep else: activation = layer.get_activation(da3c_config.config) head = layer.GenericLayers(flattened_input, [ dict(type=layer.Dense, size=size, activation=activation) for size in sizes ]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)
def build_graph(self): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.GenericLayers(flattened_input, [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in sizes]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)