def build_graph(self, input_placeholder): input = layer.ConfiguredInput(dppo_config.config.input, input_placeholder=input_placeholder) layers = [input] sizes = dppo_config.config.hidden_sizes activation = layer.get_activation(dppo_config.config) fc_layers = layer.GenericLayers(layer.Flatten(input), [dict(type=layer.Dense, size=size, activation=activation) for size in sizes[:-1]]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if dppo_config.config.use_lstm: lstm = layer.lstm(dppo_config.config.lstm_type, graph.Expand(fc_layers, 0), n_units=last_size, n_cores=dppo_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.lstm_items = {"ph_lstm_state": lstm.ph_state, "lstm_zero_state": lstm.zero_state, "lstm_state": lstm.state, "lstm_reset_timestep": lstm.reset_timestep} else: head = layer.Dense(fc_layers, last_size, activation=activation) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self, input_placeholder): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer) ] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers, input_placeholder=input_placeholder) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) fc_layers = layer.GenericLayers(flattened_input, [ dict( type=layer.Dense, size=size, activation=layer.Activation.Relu6) for size in sizes[:-1] ]) layers.append(fc_layers) last_size = fc_layers.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(fc_layers, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.Dense(fc_layers, last_size, activation=layer.Activation.Relu6) layers.append(head) self.ph_state = input.ph_state self.weight = layer.Weights(*layers) return head.node
def build_graph(self): input = layer.ConfiguredInput(da3c_config.config.input) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.lstm(da3c_config.config.lstm_type, graph.Expand(flattened_input, 0), n_units=last_size, n_cores=da3c_config.config.lstm_num_cores) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state self.lstm_reset_timestep = lstm.reset_timestep else: activation = layer.get_activation(da3c_config.config) head = layer.GenericLayers(flattened_input, [ dict(type=layer.Dense, size=size, activation=activation) for size in sizes ]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)
def build_graph(self): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input_layers = [dict(conv_layer)] * 4 if da3c_config.config.input.universe else None input = layer.Input(da3c_config.config.input, descs=input_layers) sizes = da3c_config.config.hidden_sizes layers = [input] flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] if len(sizes) > 0: last_size = sizes[-1] if da3c_config.config.use_lstm: lstm = layer.LSTM(graph.Expand(flattened_input, 0), n_units=last_size) head = graph.Reshape(lstm, [-1, last_size]) layers.append(lstm) self.ph_lstm_state = lstm.ph_state self.lstm_zero_state = lstm.zero_state self.lstm_state = lstm.state else: head = layer.GenericLayers(flattened_input, [dict(type=layer.Dense, size=size, activation=layer.Activation.Relu) for size in sizes]) layers.append(head) actor = layer.Actor(head, da3c_config.config.output) critic = layer.Dense(head, 1) layers.extend((actor, critic)) self.ph_state = input.ph_state self.actor = actor self.critic = graph.Flatten(critic) self.weights = layer.Weights(*layers) self.actor_weights = layer.Weights(actor)
def build_graph(self): conv_layer = dict(type=layer.Convolution, activation=layer.Activation.Elu, n_filters=32, filter_size=[3, 3], stride=[2, 2], border=layer.Border.Same) input = layer.Input(cfg.config.input, descs=[dict(conv_layer)] * 4) shape = [None] + [cfg.config.output.action_size] self.ph_probs = graph.Placeholder(np.float32, shape=shape, name='act_probs') self.ph_taken = graph.Placeholder(np.int32, shape=(None,), name='act_taken') flattened_input = layer.Flatten(input) last_size = flattened_input.node.shape.as_list()[-1] inverse_inp = graph.Reshape(input, [-1, last_size*2]) get_first = graph.TfNode(inverse_inp.node[:, :last_size]) get_second = graph.TfNode(inverse_inp.node[:, last_size:]) forward_inp = graph.Concat([get_first, self.ph_probs], axis=1) fc_size = cfg.config.hidden_sizes[-1] inv_fc1 = layer.Dense(inverse_inp, fc_size, layer.Activation.Relu) inv_fc2 = layer.Dense(inv_fc1, shape[-1]) # layer.Activation.Softmax fwd_fc1 = layer.Dense(forward_inp, fc_size, layer.Activation.Relu) fwd_fc2 = layer.Dense(fwd_fc1, last_size) inv_loss = graph.SparseSoftmaxCrossEntropyWithLogits(inv_fc2, self.ph_taken).op fwd_loss = graph.L2loss(fwd_fc2.node - get_second.node).op self.ph_state = input.ph_state # should be even wrt to batch_size for now self.rew_out = graph.TfNode(cfg.config.icm.nu * fwd_loss) self.loss = graph.TfNode(cfg.config.icm.beta * fwd_loss + (1 - cfg.config.icm.beta) * inv_loss) layers = [input, inv_fc1, inv_fc2, fwd_fc1, fwd_fc2] self.weights = layer.Weights(*layers)
def build_graph(self, x): return graph.Reshape(x, (-1, np.prod(x.node.shape.as_list()[1:]))).node