def get_mlp_backbone(state_dim, act_dim, hidden_sizes, activation, vf_share_layers=False, summary=False): """Get mlp backbone.""" state_input = Input(shape=state_dim, name='obs') if not vf_share_layers: dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes, activation, 'pi') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation, 'v') out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) else: dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) model = Model(inputs=[state_input], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_model(self, model_info): """Create Deep-Q CNN network.""" state = Input(shape=self.state_dim, dtype="uint8") state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state) convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state1) convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer) convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer) flattenlayer = Flatten()(convlayer) denselayer = Dense(256, activation='relu')(flattenlayer) value = Dense(self.action_dim, activation='linear')(denselayer) model = Model(inputs=state, outputs=value) adam = Adam(lr=self.learning_rate, clipnorm=10.) model.compile(loss='mse', optimizer=adam) if model_info.get("summary"): model.summary() self.infer_state = tf.placeholder(tf.uint8, name="infer_input", shape=(None, ) + tuple(self.state_dim)) self.infer_v = model(self.infer_state) self.actor_var = TFVariables([self.infer_v], self.sess) self.sess.run(tf.initialize_all_variables()) return model
def get_cnn_backbone(state_dim, act_dim, hidden_sizes, activation, filter_arches, vf_share_layers=True, summary=False, dtype='uint8'): """Get CNN backbone.""" state_input_raw = Input(shape=state_dim, name='obs') if dtype == 'uint8': state_input = Lambda(layer_function)(state_input_raw) elif dtype == 'float32': state_input = state_input_raw else: raise ValueError( 'dtype: {} not supported automatically, please implement it yourself' .format(dtype)) if vf_share_layers: conv_layer = build_conv_layers(state_input, filter_arches, activation, 'shared') flatten_layer = Flatten()(conv_layer) dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) else: conv_layer_pi = build_conv_layers(state_input, filter_arches, activation, 'pi') conv_layer_v = build_conv_layers(state_input, filter_arches, activation, 'v') flatten_layer_pi = Flatten()(conv_layer_pi) flatten_layer_v = Flatten()(conv_layer_v) dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes, activation, 'pi') dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes, activation, 'v') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_model(self, model_info): state_input = Input(shape=self.state_dim, name='state_input') advantage = Input(shape=(1, ), name='adv') old_prediction = Input(shape=(self.action_dim, ), name='old_p') old_value = Input(shape=(1, ), name='old_v') denselayer = Dense(HIDDEN_SIZE, activation='relu')(state_input) for _ in range(NUM_LAYERS - 1): denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer) out_actions = Dense(self.action_dim, activation='softmax', name='output_actions')(denselayer) out_value = Dense(1, name='output_value')(denselayer) model = Model(inputs=[state_input], outputs=[out_actions, out_value]) if model_info.get("summary"): model.summary() self.build_graph(tf.float32, model) return model
def get_cnn_backbone(state_dim, act_dim, hidden_sizes, activation, filter_arches, vf_share_layers=True, summary=False): """Get CNN backbone.""" state_input_raw = Input(shape=state_dim, name='obs') state_input = Lambda(layer_function)(state_input_raw) if vf_share_layers: conv_layer = build_conv_layers(state_input, filter_arches, activation, 'shared') flatten_layer = Flatten()(conv_layer) dense_layer = bulid_mlp_layers(flatten_layer, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) else: conv_layer_pi = build_conv_layers(state_input, filter_arches, activation, 'pi') conv_layer_v = build_conv_layers(state_input, filter_arches, activation, 'v') flatten_layer_pi = Flatten()(conv_layer_pi) flatten_layer_v = Flatten()(conv_layer_v) dense_layer_pi = bulid_mlp_layers(flatten_layer_pi, hidden_sizes, activation, 'pi') dense_layer_v = bulid_mlp_layers(flatten_layer_v, hidden_sizes, activation, 'v') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) model = Model(inputs=[state_input_raw], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def get_mlp_backbone(state_dim, act_dim, hidden_sizes, activation, vf_share_layers=False, summary=False, dtype='float32'): """Get mlp backbone.""" state_input_raw = Input(shape=state_dim, name='obs') if dtype == 'float32': state_input = state_input_raw else: raise ValueError( 'dtype: {} not supported automatically, please implement it yourself' .format(dtype)) if not vf_share_layers: dense_layer_pi = bulid_mlp_layers(state_input, hidden_sizes, activation, 'pi') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer_pi) dense_layer_v = bulid_mlp_layers(state_input, hidden_sizes, activation, 'v') out_value = Dense(1, activation=None, name='output_value')(dense_layer_v) else: dense_layer = bulid_mlp_layers(state_input, hidden_sizes, activation, 'shared') pi_latent = Dense(act_dim, activation=None, name='pi_latent')(dense_layer) out_value = Dense(1, activation=None, name='output_value')(dense_layer) model = Model(inputs=[state_input], outputs=[pi_latent, out_value]) if summary: model.summary() return model
def create_model(self, model_info): """Create Deep-Q network.""" user_input = Input(shape=(self.user_dim,), name="user_input", dtype=self.input_type) history_click_input = Input( shape=(self.n_history_click * self.item_dim), name="history_click", dtype=self.input_type ) history_no_click_input = Input( shape=(self.n_history_no_click * self.item_dim), name="history_no_click", dtype=self.input_type ) item_input = Input(shape=(self.item_dim,), name="item_input", dtype=self.input_type) shared_embedding = Embedding( self.vocab_size, self.emb_dim, name="Emb", mask_zero=True, embeddings_initializer=self.embedding_initializer, trainable=False, ) # un-trainable gru_click = GRU(self.item_dim * self.emb_dim) gru_no_click = GRU(self.item_dim * self.emb_dim) user_feature = Flatten()(shared_embedding(user_input)) item_feature = Flatten()(shared_embedding(item_input)) history_click_feature = Reshape( (self.n_history_click, self.item_dim * self.emb_dim) )(shared_embedding(history_click_input)) history_click_feature = gru_click(history_click_feature) history_no_click_feature = Reshape( (self.n_history_no_click, self.item_dim * self.emb_dim) )(shared_embedding(history_no_click_input)) history_no_click_feature = gru_no_click(history_no_click_feature) x = concatenate( [ user_feature, history_click_feature, history_no_click_feature, item_feature, ] ) x_dense1 = Dense(128, activation="relu")(x) x_dense2 = Dense(128, activation="relu")(x_dense1) # ctr_pred = Dense(1, activation="linear", name="q_value")(x_dense2) ctr_pred = Dense(1, activation=self.last_act, name="q_value")(x_dense2) model = Model( inputs=[ user_input, history_click_input, history_no_click_input, item_input, ], outputs=ctr_pred, ) model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate)) if self._summary: model.summary() self.user_input = tf.placeholder( dtype=self.input_type, name="user_input", shape=(None, self.user_dim) ) self.history_click_input = tf.placeholder( dtype=self.input_type, name="history_click_input", shape=(None, self.n_history_click * self.item_dim), ) self.history_no_click_input = tf.placeholder( dtype=self.input_type, name="history_no_click_input", shape=(None, self.n_history_no_click * self.item_dim), ) self.item_input = tf.placeholder( dtype=self.input_type, name="item_input", shape=(None, self.item_dim) ) self.ctr_predict = model( [ self.user_input, self.history_click_input, self.history_no_click_input, self.item_input, ] ) self.actor_var = TFVariables([self.ctr_predict], self.sess) self.sess.run(tf.initialize_all_variables()) return model