def _nas_given_input_sample(self, nas_inputs, domain=None): # nas_inputs: (batch_size, num_steps, units) initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {} seq_num = array_ops.shape(self.inputs["seq_len"])[0] gather_indices = tf.stack([tf.range(seq_num), self.inputs["seq_len"] - 1], 1) # (batch_size, 2) gather_output = tf.gather_nd(nas_inputs, gather_indices) # (batch_size, units) with tf.variable_scope("NAS"): probs = self._nas_classifier(gather_output, self.config["nas"], reuse=True) update_ops["given_input_action_probs"] = probs self.update_ops.update(add_domain2dict(update_ops, domain))
def _nas_train(self, nas_inputs, domain=None): # nas_inputs: (batch_size, num_steps, units) initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {} optimizer = TFUtils.build_optimizer(self.config["optimizer"]) num_steps = self.config["encoder"]["num_steps"] nas_config = self.config["nas"] with tf.variable_scope("NAS"): inputs["reward"] = tf.placeholder(tf.float32, [None], "reward") # (batch_size, ) inputs["probs_mask"] = tf.placeholder(tf.float32, [None, num_steps, None], "probs_mask") inputs["action"] = tf.placeholder(tf.int32, [None, num_steps], "action") # (batch_size, num_steps) inputs["action_start"] = tf.placeholder(tf.int32, [None], "action_start") # (batch_size, ) # flat_nas_inputs: (batch_size * num_steps, units) flat_nas_inputs = tf.reshape(nas_inputs, [-1, nas_inputs.shape.as_list()[2]]) probs = self._nas_classifier(flat_nas_inputs, nas_config, reuse=True) # (batch_size * num_steps, act_num) probs_mask = tf.reshape(inputs["probs_mask"], [-1, probs.shape.as_list()[1]]) probs = tf.multiply(probs, probs_mask) # (batch_size * num_steps, act_num) probs = tf.divide(probs, tf.reduce_sum(probs, axis=1, keep_dims=True)) # (batch_size * num_steps, act_num) indices = tf.reshape(inputs["action"], shape=[-1]) # (batch_size * num_steps, ) indices = tf.stack([tf.range(array_ops.shape(indices)[0]), indices], axis=1) # (batch_size * num_steps, 2) probs = tf.gather_nd(probs, indices) # (batch_size * num_steps, ) probs = tf.reshape(probs, shape=[-1, num_steps]) # (batch_size, num_steps) log_probs = tf.log(probs) # (batch_size, num_steps) action_mask = TFUtils.matrix_mask(num_steps, inputs["action_start"], self.inputs["seq_len"] - 1) log_probs = tf.multiply(log_probs, action_mask) # (batch_size, num_steps) obj = tf.multiply(log_probs, tf.expand_dims(inputs["reward"], axis=1)) # (batch_size, num_steps) obj = tf.reduce_sum(obj) / tf.cast(array_ops.shape(obj)[0], tf.float32) overheads["loss"] = -obj update_ops["reinforce"] = optimizer.minimize(overheads["loss"], self.overheads["global_step"]) self.initializers.update(add_domain2dict(initializers, domain)) self.inputs.update(add_domain2dict(inputs, domain)) self.overheads.update(add_domain2dict(overheads, domain)) self.update_ops.update(add_domain2dict(update_ops, domain)) self.summaries.update(add_domain2dict(summaries, domain))
def _nas_one_step_sample(self, cell, domain=None): encoder_config = self.config["encoder"] if encoder_config["rnn_type"] == "BiRNN": raise ValueError("NAS does not support BiRNN !") inputs, update_ops = {}, {} with tf.variable_scope(self.encoder_scope): # sample layer: first input if self.config.get("embedding") and self.config["embedding"]["use_embedding"]: inputs["one_step_token"] = tf.placeholder(tf.int32, [None], "one_step_token") # (batch_size, ) embedding_config = self.config["embedding"] embedding_initializer = self.get_initializer(embedding_config.get("initializer")) one_step_token, _ = TFUtils.embedding_layer(inputs["one_step_token"], embedding_config, embedding_initializer, reuse=True, scope=self.embed_scope) # (batch_size, n_input) else: inputs["one_step_token"] = tf.placeholder(tf.float32, [None, self.config["input_dim"]], "one_step_token") one_step_token = inputs["one_step_token"] # (batch_size, n_input) # sample next layer: hidden states inputs["init_cell_state"] = tf.placeholder(tf.float32, [encoder_config["num_layers"], 2, None, encoder_config["hidden_units"]], "init_cell_state") init_cell_state = tf.unstack(inputs["init_cell_state"], axis=0) init_cell_state = tuple([rnn.LSTMStateTuple(init_cell_state[_i][0], init_cell_state[_i][1]) for _i in range(encoder_config["num_layers"])]) with tf.variable_scope(encoder_config["rnn_type"], reuse=True): cell_output, cell_state = cell(one_step_token, init_cell_state) # cell_output: (batch_size, units) nas_config = self.config["nas"] with tf.variable_scope("NAS"): probs = self._nas_classifier(cell_output, nas_config, reuse=False) # (batch_size, act_space) update_ops["one_step_cell_state"] = cell_state update_ops["one_step_action_probs"] = probs self.inputs.update(add_domain2dict(inputs, domain)) self.update_ops.update(add_domain2dict(update_ops, domain))
def seq2v_decoder_(self, encoder_outputs, task, domain=None, build_train=True): initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {} decoder_inputs = tf.stack(encoder_outputs, axis=0) # (num_steps, seq_num, hidden_units) decoder_inputs = tf.transpose(decoder_inputs, [1, 0, 2]) # (seq_num, num_steps, hidden_units) encoder_config = self.config["encoder"] seq2v_config = self.config["seq2v"] with tf.variable_scope(task): if seq2v_config["use_all_outputs"]: decoder_inputs = tf.reshape(decoder_inputs, [-1, np.prod(decoder_inputs.shape.as_list()[1:])]) else: seq_num = array_ops.shape(self.inputs["seq_len"])[0] gather_indices = tf.stack([tf.range(seq_num), self.inputs["seq_len"] - 1], 1) # (seq_num, 2) gather_output = tf.gather_nd(decoder_inputs, gather_indices) # (seq_num, hidden_units) if encoder_config["rnn_type"] == "BiRNN": hidden_units = self.config["encoder"]["hidden_units"] fw_output = gather_output[:, 0:hidden_units] bw_output = decoder_inputs[:, 0, :][:, hidden_units:] decoder_inputs = tf.concat([fw_output, bw_output], axis=1) else: decoder_inputs = gather_output if seq2v_config.get("no_out_bias"): out_bias = False else: out_bias = True # dense and output layers: (seq_num, out_dim) dense_initializer = self.get_initializer(seq2v_config.get("dense_initializer"), "dense") output_initializer = self.get_initializer(seq2v_config.get("output_initializer"), "output") decoder_outputs = TFUtils.classifier(decoder_inputs, seq2v_config["output_dim"], seq2v_config["dense"], dense_initializer, output_initializer, reuse=False, out_bias=out_bias, scope="Output") if "regression" in task: inputs["labels"] = tf.placeholder(tf.float32, shape=[None, seq2v_config["output_dim"]], name="labels") else: inputs["labels"] = tf.placeholder(tf.int32, shape=[None], name="labels") with tf.variable_scope("Overheads"): if task == "regression": labels = inputs["labels"] square_loss = tf.square(decoder_outputs - labels, name="square_loss") loss = tf.reduce_mean(square_loss) predictions = decoder_outputs else: labels = tf.one_hot(inputs["labels"], depth=seq2v_config["output_dim"], name="one_hot_labels") softmax_loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=decoder_outputs, name="softmax_loss") loss = tf.reduce_mean(softmax_loss) predictions = tf.argmax(decoder_outputs, axis=-1, name="predictions") overheads["loss"], overheads["predictions"] = loss, predictions if build_train: optimizer = TFUtils.build_optimizer(self.config["optimizer"]) update_ops["minimize"] = optimizer.minimize(loss, global_step=self.overheads["global_step"]) if build_train: TFUtils.train_loop(update_ops["minimize"], task, initializers, inputs, overheads, update_ops, summaries) TFUtils.test_loop(task, initializers, inputs, overheads, update_ops, summaries, stage="test") TFUtils.test_loop(task, initializers, inputs, overheads, update_ops, summaries, stage="validate") self.initializers.update(add_domain2dict(initializers, domain)) self.inputs.update(add_domain2dict(inputs, domain)) self.overheads.update(add_domain2dict(overheads, domain)) self.update_ops.update(add_domain2dict(update_ops, domain)) self.summaries.update(add_domain2dict(summaries, domain))
def net2wider_decoder(self, states, domain=None, build_train=True): # states: (states_num, num_steps, units) initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {} optimizer = TFUtils.build_optimizer(self.config["optimizer"]) states_num = array_ops.shape(states)[0] config = self.config["net2wider"] with tf.variable_scope("Net2Wider"): # sample action with tf.variable_scope("Sample"): # input valid_action: (states_num, max_ac_num) inputs["valid_action"] = tf.placeholder(tf.int32, shape=[None, None], name="valid_action") tmp = tf.expand_dims(tf.range(states_num), axis=1) + \ tf.zeros_like(inputs["valid_action"], dtype=tf.int32) # (states_num, max_ac_num) indices = tf.stack([tmp, inputs["valid_action"]], axis=2) # (states_num, max_ac_num, 2) indices = tf.reshape(indices, [-1, 2]) # (states_num * max_ac_num, 2) states = tf.gather_nd( states, indices) # (states_num * max_ac_num, units) if config["seq2seq"]: pass # TODO else: dense_initializer = self.get_initializer( config.get("dense_initializer"), "dense") output_initializer = self.get_initializer( config.get("output_initializer"), "output") logits = TFUtils.classifier( states, 1, config["dense"], dense_initializer, output_initializer, reuse=False, scope="classifier") # (states_num * max_ac_num, 1) probs = tf.nn.sigmoid( logits) # (states_num * max_ac_num, 1) # operation: sample net2wider actions, (states_num, max_ac_num) update_ops["action_probs"] = tf.reshape( probs, shape=[states_num, -1]) # net2wider train with tf.variable_scope("REINFORCE"): inputs["reward"] = tf.placeholder(tf.float32, [None], "reward") # (states_num) inputs["action"] = tf.placeholder( tf.int32, [None, None], "action") # (states_num, max_ac_num) inputs["action_mask"] = tf.placeholder(tf.float32, [None, None], "action_mask") inputs["episode_num"] = tf.placeholder(tf.float32, shape=(), name="episode_num") indices = tf.reshape(inputs["action"], [-1]) # (states_num * max_ac_num) indices = tf.stack( [tf.range(array_ops.shape(indices)[0]), indices], 1) ex_probs = tf.concat([1 - probs, probs], axis=1) # (states_num * max_ac_num, 2) action_probs = tf.gather_nd( ex_probs, indices) # (states_num * max_ac_num) log_action_probs = tf.log( action_probs) # (states_num * max_ac_num) log_action_probs = tf.reshape( log_action_probs, shape=[states_num, -1]) # (states_num, max_ac_num) log_action_probs = tf.multiply( log_action_probs, inputs["action_mask"]) # (states_num, max_ac_num) obj = tf.multiply(log_action_probs, tf.expand_dims(inputs["reward"], axis=1)) obj = tf.reduce_sum(obj) if build_train: # overheads: _loss overheads["loss"] = -obj / inputs["episode_num"] # operation: _reinforce update_ops["reinforce"] = optimizer.minimize( overheads["loss"], self.overheads["global_step"]) self.initializers.update(add_domain2dict(initializers, domain)) self.inputs.update(add_domain2dict(inputs, domain)) self.overheads.update(add_domain2dict(overheads, domain)) self.update_ops.update(add_domain2dict(update_ops, domain)) self.summaries.update(add_domain2dict(summaries, domain)) return obj
def net2deeper_decoder(self, states, domain=None, build_train=True): # states: (states_num, num_steps, units) initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {} optimizer = TFUtils.build_optimizer(self.config["optimizer"]) config = self.config["net2deeper"] output_initializer = self.get_initializer( config.get("output_initializer"), "output") with tf.variable_scope("Net2Deeper"): # sample action with tf.variable_scope("Sample"): seq_len = self.inputs["seq_len"][self.inputs["state_seg"]:] seq_num = array_ops.shape(seq_len)[0] gather_indices = tf.stack([tf.range(seq_num), seq_len - 1], 1) # (states_num, 2) gather_output = tf.gather_nd( states, gather_indices) # (states_num, units) cell_units = self.config["encoder"]["hidden_units"] fw_output = gather_output[:, 0:cell_units] bw_output = states[:, 0, :][:, cell_units:] gather_output = tf.concat([fw_output, bw_output], axis=1) # (states_num, units) place_logits = tf.layers.dense( gather_output, config["place_out_dim"], kernel_initializer=output_initializer["kernel"], bias_initializer=output_initializer["bias"], name="place_predictor") place_probs = TFUtils.activation_func("softmax", place_logits) update_ops["place_probs"] = place_probs param_logits = tf.layers.dense( gather_output, config["param_out_dim"], kernel_initializer=output_initializer["kernel"], bias_initializer=output_initializer["bias"], name="param_predictor") param_probs = TFUtils.activation_func("softmax", param_logits) update_ops["param_probs"] = param_probs # net2deeper train with tf.variable_scope("REINFORCE"): inputs["reward"] = tf.placeholder(tf.float32, [None], "reward") # (states_num) inputs["place_action"] = tf.placeholder( tf.int32, [None], "place_action") # (states_num) inputs["param_action"] = tf.placeholder( tf.int32, [None], "param_action") # (states_num) inputs["place_probs_mask"] = \ tf.placeholder(tf.float32, [None, config["place_out_dim"]], "place_probs_mask") inputs["param_probs_mask"] = \ tf.placeholder(tf.float32, [None, config["param_out_dim"]], "param_out_dim") inputs["place_loss_mask"] = tf.placeholder( tf.float32, [None], "place_loss_mask") # (states_num) inputs["param_loss_mask"] = tf.placeholder( tf.float32, [None], "param_loss_mask") # (states_num) inputs["episode_num"] = tf.placeholder(tf.float32, shape=(), name="episode_num") place_probs = tf.multiply(place_probs, inputs["place_probs_mask"]) place_probs = tf.divide( place_probs, tf.reduce_sum(place_probs, axis=1, keep_dims=True)) indices = tf.stack([ tf.range(array_ops.shape(inputs["place_action"])[0]), inputs["place_action"] ], axis=1) place_probs = tf.gather_nd(place_probs, indices) # (states_num) log_place_probs = tf.log(place_probs) # (states_num) log_place_probs = tf.multiply(log_place_probs, inputs["reward"]) log_place_probs = tf.multiply(log_place_probs, inputs["place_loss_mask"]) log_place_probs = tf.reduce_sum(log_place_probs) param_probs = tf.multiply(param_probs, inputs["param_probs_mask"]) param_probs = tf.divide( param_probs, tf.reduce_sum(param_probs, axis=1, keep_dims=True)) indices = tf.stack([ tf.range(array_ops.shape(inputs["param_action"])[0]), inputs["param_action"] ], axis=1) param_probs = tf.gather_nd(param_probs, indices) # (states_num) log_param_probs = tf.log(param_probs) log_param_probs = tf.multiply(log_param_probs, inputs["reward"]) log_param_probs = tf.multiply(log_param_probs, inputs["param_loss_mask"]) log_param_probs = tf.reduce_sum(log_param_probs) obj = (log_place_probs + log_param_probs) if build_train: # overheads: _loss overheads["loss"] = -obj / inputs["episode_num"] # operation: _reinforce update_ops["reinforce"] = optimizer.minimize( overheads["loss"], self.overheads["global_step"]) self.initializers.update(add_domain2dict(initializers, domain)) self.inputs.update(add_domain2dict(inputs, domain)) self.overheads.update(add_domain2dict(overheads, domain)) self.update_ops.update(add_domain2dict(update_ops, domain)) self.summaries.update(add_domain2dict(summaries, domain)) return obj