Example #1
0
	def _nas_given_input_sample(self, nas_inputs, domain=None):
		# nas_inputs: (batch_size, num_steps, units)
		initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {}
		
		seq_num = array_ops.shape(self.inputs["seq_len"])[0]
		gather_indices = tf.stack([tf.range(seq_num), self.inputs["seq_len"] - 1], 1)  # (batch_size, 2)
		gather_output = tf.gather_nd(nas_inputs, gather_indices)  # (batch_size, units)
		with tf.variable_scope("NAS"):
			probs = self._nas_classifier(gather_output, self.config["nas"], reuse=True)
			update_ops["given_input_action_probs"] = probs
		self.update_ops.update(add_domain2dict(update_ops, domain))
Example #2
0
	def _nas_train(self, nas_inputs, domain=None):
		# nas_inputs: (batch_size, num_steps, units)
		initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {}
		optimizer = TFUtils.build_optimizer(self.config["optimizer"])
		num_steps = self.config["encoder"]["num_steps"]
		
		nas_config = self.config["nas"]
		with tf.variable_scope("NAS"):
			inputs["reward"] = tf.placeholder(tf.float32, [None], "reward")  # (batch_size, )
			inputs["probs_mask"] = tf.placeholder(tf.float32, [None, num_steps, None], "probs_mask")
			inputs["action"] = tf.placeholder(tf.int32, [None, num_steps], "action")  # (batch_size, num_steps)
			inputs["action_start"] = tf.placeholder(tf.int32, [None], "action_start")  # (batch_size, )
			
			# flat_nas_inputs: (batch_size * num_steps, units)
			flat_nas_inputs = tf.reshape(nas_inputs, [-1, nas_inputs.shape.as_list()[2]])
			probs = self._nas_classifier(flat_nas_inputs, nas_config, reuse=True)  # (batch_size * num_steps, act_num)
			probs_mask = tf.reshape(inputs["probs_mask"], [-1, probs.shape.as_list()[1]])
			probs = tf.multiply(probs, probs_mask)  # (batch_size * num_steps, act_num)
			probs = tf.divide(probs, tf.reduce_sum(probs, axis=1, keep_dims=True))  # (batch_size * num_steps, act_num)
			
			indices = tf.reshape(inputs["action"], shape=[-1])  # (batch_size * num_steps, )
			indices = tf.stack([tf.range(array_ops.shape(indices)[0]), indices], axis=1)  # (batch_size * num_steps, 2)
			probs = tf.gather_nd(probs, indices)  # (batch_size * num_steps, )
			probs = tf.reshape(probs, shape=[-1, num_steps])  # (batch_size, num_steps)
			log_probs = tf.log(probs)  # (batch_size, num_steps)
			action_mask = TFUtils.matrix_mask(num_steps, inputs["action_start"], self.inputs["seq_len"] - 1)
			log_probs = tf.multiply(log_probs, action_mask)  # (batch_size, num_steps)
			obj = tf.multiply(log_probs, tf.expand_dims(inputs["reward"], axis=1))  # (batch_size, num_steps)
			obj = tf.reduce_sum(obj) / tf.cast(array_ops.shape(obj)[0], tf.float32)
			overheads["loss"] = -obj
			update_ops["reinforce"] = optimizer.minimize(overheads["loss"], self.overheads["global_step"])
		
		self.initializers.update(add_domain2dict(initializers, domain))
		self.inputs.update(add_domain2dict(inputs, domain))
		self.overheads.update(add_domain2dict(overheads, domain))
		self.update_ops.update(add_domain2dict(update_ops, domain))
		self.summaries.update(add_domain2dict(summaries, domain))
Example #3
0
	def _nas_one_step_sample(self, cell, domain=None):
		encoder_config = self.config["encoder"]
		if encoder_config["rnn_type"] == "BiRNN":
			raise ValueError("NAS does not support BiRNN !")
		
		inputs, update_ops = {}, {}
		with tf.variable_scope(self.encoder_scope):
			# sample layer: first input
			if self.config.get("embedding") and self.config["embedding"]["use_embedding"]:
				inputs["one_step_token"] = tf.placeholder(tf.int32, [None], "one_step_token")  # (batch_size, )
				embedding_config = self.config["embedding"]
				embedding_initializer = self.get_initializer(embedding_config.get("initializer"))
				one_step_token, _ = TFUtils.embedding_layer(inputs["one_step_token"], embedding_config,
															embedding_initializer,
															reuse=True, scope=self.embed_scope)  # (batch_size, n_input)
			else:
				inputs["one_step_token"] = tf.placeholder(tf.float32, [None, self.config["input_dim"]],
														  "one_step_token")
				one_step_token = inputs["one_step_token"]  # (batch_size, n_input)
			# sample next layer: hidden states
			inputs["init_cell_state"] = tf.placeholder(tf.float32, [encoder_config["num_layers"], 2, None,
																	encoder_config["hidden_units"]], "init_cell_state")
			init_cell_state = tf.unstack(inputs["init_cell_state"], axis=0)
			init_cell_state = tuple([rnn.LSTMStateTuple(init_cell_state[_i][0], init_cell_state[_i][1])
									 for _i in range(encoder_config["num_layers"])])
			with tf.variable_scope(encoder_config["rnn_type"], reuse=True):
				cell_output, cell_state = cell(one_step_token, init_cell_state)
		
		# cell_output: (batch_size, units)
		nas_config = self.config["nas"]
		with tf.variable_scope("NAS"):
			probs = self._nas_classifier(cell_output, nas_config, reuse=False)  # (batch_size, act_space)
			update_ops["one_step_cell_state"] = cell_state
			update_ops["one_step_action_probs"] = probs
		self.inputs.update(add_domain2dict(inputs, domain))
		self.update_ops.update(add_domain2dict(update_ops, domain))
Example #4
0
	def seq2v_decoder_(self, encoder_outputs, task, domain=None, build_train=True):
		initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {}
		decoder_inputs = tf.stack(encoder_outputs, axis=0)  # (num_steps, seq_num, hidden_units)
		decoder_inputs = tf.transpose(decoder_inputs, [1, 0, 2])  # (seq_num, num_steps, hidden_units)
		
		encoder_config = self.config["encoder"]
		seq2v_config = self.config["seq2v"]
		with tf.variable_scope(task):
			if seq2v_config["use_all_outputs"]:
				decoder_inputs = tf.reshape(decoder_inputs, [-1, np.prod(decoder_inputs.shape.as_list()[1:])])
			else:
				seq_num = array_ops.shape(self.inputs["seq_len"])[0]
				gather_indices = tf.stack([tf.range(seq_num), self.inputs["seq_len"] - 1], 1)  # (seq_num, 2)
				gather_output = tf.gather_nd(decoder_inputs, gather_indices)  # (seq_num, hidden_units)
				if encoder_config["rnn_type"] == "BiRNN":
					hidden_units = self.config["encoder"]["hidden_units"]
					fw_output = gather_output[:, 0:hidden_units]
					bw_output = decoder_inputs[:, 0, :][:, hidden_units:]
					decoder_inputs = tf.concat([fw_output, bw_output], axis=1)
				else:
					decoder_inputs = gather_output
			
			if seq2v_config.get("no_out_bias"):
				out_bias = False
			else:
				out_bias = True
			# dense and output layers: (seq_num, out_dim)
			dense_initializer = self.get_initializer(seq2v_config.get("dense_initializer"), "dense")
			output_initializer = self.get_initializer(seq2v_config.get("output_initializer"), "output")
			decoder_outputs = TFUtils.classifier(decoder_inputs, seq2v_config["output_dim"], seq2v_config["dense"],
												 dense_initializer, output_initializer,
												 reuse=False, out_bias=out_bias, scope="Output")
			
			if "regression" in task:
				inputs["labels"] = tf.placeholder(tf.float32, shape=[None, seq2v_config["output_dim"]], name="labels")
			else:
				inputs["labels"] = tf.placeholder(tf.int32, shape=[None], name="labels")
			
			with tf.variable_scope("Overheads"):
				if task == "regression":
					labels = inputs["labels"]
					square_loss = tf.square(decoder_outputs - labels, name="square_loss")
					loss = tf.reduce_mean(square_loss)
					predictions = decoder_outputs
				else:
					labels = tf.one_hot(inputs["labels"], depth=seq2v_config["output_dim"], name="one_hot_labels")
					softmax_loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=decoder_outputs,
																		   name="softmax_loss")
					loss = tf.reduce_mean(softmax_loss)
					predictions = tf.argmax(decoder_outputs, axis=-1, name="predictions")
				overheads["loss"], overheads["predictions"] = loss, predictions
				if build_train:
					optimizer = TFUtils.build_optimizer(self.config["optimizer"])
					update_ops["minimize"] = optimizer.minimize(loss, global_step=self.overheads["global_step"])
			if build_train:
				TFUtils.train_loop(update_ops["minimize"], task, initializers, inputs, overheads, update_ops, summaries)
			TFUtils.test_loop(task, initializers, inputs, overheads, update_ops, summaries, stage="test")
			TFUtils.test_loop(task, initializers, inputs, overheads, update_ops, summaries, stage="validate")
		
		self.initializers.update(add_domain2dict(initializers, domain))
		self.inputs.update(add_domain2dict(inputs, domain))
		self.overheads.update(add_domain2dict(overheads, domain))
		self.update_ops.update(add_domain2dict(update_ops, domain))
		self.summaries.update(add_domain2dict(summaries, domain))
Example #5
0
    def net2wider_decoder(self, states, domain=None, build_train=True):
        # states: (states_num, num_steps, units)
        initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {}
        optimizer = TFUtils.build_optimizer(self.config["optimizer"])
        states_num = array_ops.shape(states)[0]

        config = self.config["net2wider"]
        with tf.variable_scope("Net2Wider"):
            # sample action
            with tf.variable_scope("Sample"):
                # input valid_action: (states_num, max_ac_num)
                inputs["valid_action"] = tf.placeholder(tf.int32,
                                                        shape=[None, None],
                                                        name="valid_action")
                tmp = tf.expand_dims(tf.range(states_num), axis=1) + \
                   tf.zeros_like(inputs["valid_action"], dtype=tf.int32)  # (states_num, max_ac_num)
                indices = tf.stack([tmp, inputs["valid_action"]],
                                   axis=2)  # (states_num, max_ac_num, 2)
                indices = tf.reshape(indices,
                                     [-1, 2])  # (states_num * max_ac_num, 2)
                states = tf.gather_nd(
                    states, indices)  # (states_num * max_ac_num, units)
                if config["seq2seq"]:
                    pass  # TODO
                else:
                    dense_initializer = self.get_initializer(
                        config.get("dense_initializer"), "dense")
                    output_initializer = self.get_initializer(
                        config.get("output_initializer"), "output")
                    logits = TFUtils.classifier(
                        states,
                        1,
                        config["dense"],
                        dense_initializer,
                        output_initializer,
                        reuse=False,
                        scope="classifier")  # (states_num * max_ac_num, 1)

                    probs = tf.nn.sigmoid(
                        logits)  # (states_num * max_ac_num, 1)
                    # operation: sample net2wider actions, (states_num, max_ac_num)
                    update_ops["action_probs"] = tf.reshape(
                        probs, shape=[states_num, -1])

            # net2wider train
            with tf.variable_scope("REINFORCE"):
                inputs["reward"] = tf.placeholder(tf.float32, [None],
                                                  "reward")  # (states_num)
                inputs["action"] = tf.placeholder(
                    tf.int32, [None, None],
                    "action")  # (states_num, max_ac_num)
                inputs["action_mask"] = tf.placeholder(tf.float32,
                                                       [None, None],
                                                       "action_mask")
                inputs["episode_num"] = tf.placeholder(tf.float32,
                                                       shape=(),
                                                       name="episode_num")
                indices = tf.reshape(inputs["action"],
                                     [-1])  # (states_num * max_ac_num)
                indices = tf.stack(
                    [tf.range(array_ops.shape(indices)[0]), indices], 1)
                ex_probs = tf.concat([1 - probs, probs],
                                     axis=1)  # (states_num * max_ac_num, 2)
                action_probs = tf.gather_nd(
                    ex_probs, indices)  # (states_num * max_ac_num)
                log_action_probs = tf.log(
                    action_probs)  # (states_num * max_ac_num)
                log_action_probs = tf.reshape(
                    log_action_probs, shape=[states_num,
                                             -1])  # (states_num, max_ac_num)
                log_action_probs = tf.multiply(
                    log_action_probs,
                    inputs["action_mask"])  # (states_num, max_ac_num)
                obj = tf.multiply(log_action_probs,
                                  tf.expand_dims(inputs["reward"], axis=1))
                obj = tf.reduce_sum(obj)
                if build_train:
                    # overheads: _loss
                    overheads["loss"] = -obj / inputs["episode_num"]
                    # operation: _reinforce
                    update_ops["reinforce"] = optimizer.minimize(
                        overheads["loss"], self.overheads["global_step"])

            self.initializers.update(add_domain2dict(initializers, domain))
            self.inputs.update(add_domain2dict(inputs, domain))
            self.overheads.update(add_domain2dict(overheads, domain))
            self.update_ops.update(add_domain2dict(update_ops, domain))
            self.summaries.update(add_domain2dict(summaries, domain))

            return obj
Example #6
0
    def net2deeper_decoder(self, states, domain=None, build_train=True):
        # states: (states_num, num_steps, units)
        initializers, inputs, overheads, update_ops, summaries = {}, {}, {}, {}, {}
        optimizer = TFUtils.build_optimizer(self.config["optimizer"])

        config = self.config["net2deeper"]
        output_initializer = self.get_initializer(
            config.get("output_initializer"), "output")
        with tf.variable_scope("Net2Deeper"):
            # sample action
            with tf.variable_scope("Sample"):
                seq_len = self.inputs["seq_len"][self.inputs["state_seg"]:]
                seq_num = array_ops.shape(seq_len)[0]
                gather_indices = tf.stack([tf.range(seq_num), seq_len - 1],
                                          1)  # (states_num, 2)
                gather_output = tf.gather_nd(
                    states, gather_indices)  # (states_num, units)

                cell_units = self.config["encoder"]["hidden_units"]
                fw_output = gather_output[:, 0:cell_units]
                bw_output = states[:, 0, :][:, cell_units:]
                gather_output = tf.concat([fw_output, bw_output],
                                          axis=1)  # (states_num, units)

                place_logits = tf.layers.dense(
                    gather_output,
                    config["place_out_dim"],
                    kernel_initializer=output_initializer["kernel"],
                    bias_initializer=output_initializer["bias"],
                    name="place_predictor")
                place_probs = TFUtils.activation_func("softmax", place_logits)
                update_ops["place_probs"] = place_probs

                param_logits = tf.layers.dense(
                    gather_output,
                    config["param_out_dim"],
                    kernel_initializer=output_initializer["kernel"],
                    bias_initializer=output_initializer["bias"],
                    name="param_predictor")
                param_probs = TFUtils.activation_func("softmax", param_logits)
                update_ops["param_probs"] = param_probs

            # net2deeper train
            with tf.variable_scope("REINFORCE"):
                inputs["reward"] = tf.placeholder(tf.float32, [None],
                                                  "reward")  # (states_num)
                inputs["place_action"] = tf.placeholder(
                    tf.int32, [None], "place_action")  # (states_num)
                inputs["param_action"] = tf.placeholder(
                    tf.int32, [None], "param_action")  # (states_num)
                inputs["place_probs_mask"] = \
                 tf.placeholder(tf.float32, [None, config["place_out_dim"]], "place_probs_mask")
                inputs["param_probs_mask"] = \
                 tf.placeholder(tf.float32, [None, config["param_out_dim"]], "param_out_dim")
                inputs["place_loss_mask"] = tf.placeholder(
                    tf.float32, [None], "place_loss_mask")  # (states_num)
                inputs["param_loss_mask"] = tf.placeholder(
                    tf.float32, [None], "param_loss_mask")  # (states_num)
                inputs["episode_num"] = tf.placeholder(tf.float32,
                                                       shape=(),
                                                       name="episode_num")

                place_probs = tf.multiply(place_probs,
                                          inputs["place_probs_mask"])
                place_probs = tf.divide(
                    place_probs,
                    tf.reduce_sum(place_probs, axis=1, keep_dims=True))
                indices = tf.stack([
                    tf.range(array_ops.shape(inputs["place_action"])[0]),
                    inputs["place_action"]
                ],
                                   axis=1)
                place_probs = tf.gather_nd(place_probs,
                                           indices)  # (states_num)
                log_place_probs = tf.log(place_probs)  # (states_num)
                log_place_probs = tf.multiply(log_place_probs,
                                              inputs["reward"])
                log_place_probs = tf.multiply(log_place_probs,
                                              inputs["place_loss_mask"])
                log_place_probs = tf.reduce_sum(log_place_probs)

                param_probs = tf.multiply(param_probs,
                                          inputs["param_probs_mask"])
                param_probs = tf.divide(
                    param_probs,
                    tf.reduce_sum(param_probs, axis=1, keep_dims=True))
                indices = tf.stack([
                    tf.range(array_ops.shape(inputs["param_action"])[0]),
                    inputs["param_action"]
                ],
                                   axis=1)
                param_probs = tf.gather_nd(param_probs,
                                           indices)  # (states_num)
                log_param_probs = tf.log(param_probs)
                log_param_probs = tf.multiply(log_param_probs,
                                              inputs["reward"])
                log_param_probs = tf.multiply(log_param_probs,
                                              inputs["param_loss_mask"])
                log_param_probs = tf.reduce_sum(log_param_probs)

                obj = (log_place_probs + log_param_probs)
                if build_train:
                    # overheads: _loss
                    overheads["loss"] = -obj / inputs["episode_num"]
                    # operation: _reinforce
                    update_ops["reinforce"] = optimizer.minimize(
                        overheads["loss"], self.overheads["global_step"])

        self.initializers.update(add_domain2dict(initializers, domain))
        self.inputs.update(add_domain2dict(inputs, domain))
        self.overheads.update(add_domain2dict(overheads, domain))
        self.update_ops.update(add_domain2dict(update_ops, domain))
        self.summaries.update(add_domain2dict(summaries, domain))

        return obj