Ejemplo n.º 1
0
    def __init__(self):

        # Neural network for embedding text
        self.n_text = 250
        self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text)
        text_embedding = self.text_embedder.get_output()
        ####################
        # Create bucket network
        self.buckets = [15, 30, 45]
        self.embed_token_seq_buckets = []
        for bucket in self.buckets:
            embed_token_seq_bucket = \
                embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder)
            self.embed_token_seq_buckets.append(embed_token_seq_bucket)
        ####################

        # Image Preprocessing
        self.image_preprocessor = image_preprocessing.ImagePreprocessing()

        # Neural network for embedding image
        self.n_image = 200
        self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim)
        image_embedding = self.image_embedder.get_output()

        # Network for embedding past action
        # 6 actions, one for no-action
        self.n_status_flag_dim = 18
        self.n_direction_dim = 24
        self.n_previous_action_embedding = self.n_status_flag_dim + self.n_direction_dim
        self.null_previous_action = (2, 5)
        self.previous_action_embedder = epa.EmbedPreviousAction(
            3, self.n_status_flag_dim, 6, self.n_direction_dim)
        previous_action_embedding = self.previous_action_embedder.get_output()

        # Neural network for mixing the embeddings of text
        # and image and generate probabilities over block-ids and direction
        if self.train_alg == TrainingAlgorithm.SUPERVISEDMLE \
                or self.train_alg == TrainingAlgorithm.REINFORCE \
                or self.train_alg == TrainingAlgorithm.MIXER:
            use_softmax = True
        else:
            use_softmax = False
        self.mix_and_gen_prob = mix_and_gen_prob.MixAndGenerateProbabilities(
            self.n_text, self.n_image, self.n_previous_action_embedding,
            text_embedding, image_embedding, previous_action_embedding, 5,
            use_softmax)
        ####################
        self.mix_and_gen_prob_buckets = []
        for i in range(0, len(self.buckets)):
            mix_and_gen_prob_bucket = mix_and_gen_prob.MixAndGenerateProbabilities(
                self.n_text,
                self.n_image,
                self.n_previous_action_embedding,
                self.embed_token_seq_buckets[i].get_output(),
                image_embedding,
                previous_action_embedding,
                5,
                use_softmax,
                create_copy=self.mix_and_gen_prob)
            self.mix_and_gen_prob_buckets.append(mix_and_gen_prob_bucket)
Ejemplo n.º 2
0
    def __init__(self, n_text, image_dim, n_image,
                 n_direction_dim, n_block_dim, scope_name="state_value"):

        # Neural network for embedding text
        self.text_embedder = embed_token_seq.EmbedTokenSeq(n_text, scope_name=scope_name + "_RNN")
        text_embedding = self.text_embedder.get_output()

        # Image preprocessor
        self.image_preprocessor = image_preprocessing.ImagePreprocessing()

        # Neural network for embedding image
        self.image_embedder = embed_image.EmbedImage(n_image, image_dim, scope_name=scope_name + "_embed_image")
        image_embedding = self.image_embedder.get_output()

        # Network for embedding past action
        # 6 actions, one for no-action
        n_previous_action_embedding = n_direction_dim + n_block_dim
        self.previous_action_embedder = epa.EmbedPreviousAction(
            6, n_direction_dim, 21, n_block_dim, scope_name=scope_name + "_previous_action")
        previous_action_embedding = self.previous_action_embedder.get_output()

        # Concatenate them and pass them through a layer to generate V(s)
        observed_state = tf.concat(1, [image_embedding, text_embedding, previous_action_embedding])
        n_state_dim = n_text + n_image + n_previous_action_embedding
        dim = 120
        with tf.name_scope(scope_name):
            # layers weight & bias
            self.weights = {
                'w_hid': tf.Variable(tf.random_normal([n_state_dim, dim], stddev=0.01)),
                'w_out': tf.Variable(tf.random_normal([dim, 1], stddev=0.01))
            }
            self.biases = {
                'b_hid': tf.Variable(tf.constant(0.0, dtype=None, shape=[dim])),
                'b_out': tf.Variable(tf.constant(0.0, dtype=None, shape=[1]))
            }

        latent_vector = tf.nn.relu(tf.add(tf.matmul(observed_state, self.weights["w_hid"]), self.biases["b_hid"]))
        self.state_value = tf.add(tf.matmul(latent_vector, self.weights["w_out"]), self.biases["b_out"])

        # Placeholder for total reward
        self.total_exp_reward = tf.placeholder(dtype=tf.float32, shape=None, name=scope_name + "_total_exp_reward")
        self.loss = tf.reduce_mean(tf.square(tf.sub(self.state_value, self.total_exp_reward)))

        optimizer = tf.train.AdamOptimizer(0.001)

        using_grad_clip = True
        grad_clip_val = 5.0
        if not using_grad_clip:
            self.train_step = optimizer.minimize(self.loss)
        else:
            gvs = optimizer.compute_gradients(self.loss)
            capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var)
                          if grad is not None else (grad, var) for grad, var in gvs]
            self.train_step = optimizer.apply_gradients(capped_gvs)
Ejemplo n.º 3
0
from model import embed_token_seq, image_preprocessing, mix_and_gen_prob

### Create the computation graph
n_text_output = 20
text_embedder = embed_token_seq.EmbedTokenSeq(n_text_output)
text_embed_input = text_embedder.get_input()
text_embed_output = text_embedder.get_output()
max_steps = text_embedder.get_max_time_step()
mask = text_embedder.get_zero_mask()
batch_size = text_embedder.get_batch_size()

image_preprocessing = image_preprocessing.ImagePreprocessing()

n_image_output = 250
image_embedder = embed_image.EmbedImage(n_image_output)
image_embed_input = image_embedder.get_images_data()
image_embed_output = image_embedder.get_output()

n_actions = 81
mix_text_image = mix_and_gen_prob.MixAndGenerateProbabilities(n_text_output, n_image_output, text_embed_output,
                                                              image_embed_output, n_actions)
output = mix_text_image.get_joined_probabilities()

## Do feed forwarding over a batch
sess = tf.Session()
sess.run(tf.initialize_all_variables())

start = time.time()
for i in range(1, 20):
    my_batch_size = 32
Ejemplo n.º 4
0
    def __init__(self, n_text, image_dim, n_image,
                 n_direction_dim, n_block_dim, scope_name="Q_network"):

        # Neural network for embedding text
        self.n_text = n_text
        self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text, scope_name=scope_name)
        text_embedding = self.text_embedder.get_output()

        ####################
        # Create bucket network
        self.buckets = [15, 30, 45]
        self.embed_token_seq_buckets = []
        for bucket in self.buckets:
            embed_token_seq_bucket = \
                embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder,
                                              scope_name=scope_name)
            self.embed_token_seq_buckets.append(embed_token_seq_bucket)
        ####################

        # Image Preprocessing
        self.image_preprocessor = image_preprocessing.ImagePreprocessing()

        # Neural network for embedding image
        self.n_image = n_image
        self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim, scope_name=scope_name)
        image_embedding = self.image_embedder.get_output()

        # Network for embedding past action
        # 6 actions, one for no-action
        self.n_direction_dim = n_direction_dim
        self.n_blocks_dim = n_block_dim
        self.n_previous_action_embedding = self.n_direction_dim + self.n_blocks_dim
        self.null_previous_action = (5, 20)
        self.previous_action_embedder = epa.EmbedPreviousAction(
            6, self.n_direction_dim, 21, self.n_blocks_dim, scope_name=scope_name)
        previous_action_embedding = self.previous_action_embedder.get_output()

        # Neural network for mixing the embeddings of text, image and previous action and generate q values
        self.mix_and_gen_q_val = mix_and_gen_q_values.MixAndGenerateQValues(
            self.n_text, self.n_image, self.n_previous_action_embedding,
            text_embedding, image_embedding, previous_action_embedding, 81, scope_name=scope_name)

        ####################
        # TODO BUG
        self.mix_and_gen_q_val_buckets = []
        for i in range(0, len(self.buckets)):
            mix_and_gen_q_val_bucket = mix_and_gen_q_values.MixAndGenerateQValues(
                self.n_text, self.n_image, self.n_previous_action_embedding,
                self.embed_token_seq_buckets[i].get_output(), image_embedding,
                previous_action_embedding, 81, create_copy=self.mix_and_gen_q_val, scope_name=scope_name)
            self.mix_and_gen_q_val_buckets.append(mix_and_gen_q_val_bucket)
        ####################

        # Define input and output
        self.target = tf.placeholder(dtype=tf.float32, shape=None)
        self.model_output = self.mix_and_gen_q_val.get_q_val()
        self.model_output_indices = tf.placeholder(dtype=tf.int32, shape=None)

        summary_qval_min = tf.scalar_summary("Q Val Min", tf.reduce_min(self.model_output))
        summary_qval_max = tf.scalar_summary("Q Val Max", tf.reduce_max(self.model_output))
        summary_qval_mean = tf.scalar_summary("Q Val Mean", tf.reduce_mean(self.model_output))

        self.feed_forward_summary = tf.merge_summary([summary_qval_min, summary_qval_max, summary_qval_mean])
        self.feed_iter = 0
Ejemplo n.º 5
0
    def __init__(self, image_dim, num_actions):

        # Number of actions
        self.num_actions = num_actions

        # Neural network for embedding text
        self.n_text = 250
        self.text_embedder = embed_token_seq.EmbedTokenSeq(self.n_text)
        text_embedding = self.text_embedder.get_output()

        ####################
        # Create bucket network
        self.buckets = [15, 30, 45]
        self.embed_token_seq_buckets = []
        for bucket in self.buckets:
            embed_token_seq_bucket = \
                embed_token_seq.EmbedTokenSeq(self.n_text, num_steps=bucket, create_copy=self.text_embedder)
            self.embed_token_seq_buckets.append(embed_token_seq_bucket)
        ####################

        # Image Preprocessing
        self.image_preprocessor = image_preprocessing.ImagePreprocessing()

        # Neural network for embedding image
        self.n_image = 200
        self.image_embedder = embed_image.EmbedImage(self.n_image, image_dim)
        image_embedding = self.image_embedder.get_output()

        # Network for embedding past action
        # 6 actions, one for no-action
        self.n_direction_dim = 24
        self.n_blocks_dim = 32
        self.n_previous_action_embedding = self.n_direction_dim + self.n_blocks_dim
        self.null_previous_action = (5, 20)
        self.previous_action_embedder = epa.EmbedPreviousAction(6, self.n_direction_dim, 21, self.n_blocks_dim)
        previous_action_embedding = self.previous_action_embedder.get_output()

        # Neural network for mixing the embeddings of text
        # and image and generate probabilities over block-ids and direction
        use_softmax = True
        self.mix_and_gen_prob = mix_and_gen_prob.MixAndGenerateProbabilities(
            self.n_text, self.n_image, self.n_previous_action_embedding,
            text_embedding, image_embedding, previous_action_embedding, 5, use_softmax)

        ####################
        self.mix_and_gen_prob_buckets = []
        for i in range(0, len(self.buckets)):
            mix_and_gen_prob_bucket = mix_and_gen_prob.MixAndGenerateProbabilities(
                self.n_text, self.n_image, self.n_previous_action_embedding,
                self.embed_token_seq_buckets[i].get_output(), image_embedding,
                previous_action_embedding, 5, use_softmax, create_copy=self.mix_and_gen_prob)
            self.mix_and_gen_prob_buckets.append(mix_and_gen_prob_bucket)
        ####################

        # Define input and output
        self.target = tf.placeholder(dtype=tf.float32, shape=None)
        self.block_indices = tf.placeholder(dtype=tf.int32, shape=None)
        self.direction_indices = tf.placeholder(dtype=tf.int32, shape=None)
        block_prob, direction_prob = self.mix_and_gen_prob.get_joined_probabilities()
        self.model_output = block_prob, direction_prob
        self.model_output_indices = self.block_indices, self.direction_indices

        summary_qval_min = tf.scalar_summary("Direction Prob Min", tf.reduce_min(direction_prob))
        summary_qval_max = tf.scalar_summary("Direction Prob Max", tf.reduce_max(direction_prob))
        summary_qval_mean = tf.scalar_summary("Direction Prob Mean", tf.reduce_mean(direction_prob))

        self.feed_forward_summary = tf.merge_summary([summary_qval_min, summary_qval_max, summary_qval_mean])
        self.feed_iter = 0
Ejemplo n.º 6
0
import embed_image
import tensorflow as tf

from model import image_preprocessing

image_preprocessing = image_preprocessing.ImagePreprocessing()
embedImage = embed_image.EmbedImage(20)

sess = tf.Session()
sess.run(tf.initialize_all_variables())

image_data = tf.gfile.FastGFile("../img/Screenshot.png", 'r').read()
file_names = [image_data, image_data, image_data]

raw_image_input = image_preprocessing.get_raw_image_input()
final_image_output = image_preprocessing.get_final_image()

image_datas = []
for file_name in file_names:
    image_datas.append(final_image_output.eval(session=sess, feed_dict={raw_image_input: file_name}))

input = embedImage.get_images_data()
output = embedImage.get_output()

result = output.eval(session=sess, feed_dict= {input: [image_datas]})
print result
print "Verify that all rows are same"

sess.close()