예제 #1
0
    def create_dc_actor_critic(self, h_size, num_layers):
        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
        hidden = hidden_streams[0]

        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action')
            self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size)
            hidden = tf.concat([hidden, self.prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        self.all_probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.output, name="action")

        self.value = tf.layers.dense(hidden, 1, activation=None)
        self.value = tf.identity(self.value, name="value_estimate")
        self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size)

        self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities')

        # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities.
        self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1)
        self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
예제 #2
0
    def create_dc_actor_critic(self, h_size, num_layers):
        num_streams = 1
        hidden_streams = self.create_new_obs(num_streams, h_size, num_layers)
        hidden = hidden_streams[0]

        if self.use_recurrent:
            tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action')
            self.prev_action_oh = c_layers.one_hot_encoding(self.prev_action, self.a_size)
            hidden = tf.concat([hidden, self.prev_action_oh], axis=1)

            self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
            hidden, self.memory_out = self.create_recurrent_encoder(hidden, self.memory_in)
            self.memory_out = tf.identity(self.memory_out, name='recurrent_out')

        self.policy = tf.layers.dense(hidden, self.a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))

        self.all_probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.output, name="action")

        self.value = tf.layers.dense(hidden, 1, activation=None)
        self.value = tf.identity(self.value, name="value_estimate")
        self.entropy = -tf.reduce_sum(self.all_probs * tf.log(self.all_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, self.a_size)

        self.all_old_probs = tf.placeholder(shape=[None, self.a_size], dtype=tf.float32, name='old_probabilities')

        # We reshape these tensors to [batch x 1] in order to be of the same rank as continuous control probabilities.
        self.probs = tf.expand_dims(tf.reduce_sum(self.all_probs * self.selected_actions, axis=1), 1)
        self.old_probs = tf.expand_dims(tf.reduce_sum(self.all_old_probs * self.selected_actions, axis=1), 1)
예제 #3
0
        def unroll(iter, state, hidden_states):
            """
            """
            hidden_conv = tf.cond(self.update_bool,
                                  lambda: tf.gather_nd(self.hidden_conv,
                                                       self.indices+iter),
                                  lambda: self.hidden_conv)
            inf_prev_action = tf.cond(self.update_bool,
                                      lambda: tf.gather_nd(self.inf_prev_action,
                                                           self.indices+iter),
                                      lambda: self.inf_prev_action)
            inf_hidden = state.h

            if self.parameters['attention']:
                inf_hidden = attention(hidden_conv, inf_hidden)
            else:
                inf_hidden = select_dpatch(hidden_conv)


            inf_prev_action_onehot = c_layers.one_hot_encoding(inf_prev_action,
                                                               self.act_size)
            inf_hidden = tf.concat([inf_hidden, inf_prev_action_onehot], axis=1)
            inf_hidden, state = net.rnn(inf_hidden, state,
                                        self.parameters['inf_num_rec_units'],
                                        self.inf_seq_len, 'inf_rnn')
            hidden_states = hidden_states.write(iter, inf_hidden)
            iter += 1

            return [iter, state, hidden_states]
예제 #4
0
        def unroll(iter, state, hidden_states):#, softmax_weights):
            """
            """
            hidden = tf.cond(self.update_bool,
                                  lambda: tf.gather_nd(self.feature_vector,
                                                       self.indices+iter),
                                  lambda: self.feature_vector)
            inf_prev_action = tf.cond(self.update_bool,
                                      lambda: tf.gather_nd(self.inf_prev_action,
                                                           self.indices+iter),
                                      lambda: self.inf_prev_action)
            inf_hidden = state.h
            if self.parameters['attention']:
                inf_hidden = attention(hidden, inf_hidden)
            elif self.parameters['automatic_dpatch']:
                inf_hidden = automatic_dpatch(hidden)
            else:
                inf_hidden = manual_dpatch(hidden)


            inf_prev_action_onehot = c_layers.one_hot_encoding(inf_prev_action,
                                                               self.act_size)
            # inf_hidden = tf.concat([inf_hidden, inf_prev_action_onehot], axis=1)
            inf_hidden, state = net.rnn(inf_hidden, state,
                                        self.parameters['inf_num_rec_units'],
                                        self.inf_seq_len, 'inf_rnn')
            hidden_states = hidden_states.write(iter, inf_hidden)
            iter += 1

            return [iter, state, hidden_states]#, softmax_weights]
 def create_discrete_state_encoder(self, s_size, h_size, num_streams,
                                   activation, num_layers):
     """
     Builds a set of hidden state encoders from discrete state input.
     :param s_size: state input size (discrete).
     :param h_size: Hidden layer size.
     :param num_streams: Number of state streams to construct.
     :param activation: What type of activation function to use for layers.
     :return: List of hidden layer tensors.
     """
     self.state_in = tf.placeholder(shape=[None, 1],
                                    dtype=tf.int32,
                                    name='state')
     state_in = tf.reshape(self.state_in, [-1])
     state_onehot = c_layers.one_hot_encoding(state_in, s_size)
     streams = []
     hidden = state_onehot
     for i in range(num_streams):
         for j in range(num_layers):
             hidden = tf.layers.dense(hidden,
                                      h_size,
                                      use_bias=False,
                                      activation=activation)
         streams.append(hidden)
     return streams
예제 #6
0
파일: bdqn.py 프로젝트: jtib/chi-rl-alg
 def pred(x, a: tf.int32):
     x = tf.concat((x, layers.one_hot_encoding(a, self.n_actions)), axis=1)
     x = layers.fully_connected(x, 100)
     x = layers.fully_connected(x, 50)
     x = layers.fully_connected(x, 50)
     x = layers.fully_connected(x, 100)
     x = layers.fully_connected(x, self.n_state, None)
     return x
예제 #7
0
    def build_main_model(self):
        """
        Builds neural network model to approximate policy and value functions
        """
        if self.parameters['obs_type'] == 'image':
            self.observation = tf.placeholder(shape=[None,
                                                     self.parameters["frame_height"],
                                                     self.parameters["frame_width"],
                                                     self.parameters["num_frames"]],
                                              dtype=tf.float32, name='observation')
        else:
            self.observation = tf.placeholder(shape=[None,
                                                     self.parameters["vec_size"]],
                                              dtype=tf.float32, name='observation')

        hidden = self.observation
        # normalize input
        if self.parameters['env_type'] == 'atari':
            self.observation_norm = tf.cast(self.observation, tf.float32) / 255.
            hidden = self.observation_norm
        if self.convolutional:
            self.hidden_conv = net.cnn(self.observation,
                                       self.parameters["num_conv_layers"],
                                       self.parameters["num_filters"],
                                       self.parameters["kernel_sizes"],
                                       self.parameters["strides"],
                                       tf.nn.relu, False, 'cnn')
            hidden = c_layers.flatten(self.hidden_conv)

        if self.fully_connected:
            hidden = net.fcn(hidden, self.parameters["num_fc_layers"],
                             self.parameters["num_fc_units"],
                             tf.nn.relu, 'fcn')

        if self.recurrent:
            self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32,
                                              name='prev_action')
            self.prev_action_onehot = c_layers.one_hot_encoding(self.prev_action,
                                                                self.act_size)
            hidden = tf.concat([hidden, self.prev_action_onehot], axis=1)

            c_in = tf.placeholder(tf.float32, [None,
                                               self.parameters['num_rec_units']],
                                  name='c_state')
            h_in = tf.placeholder(tf.float32, [None,
                                               self.parameters['num_rec_units']],
                                  name='h_state')
            self.seq_len = tf.placeholder(shape=None, dtype=tf.int32,
                                          name='sequence_length')
            self.state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in)
            hidden, self.state_out = net.rnn(hidden, self.state_in,
                                             self.parameters['num_rec_units'],
                                             self.seq_len,
                                             'rnn')


        self.hidden = hidden
예제 #8
0
    def __init__(self, lr, brain, h_size, epsilon, beta, max_step, normalize, num_layers):
        """
        Creates Discrete Control Actor-Critic model.
        :param brain: State-space size
        :param h_size: Hidden layer size
        """
        super(DiscreteControlModel, self).__init__()
        self._create_global_steps()
        self._create_reward_encoder()
        self.normalize = normalize

        hidden_state, hidden_visual, hidden = None, None, None
        if brain.number_observations > 0:
            height_size, width_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width']
            bw = brain.camera_resolutions[0]['blackAndWhite']
            hidden_visual = self._create_visual_encoder(height_size, width_size, bw, h_size, 1, tf.nn.elu, num_layers)[
                0]
        if brain.state_space_size > 0:
            s_size = brain.state_space_size
            if brain.state_space_type == "continuous":
                hidden_state = self._create_continuous_state_encoder(s_size, h_size, 1, tf.nn.elu, num_layers)[0]
            else:
                hidden_state = self._create_discrete_state_encoder(s_size, h_size, 1, tf.nn.elu, num_layers)[0]

        if hidden_visual is None and hidden_state is None:
            raise Exception("No valid network configuration possible. "
                            "There are no states or observations in this brain")
        elif hidden_visual is not None and hidden_state is None:
            hidden = hidden_visual
        elif hidden_visual is None and hidden_state is not None:
            hidden = hidden_state
        elif hidden_visual is not None and hidden_state is not None:
            hidden = tf.concat([hidden_visual, hidden_state], axis=1)

        a_size = brain.action_space_size

        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size')
        self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False,
                                      kernel_initializer=c_layers.variance_scaling_initializer(factor=0.01))
        self.probs = tf.nn.softmax(self.policy, name="action_probs")
        self.output = tf.multinomial(self.policy, 1)
        self.output_max = tf.argmax(self.probs, name='action_max', axis=1)
        self.output = tf.identity(self.output, name="action")
        self.value = tf.layers.dense(hidden, 1, activation=None, use_bias=False,
                                     kernel_initializer=c_layers.variance_scaling_initializer(factor=1.0))
        self.value = tf.identity(self.value, name="value_estimate")

        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1)

        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(self.action_holder, a_size)
        self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities')
        self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1)

        self._create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs,
                                   self.value, self.entropy, beta, epsilon, lr, max_step)
예제 #9
0
    def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step):
        """
        Creates Discrete Control Actor-Critic model.
        :param s_size: State-space size
        :param a_size: Action-space size
        :param h_size: Hidden layer size
        """
        self.state_in = tf.placeholder(shape=[None, s_size],
                                       dtype=tf.float32,
                                       name='state')
        self.batch_size = tf.placeholder(shape=None,
                                         dtype=tf.int32,
                                         name='batch_size')
        hidden_1 = tf.layers.dense(self.state_in,
                                   h_size,
                                   use_bias=False,
                                   activation=tf.nn.elu)
        hidden_2 = tf.layers.dense(hidden_1,
                                   h_size,
                                   use_bias=False,
                                   activation=tf.nn.elu)
        self.policy = tf.layers.dense(
            hidden_2,
            a_size,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=0.1))
        self.probs = tf.nn.softmax(self.policy)
        self.action = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.action, name='action')
        self.value = tf.layers.dense(hidden_2,
                                     1,
                                     activation=None,
                                     use_bias=False)

        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10),
                                      axis=1)

        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(
            self.action_holder, a_size)
        self.old_probs = tf.placeholder(shape=[None, a_size],
                                        dtype=tf.float32,
                                        name='old_probabilities')
        self.responsible_probs = tf.reduce_sum(self.probs *
                                               self.selected_actions,
                                               axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs *
                                                   self.selected_actions,
                                                   axis=1)

        PPOModel.__init__(self, self.responsible_probs,
                          self.old_responsible_probs, self.value, self.entropy,
                          beta, epsilon, lr, max_step)
예제 #10
0
 def create_discrete_state_encoder(self, s_size, h_size, activation, num_layers):
     """
     Builds a set of hidden state encoders from discrete state input.
     :param s_size: state input size (discrete).
     :param h_size: Hidden layer size.
     :param activation: What type of activation function to use for layers.
     :param num_layers: number of hidden layers to create.
     :return: List of hidden layer tensors.
     """
     vector_in = tf.reshape(self.vector_in, [-1])
     state_onehot = c_layers.one_hot_encoding(vector_in, s_size)
     hidden = state_onehot
     for j in range(num_layers):
         hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation)
     return hidden
예제 #11
0
 def create_discrete_state_encoder(self, s_size, h_size, activation, num_layers):
     """
     Builds a set of hidden state encoders from discrete state input.
     :param s_size: state input size (discrete).
     :param h_size: Hidden layer size.
     :param activation: What type of activation function to use for layers.
     :param num_layers: number of hidden layers to create.
     :return: List of hidden layer tensors.
     """
     vector_in = tf.reshape(self.vector_in, [-1])
     state_onehot = c_layers.one_hot_encoding(vector_in, s_size)
     hidden = state_onehot
     for j in range(num_layers):
         hidden = tf.layers.dense(hidden, h_size, use_bias=False, activation=activation)
     return hidden
예제 #12
0
    def __init__(self, lr, s_size, a_size):
        #These lines established the feed-forward part of the network. The agent takes a state and produces an action.
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(self.state_in, s_size)
        output = slim.fully_connected(state_in_OH,a_size,\
            biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer())
        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        #The next six lines establish the training proceedure. We feed the reward and chosen action into the network
        #to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
    def build_actor_critic(self):
        """
        Adds actor and critic heads to Tensorflow graph
        """
        if self.influence:
            hidden = tf.concat([self.hidden, self.inf_hidden], axis=1)
        else:
            hidden = self.hidden

        self.logits = tf.layers.dense(
            hidden,
            self.act_size,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=0.01))

        self.action_probs = tf.nn.softmax(self.logits, name="action_probs")
        self.action = tf.reduce_sum(tf.multinomial(self.logits, 1), axis=1)
        self.action = tf.identity(self.action, name="action")

        self.value = tf.reduce_sum(tf.layers.dense(hidden, 1, activation=None),
                                   axis=1)
        self.value = tf.identity(self.value, name="value_estimate")

        self.entropy = -tf.reduce_sum(
            self.action_probs * tf.log(self.action_probs + 1e-10), axis=1)
        self.action_holder = tf.placeholder(shape=[None],
                                            dtype=tf.int32,
                                            name='action_holder')
        self.actions_onehot = c_layers.one_hot_encoding(
            self.action_holder, self.act_size)

        self.old_action_probs = tf.placeholder(shape=[None, self.act_size],
                                               dtype=tf.float32,
                                               name='old_probs')

        self.action_prob = tf.reduce_sum(self.action_probs *
                                         self.actions_onehot,
                                         axis=1)
        self.old_action_prob = tf.reduce_sum(self.old_action_probs *
                                             self.actions_onehot,
                                             axis=1)
예제 #14
0
    def build_tagging_graph(self, inputs, hidden_layers, channels, num_tags,
                            use_crf, lamd, dropout_emb, dropout_hidden,
                            kernel_size, use_bn, use_wn, active_type):
        """
        Build a deep neural model for sequence tagging.
        """
        stag_ids = tf.placeholder(dtype=INT_TYPE,
                                  shape=[None, None],
                                  name='stag_ids')
        seq_lengths = tf.placeholder(dtype=INT_TYPE,
                                     shape=[None],
                                     name='seq_lengths')

        # Default is not train.
        is_train = tf.placeholder(dtype=tf.bool, shape=[], name='is_train')

        masks = tf.cast(tf.sequence_mask(seq_lengths), FLOAT_TYPE)

        # Dropout on embedding output.
        if dropout_emb:
            inputs = tf.cond(is_train,
                             lambda: tf.nn.dropout(inputs, 1 - dropout_emb),
                             lambda: inputs)

        hidden_output = inputs
        pre_channels = inputs.get_shape()[-1].value
        for i in xrange(hidden_layers):

            k = kernel_size
            cur_channels = channels[i]
            filter_w = tf.get_variable('filter_w_%d' % i,
                                       shape=[k, pre_channels, cur_channels],
                                       dtype=FLOAT_TYPE)
            filter_v = tf.get_variable('filter_v_%d' % i,
                                       shape=[k, pre_channels, cur_channels],
                                       dtype=FLOAT_TYPE)
            bias_b = tf.get_variable(
                'bias_b_%d' % i,
                shape=[cur_channels],
                initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))
            bias_c = tf.get_variable(
                'bias_c_%d' % i,
                shape=[cur_channels],
                initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))

            # Weight normalization.
            if use_wn:
                epsilon = 1e-12
                g_w = tf.get_variable('g_w_%d' % i,
                                      shape=[k, 1, cur_channels],
                                      dtype=FLOAT_TYPE)
                g_v = tf.get_variable('g_v_%d' % i,
                                      shape=[k, 1, cur_channels],
                                      dtype=FLOAT_TYPE)
                # Perform wn
                filter_w = g_w * filter_w / (tf.sqrt(
                    tf.reduce_sum(filter_w**2, 1, keep_dims=True)) + epsilon)
                filter_v = g_v * filter_v / (tf.sqrt(
                    tf.reduce_sum(filter_v**2, 1, keep_dims=True)) + epsilon)

            w = tf.nn.conv1d(hidden_output, filter_w, 1, 'SAME') + bias_b
            v = tf.nn.conv1d(hidden_output, filter_v, 1, 'SAME') + bias_c

            if use_bn:
                w = layers.batch_norm(inputs=v,
                                      decay=0.9,
                                      is_training=is_train,
                                      center=True,
                                      scale=True,
                                      scope='BatchNorm_w_%d' % i)
                v = layers.batch_norm(inputs=w,
                                      decay=0.9,
                                      is_training=is_train,
                                      center=True,
                                      scale=True,
                                      scope='BatchNorm_v_%d' % i)

            if active_type == 'glu':
                hidden_output = w * tf.nn.sigmoid(v)
            elif active_type == 'relu':
                hidden_output = tf.nn.relu(w)
            elif active_type == 'gtu':
                hidden_output = tf.tanh(w) * tf.nn.sigmoid(v)
            elif active_type == 'tanh':
                hidden_output = tf.tanh(w)
            elif active_type == 'linear':
                hidden_output = w
            elif active_type == 'bilinear':
                hidden_output = w * v

            # Mask paddings.
            hidden_output = hidden_output * tf.expand_dims(masks, -1)
            # Dropout on hidden output.
            if dropout_hidden:
                hidden_output = tf.cond(
                    is_train,
                    lambda: tf.nn.dropout(hidden_output, 1 - dropout_hidden),
                    lambda: hidden_output)

            pre_channels = cur_channels

        # Un-scaled log probabilities.
        scores = layers.fully_connected(hidden_output, num_tags, tf.identity)

        if use_crf:
            cost, transitions = crf.crf_log_likelihood(
                inputs=scores,
                tag_indices=stag_ids,
                sequence_lengths=seq_lengths)
            cost = -tf.reduce_mean(cost)
        else:
            reshaped_scores = tf.reshape(scores, [-1, num_tags])
            reshaped_stag_ids = tf.reshape(stag_ids, [-1])
            real_distribution = layers.one_hot_encoding(
                reshaped_stag_ids, num_tags)
            cost = tf.nn.softmax_cross_entropy_with_logits(
                reshaped_scores, real_distribution)
            cost = tf.reduce_sum(
                tf.reshape(cost, tf.shape(stag_ids)) * masks) / tf.cast(
                    tf.shape(inputs)[0], FLOAT_TYPE)

        # Calculate L2 penalty.
        l2_penalty = 0
        if lamd > 0:
            for v in tf.trainable_variables():
                if '/B:' not in v.name and '/biases:' not in v.name:
                    l2_penalty += lamd * tf.nn.l2_loss(v)
        train_cost = cost + l2_penalty

        # Summary cost.
        tf.summary.scalar('cost', cost)

        summaries = tf.summary.merge_all()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            with tf.control_dependencies([updates]):
                cost = tf.identity(cost)

        return stag_ids, seq_lengths, is_train, cost, train_cost, scores, summaries
예제 #15
0
    def build_model(self):
        self.placeholders = _get_placeholders(self.spatial_dim)
        with tf.variable_scope("theta"):
            units_embedded = layers.embed_sequence(
                self.placeholders.screen_unit_type,
                vocab_size=SCREEN_FEATURES.unit_type.scale,
                embed_dim=self.unit_type_emb_dim,
                scope="unit_type_emb",
                trainable=self.trainable
            )

            # Let's not one-hot zero which is background
            player_relative_screen_one_hot = layers.one_hot_encoding(
                self.placeholders.player_relative_screen,
                num_classes=SCREEN_FEATURES.player_relative.scale
            )[:, :, :, 1:]
            player_relative_minimap_one_hot = layers.one_hot_encoding(
                self.placeholders.player_relative_minimap,
                num_classes=MINIMAP_FEATURES.player_relative.scale
            )[:, :, :, 1:]

            channel_axis = 3
            screen_numeric_all = tf.concat(
                [self.placeholders.screen_numeric, units_embedded, player_relative_screen_one_hot],
                axis=channel_axis
            )
            minimap_numeric_all = tf.concat(
                [self.placeholders.minimap_numeric, player_relative_minimap_one_hot],
                axis=channel_axis
            )

            # BUILD CONVNNs
            screen_output = self._build_convs(screen_numeric_all, "screen_network")
            minimap_output = self._build_convs(minimap_numeric_all, "minimap_network")


            # State representation (last layer before separation as described in the paper)
            self.map_output = tf.concat([screen_output, minimap_output], axis=channel_axis)

            # BUILD CONVLSTM
            self.rnn_in = tf.reshape(self.map_output, [1, -1, 32, 32, 64])
            self.cell = tf.contrib.rnn.Conv2DLSTMCell(input_shape=[32, 32, 1], # input dims
                                                 kernel_shape=[3, 3],  # for a 3 by 3 conv
                                                 output_channels=64)  # number of feature maps
            c_init = np.zeros((1, 32, 32, 64), np.float32)
            h_init = np.zeros((1, 32, 32, 64), np.float32)
            self.state_init = [c_init, h_init]
            step_size = tf.shape(self.map_output)[:1] # Get step_size from input dimensions
            c_in = tf.placeholder(tf.float32, [None, 32, 32, 64])
            h_in = tf.placeholder(tf.float32, [None, 32, 32, 64])
            self.state_in = (c_in, h_in)
            state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
            self.step_size = tf.placeholder(tf.float32, [1])
            (self.outputs, self.state) = tf.nn.dynamic_rnn(self.cell, self.rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False,
                                                          dtype=tf.float32)
            lstm_c, lstm_h = self.state
            self.state_out = (lstm_c[:1, :], lstm_h[:1, :])
            rnn_out = tf.reshape(self.outputs, [-1, 32, 32, 64])
            
            # 1x1 conv layer to generate our spatial policy
            self.spatial_action_logits = layers.conv2d(
                rnn_out,
                data_format="NHWC",
                num_outputs=1,
                kernel_size=1,
                stride=1,
                activation_fn=None,
                scope='spatial_action',
                trainable=self.trainable
            )

            spatial_action_probs = tf.nn.softmax(layers.flatten(self.spatial_action_logits))


            map_output_flat = tf.reshape(self.outputs, [-1, 65536])  # (32*32*64)
            # fully connected layer for Value predictions and action_id
            self.fc1 = layers.fully_connected(
                map_output_flat,
                num_outputs=256,
                activation_fn=tf.nn.relu,
                scope="fc1",
                trainable=self.trainable
            )
            # fc/action_id
            action_id_probs = layers.fully_connected(
                self.fc1,
                num_outputs=len(actions.FUNCTIONS),
                activation_fn=tf.nn.softmax,
                scope="action_id",
                trainable=self.trainable
            )
            # fc/value
            self.value_estimate = tf.squeeze(layers.fully_connected(
                self.fc1,
                num_outputs=1,
                activation_fn=None,
                scope='value',
                trainable=self.trainable
            ), axis=1)

            # disregard non-allowed actions by setting zero prob and re-normalizing to 1 ((MINE) THE MASK)
            action_id_probs *= self.placeholders.available_action_ids
            action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True)

            def logclip(x):
                return tf.log(tf.clip_by_value(x, 1e-12, 1.0))

            spatial_action_log_probs = (
                    logclip(spatial_action_probs)
                    * tf.expand_dims(self.placeholders.is_spatial_action_available, axis=1)
            )
            # non-available actions get log(1e-10) value but that's ok because it's never used
            action_id_log_probs = logclip(action_id_probs)

            self.value_estimate = self.value_estimate
            self.action_id_probs = action_id_probs
            self.spatial_action_probs = spatial_action_probs
            self.action_id_log_probs = action_id_log_probs
            self.spatial_action_log_probs = spatial_action_log_probs

        selected_spatial_action_flat = ravel_index_pairs(
            self.placeholders.selected_spatial_action, self.spatial_dim
        )

        selected_log_probs = self._get_select_action_probs(selected_spatial_action_flat)

        # maximum is to avoid 0 / 0 because this is used to calculate some means
        sum_spatial_action_available = tf.maximum(
            1e-10, tf.reduce_sum(self.placeholders.is_spatial_action_available)
        )

        neg_entropy_spatial = tf.reduce_sum(
            self.spatial_action_probs * self.spatial_action_log_probs
        ) / sum_spatial_action_available
        neg_entropy_action_id = tf.reduce_mean(tf.reduce_sum(
            self.action_id_probs * self.action_id_log_probs, axis=1
        ))
        
        # Sample now actions from the corresponding dstrs defined by the policy network theta
        self.sampled_action_id = weighted_random_sample(self.action_id_probs)
        self.sampled_spatial_action = weighted_random_sample(self.spatial_action_probs)
        
        self.value_estimate = self.value_estimate
        policy_loss = -tf.reduce_mean(selected_log_probs.total * self.placeholders.advantage)

        value_loss = tf.losses.mean_squared_error(
            self.placeholders.value_target, self.value_estimate)

        loss = (
            policy_loss
            + value_loss * self.loss_value_weight
            + neg_entropy_spatial * self.entropy_weight_spatial
            + neg_entropy_action_id * self.entropy_weight_action_id
        )

        self.train_op = layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            optimizer=self.optimiser,
            clip_gradients=self.max_gradient_norm,
            summaries=OPTIMIZER_SUMMARIES,
            learning_rate=None,
            name="train_op"
        )

        self._scalar_summary("value/estimate", tf.reduce_mean(self.value_estimate))
        self._scalar_summary("value/target", tf.reduce_mean(self.placeholders.value_target))
        self._scalar_summary("action/is_spatial_action_available",
                             tf.reduce_mean(self.placeholders.is_spatial_action_available))
        self._scalar_summary("action/selected_id_log_prob",
                             tf.reduce_mean(selected_log_probs.action_id))
        self._scalar_summary("loss/policy", policy_loss)
        self._scalar_summary("loss/value", value_loss)
        self._scalar_summary("loss/neg_entropy_spatial", neg_entropy_spatial)
        self._scalar_summary("loss/neg_entropy_action_id", neg_entropy_action_id)
        self._scalar_summary("loss/total", loss)
        self._scalar_summary("value/advantage", tf.reduce_mean(self.placeholders.advantage))
        self._scalar_summary("action/selected_total_log_prob",
                             tf.reduce_mean(selected_log_probs.total))
        self._scalar_summary("action/selected_spatial_log_prob",
                             tf.reduce_sum(selected_log_probs.spatial) / sum_spatial_action_available)

        self.init_op = tf.global_variables_initializer()
        self.saver = tf.train.Saver(max_to_keep=2)
        self.all_summary_op = tf.summary.merge_all(tf.GraphKeys.SUMMARIES)
        self.scalar_summary_op = tf.summary.merge(tf.get_collection(self._scalar_summary_key))
예제 #16
0
    def build(self):
        """build
        Build the actual network, using the
        values passed over the from agent object, which
        themselves are derived from the Obs object.

        This has no concept of transfer learning.
        """

        # Maps a series of symbols to embeddings,
        # where an embedding is a mapping from discrete objects,
        # such as words, to vectors of real numbers.
        # In this case it is from the unit types.
        units_embedded = layers.embed_sequence(
            self.placeholders.screen_unit_type,
            vocab_size=SCREEN_FEATURES.unit_type.scale,
            embed_dim=self.unittype_emb_dim,
            scope="unit_type_emb",
            trainable=self.trainable,
        )

        # "One hot" encoding performs "binarization" on the input
        # meaning we end up with features we can suitably learn
        # from.
        # Basically, learning from categories isn't possible,
        # but learning from ints (i.e. 0/1/2 for 3 categories)
        # ends up with further issues, like the ML algorithm
        # picking up some pattern in the categories, when none exists.
        # Instead we want it in a binary form instead, to prevent this.
        # This is not needed for the background, since it is
        # not used, which is why we ignore channel 0 in the
        # last sub-array.
        player_relative_screen_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_screen,
            num_classes=SCREEN_FEATURES.player_relative.scale,
        )[:, :, :, 1:]

        player_relative_minimap_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_minimap,
            num_classes=MINIMAP_FEATURES.player_relative.scale,
        )[:, :, :, 1:]

        channel_axis = 3

        # Group together all the inputs, such that a conv
        # layer can be built upon them.
        screen_numeric_all = tf.concat(
            [
                self.placeholders.screen_numeric,
                units_embedded,
                player_relative_screen_one_hot,
            ],
            axis=channel_axis,
        )

        minimap_numeric_all = tf.concat(
            [self.placeholders.minimap_numeric, player_relative_minimap_one_hot],
            axis=channel_axis,
        )

        non_spatial_features = tf.cast(
            self.placeholders.non_spatial_features, tf.float32
        )
        log_non_spatial_features = tf.log(non_spatial_features + 1.0)

        four_d_non_spatial = reference_tiling_method(self, log_non_spatial_features)

        if DEBUG:
            # We want to print the values of the tensor
            four_d_non_spatial = tf.Print(
                four_d_non_spatial,
                [four_d_non_spatial],
                "4D non spatial tensor values: ",
                summarize=1024,  # this is the number of values TF will print from the Tensor
            )

        # Build the 2 convolutional layers based on the screen
        # and the mini-map.
        screen_conv_layer_output = self.build_conv_layers_for_input(
            screen_numeric_all, "screen_network"
        )

        minimap_conv_layer_output = self.build_conv_layers_for_input(
            minimap_numeric_all, "minimap_network"
        )

        # Group these two convolutional layers now, and the non_spatial
        # features. build a further convolutional layer on top of it.
        visual_inputs = tf.concat(
            [screen_conv_layer_output, minimap_conv_layer_output, four_d_non_spatial],
            axis=channel_axis,
        )

        spatial_actions = layers.conv2d(
            visual_inputs,
            data_format="NHWC",
            num_outputs=1,
            kernel_size=1,
            stride=1,
            activation_fn=None,
            scope="spatial_action",
            trainable=self.trainable,
        )

        if self.trainable:
            tf.summary.image(
                f"spatial_action", tf.reshape(spatial_actions, [-1, 32, 32, 1]), 3
            )

        # Take the softmax of this final convolutional layer.
        spatial_action_probs = tf.nn.softmax(layers.flatten(spatial_actions))

        # Build a full connected layer of this final convolutional layer.
        # Could possibly pass in additional variables here, alongside the
        # convolutional layer.
        map_output_flat = layers.flatten(visual_inputs)

        fully_connected_layer1 = layers.fully_connected(
            map_output_flat,
            num_outputs=256,
            activation_fn=tf.nn.relu,
            scope="fully_connected_layer1",
            trainable=self.trainable,
        )

        # Generate the probability of a given action from the
        # fully connected layer. Finally, produce a value
        # estimate for the given actions.
        action_id_probs = layers.fully_connected(
            fully_connected_layer1,
            num_outputs=len(actions.FUNCTIONS),
            activation_fn=tf.nn.softmax,
            scope="action_id",
            trainable=self.trainable,
        )

        value_estimate = tf.squeeze(
            layers.fully_connected(
                fully_connected_layer1,
                num_outputs=1,
                activation_fn=None,
                scope="value",
                trainable=self.trainable,
            ),
            axis=1,
        )

        # Disregard all the non-allowed actions by giving them a
        # probability of zero, before re-normalizing to 1.
        action_id_probs *= self.placeholders.available_action_ids
        action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True)

        spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims(
            self.placeholders.is_spatial_action_available, axis=1
        )

        action_id_log_probs = self.logclip(action_id_probs)

        self.value_estimate = value_estimate
        self.action_id_probs = action_id_probs
        self.spatial_action_probs = spatial_action_probs
        self.action_id_log_probs = action_id_log_probs
        self.spatial_action_log_probs = spatial_action_log_probs

        return self
embedded1 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    e = layers.embed_sequence(
        f,
        vocab_size=n,
        embed_dim=embedding_dim,
        initializer=tf.constant_initializer(w)
    )
    embedded1.append(e)

out11 = tf.concat(embedded1, axis=2)

# 1.2) onehot on channel -> 1x1 conv separately -> concat on channel
embedded2 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    one_hot = layers.one_hot_encoding(f, num_classes=n)

    conv_out = layers.conv2d(
        inputs=one_hot,
        num_outputs=embedding_dim,
        weights_initializer=tf.constant_initializer(w),
        kernel_size=1,
        stride=1
    )
    embedded2.append(conv_out)

out12 = tf.concat(embedded2, axis=2)

# 2.1) sum embeddings on channel instead of concatenating
out21 = tf.add_n(embedded1)
예제 #18
0
def toOneHot(input, num_classes):
    return contrib_layers.one_hot_encoding(input, num_classes)
예제 #19
0
def toOneHot(input, num_classes):
    return contrib_layers.one_hot_encoding(input, num_classes)
예제 #20
0
    def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta,
                 max_step):
        """
        Creates Discrete Control Actor-Critic model for use with visual observations (images).
        :param o_size_h: Observation height.
        :param o_size_w: Observation width.
        :param a_size: Action-space size.
        :param h_size: Hidden layer size.
        """
        self.observation_in = tf.placeholder(
            shape=[None, o_size_h, o_size_w, 1],
            dtype=tf.float32,
            name='observation_0')
        self.conv1 = tf.layers.conv2d(self.observation_in,
                                      32,
                                      kernel_size=[3, 3],
                                      strides=[2, 2],
                                      use_bias=False,
                                      activation=tf.nn.elu)
        self.conv2 = tf.layers.conv2d(self.conv1,
                                      64,
                                      kernel_size=[3, 3],
                                      strides=[2, 2],
                                      use_bias=False,
                                      activation=tf.nn.elu)
        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32)
        hidden = tf.layers.dense(c_layers.flatten(self.conv2),
                                 h_size,
                                 use_bias=False,
                                 activation=tf.nn.elu)
        self.policy = tf.layers.dense(
            hidden,
            a_size,
            activation=None,
            use_bias=False,
            kernel_initializer=c_layers.variance_scaling_initializer(
                factor=0.1))
        self.probs = tf.nn.softmax(self.policy)
        self.action = tf.multinomial(self.policy, 1)
        self.output = tf.identity(self.action, name='action')
        self.value = tf.layers.dense(hidden,
                                     1,
                                     activation=None,
                                     use_bias=False)

        self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10),
                                      axis=1)

        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32)
        self.selected_actions = c_layers.one_hot_encoding(
            self.action_holder, a_size)
        self.old_probs = tf.placeholder(shape=[None, a_size],
                                        dtype=tf.float32,
                                        name='old_probabilities')
        self.responsible_probs = tf.reduce_sum(self.probs *
                                               self.selected_actions,
                                               axis=1)
        self.old_responsible_probs = tf.reduce_sum(self.old_probs *
                                                   self.selected_actions,
                                                   axis=1)

        PPOModel.__init__(self, self.responsible_probs,
                          self.old_responsible_probs, self.value, self.entropy,
                          beta, epsilon, lr, max_step)
예제 #21
0
파일: agent.py 프로젝트: vwxyzjn/sc2aibot
    def _build_fullyconv_network(self):
        units_embedded = layers.embed_sequence(
            self.ph_screen_unit_type,
            vocab_size=SCREEN_FEATURES.unit_type.scale,
            embed_dim=self.unit_type_emb_dim,
            scope="unit_type_emb"
        )

        # Let's not one-hot zero which is background
        player_relative_screen_one_hot = layers.one_hot_encoding(
            self.ph_player_relative_screen,
            num_classes=SCREEN_FEATURES.player_relative.scale
        )[:, :, :, 1:]
        player_relative_minimap_one_hot = layers.one_hot_encoding(
            self.ph_player_relative_minimap,
            num_classes=MINIMAP_FEATURES.player_relative.scale
        )[:, :, :, 1:]

        channel_axis = 3
        screen_numeric_all = tf.concat(
            [self.ph_screen_numeric, units_embedded, player_relative_screen_one_hot],
            axis=channel_axis
        )
        minimap_numeric_all = tf.concat(
            [self.ph_minimap_numeric, player_relative_minimap_one_hot],
            axis=channel_axis
        )
        screen_output = _build_convs(screen_numeric_all, "screen_network")
        minimap_output = _build_convs(minimap_numeric_all, "minimap_network")

        map_output = tf.concat([screen_output, minimap_output], axis=channel_axis)

        spatial_action_logits = layers.conv2d(
            map_output,
            data_format="NHWC",
            num_outputs=1,
            kernel_size=1,
            stride=1,
            activation_fn=None,
            scope='spatial_action'
        )

        spatial_action_probs = tf.nn.softmax(layers.flatten(spatial_action_logits))

        map_output_flat = layers.flatten(map_output)

        fc1 = layers.fully_connected(
            map_output_flat,
            num_outputs=256,
            activation_fn=tf.nn.relu,
            scope="fc1"
        )
        action_id_probs = layers.fully_connected(
            fc1,
            num_outputs=len(actions.FUNCTIONS),
            activation_fn=tf.nn.softmax,
            scope="action_id"
        )
        value_estimate = tf.squeeze(layers.fully_connected(
            fc1,
            num_outputs=1,
            activation_fn=None,
            scope='value'
        ), axis=1)

        # disregard non-allowed actions by setting zero prob and re-normalizing to 1
        action_id_probs *= self.ph_available_action_ids
        action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keep_dims=True)

        return spatial_action_probs, action_id_probs, value_estimate
예제 #22
0
import matplotlib.pyplot as plt

conv = layers.convolution2d
pool = layers.max_pool2d
fc = layers.fully_connected
bn = layers.batch_norm

h = 50
w = 50
h_trans = 30
w_trans = 30

x = tf.placeholder(tf.float32, [None, h*w])
x_tensor = tf.reshape(x, [-1, h, w, 1])
y = tf.placeholder(tf.int32, [None])
y_one_hot = layers.one_hot_encoding(y, 10)

# localization net
"""
loc = pool(conv(x_tensor, 8, [5, 5], padding='VALID'), [2, 2])
loc = pool(conv(loc, 16, [5, 5], padding='VALID'), [2, 2])
loc = fc(layers.flatten(loc), 50)
"""

loc = pool(x_tensor, [2, 2])
loc = conv(loc, 5, [5, 5], padding='VALID')
loc = pool(loc, [2, 2])
loc = conv(loc, 10, [5, 5], padding='VALID')

"""
loc = fc(fc(x, 500), 50)
# 1.1) embed on channel -> concat on channel
embedded1 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    e = layers.embed_sequence(f,
                              vocab_size=n,
                              embed_dim=embedding_dim,
                              initializer=tf.constant_initializer(w))
    embedded1.append(e)

out11 = tf.concat(embedded1, axis=2)

# 1.2) onehot on channel -> 1x1 conv separately -> concat on channel
embedded2 = []
for f, n, w in zip(features, n_cat_by_feature, initial_emb_weights):
    one_hot = layers.one_hot_encoding(f, num_classes=n)

    conv_out = layers.conv2d(inputs=one_hot,
                             num_outputs=embedding_dim,
                             weights_initializer=tf.constant_initializer(w),
                             kernel_size=1,
                             stride=1)
    embedded2.append(conv_out)

out12 = tf.concat(embedded2, axis=2)

# 2.1) sum embeddings on channel instead of concatenating
out21 = tf.add_n(embedded1)

# 2.2) onehot on channel -> concat on channel -> 1x1 conv
one_hotted_features = tf.concat([
예제 #24
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 nenv,
                 nsteps,
                 nstack,
                 reuse=False):
        nbatch = nenv * nsteps
        nh, nw, nc = ob_space.shape
        ob_shape = (nbatch, nh, nw, nc * nstack)
        X = tf.placeholder(tf.int32, ob_shape)  # obs

        with tf.variable_scope("fullyconv_model", reuse=reuse):
            x_onehot = layers.one_hot_encoding(
                # assuming we have only one channel
                X[:, :, :, 0],
                num_classes=SCREEN_FEATURES.player_relative.scale)

            #don't one hot 0-category
            x_onehot = x_onehot[:, :, :, 1:]

            h = layers.conv2d(x_onehot,
                              num_outputs=16,
                              kernel_size=5,
                              stride=1,
                              padding='SAME',
                              scope="conv1")
            h2 = layers.conv2d(h,
                               num_outputs=32,
                               kernel_size=3,
                               stride=1,
                               padding='SAME',
                               scope="conv2")
            pi = layers.flatten(
                layers.conv2d(h,
                              num_outputs=1,
                              kernel_size=1,
                              stride=1,
                              scope="spatial_action",
                              activation_fn=None))

            pi *= 3.0  # make it little bit more deterministic, not sure if good idea

            f = layers.fully_connected(layers.flatten(h2),
                                       num_outputs=64,
                                       activation_fn=tf.nn.relu,
                                       scope="value_h_layer")

            vf = layers.fully_connected(f,
                                        num_outputs=1,
                                        activation_fn=None,
                                        scope="value_out")

        v0 = vf[:, 0]
        a0 = sample(pi)
        self.initial_state = []  # not stateful

        def step(ob, *_args, **_kwargs):
            a, v = sess.run([a0, v0], {X: ob})
            return a, v, []  # dummy state

        def value(ob, *_args, **_kwargs):
            return sess.run(v0, {X: ob})

        self.X = X
        self.pi = pi
        self.vf = vf
        self.step = step
        self.value = value
예제 #25
0
    def build_transfer(self, previous_model):
        """build_transfer

        Build the actual network, using the
        values passed over the from agent object, which
        themselves are derived from the Obs object.
        This model is built using a previous model.
        """

        # Maps a series of symbols to embeddings,
        # where an embedding is a mapping from discrete objects,
        # such as words, to vectors of real numbers.
        # In this case it is from the unit types.
        units_embedded = layers.embed_sequence(
            self.placeholders.screen_unit_type,
            vocab_size=SCREEN_FEATURES.unit_type.scale,
            embed_dim=self.unittype_emb_dim,
            scope="unit_type_emb",
            trainable=self.trainable,
        )

        # "One hot" encoding performs "binarization" on the input
        # meaning we end up with features we can suitably learn
        # from.
        # Basically, learning from categories isn't possible,
        # but learning from ints (i.e. 0/1/2 for 3 categories)
        # ends up with further issues, like the ML algorithm
        # picking up some pattern in the categories, when none exists.
        # Instead we want it in a binary form instead, to prevent this.
        # This is not needed for the background, since it is
        # not used, which is why we ignore channel 0 in the
        # last sub-array.
        player_relative_screen_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_screen,
            num_classes=SCREEN_FEATURES.player_relative.scale,
        )[:, :, :, 1:]

        player_relative_minimap_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_minimap,
            num_classes=MINIMAP_FEATURES.player_relative.scale,
        )[:, :, :, 1:]

        channel_axis = 3

        # Group together all the inputs, such that a conv
        # layer can be built upon them.
        screen_numeric_all = tf.concat(
            [
                self.placeholders.screen_numeric,
                units_embedded,
                player_relative_screen_one_hot,
            ],
            axis=channel_axis,
        )

        minimap_numeric_all = tf.concat(
            [self.placeholders.minimap_numeric, player_relative_minimap_one_hot],
            axis=channel_axis,
        )

        # Build the 2 convolutional layers based on the screen
        # and the mini-map.
        screen_conv_layer_output = self.build_conv_layers_for_input(
            screen_numeric_all, "screen_network", previous_model.screen_conv_1
        )

        # And now the minimap
        minimap_conv_layer_output = self.build_conv_layers_for_input(
            minimap_numeric_all, "minimap_network", previous_model.minimap_conv_1
        )

        # Group these two convolutional layers now, and
        # build a further convolutional layer on top of it.
        visual_inputs = tf.concat(
            [screen_conv_layer_output, minimap_conv_layer_output], axis=channel_axis
        )

        spatial_actions_normal = layers.conv2d(
            visual_inputs,
            data_format="NHWC",
            num_outputs=1,
            kernel_size=1,
            stride=1,
            activation_fn=None,
            scope=f"spatial_actions/model_{self.curriculum_number}",
            trainable=self.trainable,
        )

        # Sort the previous models spatial action layers.
        previous_spatial_actions = []
        for model_number, prev_out in enumerate(previous_model.concat_2):
            spatial_actions_previous = layers.conv2d(
                prev_out,
                data_format="NHWC",
                num_outputs=1,
                kernel_size=1,
                stride=1,
                activation_fn=None,
                scope=f"spatial_actions/model_{model_number}",
                trainable=self.trainable,
            )

            previous_spatial_actions.append(spatial_actions_previous)

        previous_spatial_actions_added = self.add_all_previous(
            previous_spatial_actions, "spatial_actions"
        )

        joint_spatial_actions = tf.add(
            spatial_actions_normal,
            previous_spatial_actions_added,
            "spatial_actions_add",
        )

        if self.trainable:
            tf.summary.image(
                f"spatial_action_normal",
                tf.reshape(spatial_actions_normal, [-1, 32, 32, 1]),
                3,
            )
            tf.summary.image(
                f"spatial_action_previous",
                tf.reshape(spatial_actions_previous, [-1, 32, 32, 1]),
                3,
            )
            tf.summary.image(
                f"joint_connected_layers",
                tf.reshape(joint_spatial_actions, [-1, 32, 32, 1]),
                3,
            )

        # Take the softmax of this final convolutional layer.
        spatial_action_probs = tf.nn.softmax(layers.flatten(joint_spatial_actions))

        # Build a full connected layer of this final convolutional layer.
        # Could possibly pass in additional variables here, alongside the
        # convolutional layer.
        map_output_flat = layers.flatten(visual_inputs)

        fully_connected_layer_normal = layers.fully_connected(
            map_output_flat,
            num_outputs=256,
            activation_fn=None,
            scope=f"fully_connected_layer1/model_{self.curriculum_number}",
            trainable=self.trainable,
        )

        previous_fully_con_1 = []
        for model_number, prev_out in enumerate(previous_model.flatten_1):
            fully_connected_previous = layers.fully_connected(
                prev_out,
                num_outputs=256,
                activation_fn=None,
                scope=f"fully_connected_layer1/model_{model_number}",
                trainable=self.trainable,
            )

            previous_fully_con_1.append(fully_connected_previous)

        previous_fully_con_1_added = self.add_all_previous(
            previous_fully_con_1, "fully_connected_layer1"
        )

        # Combine the new and old models values, and then apply RELU to the result.
        joint_connected_layers = tf.add(
            fully_connected_layer_normal,
            previous_fully_con_1_added,
            "fully_connected_layer_add",
        )

        relu_connected_layer = tf.nn.relu(
            joint_connected_layers, name="fully_connected_layer1_normal_relu"
        )

        # Generate the probability of a given action from the
        # fully connected layer. Finally, produce a value
        # estimate for the given actions.
        action_id_probs_new = layers.fully_connected(
            relu_connected_layer,
            num_outputs=len(actions.FUNCTIONS),
            activation_fn=None,
            scope=f"action_id/model_{self.curriculum_number}",
            trainable=self.trainable,
        )

        previous_action_ids = []
        for model_number, prev_out in enumerate(previous_model.fully_connected_layer1):
            previous_action_id_probs = layers.fully_connected(
                prev_out,
                num_outputs=len(actions.FUNCTIONS),
                activation_fn=None,
                scope=f"action_id/model_{model_number}",
                trainable=self.trainable,
            )

            previous_action_ids.append(previous_action_id_probs)

        previous_action_ids_added = self.add_all_previous(
            previous_action_ids, "action_id"
        )

        joint_action_ids = tf.add(
            action_id_probs_new, previous_action_ids_added, "id_probs_add"
        )

        # Combine the new and old models values, and then apply softmax to the result.
        action_id_probs = tf.nn.softmax(joint_action_ids)

        # Sort value estimate.
        value_estimate_new = layers.fully_connected(
            relu_connected_layer,
            num_outputs=1,
            activation_fn=None,
            scope=f"value/model_{self.curriculum_number}",
            trainable=self.trainable,
        )

        previous_value_estimates = []
        for model_number, prev_out in enumerate(previous_model.fully_connected_layer1):
            value_estimate_previous = layers.fully_connected(
                prev_out,
                num_outputs=1,
                activation_fn=None,
                scope=f"value/model_{model_number}",
                trainable=self.trainable,
            )

            previous_value_estimates.append(value_estimate_previous)

        previous_value_estimates_added = self.add_all_previous(
            previous_value_estimates, "value"
        )

        # Combine the new and old models values, and then squeeze the result.
        joint_value_estimate = tf.add(
            value_estimate_new, previous_value_estimates_added, "value_estimate_add"
        )

        value_estimate = tf.squeeze(joint_value_estimate, axis=1)

        # Disregard all the non-allowed actions by giving them a
        # probability of zero, before re-normalizing to 1.
        action_id_probs *= self.placeholders.available_action_ids
        action_id_probs /= tf.reduce_sum(action_id_probs, axis=1, keepdims=True)

        spatial_action_log_probs = self.logclip(spatial_action_probs) * tf.expand_dims(
            self.placeholders.is_spatial_action_available, axis=1
        )

        action_id_log_probs = self.logclip(action_id_probs)

        self.value_estimate = value_estimate
        self.action_id_probs = action_id_probs
        self.spatial_action_probs = spatial_action_probs
        self.action_id_log_probs = action_id_log_probs
        self.spatial_action_log_probs = spatial_action_log_probs

        return self
예제 #26
0
파일: policy.py 프로젝트: zyqlzr/sc2aibot
    def build(self):
        units_embedded = layers.embed_sequence(
            self.placeholders.screen_unit_type,
            vocab_size=SCREEN_FEATURES.unit_type.scale,
            embed_dim=self.unittype_emb_dim,
            scope="unit_type_emb",
            trainable=self.trainable)

        print("*model* units_embedded={},input={}, dim={}".format(
            units_embedded.shape, self.placeholders.screen_unit_type.shape,
            self.unittype_emb_dim))

        # Let's not one-hot zero which is background
        player_relative_screen_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_screen,
            num_classes=SCREEN_FEATURES.player_relative.scale)[:, :, :, 1:]
        player_relative_minimap_one_hot = layers.one_hot_encoding(
            self.placeholders.player_relative_minimap,
            num_classes=MINIMAP_FEATURES.player_relative.scale)[:, :, :, 1:]

        print(
            "*model* player_relative_screen_one_hot={},input={}, num_classes".
            format(player_relative_screen_one_hot.shape,
                   self.placeholders.player_relative_screen.shape,
                   SCREEN_FEATURES.player_relative.scale))
        print("*model* player_relative_minimap_one_hot={},input={}".format(
            player_relative_minimap_one_hot.shape,
            self.placeholders.player_relative_minimap.shape,
            MINIMAP_FEATURES.player_relative.scale))

        channel_axis = 3
        screen_numeric_all = tf.concat([
            self.placeholders.screen_numeric, units_embedded,
            player_relative_screen_one_hot
        ],
                                       axis=channel_axis)
        print("*model* screen_numric_all={}, input=[{},{},{}]".format(
            screen_numeric_all.shape, self.placeholders.screen_numeric.shape,
            units_embedded.shape, player_relative_screen_one_hot.shape))

        minimap_numeric_all = tf.concat([
            self.placeholders.minimap_numeric, player_relative_minimap_one_hot
        ],
                                        axis=channel_axis)
        print("*model* minimap_numric_all={}, input=[{},{}]".format(
            minimap_numeric_all.shape, self.placeholders.minimap_numeric.shape,
            player_relative_minimap_one_hot.shape))

        screen_output = self._build_convs(screen_numeric_all, "screen_network")
        minimap_output = self._build_convs(minimap_numeric_all,
                                           "minimap_network")
        print("*model* conv_screen={},input={},".format(
            screen_output.shape, screen_numeric_all.shape))
        print("*model* conv_screen={},input={},".format(
            minimap_output.shape, minimap_numeric_all.shape))

        map_output = tf.concat([screen_output, minimap_output],
                               axis=channel_axis)

        spatial_action_logits = layers.conv2d(map_output,
                                              data_format="NHWC",
                                              num_outputs=1,
                                              kernel_size=1,
                                              stride=1,
                                              activation_fn=None,
                                              scope='spatial_action',
                                              trainable=self.trainable)

        spatial_action_probs = tf.nn.softmax(
            layers.flatten(spatial_action_logits))
        print(
            "*model* action_probs={}, action_logits={}, map_output={}".format(
                spatial_action_probs.shape, spatial_action_logits.shape,
                map_output.shape))

        map_output_flat = layers.flatten(map_output)

        fc1 = layers.fully_connected(map_output_flat,
                                     num_outputs=256,
                                     activation_fn=tf.nn.relu,
                                     scope="fc1",
                                     trainable=self.trainable)
        action_id_probs = layers.fully_connected(fc1,
                                                 num_outputs=len(
                                                     actions.FUNCTIONS),
                                                 activation_fn=tf.nn.softmax,
                                                 scope="action_id",
                                                 trainable=self.trainable)
        value_estimate = tf.squeeze(layers.fully_connected(
            fc1,
            num_outputs=1,
            activation_fn=None,
            scope='value',
            trainable=self.trainable),
                                    axis=1)

        print(
            "*model* action_id_probs={}, value_estimate={}, map_output_flat={}"
            .format(action_id_probs.shape, value_estimate.shape,
                    map_output_flat.shape))

        # disregard non-allowed actions by setting zero prob and re-normalizing to 1
        action_id_probs *= self.placeholders.available_action_ids
        action_id_probs /= tf.reduce_sum(action_id_probs,
                                         axis=1,
                                         keep_dims=True)

        def logclip(x):
            return tf.log(tf.clip_by_value(x, 1e-12, 1.0))

        spatial_action_log_probs = (
            logclip(spatial_action_probs) * tf.expand_dims(
                self.placeholders.is_spatial_action_available, axis=1))

        # non-available actions get log(1e-10) value but that's ok because it's never used
        action_id_log_probs = logclip(action_id_probs)

        self.value_estimate = value_estimate
        self.action_id_probs = action_id_probs
        self.spatial_action_probs = spatial_action_probs
        self.action_id_log_probs = action_id_log_probs
        self.spatial_action_log_probs = spatial_action_log_probs
        return self
예제 #27
0
def run_module_unit_test(use_fake_data=False, test_mode="full_model"):
    #Test mode can either be "module", "stem", "classifier_auxiliary", "classifier_basic" or "full_model"
    fl = tf.app.flags.FLAGS
    BATCH_SIZE = fl.batch_size
    L2_WEIGHT = fl.l2_lambda_weight
    
    if use_fake_data:
        #load fake data. imagenet uses 224*224*3, but put whatever you want here.
        train_X = np.random.rand(BATCH_SIZE*5, 224, 224, 3)
        train_y = np.random.randint(low=0, high=1000, size=(BATCH_SIZE*5, 1))
        NUM_LABELS = 1000
        test_X = np.random.rand(BATCH_SIZE, 224, 224, 3)
        test_y = np.random.randint(low=0, high=1000, size=(BATCH_SIZE*4, 1))
    else:
        #TODO - toss away this NUM_LABELS when done testing
        (train_X, train_y), (test_X, test_y), NUM_LABELS = data_utils.load_dataset(fl.dataset)

    #extract a random validation set from the training set
    validation_size = np.floor(train_X.shape[0]*fl.validation_ratio).astype(int)
    shuf = np.random.permutation(train_X.shape[0])
    train_X = train_X[shuf]
    train_y = train_y[shuf]
    validation_X, validation_y = train_X[:validation_size], train_y[:validation_size]
    train_X, train_y = train_X[validation_size:], train_y[validation_size:]
    
    IMAGE_LEN = train_X.shape[1]
    IMAGE_WID = train_X.shape[2]
    IMAGE_SIZE = IMAGE_LEN
    NUM_CHANNELS = train_X.shape[3]

    g = tf.Graph()
    with g.as_default():
        tf_train_X = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
        tf_train_y = tf.placeholder(tf.int32, shape=(BATCH_SIZE,1))
        tf_validation_X = tf.placeholder(tf.float32, shape=validation_X.shape)
        tf_validation_y = tf.placeholder(tf.int32, shape=(validation_y.shape[0],1))
        tf_test_X = tf.placeholder(tf.float32, shape=test_X.shape)
        tf_test_y = tf.placeholder(tf.int32, shape=(test_y.shape[0],1))
        
        if test_mode == "stem":
            expected_output_shape = [BATCH_SIZE, IMAGE_SIZE/8, IMAGE_SIZE/8, 192] #three times the length and width of an image are halved, hence the 8.
            inception_model = InceptionStemV1(filter_sizes=[64, 64, 192],
                                            input_shape=tf_train_X.get_shape(),
                                            output_shape=expected_output_shape,
                                            scope="stem1")
        
        elif test_mode == "module":
            expected_output_shape = [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 64]
            inception_model = InceptionModuleV1(dtype = tf.float32,
                                        input_shape = tf_train_X.get_shape().as_list(),
                                        output_shape = expected_output_shape,
                                        filter_sizes = [16, 24, 32, 4, 8, 8], #indexes 0,2,4,5 must add up to output[-1]
                                        scope="module1")
        
        elif test_mode == "classifier_auxiliary":
            expected_output_shape = [BATCH_SIZE, NUM_LABELS]
            inception_model = InceptionClassifierV1(dtype=tf.float32,
                                        auxiliary_weight_constant=0.3,
                                        filter_sizes=[10,1024],
                                        auxiliary_classifier=True,
                                        input_shape=tf_train_X.get_shape().as_list(), 
                                        output_shape=expected_output_shape,
                                        scope="classifier_auxiliary1")
            
        elif test_mode == "classifier_basic":
            expected_output_shape = [BATCH_SIZE, NUM_LABELS]
            inception_model = InceptionClassifierV1(dtype=tf.float32,
                                        input_shape=tf_train_X.get_shape().as_list(),
                                        output_shape=expected_output_shape,
                                        scope="classifier_basic1")
           
        elif test_mode == "full_model":
            expected_output_shape = [BATCH_SIZE, NUM_LABELS]
            inception_model = InceptionV1(dtype=tf.float32,
                                          filter_size_dict="imagenet_auto",
                                          filter_size_reduction_factor=4,
                                          auxiliary_classifier_weights=[0.3,0.3],
                                          use_mini_model=False,
                                          input_shape=tf_train_X.get_shape().as_list(), #224x224x3 imagenet images
                                          output_shape=expected_output_shape, 
                                          scope="inception1")
            
        
        inception_model.create_model()
        global_step = tf.Variable(0)
        
        #set up a learning rate and learning rate decay mechanism
        lr_calc = tf.train.exponential_decay(0.01, global_step, 100, 0.999, staircase=True)
        lr_min = 0.0001
        lr = tf.maximum(lr_calc, lr_min)
        
        #set up an l2 regulariztaion and its decay mechanism operation
        l2_lambda_weight = tf.Variable(fl.l2_lambda_weight, dtype=tf.float32)
        l2_lambda_decay = tf.constant(fl.l2_lambda_weight_decay, dtype=tf.float32)
        l2_lambda_decay_op = l2_lambda_weight.assign(
            l2_lambda_weight * l2_lambda_decay)
        
        #reshape the images and their labels
        #flat_inputs = flatten(tf_train_X, scope="flatten_pixel_channels")
        one_hot_train_outputs = one_hot_encoding(tf.squeeze(tf_train_y), NUM_LABELS, on_value=1.0, off_value=0.0)
        one_hot_validation_outputs = one_hot_encoding(tf.squeeze(tf_validation_y), NUM_LABELS, on_value=1.0, off_value=0.0)
        one_hot_test_outputs = one_hot_encoding(tf.squeeze(tf_test_y), NUM_LABELS, on_value=1.0, off_value=0.0)

        #A cheap model that tosses a fully-connected layer on to the flattened result of the 4d Tensor
        
        if test_mode in ["module", "stem"]: #not testing any classification, so we build a dummy FC layer to connect to logits
            
            flattened_incept_out_size = expected_output_shape[1]*expected_output_shape[2]*expected_output_shape[3]
            
            w_l2 = tf.get_variable("w_l2",
                           shape=(flattened_incept_out_size, one_hot_train_outputs.get_shape()[1]),
                           dtype=tf.float32,
                           initializer=xavier_initializer())
            b_l2 = tf.get_variable("b_l2",
                           shape=(one_hot_train_outputs.get_shape()[1]),
                           dtype=tf.float32,
                           initializer=tf.zeros_initializer())
            
            def model_with_linear_classifier(inp, training=True):
                inception_out = inception_model.run_model(inp)
                flat_inputs = flatten(inception_out)
                return tf.matmul(flat_inputs, w_l2) + b_l2
            
            train_out = model_with_linear_classifier(tf_train_X)
            train_predictions = tf.nn.softmax(train_out)
            validation_out = model_with_linear_classifier(tf_validation_X, training=False)
            validation_predictions = tf.nn.softmax(validation_out)
            test_out = model_with_linear_classifier(tf_test_X, training=False)
            test_predictions = tf.nn.softmax(test_out)
        
        #using a model with a classifier in it
        else:
            train_out = inception_model.run_model(tf_train_X, training=True)
            train_predictions = tf.nn.softmax(train_out)
            validation_out = inception_model.run_model(tf_validation_X, training=False)
            validation_predictions = tf.nn.softmax(validation_out)
            test_out = inception_model.run_model(tf_test_X, training=False)
            test_predictions = tf.nn.softmax(test_out)
        
        #separate the losses so we can compare them in the session
        ce_loss = loss.softmax_cross_entropy_with_laplace_smoothing(train_out, one_hot_train_outputs, laplace_pseudocount=0.00001, scale=[0.3,0.3,1.0] if test_mode=='full_model' else 1.0)
        
        #collect all the parameters in the model to do l2 regulariztion
        regularization_parameters = inception_model.model_parameters
        if test_mode in ["module", "stem"]:
            regularization_parameters.extend((w_l2, b_l2))
        
        reg_loss = loss.regularizer(regularization_parameters, reg_type='l2', weight_lambda=0.001)
        
        total_loss = tf.reduce_mean(ce_loss + reg_loss)
        opt = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, global_step=global_step)
        
        #we also declare this in the graph and run it in the session
        init_op = tf.global_variables_initializer()
        
    with tf.Session(graph=g, config=tf.ConfigProto(log_device_placement=True)) as sess:
    
        sess.run(init_op)
        total_steps = 0
        num_epochs = 100

        for epoch in range(num_epochs):
            shuf = np.random.permutation(train_X.shape[0])
            train_X = train_X[shuf]
            train_y = train_y[shuf]
            processed=0

            while processed+BATCH_SIZE <= train_X.shape[0]:
                batch_X = train_X[processed:processed+BATCH_SIZE]
                batch_y = train_y[processed:processed+BATCH_SIZE]
                processed += BATCH_SIZE

                feed_dict = {tf_train_X:batch_X,
                            tf_train_y:batch_y}

                _, l, rl, pred, l2lw = sess.run([opt, total_loss, reg_loss, train_predictions, l2_lambda_weight], feed_dict=feed_dict)
                total_steps += 1
                
                if total_steps % fl.l2_lambda_weight_decay_steps == 0:
                    sess.run(l2_lambda_decay_op)
                
                #Validation Set
                if total_steps % fl.validation_frequency == 0:
                    feed_dict = {tf_validation_X:validation_X,
                                 tf_validation_y:validation_y}
                    pred_labels, true_labels = sess.run([validation_predictions, one_hot_validation_outputs], feed_dict=feed_dict)
                    print("Validation Top-1 accuracy is " + str(100.0*data_utils.n_accuracy(pred_labels, true_labels, 1)) + "%")
        
        #Test Set
        feed_dict = {tf_test_X:test_X,tf_test_y:test_y}
        pred_labels, true_labels = sess.run([test_predictions, one_hot_test_outputs], feed_dict=feed_dict)
        print("Test Top-1 accuracy is " + str(100.0*data_utils.n_accuracy(pred_labels, true_labels, 1)) + "%")