Example #1
0
 def body(past, prev, output):
     next_outputs = step(hparams, prev[:, tf.newaxis], past=past)
     logits = next_outputs['logits'][:, -1, :]  / tf.to_float(temperature)
     if top_p > 0.0:
         logits = top_p_logits(logits, p=top_p)
     else:
         logits = top_k_logits(logits, k=top_k)
     samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
     return [
         tf.concat([past, next_outputs['presents']], axis=-2),
         tf.squeeze(samples, axis=[1]),
         tf.concat([output, samples], axis=1),
     ]
Example #2
0
 def build(self, guidence, newNet):
     with tf.variable_scope("training_variable"):
         inputEmb = tf.nn.embedding_lookup(self.embedding, self.X)
         initFw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw1"]) +
                 self.biases["Fw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw2"]) +
                 self.biases["Fw2"]))
         initBw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw1"]) +
                 self.biases["Bw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw2"]) +
                 self.biases["Bw2"]))
         rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         outputs, state = tf.nn.bidirectional_dynamic_rnn(
             cell_fw=rnnCellFw,
             cell_bw=rnnCellBw,
             inputs=inputEmb,
             initial_state_fw=initFw,
             initial_state_bw=initBw,
             dtype=tf.float32)
         outputsConcat = tf.concat(outputs, axis=2)
         self.outputs = outputsConcat
         self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
Example #3
0
def attn(x, scope, n_state, *, past, hparams):
    assert x.shape.ndims == 3  # Should be [batch, sequence, features]
    assert n_state % hparams.n_head == 0
    if past is not None:
        assert past.shape.ndims == 5  # Should be [batch, 2, heads, sequence, features],
        # where 2 is [k, v]

    def split_heads(x):
        # From [batch, sequence, features] to [batch, heads, sequence, features]
        return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3])

    def merge_heads(x):
        # Reverse of split_heads
        return merge_states(tf.transpose(x, [0, 2, 1, 3]))

    def mask_attn_weights(w):
        # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from
        # src to dst.
        _, _, nd, ns = shape_list(w)
        b = attention_mask(nd, ns, dtype=w.dtype)
        b = tf.reshape(b, [1, 1, nd, ns])
        w = w * b - tf.cast(1e10, w.dtype) * (1 - b)
        return w

    def multihead_attn(q, k, v):
        # q, k, v have shape [batch, heads, sequence, features]
        w = tf.matmul(q, k, transpose_b=True)
        # w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype))
        w = w * tf.rsqrt(tf.cast(v.shape[-1], w.dtype))

        w = mask_attn_weights(w)
        w = softmax(w)
        a = tf.matmul(w, v)
        return a

    with tf.variable_scope(scope):
        c = conv1d(x, 'c_attn', n_state * 3)
        q, k, v = map(split_heads, tf.split(c, 3, axis=2))
        present = tf.stack([k, v], axis=1)
        if past is not None:
            pk, pv = tf.unstack(past, axis=1)
            k = tf.concat([pk, k], axis=-2)
            v = tf.concat([pv, v], axis=-2)
        a = multihead_attn(q, k, v)
        a = merge_heads(a)
        a = conv1d(a, 'c_proj', n_state)
        return a, present
 def generator(x, m):
     # Concatenate Mask and Data
     inputs = tf.concat(values=[x, m], axis=1)
     G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
     G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
     # MinMax normalized output
     G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
     return G_prob
 def discriminator(x, h):
     # Concatenate Data and Hint
     inputs = tf.concat(values=[x, h], axis=1)
     D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
     D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
     D_logit = tf.matmul(D_h2, D_W3) + D_b3
     D_prob = tf.nn.sigmoid(D_logit)
     return D_prob
Example #6
0
 def Attention(self, g0, sequence, g1, g2, AttStr, AttStr2, length, flag):
     tmpLS = []
     tmpLS2 = []
     tmpLS3 = []
     if not flag:
         seq = tf.transpose(sequence, [1, 0, 2])
     else:
         seq = sequence
     for i in range(length):
         nHidden = tf.tanh(
             tf.matmul(tf.concat([seq[i], g1], axis=1), self.weights[
                 AttStr + "_1"]) + self.biases[AttStr + "_1"])
         nHidden2 = tf.tanh(
             tf.matmul(tf.concat([seq[i], g2], axis=1), self.weights[
                 AttStr + "_2"]) + self.biases[AttStr + "_2"])
         nHidden3 = tf.tanh(
             tf.matmul(tf.concat([seq[i], g0], axis=1), self.weights[
                 AttStr + "_3"]) + self.biases[AttStr + "_3"])
         tmpLS.append(
             tf.matmul(nHidden, self.weights[AttStr2 + "_1"]) +
             self.biases[AttStr2 + "_1"])
         tmpLS2.append(
             tf.matmul(nHidden2, self.weights[AttStr2 + "_2"]) +
             self.biases[AttStr2 + "_2"])
         tmpLS3.append(
             tf.matmul(nHidden3, self.weights[AttStr2 + "_3"]) +
             self.biases[AttStr2 + "_3"])
     tmpLS = tf.nn.softmax(tmpLS, dim=0)
     tmpLS2 = tf.nn.softmax(tmpLS2, dim=0)
     tmpLS3 = tf.nn.softmax(tmpLS3, dim=0)
     self.representation_score[AttStr] = (tmpLS + tmpLS2 + tmpLS3) / 3
     ret = tmpLS[0] * seq[0] / 3 + tmpLS2[0] * seq[0] / 3 + tmpLS3[0] * seq[
         0] / 3
     for i in range(1, length):
         ret += tmpLS[i] * seq[i] / 3 + tmpLS2[i] * seq[i] / 3 + tmpLS3[
             i] * seq[i] / 3
     return ret
Q_target = tf.slice(target, [0, 0], [-1, Q_output_count], name = "Q_slice_node")
action_target = tf.slice(target, [0, Q_output_count], [-1, action_output_count], name = "action_slice_node")

hidden_1 = Dense(x,        hidden_count_1, tf.nn.relu, use_bias = True, kernel_initializer = tf.glorot_normal_initializer(), name = "hidden_1")

Q_output_raw      = tf.layers.dense(hidden_1, Q_output_count,                   use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = 'Q_output_node')
Q_output_ = tf.multiply(Q_output_raw, 100000)
Q_output_ = tf.round(Q_output_)
Q_output = tf.div(Q_output_, 100000)

action_output_raw = tf.layers.dense(hidden_1, action_output_count, tf.nn.softmax, use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = "action_output_node")
action_output_ = tf.multiply(action_output_raw, 100000)
action_output_ = tf.round(action_output_)
action_output = tf.div(action_output_, 100000)

prediction = tf.concat([Q_output, action_output], 1, name = "concat_node")
prediction_identity = tf.identity(prediction, name = "prediction_node")

Q_loss = tf.keras.losses.mean_squared_error(y_true = Q_target, y_pred = Q_output_raw)
policy_loss = tf.keras.losses.categorical_crossentropy(y_true = action_target, y_pred = action_output_raw)

total_loss = Q_loss + policy_loss
train_op = tf.train.AdamOptimizer(learning_rate = learning_rate, name = "Optimizer").minimize(total_loss, name = 'optimize_node')

init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
train_writer = tf.summary.FileWriter(path_to_store + "/summary", sess.graph)
train_writer.close()
def build_model():

    size = 8  # Single size for easier debugging (for now)
    max_s = [1, 2, 2, 1]  # size of the sliding window for max pooling
    learning_rate = 0.0001

    # frames = tf.placeholder(tf.float32, [None, 256, 256, 5]) # None is the number of samples, rename the variable name later
    frames = tf.placeholder(
        tf.float32, [None, 32, 32, 4], name="frames"
    )  # features: halite_available, others_ship, cargo, self_shipyard
    # can_afford = tf.placeholder(tf.float32, [None, 3])
    turns_left = tf.placeholder(tf.float32, [None, 1], name="turnsleft")
    my_ships = tf.placeholder(tf.float32, [None, 32, 32, 1], name="myships")

    my_ships = tf.cast(my_ships, tf.float32)

    moves = tf.placeholder(tf.uint8, [None, 32, 32, 1], name="moves")
    spawn = tf.placeholder(tf.float32, [None, 1], name="spawn")

    tf.add_to_collection('frames', frames)
    # tf.add_to_collection('can_afford', can_afford)
    tf.add_to_collection('turns_left', turns_left)
    tf.add_to_collection('my_ships', my_ships)
    tf.add_to_collection('moves', moves)
    tf.add_to_collection('spawn', spawn)

    moves = tf.one_hot(moves, 6)

    # ca = tf.layers.dense(can_afford, size)
    tl = tf.layers.dense(turns_left, size)

    # ca = tf.expand_dims(ca, 1)
    # ca = tf.expand_dims(ca, 1)
    tl = tf.expand_dims(tl, 1)
    tl = tf.expand_dims(tl, 1)

    d_l1_a = tf.layers.conv2d(
        frames, size, 3, activation=tf.nn.relu, padding='same'
    )  # input is frames, filters is size, kernal size is 3(x3)
    d_l1_p = tf.nn.max_pool(d_l1_a, max_s, max_s, padding='VALID')  # 16

    d_l2_a = tf.layers.conv2d(d_l1_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l2_p = tf.nn.max_pool(d_l2_a, max_s, max_s, padding='VALID')  # 8

    d_l3_a = tf.layers.conv2d(d_l2_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l3_p = tf.nn.max_pool(d_l3_a, max_s, max_s, padding='VALID')  # 4

    d_l4_a = tf.layers.conv2d(d_l3_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l4_p = tf.nn.max_pool(d_l4_a, max_s, max_s, padding='VALID')  # 2

    d_l5_a = tf.layers.conv2d(d_l4_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l5_p = tf.nn.max_pool(d_l5_a, max_s, max_s, padding='VALID')  # 1

    final_state = tf.concat([d_l5_p, tl], -1)
    latent = tf.layers.dense(final_state, size, activation=tf.nn.relu)
    # latent = tf.layers.dense(d_l5_p, size, activation=tf.nn.relu)

    u_l5_a = tf.layers.conv2d_transpose(latent,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 2
    u_l5_c = tf.concat([u_l5_a, d_l5_a], -1)
    u_l5_s = tf.layers.conv2d(u_l5_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l4_a = tf.layers.conv2d_transpose(u_l5_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 4
    u_l4_c = tf.concat([u_l4_a, d_l4_a], -1)
    u_l4_s = tf.layers.conv2d(u_l4_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l3_a = tf.layers.conv2d_transpose(u_l4_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 8
    u_l3_c = tf.concat([u_l3_a, d_l3_a], -1)
    u_l3_s = tf.layers.conv2d(u_l3_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l2_a = tf.layers.conv2d_transpose(u_l3_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 16
    u_l2_c = tf.concat([u_l2_a, d_l2_a], -1)
    u_l2_s = tf.layers.conv2d(u_l2_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l1_a = tf.layers.conv2d_transpose(u_l2_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 32
    u_l1_c = tf.concat([u_l1_a, d_l1_a], -1)
    u_l1_s = tf.layers.conv2d(u_l1_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    spawn_logits = tf.layers.dense(latent, 1, activation=None)
    #
    spawn_logits = tf.squeeze(spawn_logits, [1, 2])

    moves_logits = tf.layers.conv2d(u_l1_s,
                                    6,
                                    3,
                                    activation=None,
                                    padding='same')

    tf.add_to_collection('m_logits', moves_logits)
    tf.add_to_collection('s_logits', spawn_logits)

    losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=moves,
                                                        logits=moves_logits,
                                                        dim=-1)

    losses = tf.expand_dims(losses, -1)

    masked_loss = losses * my_ships

    ships_per_frame = tf.reduce_sum(my_ships, axis=[1, 2])

    frame_loss = tf.reduce_sum(masked_loss, axis=[1, 2])

    average_frame_loss = frame_loss / (ships_per_frame + 0.00000001
                                       )  # First frames have no ship

    spawn_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=spawn,
                                                           logits=spawn_logits)

    spawn_losses = tf.reduce_mean(spawn_losses)

    loss = tf.reduce_mean(average_frame_loss) + 0.01 * spawn_losses

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    tf.add_to_collection('loss', loss)
    tf.add_to_collection('optimizer', optimizer)

    return