def body(past, prev, output): next_outputs = step(hparams, prev[:, tf.newaxis], past=past) logits = next_outputs['logits'][:, -1, :] / tf.to_float(temperature) if top_p > 0.0: logits = top_p_logits(logits, p=top_p) else: logits = top_k_logits(logits, k=top_k) samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32) return [ tf.concat([past, next_outputs['presents']], axis=-2), tf.squeeze(samples, axis=[1]), tf.concat([output, samples], axis=1), ]
def build(self, guidence, newNet): with tf.variable_scope("training_variable"): inputEmb = tf.nn.embedding_lookup(self.embedding, self.X) initFw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Fw1"]) + self.biases["Fw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Fw2"]) + self.biases["Fw2"])) initBw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Bw1"]) + self.biases["Bw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Bw2"]) + self.biases["Bw2"])) rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=rnnCellFw, cell_bw=rnnCellBw, inputs=inputEmb, initial_state_fw=initFw, initial_state_bw=initBw, dtype=tf.float32) outputsConcat = tf.concat(outputs, axis=2) self.outputs = outputsConcat self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
def attn(x, scope, n_state, *, past, hparams): assert x.shape.ndims == 3 # Should be [batch, sequence, features] assert n_state % hparams.n_head == 0 if past is not None: assert past.shape.ndims == 5 # Should be [batch, 2, heads, sequence, features], # where 2 is [k, v] def split_heads(x): # From [batch, sequence, features] to [batch, heads, sequence, features] return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3]) def merge_heads(x): # Reverse of split_heads return merge_states(tf.transpose(x, [0, 2, 1, 3])) def mask_attn_weights(w): # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from # src to dst. _, _, nd, ns = shape_list(w) b = attention_mask(nd, ns, dtype=w.dtype) b = tf.reshape(b, [1, 1, nd, ns]) w = w * b - tf.cast(1e10, w.dtype) * (1 - b) return w def multihead_attn(q, k, v): # q, k, v have shape [batch, heads, sequence, features] w = tf.matmul(q, k, transpose_b=True) # w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype)) w = w * tf.rsqrt(tf.cast(v.shape[-1], w.dtype)) w = mask_attn_weights(w) w = softmax(w) a = tf.matmul(w, v) return a with tf.variable_scope(scope): c = conv1d(x, 'c_attn', n_state * 3) q, k, v = map(split_heads, tf.split(c, 3, axis=2)) present = tf.stack([k, v], axis=1) if past is not None: pk, pv = tf.unstack(past, axis=1) k = tf.concat([pk, k], axis=-2) v = tf.concat([pv, v], axis=-2) a = multihead_attn(q, k, v) a = merge_heads(a) a = conv1d(a, 'c_proj', n_state) return a, present
def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob
def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob
def Attention(self, g0, sequence, g1, g2, AttStr, AttStr2, length, flag): tmpLS = [] tmpLS2 = [] tmpLS3 = [] if not flag: seq = tf.transpose(sequence, [1, 0, 2]) else: seq = sequence for i in range(length): nHidden = tf.tanh( tf.matmul(tf.concat([seq[i], g1], axis=1), self.weights[ AttStr + "_1"]) + self.biases[AttStr + "_1"]) nHidden2 = tf.tanh( tf.matmul(tf.concat([seq[i], g2], axis=1), self.weights[ AttStr + "_2"]) + self.biases[AttStr + "_2"]) nHidden3 = tf.tanh( tf.matmul(tf.concat([seq[i], g0], axis=1), self.weights[ AttStr + "_3"]) + self.biases[AttStr + "_3"]) tmpLS.append( tf.matmul(nHidden, self.weights[AttStr2 + "_1"]) + self.biases[AttStr2 + "_1"]) tmpLS2.append( tf.matmul(nHidden2, self.weights[AttStr2 + "_2"]) + self.biases[AttStr2 + "_2"]) tmpLS3.append( tf.matmul(nHidden3, self.weights[AttStr2 + "_3"]) + self.biases[AttStr2 + "_3"]) tmpLS = tf.nn.softmax(tmpLS, dim=0) tmpLS2 = tf.nn.softmax(tmpLS2, dim=0) tmpLS3 = tf.nn.softmax(tmpLS3, dim=0) self.representation_score[AttStr] = (tmpLS + tmpLS2 + tmpLS3) / 3 ret = tmpLS[0] * seq[0] / 3 + tmpLS2[0] * seq[0] / 3 + tmpLS3[0] * seq[ 0] / 3 for i in range(1, length): ret += tmpLS[i] * seq[i] / 3 + tmpLS2[i] * seq[i] / 3 + tmpLS3[ i] * seq[i] / 3 return ret
Q_target = tf.slice(target, [0, 0], [-1, Q_output_count], name = "Q_slice_node") action_target = tf.slice(target, [0, Q_output_count], [-1, action_output_count], name = "action_slice_node") hidden_1 = Dense(x, hidden_count_1, tf.nn.relu, use_bias = True, kernel_initializer = tf.glorot_normal_initializer(), name = "hidden_1") Q_output_raw = tf.layers.dense(hidden_1, Q_output_count, use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = 'Q_output_node') Q_output_ = tf.multiply(Q_output_raw, 100000) Q_output_ = tf.round(Q_output_) Q_output = tf.div(Q_output_, 100000) action_output_raw = tf.layers.dense(hidden_1, action_output_count, tf.nn.softmax, use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = "action_output_node") action_output_ = tf.multiply(action_output_raw, 100000) action_output_ = tf.round(action_output_) action_output = tf.div(action_output_, 100000) prediction = tf.concat([Q_output, action_output], 1, name = "concat_node") prediction_identity = tf.identity(prediction, name = "prediction_node") Q_loss = tf.keras.losses.mean_squared_error(y_true = Q_target, y_pred = Q_output_raw) policy_loss = tf.keras.losses.categorical_crossentropy(y_true = action_target, y_pred = action_output_raw) total_loss = Q_loss + policy_loss train_op = tf.train.AdamOptimizer(learning_rate = learning_rate, name = "Optimizer").minimize(total_loss, name = 'optimize_node') init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) train_writer = tf.summary.FileWriter(path_to_store + "/summary", sess.graph) train_writer.close()
def build_model(): size = 8 # Single size for easier debugging (for now) max_s = [1, 2, 2, 1] # size of the sliding window for max pooling learning_rate = 0.0001 # frames = tf.placeholder(tf.float32, [None, 256, 256, 5]) # None is the number of samples, rename the variable name later frames = tf.placeholder( tf.float32, [None, 32, 32, 4], name="frames" ) # features: halite_available, others_ship, cargo, self_shipyard # can_afford = tf.placeholder(tf.float32, [None, 3]) turns_left = tf.placeholder(tf.float32, [None, 1], name="turnsleft") my_ships = tf.placeholder(tf.float32, [None, 32, 32, 1], name="myships") my_ships = tf.cast(my_ships, tf.float32) moves = tf.placeholder(tf.uint8, [None, 32, 32, 1], name="moves") spawn = tf.placeholder(tf.float32, [None, 1], name="spawn") tf.add_to_collection('frames', frames) # tf.add_to_collection('can_afford', can_afford) tf.add_to_collection('turns_left', turns_left) tf.add_to_collection('my_ships', my_ships) tf.add_to_collection('moves', moves) tf.add_to_collection('spawn', spawn) moves = tf.one_hot(moves, 6) # ca = tf.layers.dense(can_afford, size) tl = tf.layers.dense(turns_left, size) # ca = tf.expand_dims(ca, 1) # ca = tf.expand_dims(ca, 1) tl = tf.expand_dims(tl, 1) tl = tf.expand_dims(tl, 1) d_l1_a = tf.layers.conv2d( frames, size, 3, activation=tf.nn.relu, padding='same' ) # input is frames, filters is size, kernal size is 3(x3) d_l1_p = tf.nn.max_pool(d_l1_a, max_s, max_s, padding='VALID') # 16 d_l2_a = tf.layers.conv2d(d_l1_p, size, 3, activation=tf.nn.relu, padding='same') d_l2_p = tf.nn.max_pool(d_l2_a, max_s, max_s, padding='VALID') # 8 d_l3_a = tf.layers.conv2d(d_l2_p, size, 3, activation=tf.nn.relu, padding='same') d_l3_p = tf.nn.max_pool(d_l3_a, max_s, max_s, padding='VALID') # 4 d_l4_a = tf.layers.conv2d(d_l3_p, size, 3, activation=tf.nn.relu, padding='same') d_l4_p = tf.nn.max_pool(d_l4_a, max_s, max_s, padding='VALID') # 2 d_l5_a = tf.layers.conv2d(d_l4_p, size, 3, activation=tf.nn.relu, padding='same') d_l5_p = tf.nn.max_pool(d_l5_a, max_s, max_s, padding='VALID') # 1 final_state = tf.concat([d_l5_p, tl], -1) latent = tf.layers.dense(final_state, size, activation=tf.nn.relu) # latent = tf.layers.dense(d_l5_p, size, activation=tf.nn.relu) u_l5_a = tf.layers.conv2d_transpose(latent, size, 3, 2, activation=tf.nn.relu, padding='same') # 2 u_l5_c = tf.concat([u_l5_a, d_l5_a], -1) u_l5_s = tf.layers.conv2d(u_l5_c, size, 3, activation=tf.nn.relu, padding='same') u_l4_a = tf.layers.conv2d_transpose(u_l5_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 4 u_l4_c = tf.concat([u_l4_a, d_l4_a], -1) u_l4_s = tf.layers.conv2d(u_l4_c, size, 3, activation=tf.nn.relu, padding='same') u_l3_a = tf.layers.conv2d_transpose(u_l4_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 8 u_l3_c = tf.concat([u_l3_a, d_l3_a], -1) u_l3_s = tf.layers.conv2d(u_l3_c, size, 3, activation=tf.nn.relu, padding='same') u_l2_a = tf.layers.conv2d_transpose(u_l3_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 16 u_l2_c = tf.concat([u_l2_a, d_l2_a], -1) u_l2_s = tf.layers.conv2d(u_l2_c, size, 3, activation=tf.nn.relu, padding='same') u_l1_a = tf.layers.conv2d_transpose(u_l2_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 32 u_l1_c = tf.concat([u_l1_a, d_l1_a], -1) u_l1_s = tf.layers.conv2d(u_l1_c, size, 3, activation=tf.nn.relu, padding='same') spawn_logits = tf.layers.dense(latent, 1, activation=None) # spawn_logits = tf.squeeze(spawn_logits, [1, 2]) moves_logits = tf.layers.conv2d(u_l1_s, 6, 3, activation=None, padding='same') tf.add_to_collection('m_logits', moves_logits) tf.add_to_collection('s_logits', spawn_logits) losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=moves, logits=moves_logits, dim=-1) losses = tf.expand_dims(losses, -1) masked_loss = losses * my_ships ships_per_frame = tf.reduce_sum(my_ships, axis=[1, 2]) frame_loss = tf.reduce_sum(masked_loss, axis=[1, 2]) average_frame_loss = frame_loss / (ships_per_frame + 0.00000001 ) # First frames have no ship spawn_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=spawn, logits=spawn_logits) spawn_losses = tf.reduce_mean(spawn_losses) loss = tf.reduce_mean(average_frame_loss) + 0.01 * spawn_losses optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) tf.add_to_collection('loss', loss) tf.add_to_collection('optimizer', optimizer) return