def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, entropy, log_prob): indices = tf.range(0, layer_id, dtype=tf.int32) start_id = 4 * (layer_id - 2) prev_layers = [] for i in range(2): # index_1, index_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h query = anchors_w_1.gather(indices) query = tf.reshape(query, [layer_id, self.lstm_size]) query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) query = tf.matmul(query, self.v_attn) logits = tf.reshape(query, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) index = tf.multinomial(logits, 1) index = tf.to_int32(index) index = tf.reshape(index, [1]) arc_seq = arc_seq.write(start_id + 2 * i, index) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=index) log_prob += curr_log_prob curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent prev_layers.append(anchors.read(tf.reduce_sum(index))) inputs = prev_layers[-1] for i in range(2): # op_1, op_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: op_tanh = self.tanh_constant / self.op_tanh_reduce logits = op_tanh * tf.tanh(logits) if use_bias: logits += self.b_soft_no_learn op_id = tf.multinomial(logits, 1) op_id = tf.to_int32(op_id) op_id = tf.reshape(op_id, [1]) arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=op_id) log_prob += curr_log_prob curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent inputs = tf.nn.embedding_lookup(self.w_emb, op_id) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) anchors = anchors.write(layer_id, next_h[-1]) anchors_w_1 = anchors_w_1.write(layer_id, tf.matmul(next_h[-1], self.w_attn_1)) inputs = self.g_emb return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1, arc_seq, entropy, log_prob)
def _build_sampler(self, prev_c=None, prev_h=None, use_bias=False): """Build the sampler ops and the log_prob ops.""" print("-" * 80) print("Build controller sampler") anchors = tf.TensorArray(tf.float32, size=self.num_cells + 2, clear_after_read=False) anchors_w_1 = tf.TensorArray(tf.float32, size=self.num_cells + 2, clear_after_read=False) arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 4) if prev_c is None: assert prev_h is None, "prev_c and prev_h must both be None" prev_c = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers) ] prev_h = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers) ] inputs = self.g_emb for layer_id in range(2): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h anchors = anchors.write(layer_id, tf.zeros_like(next_h[-1])) anchors_w_1 = anchors_w_1.write( layer_id, tf.matmul(next_h[-1], self.w_attn_1)) def _condition(layer_id, *args): return tf.less(layer_id, self.num_cells + 2) def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, entropy, log_prob): indices = tf.range(0, layer_id, dtype=tf.int32) start_id = 4 * (layer_id - 2) prev_layers = [] for i in range(2): # index_1, index_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h query = anchors_w_1.gather(indices) query = tf.reshape(query, [layer_id, self.lstm_size]) query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) query = tf.matmul(query, self.v_attn) logits = tf.reshape(query, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) index = tf.multinomial(logits, 1) index = tf.to_int32(index) index = tf.reshape(index, [1]) arc_seq = arc_seq.write(start_id + 2 * i, index) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=index) log_prob += curr_log_prob curr_ent = tf.stop_gradient( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent prev_layers.append(anchors.read(tf.reduce_sum(index))) inputs = prev_layers[-1] for i in range(2): # op_1, op_2 next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: op_tanh = self.tanh_constant / self.op_tanh_reduce logits = op_tanh * tf.tanh(logits) if use_bias: logits += self.b_soft_no_learn op_id = tf.multinomial(logits, 1) op_id = tf.to_int32(op_id) op_id = tf.reshape(op_id, [1]) arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id) curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=op_id) log_prob += curr_log_prob curr_ent = tf.stop_gradient( tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=tf.nn.softmax(logits))) entropy += curr_ent inputs = tf.nn.embedding_lookup(self.w_emb, op_id) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) anchors = anchors.write(layer_id, next_h[-1]) anchors_w_1 = anchors_w_1.write( layer_id, tf.matmul(next_h[-1], self.w_attn_1)) inputs = self.g_emb return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1, arc_seq, entropy, log_prob) loop_vars = [ tf.constant(2, dtype=tf.int32, name="layer_id"), inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq, tf.constant([0.0], dtype=tf.float32, name="entropy"), tf.constant([0.0], dtype=tf.float32, name="log_prob"), ] loop_outputs = tf.while_loop(_condition, _body, loop_vars, parallel_iterations=1) arc_seq = loop_outputs[-3].stack() arc_seq = tf.reshape(arc_seq, [-1]) entropy = tf.reduce_sum(loop_outputs[-2]) log_prob = tf.reduce_sum(loop_outputs[-1]) last_c = loop_outputs[-7] last_h = loop_outputs[-6] return arc_seq, entropy, log_prob, last_c, last_h
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" print("-" * 80) print("Build controller sampler") anchors = [] anchors_w_1 = [] arc_seq = [] entropys = [] log_probs = [] skip_count = [] skip_penaltys = [] prev_c = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers) ] prev_h = [ tf.zeros([1, self.lstm_size], tf.float32) for _ in range(self.lstm_num_layers) ] inputs = self.g_emb skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target], dtype=tf.float32) for layer_id in range(self.num_layers): if self.search_whole_channels: next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) if self.search_for == "macro" or self.search_for == "branch": branch_id = tf.multinomial(logit, 1) branch_id = tf.to_int32(branch_id) branch_id = tf.reshape(branch_id, [1]) elif self.search_for == "connection": branch_id = tf.constant([0], dtype=tf.int32) else: raise ValueError("Unknown search_for {}".format( self.search_for)) arc_seq.append(branch_id) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=branch_id) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup(self.w_emb, branch_id) else: for branch_id in range(self.num_branches): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft["start"][branch_id]) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) start = tf.multinomial(logit, 1) start = tf.to_int32(start) start = tf.reshape(start, [1]) arc_seq.append(start) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=start) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup( self.w_emb["start"][branch_id], start) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logit = tf.matmul(next_h[-1], self.w_soft["count"][branch_id]) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) mask = tf.range(0, limit=self.out_filters - 1, delta=1, dtype=tf.int32) mask = tf.reshape(mask, [1, self.out_filters - 1]) mask = tf.less_equal(mask, self.out_filters - 1 - start) logit = tf.where(mask, x=logit, y=tf.fill(tf.shape(logit), -np.inf)) count = tf.multinomial(logit, 1) count = tf.to_int32(count) count = tf.reshape(count, [1]) arc_seq.append(count + 1) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=count) log_probs.append(log_prob) entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob)) entropys.append(entropy) inputs = tf.nn.embedding_lookup( self.w_emb["count"][branch_id], count) next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h if layer_id > 0: query = tf.concat(anchors_w_1, axis=0) query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2)) query = tf.matmul(query, self.v_attn) logit = tf.concat([-query, query], axis=1) if self.temperature is not None: logit /= self.temperature if self.tanh_constant is not None: logit = self.tanh_constant * tf.tanh(logit) skip = tf.multinomial(logit, 1) skip = tf.to_int32(skip) skip = tf.reshape(skip, [layer_id]) arc_seq.append(skip) skip_prob = tf.sigmoid(logit) kl = skip_prob * tf.log(skip_prob / skip_targets) kl = tf.reduce_sum(kl) skip_penaltys.append(kl) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit, labels=skip) log_probs.append(tf.reduce_sum(log_prob, keep_dims=True)) entropy = tf.stop_gradient( tf.reduce_sum(log_prob * tf.exp(-log_prob), keep_dims=True)) entropys.append(entropy) skip = tf.to_float(skip) skip = tf.reshape(skip, [1, layer_id]) skip_count.append(tf.reduce_sum(skip)) inputs = tf.matmul(skip, tf.concat(anchors, axis=0)) inputs /= (1.0 + tf.reduce_sum(skip)) else: inputs = self.g_emb anchors.append(next_h[-1]) anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1)) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = tf.reshape(arc_seq, [-1]) entropys = tf.stack(entropys) self.sample_entropy = tf.reduce_sum(entropys) log_probs = tf.stack(log_probs) self.sample_log_prob = tf.reduce_sum(log_probs) skip_count = tf.stack(skip_count) self.skip_count = tf.reduce_sum(skip_count) skip_penaltys = tf.stack(skip_penaltys) self.skip_penaltys = tf.reduce_mean(skip_penaltys)
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" arc_seq = [] sample_log_probs = [] sample_entropy = [] all_h = [] all_h_w = [] # sampler ops inputs = self.g_emb prev_c, prev_h = [], [] for _ in xrange(self.lstm_num_layers): prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32)) # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32) for layer_id in xrange(self.rhn_depth): next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h all_h.append(next_h[-1]) all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1)) if layer_id > 0: query = tf.matmul(next_h[-1], self.attn_w_2) query = query + tf.concat(all_h_w[:-1], axis=0) query = tf.tanh(query) logits = tf.matmul(query, self.attn_v) logits = tf.reshape(logits, [1, layer_id]) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) diff = tf.to_float(layer_id - tf.range(0, layer_id))**2 logits -= tf.reshape(diff, [1, layer_id]) / 6.0 skip_index = tf.multinomial(logits, 1) skip_index = tf.to_int32(skip_index) skip_index = tf.reshape(skip_index, [1]) arc_seq.append(skip_index) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=skip_index) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0), skip_index) inputs /= (0.1 + tf.to_float(layer_id - skip_index)) else: inputs = self.g_emb next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h[-1], self.w_soft) if self.temperature is not None: logits /= self.temperature if self.tanh_constant is not None: logits = self.tanh_constant * tf.tanh(logits) func = tf.multinomial(logits, 1) func = tf.to_int32(func) func = tf.reshape(func, [1]) arc_seq.append(func) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=func) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(self.w_emb, func) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs)) sample_entropy = tf.concat(sample_entropy, axis=0) self.sample_entropy = tf.reduce_sum(sample_entropy) self.all_h = all_h