예제 #1
0
    def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1, arc_seq,
              entropy, log_prob):
      indices = tf.range(0, layer_id, dtype=tf.int32)
      start_id = 4 * (layer_id - 2)
      prev_layers = []
      for i in range(2):  # index_1, index_2
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        query = anchors_w_1.gather(indices)
        query = tf.reshape(query, [layer_id, self.lstm_size])
        query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
        query = tf.matmul(query, self.v_attn)
        logits = tf.reshape(query, [1, layer_id])
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          logits = self.tanh_constant * tf.tanh(logits)
        index = tf.multinomial(logits, 1)
        index = tf.to_int32(index)
        index = tf.reshape(index, [1])
        arc_seq = arc_seq.write(start_id + 2 * i, index)
        curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=index)
        log_prob += curr_log_prob
        curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits(
          logits=logits, labels=tf.nn.softmax(logits)))
        entropy += curr_ent
        prev_layers.append(anchors.read(tf.reduce_sum(index)))
        inputs = prev_layers[-1]

      for i in range(2):  # op_1, op_2
        next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
        prev_c, prev_h = next_c, next_h
        logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
        if self.temperature is not None:
          logits /= self.temperature
        if self.tanh_constant is not None:
          op_tanh = self.tanh_constant / self.op_tanh_reduce
          logits = op_tanh * tf.tanh(logits)
        if use_bias:
          logits += self.b_soft_no_learn
        op_id = tf.multinomial(logits, 1)
        op_id = tf.to_int32(op_id)
        op_id = tf.reshape(op_id, [1])
        arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
        curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits, labels=op_id)
        log_prob += curr_log_prob
        curr_ent = tf.stop_gradient(tf.nn.softmax_cross_entropy_with_logits(
          logits=logits, labels=tf.nn.softmax(logits)))
        entropy += curr_ent
        inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

      next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
      anchors = anchors.write(layer_id, next_h[-1])
      anchors_w_1 = anchors_w_1.write(layer_id, tf.matmul(next_h[-1], self.w_attn_1))
      inputs = self.g_emb

      return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
              arc_seq, entropy, log_prob)
예제 #2
0
    def _build_sampler(self, prev_c=None, prev_h=None, use_bias=False):
        """Build the sampler ops and the log_prob ops."""

        print("-" * 80)
        print("Build controller sampler")

        anchors = tf.TensorArray(tf.float32,
                                 size=self.num_cells + 2,
                                 clear_after_read=False)
        anchors_w_1 = tf.TensorArray(tf.float32,
                                     size=self.num_cells + 2,
                                     clear_after_read=False)
        arc_seq = tf.TensorArray(tf.int32, size=self.num_cells * 4)
        if prev_c is None:
            assert prev_h is None, "prev_c and prev_h must both be None"
            prev_c = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]
            prev_h = [
                tf.zeros([1, self.lstm_size], tf.float32)
                for _ in range(self.lstm_num_layers)
            ]
        inputs = self.g_emb

        for layer_id in range(2):
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            anchors = anchors.write(layer_id, tf.zeros_like(next_h[-1]))
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))

        def _condition(layer_id, *args):
            return tf.less(layer_id, self.num_cells + 2)

        def _body(layer_id, inputs, prev_c, prev_h, anchors, anchors_w_1,
                  arc_seq, entropy, log_prob):
            indices = tf.range(0, layer_id, dtype=tf.int32)
            start_id = 4 * (layer_id - 2)
            prev_layers = []
            for i in range(2):  # index_1, index_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                query = anchors_w_1.gather(indices)
                query = tf.reshape(query, [layer_id, self.lstm_size])
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logits = tf.reshape(query, [1, layer_id])
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                index = tf.multinomial(logits, 1)
                index = tf.to_int32(index)
                index = tf.reshape(index, [1])
                arc_seq = arc_seq.write(start_id + 2 * i, index)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=index)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                prev_layers.append(anchors.read(tf.reduce_sum(index)))
                inputs = prev_layers[-1]

            for i in range(2):  # op_1, op_2
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logits = tf.matmul(next_h[-1], self.w_soft) + self.b_soft
                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    op_tanh = self.tanh_constant / self.op_tanh_reduce
                    logits = op_tanh * tf.tanh(logits)
                if use_bias:
                    logits += self.b_soft_no_learn
                op_id = tf.multinomial(logits, 1)
                op_id = tf.to_int32(op_id)
                op_id = tf.reshape(op_id, [1])
                arc_seq = arc_seq.write(start_id + 2 * i + 1, op_id)
                curr_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=op_id)
                log_prob += curr_log_prob
                curr_ent = tf.stop_gradient(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=logits, labels=tf.nn.softmax(logits)))
                entropy += curr_ent
                inputs = tf.nn.embedding_lookup(self.w_emb, op_id)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            anchors = anchors.write(layer_id, next_h[-1])
            anchors_w_1 = anchors_w_1.write(
                layer_id, tf.matmul(next_h[-1], self.w_attn_1))
            inputs = self.g_emb

            return (layer_id + 1, inputs, next_c, next_h, anchors, anchors_w_1,
                    arc_seq, entropy, log_prob)

        loop_vars = [
            tf.constant(2, dtype=tf.int32, name="layer_id"),
            inputs,
            prev_c,
            prev_h,
            anchors,
            anchors_w_1,
            arc_seq,
            tf.constant([0.0], dtype=tf.float32, name="entropy"),
            tf.constant([0.0], dtype=tf.float32, name="log_prob"),
        ]

        loop_outputs = tf.while_loop(_condition,
                                     _body,
                                     loop_vars,
                                     parallel_iterations=1)

        arc_seq = loop_outputs[-3].stack()
        arc_seq = tf.reshape(arc_seq, [-1])
        entropy = tf.reduce_sum(loop_outputs[-2])
        log_prob = tf.reduce_sum(loop_outputs[-1])

        last_c = loop_outputs[-7]
        last_h = loop_outputs[-6]

        return arc_seq, entropy, log_prob, last_c, last_h
예제 #3
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        print("-" * 80)
        print("Build controller sampler")
        anchors = []
        anchors_w_1 = []

        arc_seq = []
        entropys = []
        log_probs = []
        skip_count = []
        skip_penaltys = []

        prev_c = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in range(self.lstm_num_layers)
        ]
        prev_h = [
            tf.zeros([1, self.lstm_size], tf.float32)
            for _ in range(self.lstm_num_layers)
        ]
        inputs = self.g_emb
        skip_targets = tf.constant([1.0 - self.skip_target, self.skip_target],
                                   dtype=tf.float32)
        for layer_id in range(self.num_layers):
            if self.search_whole_channels:
                next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                            self.w_lstm)
                prev_c, prev_h = next_c, next_h
                logit = tf.matmul(next_h[-1], self.w_soft)
                if self.temperature is not None:
                    logit /= self.temperature
                if self.tanh_constant is not None:
                    logit = self.tanh_constant * tf.tanh(logit)
                if self.search_for == "macro" or self.search_for == "branch":
                    branch_id = tf.multinomial(logit, 1)
                    branch_id = tf.to_int32(branch_id)
                    branch_id = tf.reshape(branch_id, [1])
                elif self.search_for == "connection":
                    branch_id = tf.constant([0], dtype=tf.int32)
                else:
                    raise ValueError("Unknown search_for {}".format(
                        self.search_for))
                arc_seq.append(branch_id)
                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=branch_id)
                log_probs.append(log_prob)
                entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                entropys.append(entropy)
                inputs = tf.nn.embedding_lookup(self.w_emb, branch_id)
            else:
                for branch_id in range(self.num_branches):
                    next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                                self.w_lstm)
                    prev_c, prev_h = next_c, next_h
                    logit = tf.matmul(next_h[-1],
                                      self.w_soft["start"][branch_id])
                    if self.temperature is not None:
                        logit /= self.temperature
                    if self.tanh_constant is not None:
                        logit = self.tanh_constant * tf.tanh(logit)
                    start = tf.multinomial(logit, 1)
                    start = tf.to_int32(start)
                    start = tf.reshape(start, [1])
                    arc_seq.append(start)
                    log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logit, labels=start)
                    log_probs.append(log_prob)
                    entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                    entropys.append(entropy)
                    inputs = tf.nn.embedding_lookup(
                        self.w_emb["start"][branch_id], start)

                    next_c, next_h = stack_lstm(inputs, prev_c, prev_h,
                                                self.w_lstm)
                    prev_c, prev_h = next_c, next_h
                    logit = tf.matmul(next_h[-1],
                                      self.w_soft["count"][branch_id])
                    if self.temperature is not None:
                        logit /= self.temperature
                    if self.tanh_constant is not None:
                        logit = self.tanh_constant * tf.tanh(logit)
                    mask = tf.range(0,
                                    limit=self.out_filters - 1,
                                    delta=1,
                                    dtype=tf.int32)
                    mask = tf.reshape(mask, [1, self.out_filters - 1])
                    mask = tf.less_equal(mask, self.out_filters - 1 - start)
                    logit = tf.where(mask,
                                     x=logit,
                                     y=tf.fill(tf.shape(logit), -np.inf))
                    count = tf.multinomial(logit, 1)
                    count = tf.to_int32(count)
                    count = tf.reshape(count, [1])
                    arc_seq.append(count + 1)
                    log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logit, labels=count)
                    log_probs.append(log_prob)
                    entropy = tf.stop_gradient(log_prob * tf.exp(-log_prob))
                    entropys.append(entropy)
                    inputs = tf.nn.embedding_lookup(
                        self.w_emb["count"][branch_id], count)

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h

            if layer_id > 0:
                query = tf.concat(anchors_w_1, axis=0)
                query = tf.tanh(query + tf.matmul(next_h[-1], self.w_attn_2))
                query = tf.matmul(query, self.v_attn)
                logit = tf.concat([-query, query], axis=1)
                if self.temperature is not None:
                    logit /= self.temperature
                if self.tanh_constant is not None:
                    logit = self.tanh_constant * tf.tanh(logit)

                skip = tf.multinomial(logit, 1)
                skip = tf.to_int32(skip)
                skip = tf.reshape(skip, [layer_id])
                arc_seq.append(skip)

                skip_prob = tf.sigmoid(logit)
                kl = skip_prob * tf.log(skip_prob / skip_targets)
                kl = tf.reduce_sum(kl)
                skip_penaltys.append(kl)

                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logit, labels=skip)
                log_probs.append(tf.reduce_sum(log_prob, keep_dims=True))

                entropy = tf.stop_gradient(
                    tf.reduce_sum(log_prob * tf.exp(-log_prob),
                                  keep_dims=True))
                entropys.append(entropy)

                skip = tf.to_float(skip)
                skip = tf.reshape(skip, [1, layer_id])
                skip_count.append(tf.reduce_sum(skip))
                inputs = tf.matmul(skip, tf.concat(anchors, axis=0))
                inputs /= (1.0 + tf.reduce_sum(skip))
            else:
                inputs = self.g_emb

            anchors.append(next_h[-1])
            anchors_w_1.append(tf.matmul(next_h[-1], self.w_attn_1))

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = tf.reshape(arc_seq, [-1])

        entropys = tf.stack(entropys)
        self.sample_entropy = tf.reduce_sum(entropys)

        log_probs = tf.stack(log_probs)
        self.sample_log_prob = tf.reduce_sum(log_probs)

        skip_count = tf.stack(skip_count)
        self.skip_count = tf.reduce_sum(skip_count)

        skip_penaltys = tf.stack(skip_penaltys)
        self.skip_penaltys = tf.reduce_mean(skip_penaltys)
예제 #4
0
    def _build_sampler(self):
        """Build the sampler ops and the log_prob ops."""

        arc_seq = []
        sample_log_probs = []
        sample_entropy = []
        all_h = []
        all_h_w = []

        # sampler ops
        inputs = self.g_emb
        prev_c, prev_h = [], []
        for _ in xrange(self.lstm_num_layers):
            prev_c.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))
            prev_h.append(tf.zeros([1, self.lstm_size], dtype=tf.float32))

        # used = tf.zeros([self.rhn_depth, 2], dtype=tf.int32)
        for layer_id in xrange(self.rhn_depth):
            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            all_h.append(next_h[-1])
            all_h_w.append(tf.matmul(next_h[-1], self.attn_w_1))

            if layer_id > 0:
                query = tf.matmul(next_h[-1], self.attn_w_2)
                query = query + tf.concat(all_h_w[:-1], axis=0)
                query = tf.tanh(query)
                logits = tf.matmul(query, self.attn_v)
                logits = tf.reshape(logits, [1, layer_id])

                if self.temperature is not None:
                    logits /= self.temperature
                if self.tanh_constant is not None:
                    logits = self.tanh_constant * tf.tanh(logits)
                diff = tf.to_float(layer_id - tf.range(0, layer_id))**2
                logits -= tf.reshape(diff, [1, layer_id]) / 6.0

                skip_index = tf.multinomial(logits, 1)
                skip_index = tf.to_int32(skip_index)
                skip_index = tf.reshape(skip_index, [1])
                arc_seq.append(skip_index)

                log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=logits, labels=skip_index)
                sample_log_probs.append(log_prob)

                entropy = log_prob * tf.exp(-log_prob)
                sample_entropy.append(tf.stop_gradient(entropy))

                inputs = tf.nn.embedding_lookup(tf.concat(all_h[:-1], axis=0),
                                                skip_index)
                inputs /= (0.1 + tf.to_float(layer_id - skip_index))
            else:
                inputs = self.g_emb

            next_c, next_h = stack_lstm(inputs, prev_c, prev_h, self.w_lstm)
            prev_c, prev_h = next_c, next_h
            logits = tf.matmul(next_h[-1], self.w_soft)
            if self.temperature is not None:
                logits /= self.temperature
            if self.tanh_constant is not None:
                logits = self.tanh_constant * tf.tanh(logits)
            func = tf.multinomial(logits, 1)
            func = tf.to_int32(func)
            func = tf.reshape(func, [1])
            arc_seq.append(func)
            log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=func)
            sample_log_probs.append(log_prob)
            entropy = log_prob * tf.exp(-log_prob)
            sample_entropy.append(tf.stop_gradient(entropy))
            inputs = tf.nn.embedding_lookup(self.w_emb, func)

        arc_seq = tf.concat(arc_seq, axis=0)
        self.sample_arc = arc_seq

        self.sample_log_probs = tf.concat(sample_log_probs, axis=0)
        self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs))

        sample_entropy = tf.concat(sample_entropy, axis=0)
        self.sample_entropy = tf.reduce_sum(sample_entropy)

        self.all_h = all_h