def split_char_normal(image, width=64, height=64, char_width=4):
        """
        :param image:
        :return: images
        切分字符、归一化
        """

        images = []
        x = util.projection(image)
        bounds = []
        draw_bounds = []
        x = [min(x)] + x + [max(x)]
        for i in xrange(len(x) - 1):
            if x[i] <= x[0] < x[i + 1]:
                bounds.append(i)
            elif x[i] > x[0] >= x[i + 1]:
                bounds.append(i - 1)
        for i in xrange(0, len(x), 2):
            if i + 1 < len(bounds) and bounds[i + 1] - bounds[i] >= char_width:
                image_char = image.crop((bounds[i], 0, bounds[i + 1], image.size[1]))
                y1, y2 = util.get_width(util.projection(image_char, lambda a, b: b), True)
                sig_char_image = image_char.crop((0, y1, image_char.size[0], y2))
                draw_bounds.append((bounds[i], y1, bounds[i + 1], y2))
                sig_char_image = sig_char_image.resize((width, height))
                images.append(sig_char_image)
        image = image.convert('RGB')
        draw = ImageDraw.ImageDraw(image)
        for bound in draw_bounds:
            x1, y2, x2, y2 = bound
            draw.line((x1, y1, x1,y2), fill=(255,0,0), width=2)
            draw.line((x1, y1, x2,y1), fill=(255,0,0), width=2)
            draw.line((x2, y1, x2,y2), fill=(255,0,0), width=2)
            draw.line((x1, y2, x2,y2), fill=(255,0,0), width=2)
        images.append(image)
        return images
    def featExt(image, label, width=16, height=16):
        """
        :param: image
        :return: feat

        提取特征值
        """

        feat = []
        x, y = image.size
        bound_x = util.projection(image)
        bound_y = util.projection(image, lambda a, b: b)
        feat.append(util.get_max_min(bound_x)[1])
        feat.append(util.get_max_min(bound_y)[1])
        for i in xrange(x / width):
            for j in xrange(y / height):
                x1 = j * height
                x2 = x1 + height
                y1 = i * width
                y2 = y1 + width
                local_image = image.crop((x1, y1, x2, y2))
                feat.append(sum(util.projection(local_image)))

        util.normal(feat)
        feat.append(label)
        return feat
Beispiel #3
0
def get_rel_scores(entity_emb, entity_scores, num_labels, config, dropout, num_predicted_entities):
  num_sentences = util.shape(entity_emb, 0)
  num_entities = util.shape(entity_emb, 1)
  entities_mask = tf.sequence_mask(num_predicted_entities, num_entities) #[num_sentences, num_entities]
  flat_entities_mask = tf.reshape(entities_mask, [-1]) 
  rel_mask = tf.logical_and(tf.expand_dims(entities_mask, 2),  # [num_sentences, max_num_entities, 1]
                                            tf.expand_dims(entities_mask, 1)  # [num_sentences, 1, max_num_entities]
  )
  e1_emb_expanded = tf.expand_dims(entity_emb, 2)  # [num_sents, num_ents, 1, emb]
  e2_emb_expanded = tf.expand_dims(entity_emb, 1)  # [num_sents, 1, num_ents, emb]
  e1_emb_tiled = tf.tile(e1_emb_expanded, [1, 1, num_entities, 1])  # [num_sents, num_ents, num_ents, emb]
  e2_emb_tiled = tf.tile(e2_emb_expanded, [1, num_entities, 1, 1])  # [num_sents, num_ents, num_ents, emb]
  

  similarity_emb = e1_emb_expanded * e2_emb_expanded  # [num_sents, num_ents, num_ents, emb]

  pair_emb_list = [e1_emb_tiled, e2_emb_tiled, similarity_emb]

  pair_emb = tf.concat(pair_emb_list, 3)  # [num_sentences, num_ents, num_ents, emb]
  pair_emb_size = util.shape(pair_emb, 3)
  flat_pair_emb = tf.reshape(pair_emb, [num_sentences * num_entities * num_entities, pair_emb_size])

  flat_rel_scores = get_unary_scores(flat_pair_emb, config, dropout, num_labels - 1,
      "relation_scores")  # [num_sentences * num_ents * num_ents, num_labels-1]
  rel_scores = tf.reshape(flat_rel_scores, [num_sentences, num_entities, num_entities, num_labels - 1])
  rel_scores += tf.expand_dims(tf.expand_dims(entity_scores, 2), 3) + tf.expand_dims(
      tf.expand_dims(entity_scores, 1), 3)  # [num_sentences, ents, max_num_ents, num_labels-1]
  if config['rel_prop']:
    flat_rel_scores = tf.reshape(rel_scores, [num_sentences * num_entities* num_entities, num_labels - 1])
    with tf.variable_scope("rel_W"):
      entity_emb_size = util.shape(entity_emb, -1)
      relation_transition = util.projection(tf.nn.relu(flat_rel_scores), entity_emb_size) #f(V)A_R in Eq. 3
      e2_emb_tiled = tf.reshape(e2_emb_tiled, [num_sentences * num_entities * num_entities, entity_emb_size])
      rel_mask = tf.reshape(rel_mask, [-1])
      tranformed_embeddings = tf.multiply(tf.transpose(relation_transition * e2_emb_tiled), tf.to_float(rel_mask)) #[entity_emb_size, num_sents * num_ents * num_ents]
      tranformed_embeddings = tf.transpose(tranformed_embeddings) # [entity_emb_size, num_sents * num_ents * num_ents]
      tranformed_embeddings = tf.reshape(tranformed_embeddings, [num_sentences, num_entities, num_entities, entity_emb_size]) #[num_sents, num_ents, num_ents, entity_emb_size] 
      tranformed_embeddings = tf.reduce_sum(tranformed_embeddings, 2) #[num_sents, num_ents, entity_emb_size]
      tranformed_embeddings = tf.reshape(tranformed_embeddings, [num_sentences * num_entities, entity_emb_size])
      entity_emb = tf.reshape(entity_emb, [num_sentences * num_entities, entity_emb_size]) 
      with tf.variable_scope("f"):
        f = tf.sigmoid(util.projection(tf.concat([tranformed_embeddings, entity_emb], 1), entity_emb_size)) # [num_sents * num_ents, entity_emb_size]
        entity_emb = f * tranformed_embeddings + (1 - f) * entity_emb # [num_sents * num_ents, entity_emb_size]
      entity_emb = tf.reshape(entity_emb, [num_sentences, num_entities, entity_emb_size])
      
      
  dummy_scores = tf.zeros([num_sentences, num_entities, num_entities, 1], tf.float32)
  rel_scores = tf.concat([dummy_scores, rel_scores], 3)  # [num_sentences, max_num_ents, max_num_ents, num_labels]
  if config['rel_prop']:
    return rel_scores, entity_emb, flat_entities_mask
  else:
    return rel_scores  # [num_sentences, num_entities, num_entities, num_labels]
  def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends):
    mention_emb_list = []

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_emb_list.append(mention_start_emb)

    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_emb_list.append(mention_end_emb)

    mention_width = 1 + mention_ends - mention_starts # [num_mentions]
    if self.config["use_features"]:
      mention_width_index = mention_width - 1 # [num_mentions]
      mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb]
      mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
      mention_emb_list.append(mention_width_emb)

    if self.config["model_heads"]:
      mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width]
      mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width]
      mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
      self.head_scores = util.projection(text_outputs, 1) # [num_words, 1]
      mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
      mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]
      mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1]
      mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
      mention_emb_list.append(mention_head_emb)

    mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
    return mention_emb
Beispiel #5
0
  def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends):
    mention_emb_list = []

    mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb]
    mention_emb_list.append(mention_start_emb)

    mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb]
    mention_emb_list.append(mention_end_emb)

    mention_width = 1 + mention_ends - mention_starts # [num_mentions]
    if self.config["use_features"]:
      mention_width_index = mention_width - 1 # [num_mentions]
      mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb]
      mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
      mention_emb_list.append(mention_width_emb)

    if self.config["model_heads"]:
      mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width]
      mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width]
      mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
      self.head_scores = util.projection(text_outputs, 1) # [num_words, 1]
      mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
      mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1]
      mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1]
      mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
      mention_emb_list.append(mention_head_emb)

    mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
    return mention_emb
Beispiel #6
0
  def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends):
    span_emb_list = []

    span_start_emb = tf.gather(context_outputs, span_starts) # [k, emb]
    span_emb_list.append(span_start_emb)

    span_end_emb = tf.gather(context_outputs, span_ends) # [k, emb]
    span_emb_list.append(span_end_emb)

    span_width = 1 + span_ends - span_starts # [k]

    if self.config["use_features"]:
      span_width_index = span_width - 1 # [k]
      span_width_emb = tf.gather(tf.get_variable("span_width_embeddings", [self.config["max_span_width"], self.config["feature_size"]]), span_width_index) # [k, emb]
      span_width_emb = tf.nn.dropout(span_width_emb, self.dropout)
      span_emb_list.append(span_width_emb)

    if self.config["model_heads"]:
      span_indices = tf.expand_dims(tf.range(self.config["max_span_width"]), 0) + tf.expand_dims(span_starts, 1) # [k, max_span_width]
      span_indices = tf.minimum(util.shape(context_outputs, 0) - 1, span_indices) # [k, max_span_width]
      span_text_emb = tf.gather(head_emb, span_indices) # [k, max_span_width, emb]
      with tf.variable_scope("head_scores"):
        self.head_scores = util.projection(context_outputs, 1) # [num_words, 1]
      span_head_scores = tf.gather(self.head_scores, span_indices) # [k, max_span_width, 1]
      span_mask = tf.expand_dims(tf.sequence_mask(span_width, self.config["max_span_width"], dtype=tf.float32), 2) # [k, max_span_width, 1]
      span_head_scores += tf.log(span_mask) # [k, max_span_width, 1]
      span_attention = tf.nn.softmax(span_head_scores, 1) # [k, max_span_width, 1]
      span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1) # [k, emb]
      span_emb_list.append(span_head_emb)

    span_emb = tf.concat(span_emb_list, 1) # [k, emb]
    return span_emb # [k, emb]
Beispiel #7
0
  def lstm_contextualize(self, text_emb, text_len, text_len_mask):
    num_sentences = tf.shape(text_emb)[0]

    current_inputs = text_emb # [num_sentences, max_sentence_length, emb]

    for layer in range(self.config["contextualization_layers"]):
      with tf.variable_scope("layer_{}".format(layer)):
        with tf.variable_scope("fw_cell"):
          cell_fw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout)
        with tf.variable_scope("bw_cell"):
          cell_bw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout)
        state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1]))
        state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1]))

        (fw_outputs, bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn(
          cell_fw=cell_fw,
          cell_bw=cell_bw,
          inputs=current_inputs,
          sequence_length=text_len,
          initial_state_fw=state_fw,
          initial_state_bw=state_bw)

        text_outputs = tf.concat([fw_outputs, bw_outputs], 2) # [num_sentences, max_sentence_length, emb]
        text_outputs = tf.nn.dropout(text_outputs, self.lstm_dropout)
        if layer > 0:
          highway_gates = tf.sigmoid(util.projection(text_outputs, util.shape(text_outputs, 2))) # [num_sentences, max_sentence_length, emb]
          text_outputs = highway_gates * text_outputs + (1 - highway_gates) * current_inputs
        current_inputs = text_outputs

    return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
Beispiel #8
0
def get_span_emb(head_emb, context_outputs, span_starts, span_ends, config, dropout):
  """Compute span representation shared across tasks.
  Args:
    head_emb: Tensor of [num_words, emb]
    context_outputs: Tensor of [num_words, emb]
    span_starts: [num_spans]
    span_ends: [num_spans]
  """
  text_length = util.shape(context_outputs, 0)
  num_spans = util.shape(span_starts, 0)

  max_arg_width = config["max_arg_width"]
  num_heads = config["num_attention_heads"]

  span_start_emb = tf.gather(context_outputs, span_starts)  # [num_words, emb]
  span_end_emb = tf.gather(context_outputs, span_ends)  # [num_words, emb]

  if max_arg_width > 1:
    span_emb_list = [span_start_emb, span_end_emb]
  else:
    span_emb_list = [span_start_emb]

  # span_emb_list = [span_start_emb, span_end_emb]

  span_width = 1 + span_ends - span_starts # [num_spans]
  
  if config["use_features"] and max_arg_width > 1: #
    span_width_index = span_width - 1  # [num_spans]
    span_width_emb = tf.gather(
        tf.get_variable("span_width_embeddings", [max_arg_width, config["feature_size"]]),
        span_width_index)  # [num_spans, emb]
    span_width_emb = tf.nn.dropout(span_width_emb, dropout)
    span_emb_list.append(span_width_emb)

  head_scores = None
  span_text_emb = None
  span_indices = None
  span_indices_log_mask = None

  if config["model_heads"]: # and max_arg_width > 1
    if max_arg_width > 1:
      span_indices = tf.minimum(
          tf.expand_dims(tf.range(max_arg_width), 0) + tf.expand_dims(span_starts, 1),
          text_length - 1)  # [num_spans, max_span_width]
      span_text_emb = tf.gather(head_emb, span_indices)  # [num_spans, max_arg_width, emb]
      span_indices_log_mask = tf.log(
          tf.sequence_mask(span_width, max_arg_width, dtype=tf.float32)) # [num_spans, max_arg_width]
      with tf.variable_scope("head_scores"):
        head_scores = util.projection(context_outputs, num_heads)  # [num_words, num_heads]
      span_attention = tf.nn.softmax(
        tf.gather(head_scores, span_indices) + tf.expand_dims(span_indices_log_mask, 2),
        dim=1)  # [num_spans, max_arg_width, num_heads]
      span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1)  # [num_spans, emb]
    else:
      span_head_emb = tf.gather(head_emb, span_starts)
    span_emb_list.append(span_head_emb)

  span_emb = tf.concat(span_emb_list, 1) # [num_spans, emb]

  return span_emb, head_scores, span_text_emb, span_indices, span_indices_log_mask
Beispiel #9
0
def get_fast_antecedent_scores(top_span_emb, dropout):
    with tf.variable_scope("src_projection"):
        source_top_span_emb = tf.nn.dropout(
            util.projection(top_span_emb, util.shape(top_span_emb, -1)),
            dropout)  # [k, emb]
    target_top_span_emb = tf.nn.dropout(top_span_emb, dropout)  # [k, emb]
    return tf.matmul(source_top_span_emb,
                     target_top_span_emb,
                     transpose_b=True)  # [k, k]
Beispiel #10
0
 def get_masked_mention_word_scores(self, encoded_doc, span_starts, span_ends):
     num_words = util.shape(encoded_doc, 0) # T
     num_c = util.shape(span_starts, 0) # NC
     doc_range = tf.tile(tf.expand_dims(tf.range(0, num_words), 0), [num_c, 1]) # [K, T]
     mention_mask = tf.logical_and(doc_range >= tf.expand_dims(span_starts, 1), doc_range <= tf.expand_dims(span_ends, 1)) #[K, T]
     with tf.variable_scope("mention_word_attn"):
       word_attn = tf.squeeze(util.projection(encoded_doc, 1, initializer=tf.truncated_normal_initializer(stddev=0.02)), 1)
     mention_word_attn = tf.nn.softmax(tf.log(tf.to_float(mention_mask)) + tf.expand_dims(word_attn, 0))
     return mention_word_attn
Beispiel #11
0
    def get_mention_emb(self, text_emb, text_outputs, mention_starts,
                        mention_ends):
        mention_emb_list = []

        mention_start_emb = tf.gather(text_outputs,
                                      mention_starts)  # [num_mentions, emb]
        mention_emb_list.append(mention_start_emb)

        mention_end_emb = tf.gather(text_outputs,
                                    mention_ends)  # [num_mentions, emb]
        mention_emb_list.append(mention_end_emb)

        mention_width = 1 + mention_ends - mention_starts  # [num_mentions]
        if self.config["use_features"]:
            mention_width_index = mention_width - 1  # [num_mentions]
            temp_tensor = tf.zeros([
                self.config["max_mention_width"], self.config["feature_size"]
            ])
            nn.init.xavier_uniform(temp_tensor)
            mention_width_emb = tf.gather(
                temp_tensor, mention_width_index)  # [num_mentions, emb]
            mention_width_emb = F.dropout(mention_width_emb, self.dropout)
            mention_emb_list.append(mention_width_emb)

        if self.config["model_heads"]:
            mention_indices = tf.unsqueeze(
                tf.range(self.config["max_mention_width"]), 0) + tf.unsqueeze(
                    mention_starts, 1)  # [num_mentions, max_mention_width]
            mention_indices = tf.min(
                (util.shape(text_outputs, 0) - 1),
                mention_indices)  # [num_mentions, max_mention_width]
            mention_text_emb = tf.gather(
                text_emb,
                mention_indices)  # [num_mentions, max_mention_width, emb]
            self.head_scores = util.projection(text_outputs,
                                               1)  # [num_words, 1]
            mention_head_scores = tf.gather(
                self.head_scores,
                mention_indices)  # [num_mentions, max_mention_width, 1]
            mention_mask = tf.unsqueeze(
                tf.sequence_mask(mention_width,
                                 self.config["max_mention_width"],
                                 dtype=tf.float32),
                2)  # [num_mentions, max_mention_width, 1]
            mention_attention = F.softmax(
                mention_head_scores + tf.log(mention_mask),
                dim=1)  # [num_mentions, max_mention_width, 1]
            mention_head_emb = tf.sum(mention_attention * mention_text_emb,
                                      1)  # [num_mentions, emb]
            mention_emb_list.append(mention_head_emb)

        mention_emb = tf.cat(mention_emb_list, 1)  # [num_mentions, emb]
        return mention_emb
Beispiel #12
0
    def coarse_to_fine_pruning(self, top_span_emb, top_span_mention_scores, c):
        k = util.shape(top_span_emb, 0)
        top_span_range = tf.range(k)  # [k]
        antecedent_offsets = tf.expand_dims(
            top_span_range, 1) - tf.expand_dims(top_span_range, 0)  # [k, k]
        antecedents_mask = antecedent_offsets >= 1  # [k, k]
        fast_antecedent_scores = tf.expand_dims(
            top_span_mention_scores, 1) + tf.expand_dims(
                top_span_mention_scores, 0)  # [k, k]
        fast_antecedent_scores += tf.log(
            tf.to_float(antecedents_mask))  # [k, k]
        fast_antecedent_scores += self.get_fast_antecedent_scores(
            top_span_emb)  # [k, k]
        if self.config['use_prior']:
            antecedent_distance_buckets = self.bucket_distance(
                antecedent_offsets)  # [k, c]
            distance_scores = util.projection(
                tf.nn.dropout(
                    tf.get_variable(
                        "antecedent_distance_emb",
                        [10, self.config["feature_size"]],
                        initializer=tf.truncated_normal_initializer(
                            stddev=0.02)), self.dropout),
                1,
                initializer=tf.truncated_normal_initializer(
                    stddev=0.02))  #[10, 1]
            antecedent_distance_scores = tf.gather(
                tf.squeeze(distance_scores, 1),
                antecedent_distance_buckets)  # [k, c]
            fast_antecedent_scores += antecedent_distance_scores

        _, top_antecedents = tf.nn.top_k(fast_antecedent_scores,
                                         c,
                                         sorted=True)  # [k, c]
        top_antecedents_mask = util.batch_gather(antecedents_mask,
                                                 top_antecedents)  # [k, c]
        top_fast_antecedent_scores = util.batch_gather(
            fast_antecedent_scores, top_antecedents)  # [k, c]
        top_antecedent_offsets = util.batch_gather(antecedent_offsets,
                                                   top_antecedents)  # [k, c]

        self.top_antecedents_idx = top_antecedents
        self.top_antecedents_mask = top_antecedents_mask
        self.top_fast_antecedent_scores = top_fast_antecedent_scores
        self.top_antecedent_offsets = top_antecedent_offsets
        self.antecedent_distance_buckets = antecedent_distance_buckets
        self.antecedent_distance_scores = antecedent_distance_scores
        self.fast_antecedent_scores = fast_antecedent_scores
        return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
Beispiel #13
0
  def lstm_contextualize(self, text_emb, text_len, text_len_mask):
    # self.a = text_emb
    # self.b = text_len
    # self.c = text_len_mask
    num_sentences = tf.shape(text_emb)[0]
    # text_emb = model.a
    # text_len = model.b
    # text_len_mask = model.c
    # num_sentences = tf.shape(text_emb)[0]
    # max_sentence_length = tf.shape(text_emb)[1]
    # session.run([num_sentences, max_sentence_length])
    # sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    # x=  session.run([sentence_indices, num_sentences  ,  max_sentence_length , text_len_mask , text_len , text_emb ])
    # flattened_sentence_indices = model.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    # flattened_text_emb = self..flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words]
    # s = tf.shape(flattened_text_emb)
    # paddings = [[0, 500 - s[0]], [0, 0]]         
    # paddings = [[0, 0], [0, 4-tf.shape(t)[0]]]
    # paddings = tf.constant([[0, paddings_size-flattened_text_emb.shape[0],], [0, 0]])
    # padded_embd = tf.pad(flattened_text_emb, paddings, "CONSTANT")

    current_inputs = text_emb # [num_sentences, max_sentence_length, emb]

    for layer in range(self.config["contextualization_layers"]):
      with tf.variable_scope("layer_{}".format(layer)):
        with tf.variable_scope("fw_cell"):
          cell_fw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout)
        with tf.variable_scope("bw_cell"):
          cell_bw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout)
        state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1]))
        state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1]))

        (fw_outputs, bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn(
          cell_fw=cell_fw,
          cell_bw=cell_bw,
          inputs=current_inputs,
          sequence_length=text_len,
          initial_state_fw=state_fw,
          initial_state_bw=state_bw)

        text_outputs = tf.concat([fw_outputs, bw_outputs], 2) # [num_sentences, max_sentence_length, emb]
        text_outputs = tf.nn.dropout(text_outputs, self.lstm_dropout)
        if layer > 0:
          highway_gates = tf.sigmoid(util.projection(text_outputs, util.shape(text_outputs, 2))) # [num_sentences, max_sentence_length, emb]
          text_outputs = highway_gates * text_outputs + (1 - highway_gates) * current_inputs
        current_inputs = text_outputs

    return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
Beispiel #14
0
 def coarse_pruning(self, top_span_emb, top_span_mention_scores, c):
     """在取出的前k个候选span,针对每个span取出前c个antecedent,其mention score得分的组成是
     1. 每个span的mention score
     2. emb_i * W * emb_j的得分
     3. 每个span只取前面的span作为antecedent
     4. span与antecedent的距离映射为向量算个分
     """
     k = util.shape(top_span_emb, 0)  # num_candidates
     top_span_range = tf.range(k)  # [num_candidates, ]
     # antecedent_offsets: [num_candidates, num_candidates] 每两个span之间的距离,隔了几个span
     antecedent_offsets = tf.expand_dims(
         top_span_range, 1) - tf.expand_dims(top_span_range, 0)  # [k, k]
     antecedents_mask = antecedent_offsets >= 1  # [k, k]
     fast_antecedent_scores = tf.expand_dims(top_span_mention_scores,
                                             1) + tf.expand_dims(
                                                 top_span_mention_scores, 0)
     fast_antecedent_scores += tf.log(
         tf.to_float(antecedents_mask))  # [k, k]
     fast_antecedent_scores += self.get_fast_antecedent_scores(
         top_span_emb)  # [k, k]
     if self.config['use_prior']:
         antecedent_distance_buckets = self.bucket_distance(
             antecedent_offsets)  # [k, k]
         distance_scores = util.projection(
             tf.nn.dropout(
                 tf.get_variable(
                     "antecedent_distance_emb",
                     [10, self.config["feature_size"]],
                     initializer=tf.truncated_normal_initializer(
                         stddev=0.02)), self.dropout),
             1,
             initializer=tf.truncated_normal_initializer(
                 stddev=0.02))  # [10, 1]
         antecedent_distance_scores = tf.gather(
             tf.squeeze(distance_scores, 1),
             antecedent_distance_buckets)  # [k,k]
         fast_antecedent_scores += antecedent_distance_scores
     # 取fast_antecedent_score top_k高的antecedent,每个antecedent对应的span_index
     _, top_antecedents = tf.nn.top_k(fast_antecedent_scores,
                                      c,
                                      sorted=False)  # [k, c]
     top_antecedents_mask = util.batch_gather(
         antecedents_mask, top_antecedents)  # [k, c] 每个pair对应的mask
     top_fast_antecedent_scores = util.batch_gather(
         fast_antecedent_scores, top_antecedents)  # [k, c] 每个pair对应的score
     top_antecedent_offsets = util.batch_gather(
         antecedent_offsets, top_antecedents)  # [k, c] 每个pair对应的offset
     return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def fit_uv_mesh(initial_mesh: dict,
                target_dataset,
                max_iterations: int = 5000,
                resolution: int = 4,
                log_interval: int = 10,
                dispaly_interval=1000,
                display_res=512,
                out_dir=None,
                mp4save_interval=None):
    glctx = dr.RasterizeGLContext()

    r_rot = util.random_rotation_translation(0.25)

    # Smooth rotation for display.
    ang = 0.0
    a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
    dist = 2

    # Modelview and modelview + projection matrices.
    proj = util.projection(x=0.4, n=1.0, f=200.0)
    r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
    r_mvp = np.matmul(proj, r_mv).astype(np.float32)
    a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
    a_mvp = np.matmul(proj, a_mv).astype(np.float32)

    pos_idx = initial_mesh['pos_idx'].cuda()
    vtx_pos = initial_mesh['vtx_pos'].cuda()
    tex = np.ones((1024, 1024, 3), dtype=np.float32) / 2

    uv, uv_idx = init_uv()
    uv_idx = uv_idx[:pos_idx.shape[0]]
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()
    tex = torch.from_numpy(tex.astype(np.float32)).cuda()

    # Render reference and optimized frames. Always enable mipmapping for reference.
    color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, 1024,
                   False, 0)
    Image.fromarray((color[0].detach().cpu().numpy() * 255).astype(
        np.uint8)).save('test.png')
Beispiel #16
0
    def __rotate(image, width, func=lambda a, b: b, angle=46, fact=1):
        """
        :param: image
        :return: tmp_image, tmp_width

        旋转图像

        """

        tmp_image = image
        result_image = image
        for i in xrange(1, angle):
            tmp_image = image.rotate(i * fact)
            tmp_width = util.get_width(util.projection(tmp_image, func))
            if width >= tmp_width:
                width = tmp_width
                result_image = tmp_image
            else: break

        return result_image, width
Beispiel #17
0
 def combine_passes(self, original_doc, input_ids, input_mask, overlap_doc,
                    overlap_ids, overlap_mask):
     overlap_mask, input_mask = tf.equal(overlap_mask,
                                         1), tf.equal(input_mask, 1)
     org_content_mask = tf.logical_and(
         input_mask,
         tf.logical_and(tf.not_equal(input_ids, self.cls),
                        tf.not_equal(input_ids, self.sep)))
     overlap_content_mask = tf.logical_and(
         overlap_mask,
         tf.logical_and(tf.not_equal(overlap_ids, self.cls),
                        tf.not_equal(overlap_ids, self.sep)))
     flat_org_doc = self.flatten_emb_by_sentence(original_doc,
                                                 org_content_mask)
     flat_overlap_doc = self.flatten_emb_by_sentence(
         overlap_doc, overlap_content_mask)
     with tf.variable_scope("combo"):
         f = tf.sigmoid(
             util.projection(
                 tf.concat([flat_org_doc, flat_overlap_doc], -1),
                 util.shape(flat_org_doc, -1)))  # [n, emb]
         combo = f * flat_org_doc + (1 - f) * flat_overlap_doc
     return combo, org_content_mask
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir='.',
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Learned texture.
    tex_var = tf.get_variable('tex',
                              initializer=tf.constant_initializer(0.2),
                              shape=tex.shape)

    # Setup TF graph for reference rendering in high resolution.
    pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res])
    texc, texd = dr.interpolate(uv[tf.newaxis, ...],
                                rast_out,
                                uv_idx,
                                rast_db=rast_out_db,
                                diff_attrs='all')
    color = dr.texture(tex[np.newaxis],
                       texc,
                       texd,
                       filter_mode='linear-mipmap-linear',
                       max_mip_level=max_mip_level)
    color = color * tf.clip_by_value(rast_out[..., -1:], 0,
                                     1)  # Mask out background.

    # Reduce the reference to correct size.
    while color.shape[1] > res:
        color = util.bilinear_downsample(color)

    # TF Graph for rendered candidate.
    if enable_mip:
        # With mipmaps.
        rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx,
                                                     [res, res])
        texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...],
                                            rast_out_opt,
                                            uv_idx,
                                            rast_db=rast_out_db_opt,
                                            diff_attrs='all')
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               texd_opt,
                               filter_mode='linear-mipmap-linear',
                               max_mip_level=max_mip_level)
    else:
        # No mipmaps: no image-space derivatives anywhere.
        rast_out_opt, _ = dr.rasterize(pos_clip,
                                       pos_idx, [res, res],
                                       output_db=False)
        texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx)
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               filter_mode='linear')
    color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0,
                                             1)  # Mask out background.

    # Measure only relevant portions of texture when calculating texture PSNR.
    loss = tf.reduce_mean((color - color_opt)**2)
    texmask = np.zeros_like(tex)
    tr = tex.shape[1] // 4
    texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
    texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
    texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2) /
               np.sum(texmask))**0.5  # RMSE within masked area.

    # Training driven by image-space loss.
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9,
                                      0.99).minimize(loss, var_list=[tex_var])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Render.
    ang = 0.0
    util.init_uninitialized_vars()
    texloss_avg = []
    for it in range(max_iter + 1):
        lr = lr_base * lr_ramp**(float(it) / float(max_iter))

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Run training and measure texture-space RMSE loss.
        texloss_val, _ = util.run([texloss, train_op], {
            mtx_in: r_mvp,
            lr_in: lr
        })
        texloss_avg.append(texloss_val)

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result images/textures.
        display_image = display_interval and (it % display_interval) == 0
        save_image = imgsave_interval and (it % imgsave_interval) == 0
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            result_image = util.run(color_opt, {mtx_in: a_mvp})[0]
        if display_image:
            util.display_image(result_image,
                               size=display_res,
                               title='%d / %d' % (it, max_iter))
        if save_image:
            util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
        if save_texture:
            util.save_image(out_dir + '/' + (texsave_fn % it),
                            util.run(tex_var)[::-1])

    # Done.
    if log_file:
        log_file.close()
def fit_mesh(initial_mesh: dict,
             target_dataset_dir: str,
             max_iterations: int = 10000,
             resolution: int = 256,
             log_interval: int = 1000,
             display_interval=None,
             display_res=512,
             out_dir=None,
             mp4save_interval=None):

    distance = 3

    target_dataset = util.ReferenceImages(target_dataset_dir, resolution,
                                          resolution)

    pos_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32))
    vtx_pos = torch.from_numpy(initial_mesh['vtx_pos'].astype(np.float32))

    laplace = util.compute_laplace_matrix(vtx_pos, pos_idx).cuda()
    pos_idx = pos_idx.cuda()
    vtx_pos = vtx_pos.cuda()

    init_rot = util.rotate_z(-math.pi / 2).cuda()
    vtx_pos = transform_pos(init_rot, vtx_pos)[0][:, 0:3]
    vtx_pos.requires_grad = True

    uv, uv_idx = init_uv()
    uv_idx = uv_idx[:pos_idx.shape[0]]
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()
    vtx_uv.requires_grad = True

    #col_idx  = torch.from_numpy(initial_mesh['col_idx'].astype(np.int32)).cuda()
    #vtx_col  = initial_mesh['vtx_col'].cuda()
    tex = torch.ones((1024, 1024, 3)).float() / 2
    tex = tex.cuda()
    tex.requires_grad = True

    glctx = dr.RasterizeGLContext()

    M1 = torch.eye(len(target_dataset)).cuda()
    M1.requires_grad = True
    M2 = torch.eye(len(target_dataset)).cuda()
    M2.requires_grad = True

    #M3 = torch.zeros((3, vtx_pos.shape[0], len(target_dataset))).cuda()
    M3 = torch.zeros((3 * vtx_pos.shape[0], len(target_dataset))).cuda()
    M3.requires_grad = True

    lr_ramp = .1
    params = [{
        'params': [M1, M2, M3],
        'lr': 1e-3
    }, {
        'params': tex,
        'lr': 1e-2
    }]
    #lambdas = [lambda x: max(0.01, 10**(-x*0.0005)), lambda x: lr_ramp**(float(x)/float(max_iterations))]

    optimizer = torch.optim.Adam(params)
    #scheduler    = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambdas)

    total_steps = 0

    for i in range(max_iterations):
        for j, (img, angle) in enumerate(target_dataset):
            img = img.cuda().permute(2, 1, 0)

            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()
            frame_tensor.requires_grad = True

            deltas = torch.matmul(
                M3, torch.matmul(M2, torch.matmul(M1,
                                                  frame_tensor))).flatten()
            #deformed_vtxs = vtx_pos + deltas.T
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape(
                (vtx_pos.shape[0], 3))

            # create the model-view-projection matrix
            # rotate model about z axis by angle
            #rot = util.rotate_y(angle)
            rot = torch.eye(4)
            # translate by distance
            tr = util.translate(z=-distance)
            # perspective projection
            proj = util.projection(x=0.4)

            mtx = proj.matmul(tr.matmul(rot)).cuda()
            mtx.requires_grad = True

            estimate = render(glctx,
                              mtx,
                              deformed_vtxs,
                              pos_idx,
                              vtx_uv,
                              uv_idx,
                              tex,
                              resolution,
                              enable_mip=False,
                              max_mip_level=4)[0]

            # compute loss
            loss = torch.mean((estimate - img)**2)

            # compute regularizer
            reg = torch.mean((util.compute_curvature(deformed_vtxs, laplace) -
                              util.compute_curvature(vtx_pos, laplace))**2)

            # combine
            loss = 5 * loss + 0 * reg

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step()

            with torch.no_grad():
                # clamp texture between 0 and 1
                tex.clamp_(0, 1)

            if (display_interval and
                (i % display_interval == 0)) or (i == max_iterations - 1):
                with torch.no_grad():
                    estimate = render(
                        glctx,
                        mtx,
                        deformed_vtxs,
                        pos_idx,
                        vtx_uv,
                        uv_idx,
                        tex,
                        resolution,
                        enable_mip=True,
                        max_mip_level=4)[0].detach().cpu().numpy()
                    plt.imshow(estimate)
                    plt.show()
                    plt.imshow(img.detach().cpu().numpy())
                    plt.show()

            if log_interval and i % log_interval == 0:
                print(f"Loss: {loss}")
                print(M1.grad)

    with torch.no_grad():
        for i, (im, _) in enumerate(target_dataset):
            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()

            deltas = torch.matmul(
                M3, torch.matmul(M2, torch.matmul(M1,
                                                  frame_tensor))).flatten()
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape(
                (vtx_pos.shape[0], 3))

            write_obj(f"frame_{i}.obj",
                      deformed_vtxs.detach().cpu().tolist(),
                      pos_idx.detach().cpu().tolist())
    Image.fromarray((tex.detach().cpu().numpy() * 255).astype(
        np.uint8)).save('diff_render_tex.png')
    print("Outputted texture to diff_render_tex.png")
Beispiel #20
0
  def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns):
    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
    self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
    self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

    num_sentences = tf.shape(context_word_emb)[0]
    max_sentence_length = tf.shape(context_word_emb)[1]

    context_emb_list = [context_word_emb]
    head_emb_list = [head_word_emb]

    if self.config["char_embedding_size"] > 0:
      char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
      flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
      flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
      aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
      context_emb_list.append(aggregated_char_emb)
      head_emb_list.append(aggregated_char_emb)

    if not self.lm_file:
      elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
      lm_embeddings = elmo_module(
          inputs={"tokens": tokens, "sequence_len": text_len},
          signature="tokens", as_dict=True)
      word_emb = lm_embeddings["word_emb"]  # [num_sentences, max_sentence_length, 512]
      lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1),
                         lm_embeddings["lstm_outputs1"],
                         lm_embeddings["lstm_outputs2"]], -1)  # [num_sentences, max_sentence_length, 1024, 3]
    lm_emb_size = util.shape(lm_emb, 2)
    lm_num_layers = util.shape(lm_emb, 3)
    with tf.variable_scope("lm_aggregation"):
      self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
      self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))
    flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
    flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1]
    aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
    aggregated_lm_emb *= self.lm_scaling
    context_emb_list.append(aggregated_lm_emb)

    context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]
    head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

    context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb]
    num_words = util.shape(context_outputs, 0)

    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]

    sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length]
    flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words]
    flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words]

    candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width]
    candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width]
    
    #debug
    prev_can_st = candidate_starts
    prev_can_ends = candidate_ends
    #debug

    candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width]
    candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width]
    candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width]
    flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
    candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates]

    combined_candidate_st = candidate_starts*10000 + candidate_ends
    combined_gold_st = gold_starts*10000 + gold_ends

    _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions]
    whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates]
    gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions]


    candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates]

    candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates]

    candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb]


    #Video Scene Emb
    ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100]
    candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100]

    '''
    #Comment : This part is for calculating mention scores and prnunign metnion
    #It is not used for this task, because mention boundary are given.

    candidate_mention_scores =  self.get_mention_scores(candidate_span_emb) # [k, 1]
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k]

    k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"]))
    top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0),
                                               tf.expand_dims(candidate_starts, 0),
                                               tf.expand_dims(candidate_ends, 0),
                                               tf.expand_dims(k, 0),
                                               util.shape(context_outputs, 0),
                                               True) # [1, k]
    top_span_indices.set_shape([1, None])
    top_span_indices = tf.squeeze(top_span_indices, 0) # [k]
    '''

    ######## Only Using Gold Span Indices #####
    k = tf.to_int32(util.shape(gold_span_indices,0))
    top_span_indices = gold_span_indices
    ############

    top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
    top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
    top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
    top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene]

    top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k]
    #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
    top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k]
    top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]
    top_span_genders = tf.gather(genders, top_span_ends)
    top_span_fpronouns = tf.gather(fpronouns, top_span_ends)

    # k : total number of candidates span (M in paper)
    # c : how many antecedents we check (K in paper)
    c = tf.minimum(self.config["max_top_antecedents"], k)

    if self.config["coarse_to_fine"]:
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c)
    else:
      #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c)
      top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c)

    dummy_scores = tf.zeros([k, 1]) # [k, 1]
    for i in range(self.config["coref_depth"]):
      with tf.variable_scope("coref_layer", reuse=(i > 0)):
        top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene]
        top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c]
        top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1]
        top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb]
        attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb]
        with tf.variable_scope("f"):
          f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb]
          top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb]

    top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1]

    top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c]
    top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c]
    same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c]
    non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1]
    pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범
    dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1]
    top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1]

    top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1]
    if (self.config["use_gender_logic_rule"]):
      top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k)
      '''
      marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1)
      gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      top_antecedent_scores = top_antecedent_prob      
      '''
      origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      origin_loss = tf.reduce_sum(origin_loss)

      # cross_entropy : -1 * ground_truth * log(prediction)
      #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores))
      teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1]))

      pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) 

      # For Validation Loss
      marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1)
      validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob))
      
      #loss = teacher_loss + origin_loss
      loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss)

      top_antecedent_scores = top_antecedent_prob_with_logic
    else:
      loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
      loss = tf.reduce_sum(loss) # []
      teacher_loss = loss
      origin_loss = loss

    return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
Beispiel #21
0
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir=None,
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    log_file = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
    else:
        imgsave_interval, texsave_interval = None, None

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
    # the last column in that case.
    if pos.shape[1] == 4: pos = pos[:, 0:3]

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()

    tex = torch.from_numpy(tex.astype(np.float32)).cuda()
    tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True)
    glctx = dr.RasterizeGLContext()

    ang = 0.0

    # Adam optimizer for texture with a learning rate ramp.
    optimizer = torch.optim.Adam([tex_opt], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda x: lr_ramp**(float(x) / float(max_iter)))

    # Render.
    ang = 0.0
    texloss_avg = []
    for it in range(max_iter + 1):
        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Measure texture-space RMSE loss
        with torch.no_grad():
            texmask = torch.zeros_like(tex)
            tr = tex.shape[1] // 4
            texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
            texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
            # Measure only relevant portions of texture when calculating texture
            # PSNR.
            texloss = (torch.sum(texmask * (tex - tex_opt)**2) /
                       torch.sum(texmask))**0.5  # RMSE within masked area.
            texloss_avg.append(float(texloss))

        # Render reference and optimized frames. Always enable mipmapping for reference.
        color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex,
                       ref_res, True, max_mip_level)
        color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx,
                           tex_opt, res, enable_mip, max_mip_level)

        # Reduce the reference to correct size.
        while color.shape[1] > res:
            color = util.bilinear_downsample(color)

        # Compute loss and perform a training step.
        loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val = np.mean(np.asarray(texloss_avg))
            texloss_avg = []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save image.
        display_image = display_interval and (it % display_interval == 0)
        save_image = imgsave_interval and (it % imgsave_interval == 0)
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            ang = ang + 0.1

            with torch.no_grad():
                result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv,
                                      uv_idx, tex_opt, res, enable_mip,
                                      max_mip_level)[0].cpu().numpy()

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_image:
                    util.save_image(out_dir + '/' + (imgsave_fn % it),
                                    result_image)

                if save_texture:
                    texture = tex_opt.cpu().numpy()[::-1]
                    util.save_image(out_dir + '/' + (texsave_fn % it), texture)

    # Done.
    if log_file:
        log_file.close()
Beispiel #22
0
  def get_predictions_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map):
    model = modeling.BertModel(
      config=self.bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      use_one_hot_embeddings=False,
      scope='bert')
    all_encoder_layers = model.get_all_encoder_layers()
    mention_doc = model.get_sequence_output() # [batch_size, seq_length, hidden_size]

    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)

    num_sentences = tf.shape(mention_doc)[0]
    max_sentence_length = tf.shape(mention_doc)[1]
    mention_doc = self.flatten_emb_by_sentence(mention_doc, input_mask) # [num_words, hidden_size]
    num_words = util.shape(mention_doc, 0)
    antecedent_doc = mention_doc


    flattened_sentence_indices = sentence_map
    candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width]
    candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width]
    candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width]
    candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width]
    candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width]
    flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width]
    candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates]
    candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates]

    candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates]

    candidate_span_emb = self.get_span_emb(mention_doc, mention_doc, candidate_starts, candidate_ends) # [num_candidates, emb]
    candidate_mention_scores =  self.get_mention_scores(candidate_span_emb, candidate_starts, candidate_ends)
    candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k]

    # beam size
    k = tf.minimum(3900, tf.to_int32(tf.floor(tf.to_float(num_words) * self.config["top_span_ratio"])))
    c = tf.minimum(self.config["max_top_antecedents"], k)
    # pull from beam
    top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0),
                                               tf.expand_dims(candidate_starts, 0),
                                               tf.expand_dims(candidate_ends, 0),
                                               tf.expand_dims(k, 0),
                                               num_words,
                                               True) # [1, k]
    top_span_indices.set_shape([1, None])
    top_span_indices = tf.squeeze(top_span_indices, 0) # [k]

    top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k]
    top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k]
    top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb]
    top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k]
    top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k]
    genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)),
                          genre) # [emb]
    if self.config['use_metadata']:
      speaker_ids = self.flatten_emb_by_sentence(speaker_ids, input_mask)
      top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]i
    else:
        top_span_speaker_ids = None


    dummy_scores = tf.zeros([k, 1]) # [k, 1]
    top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c)
    num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1)
    word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1), [1, seg_len])
    flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]), tf.reshape(input_mask, [-1]))
    mention_segments = tf.expand_dims(tf.gather(flat_word_segments, top_span_starts), 1) # [k, 1]
    antecedent_segments = tf.gather(flat_word_segments, tf.gather(top_span_starts, top_antecedents)) #[k, c]
    segment_distance = tf.clip_by_value(mention_segments - antecedent_segments, 0, self.config['max_training_sentences'] - 1) if self.config['use_segment_distance'] else None #[k, c]
    if self.config['fine_grained']:
      for i in range(self.config["coref_depth"]):
        with tf.variable_scope("coref_layer", reuse=(i > 0)):
          top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
          top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance) # [k, c]
          top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1]
          top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb]
          attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb]
          with tf.variable_scope("f"):
            f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb]
            top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb]
    else:
        top_antecedent_scores = top_fast_antecedent_scores

    top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1]

    top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c]
    top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c]
    same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c]
    non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1]
    pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]
    dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1]
    top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1]
    loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k]
    loss = tf.reduce_sum(loss) # []

    return [candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores], loss
Beispiel #23
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, text_len,\
         is_training, gold_starts, gold_ends, cluster_ids,swag_context_emb, swag_text_len, swag_label):
        """
        This is the major part of the architecutre, and is the placehlder. 
        We have two branches - one for SWAG, and another for the main Lee code.
        """
        self.same(is_training)
        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]
        print("normal", swag_context_emb)
        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        # genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb]
        genre_emb = None
        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        # top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Beispiel #24
0
    def get_predictions_and_loss(self, inputs, labels, config):
        is_training = inputs["is_training"][0]
        self.dropout = 1 - (tf.to_float(is_training) * config["dropout_rate"])
        self.lexical_dropout = 1 - (tf.to_float(is_training) *
                                    config["lexical_dropout_rate"])
        self.lstm_dropout = 1 - (tf.to_float(is_training) *
                                 config["lstm_dropout_rate"])

        sentences = inputs["tokens"]
        text_len = inputs["text_len"]  # [num_sentences]
        context_word_emb = inputs[
            "context_word_emb"]  # [num_sentences, max_sentence_length, emb]
        head_word_emb = inputs[
            "head_word_emb"]  # [num_sentences, max_sentence_length, emb]
        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]
        context_emb, head_emb, self.lm_weights, self.lm_scaling = get_embeddings(
            self.data, sentences, text_len, context_word_emb, head_word_emb,
            inputs["char_idx"], inputs["lm_emb"],
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        context_outputs = lstm_contextualize(
            context_emb, text_len, config,
            self.lstm_dropout)  # [num_sentences, max_sentence_length, emb]

        # [num_sentences, max_num_candidates], ...
        candidate_starts, candidate_ends, candidate_mask = get_span_candidates(
            text_len, max_sentence_length, config["max_arg_width"])
        flat_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_sentences * max_num_candidates]
        batch_word_offset = tf.expand_dims(tf.cumsum(text_len, exclusive=True),
                                           1)  # [num_sentences, 1]
        flat_candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts + batch_word_offset, [-1]),
            flat_candidate_mask)  # [num_candidates]
        flat_candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends + batch_word_offset, [-1]),
            flat_candidate_mask)  # [num_candidates]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentences, max_sentence_length]
        flat_context_outputs = flatten_emb_by_sentence(
            context_outputs, text_len_mask)  # [num_doc_words]
        flat_head_emb = flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_doc_words]
        doc_len = util.shape(flat_context_outputs, 0)

        candidate_span_emb, head_scores, span_head_emb, head_indices, head_indices_log_mask = get_span_emb(
            flat_head_emb, flat_context_outputs, flat_candidate_starts,
            flat_candidate_ends, config, self.dropout
        )  # [num_candidates, emb], [num_candidates, max_span_width, emb], [num_candidates, max_span_width]
        num_candidates = util.shape(candidate_span_emb, 0)
        max_num_candidates_per_sentence = util.shape(candidate_mask, 1)
        candidate_span_ids = tf.sparse_to_dense(
            sparse_indices=tf.where(tf.equal(candidate_mask, True)),
            output_shape=tf.cast(
                tf.stack([num_sentences, max_num_candidates_per_sentence]),
                tf.int64),
            sparse_values=tf.range(num_candidates, dtype=tf.int32),
            default_value=0,
            validate_indices=True)  # [num_sentences, max_num_candidates]

        predict_dict = {
            "candidate_starts": candidate_starts,
            "candidate_ends": candidate_ends
        }

        if config["coref_depth"]:
            candidate_mention_scores = get_unary_scores(
                candidate_span_emb, config, self.dropout, 1,
                "mention_scores")  # [num_candidates]
            #if self.config["span_score_weight"] > 0:
            #  candidate_mention_scores += self.config["span_score_weight"] * flat_span_scores

            doc_ids = tf.expand_dims(inputs["doc_id"], 1)  # [num_sentences, 1]
            candidate_doc_ids = tf.boolean_mask(
                tf.reshape(
                    tf.tile(doc_ids, [1, max_num_candidates_per_sentence]),
                    [-1]), flat_candidate_mask)  # [num_candidates]

            k = tf.to_int32(
                tf.floor(tf.to_float(doc_len) * config["mention_ratio"]))
            top_mention_indices = srl_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(flat_candidate_starts, 0),
                tf.expand_dims(flat_candidate_ends, 0), tf.expand_dims(k, 0),
                doc_len, True, True)  # [1, k]
            top_mention_indices.set_shape([1, None])
            top_mention_indices = tf.squeeze(top_mention_indices, 0)  # [k]
            mention_starts = tf.gather(flat_candidate_starts,
                                       top_mention_indices)  # [k]
            mention_ends = tf.gather(flat_candidate_ends,
                                     top_mention_indices)  #[k]
            mention_scores = tf.gather(candidate_mention_scores,
                                       top_mention_indices)  #[k]
            mention_emb = tf.gather(candidate_span_emb,
                                    top_mention_indices)  # [k, emb]
            mention_doc_ids = tf.gather(candidate_doc_ids,
                                        top_mention_indices)  # [k]

            max_mentions_per_doc = tf.reduce_max(
                #tf.segment_sum(data=tf.ones_like(mention_doc_ids, dtype=tf.int32),
                tf.unsorted_segment_sum(
                    data=tf.ones_like(mention_doc_ids, dtype=tf.int32),
                    segment_ids=mention_doc_ids,
                    num_segments=tf.reduce_max(mention_doc_ids) + 1))  # []

            k_Print = tf.Print(
                k, [num_sentences, doc_len, k, max_mentions_per_doc],
                "Num sents, num tokens, num_mentions, max_antecedents")

            max_antecedents = tf.minimum(
                tf.minimum(config["max_antecedents"], k - 1),
                max_mentions_per_doc - 1)

            if self.config["coarse_to_fine"]:
                antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = coarse_to_fine_pruning(
                    mention_emb, mention_scores, max_antecedents,
                    mention_doc_ids, self.dropout)
            else:
                antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = distance_pruning(
                    mention_emb, mention_scores, max_antecedents,
                    mention_doc_ids)

            dummy_scores = tf.zeros([k, 1])  # [k, 1]

            for i in range(self.config["coref_depth"]):
                top_antecedent_emb = tf.gather(mention_emb,
                                               antecedents)  # [k, c, emb]
                top_antecedent_scores, top_antecedent_emb, _ = get_antecedent_scores(
                    mention_emb, mention_scores, antecedents, config,
                    self.dropout, top_fast_antecedent_scores,
                    top_antecedent_offsets)  # [k, max_ant]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(mention_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                mention_emb = attended_span_emb
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([mention_emb, attended_span_emb], 1),
                            util.shape(mention_emb, -1)))  # [k, emb]
                    mention_emb = f * attended_span_emb + (
                        1 - f) * mention_emb  # [k, emb]
            old_mention_emb = tf.gather(candidate_span_emb,
                                        top_mention_indices)
            top_mention_indices = tf.expand_dims(top_mention_indices, 1)
            old_mention_emb_padded = tf.scatter_nd(
                top_mention_indices, old_mention_emb,
                tf.shape(candidate_span_emb))
            new_mention_emb_padded = tf.scatter_nd(
                top_mention_indices, mention_emb, tf.shape(candidate_span_emb))
            candidate_span_emb = candidate_span_emb - old_mention_emb_padded + new_mention_emb_padded
            top_antecedent_scores = tf.concat(
                [tf.zeros([k, 1]), top_antecedent_scores], 1)  # [k, max_ant+1]
            predict_dict.update({
                "candidate_mention_starts":
                flat_candidate_starts,  # [num_candidates]
                "candidate_mention_ends":
                flat_candidate_ends,  # [num_candidates]
                "candidate_mention_scores":
                candidate_mention_scores,  # [num_candidates]
                "mention_starts": mention_starts,  # [k]
                "mention_ends": mention_ends,  # [k]
                "antecedents": antecedents,  # [k, max_ant]
                "antecedent_scores": top_antecedent_scores,  # [k, max_ant+1]
            })

        spans_log_mask = tf.log(
            tf.to_float(candidate_mask))  # [num_sentences, max_num_candidates]
        if head_scores is not None:
            predict_dict["head_scores"] = head_scores

        dummy_scores = tf.expand_dims(
            tf.zeros_like(candidate_span_ids, dtype=tf.float32), 2)

        if config["ner_weight"] + config["coref_weight"] > 0:
            gold_ner_labels, gold_coref_cluster_ids = get_span_task_labels(
                candidate_starts, candidate_ends, labels,
                max_sentence_length)  # [num_sentences, max_num_candidates]

        if config["relation_weight"] > 0:
            if config['entity_beam']:
                flat_ner_scores = get_unary_scores(
                    candidate_span_emb, config, self.dropout,
                    len(self.data.ner_labels) - 1,
                    "ner_scores")  # [num_candidates, num_labels-1]

                ner_scores = tf.gather(
                    flat_ner_scores, candidate_span_ids) + tf.expand_dims(
                        spans_log_mask,
                        2)  # [num_sentences, max_num_candidates, num_labels-1]
                ner_scores = tf.concat(
                    [dummy_scores, ner_scores],
                    2)  # [num_sentences, max_num_candidates, num_labels]
                entity_starts, entity_ends, entity_scores, num_entities, top_entity_indices = get_ner_candidates(
                    candidate_starts, candidate_ends, ner_scores,
                    candidate_mask, text_len,
                    config["entity_ratio"])  # Do we need to sort spans?
            else:
                flat_candidate_entity_scores = get_unary_scores(
                    candidate_span_emb, config, self.dropout, 1,
                    "entity_scores")  # [num_candidates,]
                candidate_entity_scores = tf.gather(
                    flat_candidate_entity_scores, candidate_span_ids
                ) + spans_log_mask  # [num_sentences, max_num_candidates]
                entity_starts, entity_ends, entity_scores, num_entities, top_entity_indices = get_batch_topk(
                    candidate_starts,
                    candidate_ends,
                    candidate_entity_scores,
                    config["entity_ratio"],
                    text_len,
                    max_sentence_length,
                    sort_spans=True,
                    enforce_non_crossing=False)  # Do we need to sort spans?
            entity_span_indices = batch_gather(
                candidate_span_ids,
                top_entity_indices)  # [num_sentences, max_num_ents]
            entity_emb = tf.gather(
                candidate_span_emb,
                entity_span_indices)  # [num_sentences, max_num_ents, emb]
            max_num_entities = util.shape(entity_scores, 1)

        if config["relation_weight"] > 0:
            if config['add_ner_emb']:
                ner_emb = tf.gather(flat_ner_scores, entity_span_indices)
                entity_emb = tf.concat([entity_emb, ner_emb], 2)
            rel_labels = get_relation_labels(
                entity_starts, entity_ends, num_entities, labels,
                max_sentence_length
            )  # [num_sentences, max_num_ents, max_num_ents]
            if config['bilinear']:
                rel_scores = get_rel_bilinear_scores(
                    entity_emb, entity_scores, len(self.data.rel_labels),
                    config, self.dropout
                )  # [num_sentences, max_num_ents, max_num_ents, num_labels]
            else:
                if config['rel_prop']:
                    for i in range(config['rel_prop']):
                        rel_scores, entity_emb, flat_entities_mask = get_rel_scores(
                            entity_emb, entity_scores,
                            len(self.data.rel_labels), config, self.dropout,
                            num_entities
                        )  # [num_sentences, max_num_ents, max_num_ents, num_labels]
                    if config['rel_prop_emb']:
                        entity_emb_size = util.shape(entity_emb, -1)
                        flat_entity_emb = tf.reshape(entity_emb, [
                            num_sentences * max_num_entities, entity_emb_size
                        ])
                        flat_entity_emb = tf.boolean_mask(
                            flat_entity_emb, flat_entities_mask)
                        entity_indices = tf.boolean_mask(
                            tf.reshape(entity_span_indices, [-1]),
                            flat_entities_mask)
                        old_entity_emb = tf.gather(candidate_span_emb,
                                                   entity_indices)
                        entity_indices = tf.expand_dims(entity_indices, 1)
                        old_entity_emb_padded = tf.scatter_nd(
                            entity_indices, old_entity_emb,
                            tf.shape(candidate_span_emb))
                        new_entity_emb_padded = tf.scatter_nd(
                            entity_indices, flat_entity_emb,
                            tf.shape(candidate_span_emb))
                        candidate_span_emb = candidate_span_emb - old_entity_emb_padded + new_entity_emb_padded

                else:
                    rel_scores = get_rel_scores(
                        entity_emb, entity_scores, len(self.data.rel_labels),
                        config, self.dropout, num_entities
                    )  # [num_sentences, max_num_ents, max_num_ents, num_labels]

        if config["relation_weight"] > 0:
            rel_loss = get_rel_softmax_loss(
                rel_scores, rel_labels, num_entities,
                config)  # [num_sentences, max_num_ents, max_num_ents]
            predict_dict.update({
                "entity_starts": entity_starts,
                "entity_ends": entity_ends,
                "entitiy_scores": entity_scores,
                "num_entities": num_entities,
                "rel_labels":
                tf.argmax(rel_scores,
                          -1),  # [num_sentences, num_ents, num_ents]
                "rel_scores": rel_scores
            })
        else:
            rel_loss = 0

        if config["ner_weight"] > 0:
            flat_ner_scores = get_unary_scores(
                candidate_span_emb, config, self.dropout,
                len(self.data.ner_labels) - 1,
                "ner_scores")  # [num_candidates, num_labels-1]

            ner_scores = tf.gather(
                flat_ner_scores, candidate_span_ids) + tf.expand_dims(
                    spans_log_mask,
                    2)  # [num_sentences, max_num_candidates, num_labels-1]
            ner_scores = tf.concat(
                [dummy_scores, ner_scores],
                2)  # [num_sentences, max_num_candidates, num_labels]

            ner_loss = get_softmax_loss(ner_scores, gold_ner_labels,
                                        candidate_mask)  # [num_sentences]
            ner_loss = tf.reduce_sum(
                ner_loss)  # / tf.to_float(num_sentences)  # []
            predict_dict["ner_scores"] = ner_scores
        else:
            ner_loss = 0

        # Get coref representations.
        if config["coref_weight"] > 0:
            candidate_mention_scores = get_unary_scores(
                candidate_span_emb, config, self.dropout, 1,
                "mention_scores")  # [num_candidates]
            doc_ids = tf.expand_dims(inputs["doc_id"], 1)  # [num_sentences, 1]
            candidate_doc_ids = tf.boolean_mask(
                tf.reshape(
                    tf.tile(doc_ids, [1, max_num_candidates_per_sentence]),
                    [-1]), flat_candidate_mask)  # [num_candidates]

            k = tf.to_int32(
                tf.floor(tf.to_float(doc_len) * config["mention_ratio"]))
            top_mention_indices = srl_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(flat_candidate_starts, 0),
                tf.expand_dims(flat_candidate_ends, 0), tf.expand_dims(k, 0),
                doc_len, True, True)  # [1, k]
            top_mention_indices.set_shape([1, None])
            top_mention_indices = tf.squeeze(top_mention_indices, 0)  # [k]
            mention_starts = tf.gather(flat_candidate_starts,
                                       top_mention_indices)  # [k]
            mention_ends = tf.gather(flat_candidate_ends,
                                     top_mention_indices)  #[k]
            mention_scores = tf.gather(candidate_mention_scores,
                                       top_mention_indices)  #[k]
            mention_emb = tf.gather(candidate_span_emb,
                                    top_mention_indices)  # [k, emb]
            mention_doc_ids = tf.gather(candidate_doc_ids,
                                        top_mention_indices)  # [k]

            if head_scores is not None:
                predict_dict["coref_head_scores"] = head_scores

            max_mentions_per_doc = tf.reduce_max(
                #tf.segment_sum(data=tf.ones_like(mention_doc_ids, dtype=tf.int32),
                tf.unsorted_segment_sum(
                    data=tf.ones_like(mention_doc_ids, dtype=tf.int32),
                    segment_ids=mention_doc_ids,
                    num_segments=tf.reduce_max(mention_doc_ids) + 1))  # []

            k_Print = tf.Print(
                k, [num_sentences, doc_len, k, max_mentions_per_doc],
                "Num sents, num tokens, num_mentions, max_antecedents")

            max_antecedents = tf.minimum(
                tf.minimum(config["max_antecedents"], k - 1),
                max_mentions_per_doc - 1)
            if self.config["coarse_to_fine"]:
                antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = coarse_to_fine_pruning(
                    mention_emb, mention_scores, max_antecedents,
                    mention_doc_ids, self.dropout)
            else:
                antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = distance_pruning(
                    mention_emb, mention_scores, max_antecedents,
                    mention_doc_ids)

            antecedent_log_mask = tf.log(
                tf.to_float(antecedents_mask))  # [k, max_ant]

            # [k, max_ant], [k, max_ant, emb], [k, max_ant, emb2]
            antecedent_scores, antecedent_emb, pair_emb = get_antecedent_scores(
                mention_emb, mention_scores, antecedents, config, self.dropout,
                top_fast_antecedent_scores,
                top_antecedent_offsets)  # [k, max_ant]
            antecedent_scores = tf.concat(
                [tf.zeros([k, 1]), antecedent_scores], 1)  # [k, max_ant+1]

        # Compute Coref loss.
        if config["coref_weight"] > 0:
            flat_cluster_ids = tf.boolean_mask(
                tf.reshape(gold_coref_cluster_ids, [-1]),
                flat_candidate_mask)  # [num_candidates]
            mention_cluster_ids = tf.gather(flat_cluster_ids,
                                            top_mention_indices)  # [k]

            antecedent_cluster_ids = tf.gather(mention_cluster_ids,
                                               antecedents)  # [k, max_ant]
            antecedent_cluster_ids += tf.to_int32(
                antecedent_log_mask)  # [k, max_ant]
            same_cluster_indicator = tf.equal(antecedent_cluster_ids,
                                              tf.expand_dims(
                                                  mention_cluster_ids,
                                                  1))  # [k, max_ant]
            non_dummy_indicator = tf.expand_dims(mention_cluster_ids > 0,
                                                 1)  # [k, 1]
            pairwise_labels = tf.logical_and(
                same_cluster_indicator, non_dummy_indicator)  # [k, max_ant]

            dummy_labels = tf.logical_not(
                tf.reduce_any(pairwise_labels, 1, keep_dims=True))  # [k, 1]
            antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, max_ant+1]
            coref_loss = get_coref_softmax_loss(antecedent_scores,
                                                antecedent_labels)  # [k]
            coref_loss = tf.reduce_sum(
                coref_loss)  # / tf.to_float(num_sentences)  # []
            predict_dict.update({
                "candidate_mention_starts":
                flat_candidate_starts,  # [num_candidates]
                "candidate_mention_ends":
                flat_candidate_ends,  # [num_candidates]
                "candidate_mention_scores":
                candidate_mention_scores,  # [num_candidates]
                "mention_starts": mention_starts,  # [k]
                "mention_ends": mention_ends,  # [k]
                "antecedents": antecedents,  # [k, max_ant]
                "antecedent_scores": antecedent_scores,  # [k, max_ant+1]
            })
        else:
            coref_loss = 0

        tf.summary.scalar("REL_loss", rel_loss)
        tf.summary.scalar("NER_loss", ner_loss)
        tf.summary.scalar("Coref_loss", coref_loss)
        #srl_loss_Print = tf.Print(srl_loss, [srl_loss, ner_loss, coref_loss], "Loss")
        loss = config["ner_weight"] * ner_loss + (
            config["coref_weight"] * coref_loss +
            config["relation_weight"] * rel_loss)

        return predict_dict, loss
def fit_cube(max_iter          = 5000,
             resolution        = 4, 
             discontinuous     = False,
             repeats           = 1,
             log_interval      = 10, 
             display_interval  = None,
             display_res       = 512,
             out_dir           = '.',
             log_fn            = None,
             imgsave_interval  = None,
             imgsave_fn        = None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
    
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
        
    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Setup TF graph for reference.
    vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32)
    pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
    color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx)
    color = dr.antialias(color, rast_out, pos_clip, pos_idx)

    # Optimized variables.
    vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape)
    vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape)

    # Optimization variable setters for initialization.
    vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape)
    vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape)
    opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in))

    # Setup TF graph for what we optimize result.
    vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1)
    pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
    color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx)
    color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)

    # Image-space loss and optimizer.
    loss = tf.reduce_mean((color_opt - color)**2)
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt])

    # Setup TF graph for display.
    rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx)
    color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx)
    rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx)
    color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx)

    # Geometric error calculation
    geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5)

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Repeats.
    for rep in range(repeats):

        # Optimize.
        ang = 0.0
        gl_avg = []
        util.init_uninitialized_vars()
        for it in range(max_iter + 1):
            # Initialize optimization.
            if it == 0:
                vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
                vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape)
                util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)})

            # Learning rate ramp.
            lr = 1e-2
            lr = lr * max(0.01, 10**(-it*0.0005))

            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj  = util.projection(x=0.4)
            r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        
            # Run training and measure geometric error.
            gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr})
            gl_avg.append(gl_val)

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val, gl_avg = np.mean(np.asarray(gl_avg)), []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_image = imgsave_interval and (it % imgsave_interval == 0)

            if display_image or save_image:
                ang = ang + 0.1
                img_o = util.run(color_opt,      {mtx_in: r_mvp})[0]
                img_b = util.run(color,          {mtx_in: r_mvp})[0]
                img_d = util.run(color_disp,     {mtx_in: a_mvp})[0]
                img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1)

            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_image:
                util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)

    # All repeats done.
    if log_file:
        log_file.close()
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        # self.a , self.b = text_len , max_sentence_length
        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        k = tf.minimum(500, k)
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        # c = tf.minimum(self.config["max_top_antecedents"], k)
        # self.top = top_span_emb

        orig_dim = 1270
        with tf.name_scope("transformer"):
            with tf.name_scope("embedding_transformer"):
                W = tf.Variable(tf.random_normal((orig_dim, self.new_dim)))
                b = tf.Variable(tf.random_normal((self.new_dim, )))
                temp_input = tf.nn.relu(tf.matmul(top_span_emb, W) + b)

            padding_mask_partial = tf.cast(tf.sequence_mask(
                tf.shape(temp_input)[0], maxlen=self.seq_length),
                                           dtype=tf.float32)
            multiples = [self.seq_length]
            padding_mask_partial2 = tf.tile(padding_mask_partial, multiples)
            enc_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            # enc_padding_mask  = tf.matrix_set_diag(enc_padding_mask, tf.zeros(enc_padding_mask.shape[0:-1]), name=None)

            dec_padding_mask = tf.reshape(padding_mask_partial2,
                                          [multiples[0], -1])
            dec_padding_mask = tf.matrix_set_diag(
                dec_padding_mask,
                tf.zeros(dec_padding_mask.shape[0:-1]),
                name=None)

            look_ahead_mask = create_look_ahead_mask(
                tf.shape(padding_mask_partial)[0])
            combined_mask = tf.minimum(enc_padding_mask, look_ahead_mask)

            s = tf.shape(temp_input)
            paddings = [[0, self.seq_length - s[0]], [0, 0]]
            padded_embd = tf.pad(temp_input, paddings, "CONSTANT")

            predictions, _ = self.sample_transformer(padded_embd, padded_embd,
                                                     True, enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)

            # self.chikka = predictions
            # self.chikka2 = predictions[:k]
            top_span_emb = tf.concat([predictions[:k], top_span_emb], 1)

        # hidd = self.new_dim // 3
        # with tf.name_scope("Scorer"):
        #    h1_1 = tf.layers.dense(predictions, hidd)
        #    h1_2 = tf.layers.dense(predictions, hidd)
        #    h1 = tf.concat([h1_1 , h1_2] , 1 )
        #       W2 = tf.Variable(tf.random_normal((hidd*2, 1)))
        #       b2 = tf.Variable(tf.random_normal((1,)))
        #       score = tf.nn.relu(tf.matmul(h1, W) + b)

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]

        # with tf.variable_scope("coref_layer"):
        #     top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb]
        #     top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c]

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Beispiel #27
0
def fit_env_phong(max_iter          = 1000,
                  log_interval      = 10,
                  display_interval  = None,
                  display_res       = 1024,
                  res               = 1024,
                  lr_base           = 1e-2,
                  lr_ramp           = 1.0,
                  out_dir           = None,
                  log_fn            = None,
                  mp4save_interval  = None,
                  mp4save_fn        = None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
    else:
        mp4save_interval = None

    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/envphong.npz') as f:
        pos_idx, pos, normals, env = f.values()
    env = env.astype(np.float32)/255.0
    env = np.stack(env)[:, ::-1].copy()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))

    # Move all the stuff to GPU.
    pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda')
    pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda')
    normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda')
    env = torch.as_tensor(env, dtype=torch.float32, device='cuda')

    # Target Phong parameters.
    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
    phong_exp = 25.0
    phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda')

    # Learned variables: environment maps, phong color, phong exponent.
    env_var = torch.ones_like(env) * .5
    env_var.requires_grad_()
    phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda')
    phong_var_raw.requires_grad_()
    phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda')

    # Render.
    ang = 0.0
    imgloss_avg, phong_avg = [], []
    glctx = dr.RasterizeGLContext()
    zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda')
    one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda')

    # Adam optimizer for environment map and phong with a learning rate ramp.
    optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))

    for it in range(max_iter + 1):
        phong_var = phong_var_raw * phong_var_mul

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

        # Modelview and modelview + projection matrices.
        proj  = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        a_mvc = a_mvp
        r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda')
        a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda')

        # Solve camera positions.
        a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda')
        r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda')

        # Random light direction.        
        lightdir = np.random.normal(size=[3])
        lightdir /= np.linalg.norm(lightdir) + 1e-8
        lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')

        def render_refl(ldir, cpos, mvp):
            # Transform and rasterize.
            viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices.
            reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices.
            reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize.
            pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...]
            rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res])
            refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.

            # Phong light.
            refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5  # Normalize.
            ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R.

            # Return
            return refl, refld, ldotr, (rast_out[..., -1:] == 0)

        # Render the reflections.
        refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp)

        # Reference color. No need for AA because we are not learning geometry.
        color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong.
        color = torch.where(mask, one_tensor, color) # White background.

        # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
        color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong.
        color_opt = torch.where(mask, one_tensor, color_opt) # White background.

        # Compute loss and train.
        loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Collect losses.
        imgloss_avg.append(loss.detach().cpu().numpy())
        phong_avg.append(phong_var.detach().cpu().numpy())

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result image.        
        display_image = display_interval and (it % display_interval == 0)
        save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

        if display_image or save_mp4:
            lightdir = np.asarray([.8, -1., .5, 0.0])
            lightdir = np.matmul(a_mvc, lightdir)[:3]
            lightdir /= np.linalg.norm(lightdir)
            lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
            refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp)
            color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
            color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3]
            color_opt = torch.where(mask, one_tensor, color_opt)
            result_image = color_opt.detach()[0].cpu().numpy()
            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_mp4:
                writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
Beispiel #28
0
def fit_pose(max_iter=10000,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             lr_base=0.01,
             lr_falloff=1.0,
             nr_base=1.0,
             nr_falloff=1e-4,
             grad_phase_start=0.5,
             resolution=256,
             out_dir=None,
             log_fn=None,
             mp4save_interval=None,
             mp4save_fn=None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}',
                                        mode='I',
                                        fps=30,
                                        codec='libx264',
                                        bitrate='16M')
    else:
        mp4save_interval = None

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/cube_p.npz') as f:
        pos_idx, pos, col_idx, col = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
    # the last column in that case.
    if pos.shape[1] == 4: pos = pos[:, 0:3]

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
    vtx_col = torch.from_numpy(col.astype(np.float32)).cuda()

    glctx = dr.RasterizeGLContext()

    for rep in range(repeats):
        pose_target = torch.tensor(q_rnd(), device='cuda')
        pose_init = q_rnd()
        pose_opt = torch.tensor(pose_init / np.sum(pose_init**2)**0.5,
                                dtype=torch.float32,
                                device='cuda',
                                requires_grad=True)

        loss_best = np.inf
        pose_best = pose_opt.detach().clone()

        # Modelview + projection matrix.
        mvp = torch.tensor(np.matmul(util.projection(x=0.4),
                                     util.translate(0, 0,
                                                    -3.5)).astype(np.float32),
                           device='cuda')

        # Adam optimizer for texture with a learning rate ramp.
        optimizer = torch.optim.Adam([pose_opt],
                                     betas=(0.9, 0.999),
                                     lr=lr_base)
        # Render.
        for it in range(max_iter + 1):
            # Set learning rate.
            itf = 1.0 * it / max_iter
            nr = nr_base * nr_falloff**itf
            lr = lr_base * lr_falloff**itf
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

            # Noise input.
            if itf >= grad_phase_start:
                noise = q_unit()
            else:
                noise = q_scale(q_rnd(), nr)
                noise = q_mul(noise, q_rnd_S4())  # Orientation noise.

            # Render.
            color = render(glctx, torch.matmul(mvp, q_to_mtx(pose_target)),
                           vtx_pos, pos_idx, vtx_col, col_idx, resolution)
            pose_total_opt = q_mul_torch(pose_opt, noise)
            mtx_total_opt = torch.matmul(mvp, q_to_mtx(pose_total_opt))
            color_opt = render(glctx, mtx_total_opt, vtx_pos, pos_idx, vtx_col,
                               col_idx, resolution)

            # Image-space loss.
            diff = (color_opt - color)**2  # L2 norm.
            diff = torch.tanh(5.0 * torch.max(diff, dim=-1)[0])
            loss = torch.mean(diff)

            # Measure image-space loss and update best found pose.
            loss_val = float(loss)
            if (loss_val < loss_best) and (loss_val > 0.0):
                pose_best = pose_total_opt.detach().clone()
                loss_best = loss_val
                if itf < grad_phase_start:
                    with torch.no_grad():
                        pose_opt[:] = pose_best

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                err = q_angle_deg(pose_opt, pose_target)
                ebest = q_angle_deg(pose_best, pose_target)
                s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (
                    rep, it, err, ebest, loss_val, loss_best, lr, nr)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Run gradient training step.
            if itf >= grad_phase_start:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            with torch.no_grad():
                pose_opt /= torch.sum(pose_opt**2)**0.5

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

            if display_image or save_mp4:
                c = color[0].detach().cpu().numpy()
                img_ref = color[0].detach().cpu().numpy()
                img_opt = color_opt[0].detach().cpu().numpy()
                img_best = render(glctx, torch.matmul(mvp,
                                                      q_to_mtx(pose_best)),
                                  vtx_pos, pos_idx, vtx_col, col_idx,
                                  resolution)[0].detach().cpu().numpy()
                result_image = np.concatenate([img_ref, img_best, img_opt],
                                              axis=1)

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='(%d) %d / %d' %
                                       (rep, it, max_iter))
                if save_mp4:
                    writer.append_data(
                        np.clip(np.rint(result_image * 255.0), 0,
                                255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
  def get_predictions_and_loss(self, inputs):
    tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs
    self.dropout = self.get_dropout(self.config["dropout_rate"], is_training)
    self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training)
    self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training)

    num_sentences = tf.shape(tokens)[0]
    max_sentence_length = tf.shape(tokens)[1]

    context_emb_list = []
    context_emb_list.append(context_word_emb)
    char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
    flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb]
    flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb]
    aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb]
    context_emb_list.append(aggregated_char_emb)


    if self.lm_file is not None:  # Only add these layers if we're using contextualized embeddings
      lm_emb_size = util.shape(lm_emb, 2)
      lm_num_layers = util.shape(lm_emb, 3)
      with tf.variable_scope("lm_aggregation"):
        self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0)))
        self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0))

      flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
      flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1]
      aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size])
      aggregated_lm_emb *= self.lm_scaling
      context_emb_list.append(aggregated_lm_emb)

    context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb]
    context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb]

    text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length]

    candidate_scores_mask = tf.logical_and(tf.expand_dims(text_len_mask,[1]),tf.expand_dims(text_len_mask,[2])) #[num_sentence, max_sentence_length,max_sentence_length]
    sentence_ends_leq_starts = tf.tile(tf.expand_dims(tf.logical_not(tf.sequence_mask(tf.range(max_sentence_length),max_sentence_length)), 0),[num_sentences,1,1]) #[num_sentence, max_sentence_length,max_sentence_length]
    candidate_scores_mask = tf.logical_and(candidate_scores_mask,sentence_ends_leq_starts)

    flattened_candidate_scores_mask = tf.reshape(candidate_scores_mask,[-1]) #[num_sentence * max_sentence_length * max_sentence_length]


    context_outputs = self.lstm_contextualize(context_emb, text_len,self.lstm_dropout) # [num_sentence, max_sentence_length, emb]


    with tf.variable_scope("candidate_starts_ffnn"):
      candidate_starts_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length,emb]
    with tf.variable_scope("candidate_ends_ffnn"):
      candidate_ends_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length, emb]


    candidate_ner_scores = util.bilinear_classifier(candidate_starts_emb,candidate_ends_emb,self.dropout,output_size=self.num_types+1)#[num_sentence, max_sentence_length,max_sentence_length,types+1]
    candidate_ner_scores = tf.boolean_mask(tf.reshape(candidate_ner_scores,[-1,self.num_types+1]),flattened_candidate_scores_mask)


    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=gold_labels, logits=candidate_ner_scores)
    loss = tf.reduce_sum(loss)


    return candidate_ner_scores, loss
def fit_cube(max_iter=5000,
             resolution=4,
             discontinuous=False,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             out_dir=None,
             log_fn=None,
             mp4save_interval=None,
             mp4save_fn=None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(f'{out_dir}/{log_fn}', 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}',
                                        mode='I',
                                        fps=30,
                                        codec='libx264',
                                        bitrate='16M')
    else:
        mp4save_interval = None

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], vtxp.shape[0]))

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda()
    vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda()

    glctx = dr.RasterizeGLContext()

    # Repeats.
    for rep in range(repeats):

        ang = 0.0
        gl_avg = []

        vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
        vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape)
        vtx_pos_opt = torch.tensor(vtx_pos_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)
        vtx_col_opt = torch.tensor(vtx_col_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)

        # Adam optimizer for vertex position and color with a learning rate ramp.
        optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2)
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lr_lambda=lambda x: max(0.01, 10**(-x * 0.0005)))

        for it in range(max_iter + 1):
            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj = util.projection(x=0.4)
            r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)

            # Compute geometric error for logging.
            with torch.no_grad():
                geom_loss = torch.mean(
                    torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5)
                gl_avg.append(float(geom_loss))

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val = np.mean(np.asarray(gl_avg))
                gl_avg = []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx,
                           resolution)
            color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, resolution)

            # Compute loss and train.
            loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

            if display_image or save_mp4:
                ang = ang + 0.01

                img_b = color[0].cpu().numpy()
                img_o = color_opt[0].detach().cpu().numpy()
                img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, display_res)[0]
                img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col,
                               col_idx, display_res)[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = make_grid(
                    np.stack([
                        img_o, img_b,
                        img_d.detach().cpu().numpy(),
                        img_r.cpu().numpy()
                    ]))

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_mp4:
                    writer.append_data(
                        np.clip(np.rint(result_image * 255.0), 0,
                                255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
Beispiel #31
0
    def get_predictions_and_loss(self, input_ids, input_mask, text_len,
                                 speaker_ids, genre, is_training, gold_starts,
                                 gold_ends, cluster_ids, sentence_map):
        model = modeling.BertModel(config=self.bert_config,
                                   is_training=is_training,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   use_one_hot_embeddings=False,
                                   scope='bert')
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        mention_doc = model.get_sequence_output(
        )  # (batch_size, seq_len, hidden)
        mention_doc = self.flatten_emb_by_sentence(
            mention_doc, input_mask)  # (b, s, e) -> (b*s, e) 取出有效token的emb
        num_words = util.shape(mention_doc, 0)  # b*s

        # candidate_span: 每个位置都可能是起点,对每个起点有max_span_width种不同的终点,总共有(num_words, max_span_width)种可能
        candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1),
                                   [1, self.max_span_width])
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)

        # [num_words, max_span_width],根据index将对应位置的sentence_id取出来
        candidate_start_sentence_indices = tf.gather(sentence_map,
                                                     candidate_starts)
        candidate_end_sentence_indices = tf.gather(
            sentence_map, tf.minimum(candidate_ends, num_words - 1))
        # [num_words, max_span_width],合法的span需要满足start/end不能越界;start/end必须在同一个句子里
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(candidate_start_sentence_indices,
                     candidate_end_sentence_indices))
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        # [num_candidates] 把候选span mask掉再铺平
        candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]),
                                           flattened_candidate_mask)
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates] 每个候选span的cluster_id
        # [num_candidates, emb] 候选答案的向量表示  [num_candidates,] 候选答案的得分
        candidate_span_emb = self.get_span_embmax_top_antecedents(
            mention_doc, candidate_starts, candidate_ends)
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb, candidate_starts, candidate_ends)

        # beam size 所有span的数量小于num_words * top_span_ratio
        k = tf.minimum(
            3900,
            tf.to_int32(
                tf.floor(
                    tf.to_float(num_words) * self.config["top_span_ratio"])))
        c = tf.minimum(self.config["max_top_antecedents"],
                       k)  # 初筛挑出0.4*500=200个候选,细筛再挑出50个候选
        # pull from beam,光使用mention_score卡前0.4*num_words个span
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), num_words,
            True)  # [1, k]
        top_span_indices = tf.reshape(
            top_span_indices, [-1])  # k个按mention_score初筛出来的candidate的index

        # 取出top_k的span的信息,过coarse的span pair筛选,每个span取前c个antecedent
        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]

        # def body(idx, tensors):
        #     fake_input = tf.stack([top_span_starts, top_span_ends])
        #     fake_model = modeling.BertModel(
        #         config=self.bert_config,
        #         is_training=is_training,
        #         input_ids=fake_input,
        #         use_one_hot_embeddings=False,
        #         scope='bert')
        #     fake_output = fake_model.get_sequence_output()
        #     return idx + 1, tf.Print(tensors, [tf.shape(fake_output)], 'fake_output')
        #
        # # do the loop:
        # initial_outs = model.get_sequence_output()
        # _, final_outs = tf.while_loop(lambda z, t: z < 100, body, loop_vars=(0, initial_outs))
        # top_span_emb = tf.Print(top_span_emb, [tf.shape(tf.stack(final_outs))], "final_outs")
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_pruning(
            top_span_emb, top_span_mention_scores, c)

        genre_emb = tf.gather(
            tf.get_variable(
                "genre_embeddings",
                [len(self.genres), self.config["feature_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02)),
            genre)  # [emb]
        if self.config['use_metadata']:
            speaker_ids = self.flatten_emb_by_sentence(speaker_ids,
                                                       input_mask)  # 拍平后加mask
            top_span_speaker_ids = tf.gather(
                speaker_ids, top_span_starts)  # 每个span取start位置的speaker_id
        else:
            top_span_speaker_ids = None

        dummy_scores = tf.zeros([k, 1])  # [k, 1]

        num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1)
        word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1),
                                [1, seg_len])
        flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]),
                                             tf.reshape(input_mask, [-1]))
        # mention_segments:[num_candidates, ] 找出每个candidate_span在第几个segment里
        mention_segments = tf.expand_dims(
            tf.gather(flat_word_segments, top_span_starts), 1)  # [k, 1]
        # antecedent_segments: [k, c] 找出每个candidate_span的每个antecedents对应在第几个segment里
        antecedent_segments = tf.gather(flat_word_segments,
                                        tf.gather(top_span_starts,
                                                  top_antecedents))  # [k, c]
        segment_distance = None
        if self.config[
                'use_segment_distance']:  # [k, c] 每个mention和其antecedent之间隔了几个segment
            segment_distance = tf.clip_by_value(
                mention_segments - antecedent_segments, 0,
                self.config['max_training_sentences'] - 1)
        if self.config['fine_grained']:  # 所谓融入high-order information
            for i in range(self.config["coref_depth"]):
                with tf.variable_scope("coref_layer", reuse=(i > 0)):
                    top_antecedent_emb = tf.gather(
                        top_span_emb, top_antecedents)  # [k, c, emb]
                    top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                        top_span_emb, top_antecedents, top_antecedent_emb,
                        top_antecedent_offsets, top_span_speaker_ids,
                        genre_emb, segment_distance
                    )  # [k, c] 算出最后的得分s(i, j) =sm(i) + sm(j) + sc(i, j) + sa(i, j)
                    # top_antecedent_weights: [k, c + 1] 每个mention对所有antecedent分配权重
                    # top_antecedent_emb:[k, c + 1, emb] 每个mention每个antecedent的embedding
                    # attended_span_emb:[k, emb] 每个mention所有antecedent的表示做加权和
                    top_antecedent_weights = tf.nn.softmax(
                        tf.concat([dummy_scores, top_antecedent_scores], 1))
                    top_antecedent_emb = tf.concat(
                        [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                        1)
                    attended_span_emb = tf.reduce_sum(
                        tf.expand_dims(top_antecedent_weights, 2) *
                        top_antecedent_emb, 1)
                    with tf.variable_scope("f"):
                        f = tf.sigmoid(
                            util.projection(
                                tf.concat([top_span_emb, attended_span_emb],
                                          1), util.shape(top_span_emb,
                                                         -1)))  # [k, emb]
                        top_span_emb = f * attended_span_emb + (
                            1 - f) * top_span_emb  # [k, emb]
        else:
            top_antecedent_scores = top_fast_antecedent_scores

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        # top_antecedent_cluster_ids [k, c] 每个mention每个antecedent的cluster_id
        # same_cluster_indicator [k, c] 每个mention跟每个预测的antecedent是否同一个cluster
        # pairwise_labels [k, c] 用pairwise的方法得到的label,非mention、非antecedent都是0,mention跟antecedent共指是1
        # top_antecedent_labels [k, c+1] 最终的标签,如果某个mention没有antecedent就是dummy_label为1
        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        # top_antecedent_labels = tf.Print(top_antecedent_labels, [tf.shape(top_antecedent_labels)], "ant labels")
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
def fit_mesh_col(
    initial_mesh: dict,
    target_dataset_dir: str,
    max_iterations: int = 10000,
    resolution: int = 256,
    log_interval: int = None,
    display_interval = None,
    display_res = 512,
    out_dir = None,
    mp4save_interval = None
    ):

    distance = 3

    target_dataset = util.ReferenceImages(target_dataset_dir, resolution, resolution)

    pos_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32))
    vtx_pos = torch.from_numpy(initial_mesh['vtx_pos'].astype(np.float32))

    laplace = util.compute_laplace_matrix(vtx_pos, pos_idx).cuda()
    pos_idx = pos_idx.cuda()
    vtx_pos = vtx_pos.cuda()

    init_rot = util.rotate_z(-math.pi/2).cuda()
    vtx_pos = transform_pos(init_rot, vtx_pos)[0][:, 0:3]
    vtx_pos.requires_grad = True

    col_idx  = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32)).cuda()
    vtx_col  = torch.ones_like(vtx_pos) * 0.5
    vtx_col.requires_grad = True

    glctx = dr.RasterizeGLContext()


    M1 = torch.eye(len(target_dataset)).cuda()
    M1.requires_grad = True
    M2 = torch.eye(len(target_dataset)).cuda()
    M2.requires_grad = True

    #M3 = torch.zeros((3, vtx_pos.shape[0], len(target_dataset))).cuda()
    M3 = torch.zeros((3 * vtx_pos.shape[0], len(target_dataset))).cuda()
    M3.requires_grad = True

    lr_ramp = .1
    params = [{'params': [M1, M2, M3], 'lr': 1e-3}, {'params': vtx_col, 'lr': 1e-2}]
    # params = [{'params': vtx_col, 'lr': 1e-2}]
    #lambdas = [lambda x: max(0.01, 10**(-x*0.0005)), lambda x: lr_ramp**(float(x)/float(max_iterations))]


    optimizer    = torch.optim.Adam(params)
    #scheduler    = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambdas)

    total_steps = 0

    loss_hist, l2_hist, reg_hist = [], [], []

    for i in range(max_iterations):
        for j, (img, angle) in enumerate(target_dataset):
            img = img.cuda().permute(2,1,0)

            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()
            frame_tensor.requires_grad = True

            deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten()
            #deformed_vtxs = vtx_pos + deltas.T
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3))

            # create the model-view-projection matrix
            # rotate model about z axis by angle
            rot = util.rotate_y(angle)
            #rot = torch.eye(4)
            # translate by distance
            tr = util.translate(z=-distance)
            # perspective projection
            proj = util.projection(x=0.4)

            mtx = proj.matmul(tr.matmul(rot)).cuda()
            mtx.requires_grad = True

            estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0]

            # compute loss
            loss = torch.mean((estimate - img) ** 2)

            # compute regularizer
            reg = torch.mean((util.compute_curvature(deformed_vtxs, laplace) - util.compute_curvature(vtx_pos, laplace)) ** 2) + torch.mean(deltas**2)
            
            # combine
            loss = loss + 5 * reg

            loss_hist.append(loss.cpu().numpy())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #scheduler.step()

            with torch.no_grad():
                #print(f"Loss: {loss}")
                # clamp color between 0 and 1
                vtx_col.clamp_(0, 1)

            if (display_interval and (i % display_interval == 0)) or (i == max_iterations - 1):
                print(loss)
                with torch.no_grad():
                    estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0].detach().cpu().numpy()
                    Image.fromarray((estimate * 255).astype(np.uint8)).save('estimate.png')
                    img = img.detach().cpu().numpy()
                    Image.fromarray((img * 255).astype(np.uint8)).save('img.png')


    with torch.no_grad():
        for i, (im, _) in enumerate(target_dataset):
            frame_tensor = torch.zeros(len(target_dataset))
            frame_tensor[j] = 1
            frame_tensor = frame_tensor.cuda()

            deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten()
            deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3))
            deformed_vtxs = torch.clamp(deformed_vtxs, -1.0, 1.0)

            #write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist())
            util.write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist(), vtx_col.detach().cpu().tolist())

    np.savez('vtx_col.npz', vtx_col=vtx_col.cpu().detach().numpy())
Beispiel #33
0
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids, inject_starts, inject_ends):

        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        if self._use_injected_mentions(is_training):
            candidate_starts = tf.transpose(tf.expand_dims(inject_starts, 1))
            candidate_ends = tf.transpose(tf.expand_dims(inject_ends, 1))
        else:
            candidate_starts = tf.tile(
                tf.expand_dims(tf.range(num_words), 1),
                [1, self.max_span_width])  # [num_words, max_span_width]
            candidate_ends = candidate_starts + tf.expand_dims(
                tf.range(self.max_span_width),
                0)  # [num_words, max_span_width]

        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        if self._use_injected_mentions(is_training):
            k = tf.shape(candidate_starts)[0]
            top_span_indices = tf.expand_dims(tf.range(k), 0)
        else:
            k = tf.to_int32(
                tf.floor(
                    tf.to_float(tf.shape(context_outputs)[0]) *
                    self.config["top_span_ratio"]))
            top_span_indices = coref_ops.extract_spans(
                tf.expand_dims(candidate_mention_scores, 0),
                tf.expand_dims(candidate_starts, 0),
                tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
                util.shape(context_outputs, 0), True)  # [1, k]

        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Beispiel #34
0
    def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends):
        span_emb_list = []

        if self.config["use_multi_span"]:
            emb_size = 2 * self.config["contextualization_size"]
            dim0 = util.shape(span_starts, 0)
            dim1 = util.shape(span_starts, 1)

            reshaped_span_starts = tf.reshape(span_starts,
                                              [dim0 * dim1])  # [a*b]
            gathered_span_starts = tf.gather(
                context_outputs, reshaped_span_starts)  # [a*b, emb]
            cnn_span_starts = tf.reshape(gathered_span_starts,
                                         [dim0, dim1, emb_size])  # [a, b, emb]

            span_starts_4dim = tf.expand_dims(cnn_span_starts, 3)
            span_start_emb = util.cnn2d(span_starts_4dim,
                                        self.config["emb_filter_widths"],
                                        ffnn_out_size=emb_size,
                                        name="start")

            reshaped_span_ends = tf.reshape(span_ends, [dim0 * dim1])
            gathered_span_ends = tf.gather(context_outputs, reshaped_span_ends)
            cnn_span_ends = tf.reshape(gathered_span_ends,
                                       [dim0, dim1, emb_size])

            span_ends_4dim = tf.expand_dims(cnn_span_ends, 3)
            span_end_emb = util.cnn2d(span_ends_4dim,
                                      self.config["emb_filter_widths"],
                                      ffnn_out_size=emb_size,
                                      name="end")

            span_starts = tf.squeeze(span_starts[:, :1], 1)  # todo model_heads
            span_ends = tf.squeeze(span_ends[:, :1], 1)  # todo  model_heads

        else:
            span_start_emb = tf.gather(context_outputs,
                                       span_starts)  # [k, emb]
            span_end_emb = tf.gather(context_outputs, span_ends)  # [k, emb]

        span_width = 1 + span_ends - span_starts  # [k]
        span_emb_list.append(span_start_emb)
        span_emb_list.append(span_end_emb)

        if self.config["use_features"]:
            span_width_index = span_width - 1  # [k]
            span_width_emb = tf.gather(
                tf.get_variable("span_width_embeddings", [
                    self.config["max_span_width"], self.config["feature_size"]
                ]), span_width_index)  # [k, emb]
            span_width_emb = tf.nn.dropout(span_width_emb, self.dropout)
            span_emb_list.append(span_width_emb)

        if self.config["model_heads"]:
            # [k, max_span_width]
            span_indices = tf.expand_dims(
                tf.range(self.config["max_span_width"]), 0) + tf.expand_dims(
                    span_starts, 1)
            span_indices = tf.minimum(
                util.shape(context_outputs, 0) - 1,
                span_indices)  # [k, max_span_width]
            span_text_emb = tf.gather(head_emb,
                                      span_indices)  # [k, max_span_width, emb]
            with tf.variable_scope("head_scores"):
                self.head_scores = util.projection(context_outputs,
                                                   1)  # [num_words, 1]
            span_head_scores = tf.gather(
                self.head_scores, span_indices)  # [k, max_span_width, 1]
            span_mask = tf.expand_dims(
                tf.sequence_mask(span_width,
                                 self.config["max_span_width"],
                                 dtype=tf.float32),
                2)  # [k, max_span_width, 1]
            span_head_scores += tf.log(span_mask)  # [k, max_span_width, 1]
            span_attention = tf.nn.softmax(span_head_scores,
                                           1)  # [k, max_span_width, 1]
            span_head_emb = tf.reduce_sum(span_attention * span_text_emb,
                                          1)  # [k, emb]
            span_emb_list.append(span_head_emb)

        span_emb = tf.concat(span_emb_list, 1)  # [k, emb]
        return span_emb  # [k, emb]