def split_char_normal(image, width=64, height=64, char_width=4): """ :param image: :return: images 切分字符、归一化 """ images = [] x = util.projection(image) bounds = [] draw_bounds = [] x = [min(x)] + x + [max(x)] for i in xrange(len(x) - 1): if x[i] <= x[0] < x[i + 1]: bounds.append(i) elif x[i] > x[0] >= x[i + 1]: bounds.append(i - 1) for i in xrange(0, len(x), 2): if i + 1 < len(bounds) and bounds[i + 1] - bounds[i] >= char_width: image_char = image.crop((bounds[i], 0, bounds[i + 1], image.size[1])) y1, y2 = util.get_width(util.projection(image_char, lambda a, b: b), True) sig_char_image = image_char.crop((0, y1, image_char.size[0], y2)) draw_bounds.append((bounds[i], y1, bounds[i + 1], y2)) sig_char_image = sig_char_image.resize((width, height)) images.append(sig_char_image) image = image.convert('RGB') draw = ImageDraw.ImageDraw(image) for bound in draw_bounds: x1, y2, x2, y2 = bound draw.line((x1, y1, x1,y2), fill=(255,0,0), width=2) draw.line((x1, y1, x2,y1), fill=(255,0,0), width=2) draw.line((x2, y1, x2,y2), fill=(255,0,0), width=2) draw.line((x1, y2, x2,y2), fill=(255,0,0), width=2) images.append(image) return images
def featExt(image, label, width=16, height=16): """ :param: image :return: feat 提取特征值 """ feat = [] x, y = image.size bound_x = util.projection(image) bound_y = util.projection(image, lambda a, b: b) feat.append(util.get_max_min(bound_x)[1]) feat.append(util.get_max_min(bound_y)[1]) for i in xrange(x / width): for j in xrange(y / height): x1 = j * height x2 = x1 + height y1 = i * width y2 = y1 + width local_image = image.crop((x1, y1, x2, y2)) feat.append(sum(util.projection(local_image))) util.normal(feat) feat.append(label) return feat
def get_rel_scores(entity_emb, entity_scores, num_labels, config, dropout, num_predicted_entities): num_sentences = util.shape(entity_emb, 0) num_entities = util.shape(entity_emb, 1) entities_mask = tf.sequence_mask(num_predicted_entities, num_entities) #[num_sentences, num_entities] flat_entities_mask = tf.reshape(entities_mask, [-1]) rel_mask = tf.logical_and(tf.expand_dims(entities_mask, 2), # [num_sentences, max_num_entities, 1] tf.expand_dims(entities_mask, 1) # [num_sentences, 1, max_num_entities] ) e1_emb_expanded = tf.expand_dims(entity_emb, 2) # [num_sents, num_ents, 1, emb] e2_emb_expanded = tf.expand_dims(entity_emb, 1) # [num_sents, 1, num_ents, emb] e1_emb_tiled = tf.tile(e1_emb_expanded, [1, 1, num_entities, 1]) # [num_sents, num_ents, num_ents, emb] e2_emb_tiled = tf.tile(e2_emb_expanded, [1, num_entities, 1, 1]) # [num_sents, num_ents, num_ents, emb] similarity_emb = e1_emb_expanded * e2_emb_expanded # [num_sents, num_ents, num_ents, emb] pair_emb_list = [e1_emb_tiled, e2_emb_tiled, similarity_emb] pair_emb = tf.concat(pair_emb_list, 3) # [num_sentences, num_ents, num_ents, emb] pair_emb_size = util.shape(pair_emb, 3) flat_pair_emb = tf.reshape(pair_emb, [num_sentences * num_entities * num_entities, pair_emb_size]) flat_rel_scores = get_unary_scores(flat_pair_emb, config, dropout, num_labels - 1, "relation_scores") # [num_sentences * num_ents * num_ents, num_labels-1] rel_scores = tf.reshape(flat_rel_scores, [num_sentences, num_entities, num_entities, num_labels - 1]) rel_scores += tf.expand_dims(tf.expand_dims(entity_scores, 2), 3) + tf.expand_dims( tf.expand_dims(entity_scores, 1), 3) # [num_sentences, ents, max_num_ents, num_labels-1] if config['rel_prop']: flat_rel_scores = tf.reshape(rel_scores, [num_sentences * num_entities* num_entities, num_labels - 1]) with tf.variable_scope("rel_W"): entity_emb_size = util.shape(entity_emb, -1) relation_transition = util.projection(tf.nn.relu(flat_rel_scores), entity_emb_size) #f(V)A_R in Eq. 3 e2_emb_tiled = tf.reshape(e2_emb_tiled, [num_sentences * num_entities * num_entities, entity_emb_size]) rel_mask = tf.reshape(rel_mask, [-1]) tranformed_embeddings = tf.multiply(tf.transpose(relation_transition * e2_emb_tiled), tf.to_float(rel_mask)) #[entity_emb_size, num_sents * num_ents * num_ents] tranformed_embeddings = tf.transpose(tranformed_embeddings) # [entity_emb_size, num_sents * num_ents * num_ents] tranformed_embeddings = tf.reshape(tranformed_embeddings, [num_sentences, num_entities, num_entities, entity_emb_size]) #[num_sents, num_ents, num_ents, entity_emb_size] tranformed_embeddings = tf.reduce_sum(tranformed_embeddings, 2) #[num_sents, num_ents, entity_emb_size] tranformed_embeddings = tf.reshape(tranformed_embeddings, [num_sentences * num_entities, entity_emb_size]) entity_emb = tf.reshape(entity_emb, [num_sentences * num_entities, entity_emb_size]) with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([tranformed_embeddings, entity_emb], 1), entity_emb_size)) # [num_sents * num_ents, entity_emb_size] entity_emb = f * tranformed_embeddings + (1 - f) * entity_emb # [num_sents * num_ents, entity_emb_size] entity_emb = tf.reshape(entity_emb, [num_sentences, num_entities, entity_emb_size]) dummy_scores = tf.zeros([num_sentences, num_entities, num_entities, 1], tf.float32) rel_scores = tf.concat([dummy_scores, rel_scores], 3) # [num_sentences, max_num_ents, max_num_ents, num_labels] if config['rel_prop']: return rel_scores, entity_emb, flat_entities_mask else: return rel_scores # [num_sentences, num_entities, num_entities, num_labels]
def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends): mention_emb_list = [] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_emb_list.append(mention_start_emb) mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_emb_list.append(mention_end_emb) mention_width = 1 + mention_ends - mention_starts # [num_mentions] if self.config["use_features"]: mention_width_index = mention_width - 1 # [num_mentions] mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.config["max_mention_width"], self.config["feature_size"]]), mention_width_index) # [num_mentions, emb] mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout) mention_emb_list.append(mention_width_emb) if self.config["model_heads"]: mention_indices = tf.expand_dims(tf.range(self.config["max_mention_width"]), 0) + tf.expand_dims(mention_starts, 1) # [num_mentions, max_mention_width] mention_indices = tf.minimum(util.shape(text_outputs, 0) - 1, mention_indices) # [num_mentions, max_mention_width] mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb] self.head_scores = util.projection(text_outputs, 1) # [num_words, 1] mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1] mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1] mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb] mention_emb_list.append(mention_head_emb) mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb] return mention_emb
def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends): span_emb_list = [] span_start_emb = tf.gather(context_outputs, span_starts) # [k, emb] span_emb_list.append(span_start_emb) span_end_emb = tf.gather(context_outputs, span_ends) # [k, emb] span_emb_list.append(span_end_emb) span_width = 1 + span_ends - span_starts # [k] if self.config["use_features"]: span_width_index = span_width - 1 # [k] span_width_emb = tf.gather(tf.get_variable("span_width_embeddings", [self.config["max_span_width"], self.config["feature_size"]]), span_width_index) # [k, emb] span_width_emb = tf.nn.dropout(span_width_emb, self.dropout) span_emb_list.append(span_width_emb) if self.config["model_heads"]: span_indices = tf.expand_dims(tf.range(self.config["max_span_width"]), 0) + tf.expand_dims(span_starts, 1) # [k, max_span_width] span_indices = tf.minimum(util.shape(context_outputs, 0) - 1, span_indices) # [k, max_span_width] span_text_emb = tf.gather(head_emb, span_indices) # [k, max_span_width, emb] with tf.variable_scope("head_scores"): self.head_scores = util.projection(context_outputs, 1) # [num_words, 1] span_head_scores = tf.gather(self.head_scores, span_indices) # [k, max_span_width, 1] span_mask = tf.expand_dims(tf.sequence_mask(span_width, self.config["max_span_width"], dtype=tf.float32), 2) # [k, max_span_width, 1] span_head_scores += tf.log(span_mask) # [k, max_span_width, 1] span_attention = tf.nn.softmax(span_head_scores, 1) # [k, max_span_width, 1] span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1) # [k, emb] span_emb_list.append(span_head_emb) span_emb = tf.concat(span_emb_list, 1) # [k, emb] return span_emb # [k, emb]
def lstm_contextualize(self, text_emb, text_len, text_len_mask): num_sentences = tf.shape(text_emb)[0] current_inputs = text_emb # [num_sentences, max_sentence_length, emb] for layer in range(self.config["contextualization_layers"]): with tf.variable_scope("layer_{}".format(layer)): with tf.variable_scope("fw_cell"): cell_fw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout) with tf.variable_scope("bw_cell"): cell_bw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout) state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1])) state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1])) (fw_outputs, bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=current_inputs, sequence_length=text_len, initial_state_fw=state_fw, initial_state_bw=state_bw) text_outputs = tf.concat([fw_outputs, bw_outputs], 2) # [num_sentences, max_sentence_length, emb] text_outputs = tf.nn.dropout(text_outputs, self.lstm_dropout) if layer > 0: highway_gates = tf.sigmoid(util.projection(text_outputs, util.shape(text_outputs, 2))) # [num_sentences, max_sentence_length, emb] text_outputs = highway_gates * text_outputs + (1 - highway_gates) * current_inputs current_inputs = text_outputs return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
def get_span_emb(head_emb, context_outputs, span_starts, span_ends, config, dropout): """Compute span representation shared across tasks. Args: head_emb: Tensor of [num_words, emb] context_outputs: Tensor of [num_words, emb] span_starts: [num_spans] span_ends: [num_spans] """ text_length = util.shape(context_outputs, 0) num_spans = util.shape(span_starts, 0) max_arg_width = config["max_arg_width"] num_heads = config["num_attention_heads"] span_start_emb = tf.gather(context_outputs, span_starts) # [num_words, emb] span_end_emb = tf.gather(context_outputs, span_ends) # [num_words, emb] if max_arg_width > 1: span_emb_list = [span_start_emb, span_end_emb] else: span_emb_list = [span_start_emb] # span_emb_list = [span_start_emb, span_end_emb] span_width = 1 + span_ends - span_starts # [num_spans] if config["use_features"] and max_arg_width > 1: # span_width_index = span_width - 1 # [num_spans] span_width_emb = tf.gather( tf.get_variable("span_width_embeddings", [max_arg_width, config["feature_size"]]), span_width_index) # [num_spans, emb] span_width_emb = tf.nn.dropout(span_width_emb, dropout) span_emb_list.append(span_width_emb) head_scores = None span_text_emb = None span_indices = None span_indices_log_mask = None if config["model_heads"]: # and max_arg_width > 1 if max_arg_width > 1: span_indices = tf.minimum( tf.expand_dims(tf.range(max_arg_width), 0) + tf.expand_dims(span_starts, 1), text_length - 1) # [num_spans, max_span_width] span_text_emb = tf.gather(head_emb, span_indices) # [num_spans, max_arg_width, emb] span_indices_log_mask = tf.log( tf.sequence_mask(span_width, max_arg_width, dtype=tf.float32)) # [num_spans, max_arg_width] with tf.variable_scope("head_scores"): head_scores = util.projection(context_outputs, num_heads) # [num_words, num_heads] span_attention = tf.nn.softmax( tf.gather(head_scores, span_indices) + tf.expand_dims(span_indices_log_mask, 2), dim=1) # [num_spans, max_arg_width, num_heads] span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1) # [num_spans, emb] else: span_head_emb = tf.gather(head_emb, span_starts) span_emb_list.append(span_head_emb) span_emb = tf.concat(span_emb_list, 1) # [num_spans, emb] return span_emb, head_scores, span_text_emb, span_indices, span_indices_log_mask
def get_fast_antecedent_scores(top_span_emb, dropout): with tf.variable_scope("src_projection"): source_top_span_emb = tf.nn.dropout( util.projection(top_span_emb, util.shape(top_span_emb, -1)), dropout) # [k, emb] target_top_span_emb = tf.nn.dropout(top_span_emb, dropout) # [k, emb] return tf.matmul(source_top_span_emb, target_top_span_emb, transpose_b=True) # [k, k]
def get_masked_mention_word_scores(self, encoded_doc, span_starts, span_ends): num_words = util.shape(encoded_doc, 0) # T num_c = util.shape(span_starts, 0) # NC doc_range = tf.tile(tf.expand_dims(tf.range(0, num_words), 0), [num_c, 1]) # [K, T] mention_mask = tf.logical_and(doc_range >= tf.expand_dims(span_starts, 1), doc_range <= tf.expand_dims(span_ends, 1)) #[K, T] with tf.variable_scope("mention_word_attn"): word_attn = tf.squeeze(util.projection(encoded_doc, 1, initializer=tf.truncated_normal_initializer(stddev=0.02)), 1) mention_word_attn = tf.nn.softmax(tf.log(tf.to_float(mention_mask)) + tf.expand_dims(word_attn, 0)) return mention_word_attn
def get_mention_emb(self, text_emb, text_outputs, mention_starts, mention_ends): mention_emb_list = [] mention_start_emb = tf.gather(text_outputs, mention_starts) # [num_mentions, emb] mention_emb_list.append(mention_start_emb) mention_end_emb = tf.gather(text_outputs, mention_ends) # [num_mentions, emb] mention_emb_list.append(mention_end_emb) mention_width = 1 + mention_ends - mention_starts # [num_mentions] if self.config["use_features"]: mention_width_index = mention_width - 1 # [num_mentions] temp_tensor = tf.zeros([ self.config["max_mention_width"], self.config["feature_size"] ]) nn.init.xavier_uniform(temp_tensor) mention_width_emb = tf.gather( temp_tensor, mention_width_index) # [num_mentions, emb] mention_width_emb = F.dropout(mention_width_emb, self.dropout) mention_emb_list.append(mention_width_emb) if self.config["model_heads"]: mention_indices = tf.unsqueeze( tf.range(self.config["max_mention_width"]), 0) + tf.unsqueeze( mention_starts, 1) # [num_mentions, max_mention_width] mention_indices = tf.min( (util.shape(text_outputs, 0) - 1), mention_indices) # [num_mentions, max_mention_width] mention_text_emb = tf.gather( text_emb, mention_indices) # [num_mentions, max_mention_width, emb] self.head_scores = util.projection(text_outputs, 1) # [num_words, 1] mention_head_scores = tf.gather( self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1] mention_mask = tf.unsqueeze( tf.sequence_mask(mention_width, self.config["max_mention_width"], dtype=tf.float32), 2) # [num_mentions, max_mention_width, 1] mention_attention = F.softmax( mention_head_scores + tf.log(mention_mask), dim=1) # [num_mentions, max_mention_width, 1] mention_head_emb = tf.sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb] mention_emb_list.append(mention_head_emb) mention_emb = tf.cat(mention_emb_list, 1) # [num_mentions, emb] return mention_emb
def coarse_to_fine_pruning(self, top_span_emb, top_span_mention_scores, c): k = util.shape(top_span_emb, 0) top_span_range = tf.range(k) # [k] antecedent_offsets = tf.expand_dims( top_span_range, 1) - tf.expand_dims(top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] fast_antecedent_scores = tf.expand_dims( top_span_mention_scores, 1) + tf.expand_dims( top_span_mention_scores, 0) # [k, k] fast_antecedent_scores += tf.log( tf.to_float(antecedents_mask)) # [k, k] fast_antecedent_scores += self.get_fast_antecedent_scores( top_span_emb) # [k, k] if self.config['use_prior']: antecedent_distance_buckets = self.bucket_distance( antecedent_offsets) # [k, c] distance_scores = util.projection( tf.nn.dropout( tf.get_variable( "antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer( stddev=0.02)), self.dropout), 1, initializer=tf.truncated_normal_initializer( stddev=0.02)) #[10, 1] antecedent_distance_scores = tf.gather( tf.squeeze(distance_scores, 1), antecedent_distance_buckets) # [k, c] fast_antecedent_scores += antecedent_distance_scores _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=True) # [k, c] top_antecedents_mask = util.batch_gather(antecedents_mask, top_antecedents) # [k, c] top_fast_antecedent_scores = util.batch_gather( fast_antecedent_scores, top_antecedents) # [k, c] top_antecedent_offsets = util.batch_gather(antecedent_offsets, top_antecedents) # [k, c] self.top_antecedents_idx = top_antecedents self.top_antecedents_mask = top_antecedents_mask self.top_fast_antecedent_scores = top_fast_antecedent_scores self.top_antecedent_offsets = top_antecedent_offsets self.antecedent_distance_buckets = antecedent_distance_buckets self.antecedent_distance_scores = antecedent_distance_scores self.fast_antecedent_scores = fast_antecedent_scores return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def lstm_contextualize(self, text_emb, text_len, text_len_mask): # self.a = text_emb # self.b = text_len # self.c = text_len_mask num_sentences = tf.shape(text_emb)[0] # text_emb = model.a # text_len = model.b # text_len_mask = model.c # num_sentences = tf.shape(text_emb)[0] # max_sentence_length = tf.shape(text_emb)[1] # session.run([num_sentences, max_sentence_length]) # sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] # x= session.run([sentence_indices, num_sentences , max_sentence_length , text_len_mask , text_len , text_emb ]) # flattened_sentence_indices = model.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] # flattened_text_emb = self..flatten_emb_by_sentence(text_emb, text_len_mask) # [num_words] # s = tf.shape(flattened_text_emb) # paddings = [[0, 500 - s[0]], [0, 0]] # paddings = [[0, 0], [0, 4-tf.shape(t)[0]]] # paddings = tf.constant([[0, paddings_size-flattened_text_emb.shape[0],], [0, 0]]) # padded_embd = tf.pad(flattened_text_emb, paddings, "CONSTANT") current_inputs = text_emb # [num_sentences, max_sentence_length, emb] for layer in range(self.config["contextualization_layers"]): with tf.variable_scope("layer_{}".format(layer)): with tf.variable_scope("fw_cell"): cell_fw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout) with tf.variable_scope("bw_cell"): cell_bw = util.CustomLSTMCell(self.config["contextualization_size"], num_sentences, self.lstm_dropout) state_fw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_fw.initial_state.c, [num_sentences, 1]), tf.tile(cell_fw.initial_state.h, [num_sentences, 1])) state_bw = tf.contrib.rnn.LSTMStateTuple(tf.tile(cell_bw.initial_state.c, [num_sentences, 1]), tf.tile(cell_bw.initial_state.h, [num_sentences, 1])) (fw_outputs, bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=current_inputs, sequence_length=text_len, initial_state_fw=state_fw, initial_state_bw=state_bw) text_outputs = tf.concat([fw_outputs, bw_outputs], 2) # [num_sentences, max_sentence_length, emb] text_outputs = tf.nn.dropout(text_outputs, self.lstm_dropout) if layer > 0: highway_gates = tf.sigmoid(util.projection(text_outputs, util.shape(text_outputs, 2))) # [num_sentences, max_sentence_length, emb] text_outputs = highway_gates * text_outputs + (1 - highway_gates) * current_inputs current_inputs = text_outputs return self.flatten_emb_by_sentence(text_outputs, text_len_mask)
def coarse_pruning(self, top_span_emb, top_span_mention_scores, c): """在取出的前k个候选span,针对每个span取出前c个antecedent,其mention score得分的组成是 1. 每个span的mention score 2. emb_i * W * emb_j的得分 3. 每个span只取前面的span作为antecedent 4. span与antecedent的距离映射为向量算个分 """ k = util.shape(top_span_emb, 0) # num_candidates top_span_range = tf.range(k) # [num_candidates, ] # antecedent_offsets: [num_candidates, num_candidates] 每两个span之间的距离,隔了几个span antecedent_offsets = tf.expand_dims( top_span_range, 1) - tf.expand_dims(top_span_range, 0) # [k, k] antecedents_mask = antecedent_offsets >= 1 # [k, k] fast_antecedent_scores = tf.expand_dims(top_span_mention_scores, 1) + tf.expand_dims( top_span_mention_scores, 0) fast_antecedent_scores += tf.log( tf.to_float(antecedents_mask)) # [k, k] fast_antecedent_scores += self.get_fast_antecedent_scores( top_span_emb) # [k, k] if self.config['use_prior']: antecedent_distance_buckets = self.bucket_distance( antecedent_offsets) # [k, k] distance_scores = util.projection( tf.nn.dropout( tf.get_variable( "antecedent_distance_emb", [10, self.config["feature_size"]], initializer=tf.truncated_normal_initializer( stddev=0.02)), self.dropout), 1, initializer=tf.truncated_normal_initializer( stddev=0.02)) # [10, 1] antecedent_distance_scores = tf.gather( tf.squeeze(distance_scores, 1), antecedent_distance_buckets) # [k,k] fast_antecedent_scores += antecedent_distance_scores # 取fast_antecedent_score top_k高的antecedent,每个antecedent对应的span_index _, top_antecedents = tf.nn.top_k(fast_antecedent_scores, c, sorted=False) # [k, c] top_antecedents_mask = util.batch_gather( antecedents_mask, top_antecedents) # [k, c] 每个pair对应的mask top_fast_antecedent_scores = util.batch_gather( fast_antecedent_scores, top_antecedents) # [k, c] 每个pair对应的score top_antecedent_offsets = util.batch_gather( antecedent_offsets, top_antecedents) # [k, c] 每个pair对应的offset return top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets
def fit_uv_mesh(initial_mesh: dict, target_dataset, max_iterations: int = 5000, resolution: int = 4, log_interval: int = 10, dispaly_interval=1000, display_res=512, out_dir=None, mp4save_interval=None): glctx = dr.RasterizeGLContext() r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. ang = 0.0 a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) dist = 2 # Modelview and modelview + projection matrices. proj = util.projection(x=0.4, n=1.0, f=200.0) r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) pos_idx = initial_mesh['pos_idx'].cuda() vtx_pos = initial_mesh['vtx_pos'].cuda() tex = np.ones((1024, 1024, 3), dtype=np.float32) / 2 uv, uv_idx = init_uv() uv_idx = uv_idx[:pos_idx.shape[0]] pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda() uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda() vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda() tex = torch.from_numpy(tex.astype(np.float32)).cuda() # Render reference and optimized frames. Always enable mipmapping for reference. color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, 1024, False, 0) Image.fromarray((color[0].detach().cpu().numpy() * 255).astype( np.uint8)).save('test.png')
def __rotate(image, width, func=lambda a, b: b, angle=46, fact=1): """ :param: image :return: tmp_image, tmp_width 旋转图像 """ tmp_image = image result_image = image for i in xrange(1, angle): tmp_image = image.rotate(i * fact) tmp_width = util.get_width(util.projection(tmp_image, func)) if width >= tmp_width: width = tmp_width result_image = tmp_image else: break return result_image, width
def combine_passes(self, original_doc, input_ids, input_mask, overlap_doc, overlap_ids, overlap_mask): overlap_mask, input_mask = tf.equal(overlap_mask, 1), tf.equal(input_mask, 1) org_content_mask = tf.logical_and( input_mask, tf.logical_and(tf.not_equal(input_ids, self.cls), tf.not_equal(input_ids, self.sep))) overlap_content_mask = tf.logical_and( overlap_mask, tf.logical_and(tf.not_equal(overlap_ids, self.cls), tf.not_equal(overlap_ids, self.sep))) flat_org_doc = self.flatten_emb_by_sentence(original_doc, org_content_mask) flat_overlap_doc = self.flatten_emb_by_sentence( overlap_doc, overlap_content_mask) with tf.variable_scope("combo"): f = tf.sigmoid( util.projection( tf.concat([flat_org_doc, flat_overlap_doc], -1), util.shape(flat_org_doc, -1))) # [n, emb] combo = f * flat_org_doc + (1 - f) * flat_overlap_doc return combo, org_content_mask
def fit_earth(max_iter=20000, log_interval=10, display_interval=None, display_res=1024, enable_mip=True, res=512, ref_res=4096, lr_base=1e-2, lr_ramp=0.1, out_dir='.', log_fn=None, texsave_interval=None, texsave_fn=None, imgsave_interval=None, imgsave_fn=None): if out_dir: os.makedirs(out_dir, exist_ok=True) # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125 datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' with np.load(f'{datadir}/earth.npz') as f: pos_idx, pos, uv_idx, uv, tex = f.values() tex = tex.astype(np.float32) / 255.0 max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps. print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) # Transformation matrix input to TF graph. mtx_in = tf.placeholder(tf.float32, [4, 4]) # Learned texture. tex_var = tf.get_variable('tex', initializer=tf.constant_initializer(0.2), shape=tex.shape) # Setup TF graph for reference rendering in high resolution. pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...] rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res]) texc, texd = dr.interpolate(uv[tf.newaxis, ...], rast_out, uv_idx, rast_db=rast_out_db, diff_attrs='all') color = dr.texture(tex[np.newaxis], texc, texd, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level) color = color * tf.clip_by_value(rast_out[..., -1:], 0, 1) # Mask out background. # Reduce the reference to correct size. while color.shape[1] > res: color = util.bilinear_downsample(color) # TF Graph for rendered candidate. if enable_mip: # With mipmaps. rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx, [res, res]) texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx, rast_db=rast_out_db_opt, diff_attrs='all') color_opt = dr.texture(tex_var[np.newaxis], texc_opt, texd_opt, filter_mode='linear-mipmap-linear', max_mip_level=max_mip_level) else: # No mipmaps: no image-space derivatives anywhere. rast_out_opt, _ = dr.rasterize(pos_clip, pos_idx, [res, res], output_db=False) texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx) color_opt = dr.texture(tex_var[np.newaxis], texc_opt, filter_mode='linear') color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0, 1) # Mask out background. # Measure only relevant portions of texture when calculating texture PSNR. loss = tf.reduce_mean((color - color_opt)**2) texmask = np.zeros_like(tex) tr = tex.shape[1] // 4 texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0 texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0 texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2) / np.sum(texmask))**0.5 # RMSE within masked area. # Training driven by image-space loss. lr_in = tf.placeholder(tf.float32, []) train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(loss, var_list=[tex_var]) # Open log file. log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None # Render. ang = 0.0 util.init_uninitialized_vars() texloss_avg = [] for it in range(max_iter + 1): lr = lr_base * lr_ramp**(float(it) / float(max_iter)) # Random rotation/translation matrix for optimization. r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. ang = ang + 0.01 a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) dist = np.random.uniform(0.0, 48.5) # Modelview and modelview + projection matrices. proj = util.projection(x=0.4, n=1.0, f=200.0) r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) # Run training and measure texture-space RMSE loss. texloss_val, _ = util.run([texloss, train_op], { mtx_in: r_mvp, lr_in: lr }) texloss_avg.append(texloss_val) # Print/save log. if log_interval and (it % log_interval == 0): texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), [] psnr = -10.0 * np.log10(texloss_val** 2) # PSNR based on average RMSE. s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr) print(s) if log_file: log_file.write(s + '\n') # Show/save result images/textures. display_image = display_interval and (it % display_interval) == 0 save_image = imgsave_interval and (it % imgsave_interval) == 0 save_texture = texsave_interval and (it % texsave_interval) == 0 if display_image or save_image: result_image = util.run(color_opt, {mtx_in: a_mvp})[0] if display_image: util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) if save_image: util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) if save_texture: util.save_image(out_dir + '/' + (texsave_fn % it), util.run(tex_var)[::-1]) # Done. if log_file: log_file.close()
def fit_mesh(initial_mesh: dict, target_dataset_dir: str, max_iterations: int = 10000, resolution: int = 256, log_interval: int = 1000, display_interval=None, display_res=512, out_dir=None, mp4save_interval=None): distance = 3 target_dataset = util.ReferenceImages(target_dataset_dir, resolution, resolution) pos_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32)) vtx_pos = torch.from_numpy(initial_mesh['vtx_pos'].astype(np.float32)) laplace = util.compute_laplace_matrix(vtx_pos, pos_idx).cuda() pos_idx = pos_idx.cuda() vtx_pos = vtx_pos.cuda() init_rot = util.rotate_z(-math.pi / 2).cuda() vtx_pos = transform_pos(init_rot, vtx_pos)[0][:, 0:3] vtx_pos.requires_grad = True uv, uv_idx = init_uv() uv_idx = uv_idx[:pos_idx.shape[0]] uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda() vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda() vtx_uv.requires_grad = True #col_idx = torch.from_numpy(initial_mesh['col_idx'].astype(np.int32)).cuda() #vtx_col = initial_mesh['vtx_col'].cuda() tex = torch.ones((1024, 1024, 3)).float() / 2 tex = tex.cuda() tex.requires_grad = True glctx = dr.RasterizeGLContext() M1 = torch.eye(len(target_dataset)).cuda() M1.requires_grad = True M2 = torch.eye(len(target_dataset)).cuda() M2.requires_grad = True #M3 = torch.zeros((3, vtx_pos.shape[0], len(target_dataset))).cuda() M3 = torch.zeros((3 * vtx_pos.shape[0], len(target_dataset))).cuda() M3.requires_grad = True lr_ramp = .1 params = [{ 'params': [M1, M2, M3], 'lr': 1e-3 }, { 'params': tex, 'lr': 1e-2 }] #lambdas = [lambda x: max(0.01, 10**(-x*0.0005)), lambda x: lr_ramp**(float(x)/float(max_iterations))] optimizer = torch.optim.Adam(params) #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambdas) total_steps = 0 for i in range(max_iterations): for j, (img, angle) in enumerate(target_dataset): img = img.cuda().permute(2, 1, 0) frame_tensor = torch.zeros(len(target_dataset)) frame_tensor[j] = 1 frame_tensor = frame_tensor.cuda() frame_tensor.requires_grad = True deltas = torch.matmul( M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten() #deformed_vtxs = vtx_pos + deltas.T deformed_vtxs = (vtx_pos.flatten() + deltas).reshape( (vtx_pos.shape[0], 3)) # create the model-view-projection matrix # rotate model about z axis by angle #rot = util.rotate_y(angle) rot = torch.eye(4) # translate by distance tr = util.translate(z=-distance) # perspective projection proj = util.projection(x=0.4) mtx = proj.matmul(tr.matmul(rot)).cuda() mtx.requires_grad = True estimate = render(glctx, mtx, deformed_vtxs, pos_idx, vtx_uv, uv_idx, tex, resolution, enable_mip=False, max_mip_level=4)[0] # compute loss loss = torch.mean((estimate - img)**2) # compute regularizer reg = torch.mean((util.compute_curvature(deformed_vtxs, laplace) - util.compute_curvature(vtx_pos, laplace))**2) # combine loss = 5 * loss + 0 * reg optimizer.zero_grad() loss.backward() optimizer.step() #scheduler.step() with torch.no_grad(): # clamp texture between 0 and 1 tex.clamp_(0, 1) if (display_interval and (i % display_interval == 0)) or (i == max_iterations - 1): with torch.no_grad(): estimate = render( glctx, mtx, deformed_vtxs, pos_idx, vtx_uv, uv_idx, tex, resolution, enable_mip=True, max_mip_level=4)[0].detach().cpu().numpy() plt.imshow(estimate) plt.show() plt.imshow(img.detach().cpu().numpy()) plt.show() if log_interval and i % log_interval == 0: print(f"Loss: {loss}") print(M1.grad) with torch.no_grad(): for i, (im, _) in enumerate(target_dataset): frame_tensor = torch.zeros(len(target_dataset)) frame_tensor[j] = 1 frame_tensor = frame_tensor.cuda() deltas = torch.matmul( M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten() deformed_vtxs = (vtx_pos.flatten() + deltas).reshape( (vtx_pos.shape[0], 3)) write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist()) Image.fromarray((tex.detach().cpu().numpy() * 255).astype( np.uint8)).save('diff_render_tex.png') print("Outputted texture to diff_render_tex.png")
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, scene_emb, genders, fpronouns): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module( inputs={"tokens": tokens, "sequence_len": text_len}, signature="tokens", as_dict=True) word_emb = lm_embeddings["word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout(head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile(tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence(sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence(head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] #debug prev_can_st = candidate_starts prev_can_ends = candidate_ends #debug candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] combined_candidate_st = candidate_starts*10000 + candidate_ends combined_gold_st = gold_starts*10000 + gold_ends _, non_top_span_list = tf.setdiff1d(combined_candidate_st, combined_gold_st) #[num_candidate - num_gold_mentions] whole_candidate_indices_list = tf.range(util.shape(candidate_starts,0)) # [num_candidates] gold_span_indices, _ = tf.setdiff1d(whole_candidate_indices_list, non_top_span_list) #[num_gold_mentions] candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb(flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] #Video Scene Emb ffnn_scene_emb = util.ffnn(scene_emb, num_hidden_layers=self.config["ffnn_depth"], hidden_size=400, output_size=128, dropout=self.dropout) # [num_words, 100] candidate_scene_emb = self.get_scene_emb(ffnn_scene_emb, candidate_starts) #[num_candidates, 100] ''' #Comment : This part is for calculating mention scores and prnunign metnion #It is not used for this task, because mention boundary are given. candidate_mention_scores = self.get_mention_scores(candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32(tf.floor(tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] ''' ######## Only Using Gold Span Indices ##### k = tf.to_int32(util.shape(gold_span_indices,0)) top_span_indices = gold_span_indices ############ top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_scene_emb = tf.gather(candidate_scene_emb, top_span_indices) # [k, emb-scene] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] #top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] top_span_genders = tf.gather(genders, top_span_ends) top_span_fpronouns = tf.gather(fpronouns, top_span_ends) # k : total number of candidates span (M in paper) # c : how many antecedents we check (K in paper) c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c) else: #top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(top_span_emb, top_span_mention_scores, c) top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_prnuing_wo_mention_score(top_span_emb, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scene_emb = tf.gather(top_scene_emb, top_antecedents) # [k, c, emb-scene] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, top_scene_emb, top_antecedent_scene_emb, top_span_genders, top_span_fpronouns) # [k, c] top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c]집단사기범 dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] top_antecedent_prob = tf.nn.softmax(top_antecedent_scores, 1) # [k, c + 1] if (self.config["use_gender_logic_rule"]): top_antecedent_prob_with_logic = self.project_logic_rule(top_antecedent_prob, top_span_genders, top_span_fpronouns, top_span_speaker_ids, top_antecedents, k) ''' marginal_prob = tf.reduce_sum(top_antecedent_prob*tf.to_float(top_antecedent_labels),axis=1) gold_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) top_antecedent_scores = top_antecedent_prob ''' origin_loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] origin_loss = tf.reduce_sum(origin_loss) # cross_entropy : -1 * ground_truth * log(prediction) #teacher_loss = tf.reduce_min(tf.nn. (labels=top_antecedent_prob_with_logic, logits=top_antecedent_scores)) teacher_loss = tf.reduce_sum(-tf.reduce_sum(top_antecedent_prob_with_logic * tf.log(top_antecedent_prob + 1e-10), reduction_indices=[1])) pi = tf.minimum(self.config["logic_rule_pi_zero"], 1.0 - tf.pow(self.config["logic_rule_imitation_alpha"], tf.to_float(self.global_step)+1.0)) # For Validation Loss marginal_prob = tf.reduce_sum(top_antecedent_prob_with_logic*tf.to_float(top_antecedent_labels),axis=1) validation_loss = -1 * tf.reduce_sum(tf.log(marginal_prob)) #loss = teacher_loss + origin_loss loss = tf.where(is_training, pi*teacher_loss + (1.0-pi)*origin_loss, validation_loss) top_antecedent_scores = top_antecedent_prob_with_logic else: loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] teacher_loss = loss origin_loss = loss return [candidate_starts, candidate_ends, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores, teacher_loss, origin_loss], loss
def fit_earth(max_iter=20000, log_interval=10, display_interval=None, display_res=1024, enable_mip=True, res=512, ref_res=4096, lr_base=1e-2, lr_ramp=0.1, out_dir=None, log_fn=None, texsave_interval=None, texsave_fn=None, imgsave_interval=None, imgsave_fn=None): log_file = None if out_dir: os.makedirs(out_dir, exist_ok=True) if log_fn: log_file = open(out_dir + '/' + log_fn, 'wt') else: imgsave_interval, texsave_interval = None, None # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125 datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' with np.load(f'{datadir}/earth.npz') as f: pos_idx, pos, uv_idx, uv, tex = f.values() tex = tex.astype(np.float32) / 255.0 max_mip_level = 9 # Texture is a 4x3 atlas of 512x512 maps. print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1). Drop # the last column in that case. if pos.shape[1] == 4: pos = pos[:, 0:3] # Create position/triangle index tensors pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda() uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda() vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda() tex = torch.from_numpy(tex.astype(np.float32)).cuda() tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True) glctx = dr.RasterizeGLContext() ang = 0.0 # Adam optimizer for texture with a learning rate ramp. optimizer = torch.optim.Adam([tex_opt], lr=lr_base) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: lr_ramp**(float(x) / float(max_iter))) # Render. ang = 0.0 texloss_avg = [] for it in range(max_iter + 1): # Random rotation/translation matrix for optimization. r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) dist = np.random.uniform(0.0, 48.5) # Modelview and modelview + projection matrices. proj = util.projection(x=0.4, n=1.0, f=200.0) r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) # Measure texture-space RMSE loss with torch.no_grad(): texmask = torch.zeros_like(tex) tr = tex.shape[1] // 4 texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0 texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0 # Measure only relevant portions of texture when calculating texture # PSNR. texloss = (torch.sum(texmask * (tex - tex_opt)**2) / torch.sum(texmask))**0.5 # RMSE within masked area. texloss_avg.append(float(texloss)) # Render reference and optimized frames. Always enable mipmapping for reference. color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex, ref_res, True, max_mip_level) color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level) # Reduce the reference to correct size. while color.shape[1] > res: color = util.bilinear_downsample(color) # Compute loss and perform a training step. loss = torch.mean((color - color_opt)**2) # L2 pixel loss. optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # Print/save log. if log_interval and (it % log_interval == 0): texloss_val = np.mean(np.asarray(texloss_avg)) texloss_avg = [] psnr = -10.0 * np.log10(texloss_val** 2) # PSNR based on average RMSE. s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr) print(s) if log_file: log_file.write(s + '\n') # Show/save image. display_image = display_interval and (it % display_interval == 0) save_image = imgsave_interval and (it % imgsave_interval == 0) save_texture = texsave_interval and (it % texsave_interval) == 0 if display_image or save_image: ang = ang + 0.1 with torch.no_grad(): result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex_opt, res, enable_mip, max_mip_level)[0].cpu().numpy() if display_image: util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) if save_image: util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) if save_texture: texture = tex_opt.cpu().numpy()[::-1] util.save_image(out_dir + '/' + (texsave_fn % it), texture) # Done. if log_file: log_file.close()
def get_predictions_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map): model = modeling.BertModel( config=self.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False, scope='bert') all_encoder_layers = model.get_all_encoder_layers() mention_doc = model.get_sequence_output() # [batch_size, seq_length, hidden_size] self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) num_sentences = tf.shape(mention_doc)[0] max_sentence_length = tf.shape(mention_doc)[1] mention_doc = self.flatten_emb_by_sentence(mention_doc, input_mask) # [num_words, hidden_size] num_words = util.shape(mention_doc, 0) antecedent_doc = mention_doc flattened_sentence_indices = sentence_map candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims(tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather(flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather(flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and(candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape(candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask(tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask(tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels(candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb(mention_doc, mention_doc, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores(candidate_span_emb, candidate_starts, candidate_ends) candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] # beam size k = tf.minimum(3900, tf.to_int32(tf.floor(tf.to_float(num_words) * self.config["top_span_ratio"]))) c = tf.minimum(self.config["max_top_antecedents"], k) # pull from beam top_span_indices = coref_ops.extract_spans(tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), num_words, True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), genre) # [emb] if self.config['use_metadata']: speaker_ids = self.flatten_emb_by_sentence(speaker_ids, input_mask) top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k]i else: top_span_speaker_ids = None dummy_scores = tf.zeros([k, 1]) # [k, 1] top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(top_span_emb, top_span_mention_scores, c) num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1) word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1), [1, seg_len]) flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]), tf.reshape(input_mask, [-1])) mention_segments = tf.expand_dims(tf.gather(flat_word_segments, top_span_starts), 1) # [k, 1] antecedent_segments = tf.gather(flat_word_segments, tf.gather(top_span_starts, top_antecedents)) #[k, c] segment_distance = tf.clip_by_value(mention_segments - antecedent_segments, 0, self.config['max_training_sentences'] - 1) if self.config['use_segment_distance'] else None #[k, c] if self.config['fine_grained']: for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance) # [k, c] top_antecedent_weights = tf.nn.softmax(tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat([tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum(tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid(util.projection(tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + (1 - f) * top_span_emb # [k, emb] else: top_antecedent_scores = top_fast_antecedent_scores top_antecedent_scores = tf.concat([dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32(tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims(top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not(tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores], loss
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, text_len,\ is_training, gold_starts, gold_ends, cluster_ids,swag_context_emb, swag_text_len, swag_label): """ This is the major part of the architecutre, and is the placehlder. We have two branches - one for SWAG, and another for the main Lee code. """ self.same(is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] print("normal", swag_context_emb) context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) # genre_emb = tf.gather(tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] genre_emb = None sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] # top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def get_predictions_and_loss(self, inputs, labels, config): is_training = inputs["is_training"][0] self.dropout = 1 - (tf.to_float(is_training) * config["dropout_rate"]) self.lexical_dropout = 1 - (tf.to_float(is_training) * config["lexical_dropout_rate"]) self.lstm_dropout = 1 - (tf.to_float(is_training) * config["lstm_dropout_rate"]) sentences = inputs["tokens"] text_len = inputs["text_len"] # [num_sentences] context_word_emb = inputs[ "context_word_emb"] # [num_sentences, max_sentence_length, emb] head_word_emb = inputs[ "head_word_emb"] # [num_sentences, max_sentence_length, emb] num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb, head_emb, self.lm_weights, self.lm_scaling = get_embeddings( self.data, sentences, text_len, context_word_emb, head_word_emb, inputs["char_idx"], inputs["lm_emb"], self.lexical_dropout) # [num_sentences, max_sentence_length, emb] context_outputs = lstm_contextualize( context_emb, text_len, config, self.lstm_dropout) # [num_sentences, max_sentence_length, emb] # [num_sentences, max_num_candidates], ... candidate_starts, candidate_ends, candidate_mask = get_span_candidates( text_len, max_sentence_length, config["max_arg_width"]) flat_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_sentences * max_num_candidates] batch_word_offset = tf.expand_dims(tf.cumsum(text_len, exclusive=True), 1) # [num_sentences, 1] flat_candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts + batch_word_offset, [-1]), flat_candidate_mask) # [num_candidates] flat_candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends + batch_word_offset, [-1]), flat_candidate_mask) # [num_candidates] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentences, max_sentence_length] flat_context_outputs = flatten_emb_by_sentence( context_outputs, text_len_mask) # [num_doc_words] flat_head_emb = flatten_emb_by_sentence( head_emb, text_len_mask) # [num_doc_words] doc_len = util.shape(flat_context_outputs, 0) candidate_span_emb, head_scores, span_head_emb, head_indices, head_indices_log_mask = get_span_emb( flat_head_emb, flat_context_outputs, flat_candidate_starts, flat_candidate_ends, config, self.dropout ) # [num_candidates, emb], [num_candidates, max_span_width, emb], [num_candidates, max_span_width] num_candidates = util.shape(candidate_span_emb, 0) max_num_candidates_per_sentence = util.shape(candidate_mask, 1) candidate_span_ids = tf.sparse_to_dense( sparse_indices=tf.where(tf.equal(candidate_mask, True)), output_shape=tf.cast( tf.stack([num_sentences, max_num_candidates_per_sentence]), tf.int64), sparse_values=tf.range(num_candidates, dtype=tf.int32), default_value=0, validate_indices=True) # [num_sentences, max_num_candidates] predict_dict = { "candidate_starts": candidate_starts, "candidate_ends": candidate_ends } if config["coref_depth"]: candidate_mention_scores = get_unary_scores( candidate_span_emb, config, self.dropout, 1, "mention_scores") # [num_candidates] #if self.config["span_score_weight"] > 0: # candidate_mention_scores += self.config["span_score_weight"] * flat_span_scores doc_ids = tf.expand_dims(inputs["doc_id"], 1) # [num_sentences, 1] candidate_doc_ids = tf.boolean_mask( tf.reshape( tf.tile(doc_ids, [1, max_num_candidates_per_sentence]), [-1]), flat_candidate_mask) # [num_candidates] k = tf.to_int32( tf.floor(tf.to_float(doc_len) * config["mention_ratio"])) top_mention_indices = srl_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(flat_candidate_starts, 0), tf.expand_dims(flat_candidate_ends, 0), tf.expand_dims(k, 0), doc_len, True, True) # [1, k] top_mention_indices.set_shape([1, None]) top_mention_indices = tf.squeeze(top_mention_indices, 0) # [k] mention_starts = tf.gather(flat_candidate_starts, top_mention_indices) # [k] mention_ends = tf.gather(flat_candidate_ends, top_mention_indices) #[k] mention_scores = tf.gather(candidate_mention_scores, top_mention_indices) #[k] mention_emb = tf.gather(candidate_span_emb, top_mention_indices) # [k, emb] mention_doc_ids = tf.gather(candidate_doc_ids, top_mention_indices) # [k] max_mentions_per_doc = tf.reduce_max( #tf.segment_sum(data=tf.ones_like(mention_doc_ids, dtype=tf.int32), tf.unsorted_segment_sum( data=tf.ones_like(mention_doc_ids, dtype=tf.int32), segment_ids=mention_doc_ids, num_segments=tf.reduce_max(mention_doc_ids) + 1)) # [] k_Print = tf.Print( k, [num_sentences, doc_len, k, max_mentions_per_doc], "Num sents, num tokens, num_mentions, max_antecedents") max_antecedents = tf.minimum( tf.minimum(config["max_antecedents"], k - 1), max_mentions_per_doc - 1) if self.config["coarse_to_fine"]: antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = coarse_to_fine_pruning( mention_emb, mention_scores, max_antecedents, mention_doc_ids, self.dropout) else: antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = distance_pruning( mention_emb, mention_scores, max_antecedents, mention_doc_ids) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): top_antecedent_emb = tf.gather(mention_emb, antecedents) # [k, c, emb] top_antecedent_scores, top_antecedent_emb, _ = get_antecedent_scores( mention_emb, mention_scores, antecedents, config, self.dropout, top_fast_antecedent_scores, top_antecedent_offsets) # [k, max_ant] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(mention_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] mention_emb = attended_span_emb with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([mention_emb, attended_span_emb], 1), util.shape(mention_emb, -1))) # [k, emb] mention_emb = f * attended_span_emb + ( 1 - f) * mention_emb # [k, emb] old_mention_emb = tf.gather(candidate_span_emb, top_mention_indices) top_mention_indices = tf.expand_dims(top_mention_indices, 1) old_mention_emb_padded = tf.scatter_nd( top_mention_indices, old_mention_emb, tf.shape(candidate_span_emb)) new_mention_emb_padded = tf.scatter_nd( top_mention_indices, mention_emb, tf.shape(candidate_span_emb)) candidate_span_emb = candidate_span_emb - old_mention_emb_padded + new_mention_emb_padded top_antecedent_scores = tf.concat( [tf.zeros([k, 1]), top_antecedent_scores], 1) # [k, max_ant+1] predict_dict.update({ "candidate_mention_starts": flat_candidate_starts, # [num_candidates] "candidate_mention_ends": flat_candidate_ends, # [num_candidates] "candidate_mention_scores": candidate_mention_scores, # [num_candidates] "mention_starts": mention_starts, # [k] "mention_ends": mention_ends, # [k] "antecedents": antecedents, # [k, max_ant] "antecedent_scores": top_antecedent_scores, # [k, max_ant+1] }) spans_log_mask = tf.log( tf.to_float(candidate_mask)) # [num_sentences, max_num_candidates] if head_scores is not None: predict_dict["head_scores"] = head_scores dummy_scores = tf.expand_dims( tf.zeros_like(candidate_span_ids, dtype=tf.float32), 2) if config["ner_weight"] + config["coref_weight"] > 0: gold_ner_labels, gold_coref_cluster_ids = get_span_task_labels( candidate_starts, candidate_ends, labels, max_sentence_length) # [num_sentences, max_num_candidates] if config["relation_weight"] > 0: if config['entity_beam']: flat_ner_scores = get_unary_scores( candidate_span_emb, config, self.dropout, len(self.data.ner_labels) - 1, "ner_scores") # [num_candidates, num_labels-1] ner_scores = tf.gather( flat_ner_scores, candidate_span_ids) + tf.expand_dims( spans_log_mask, 2) # [num_sentences, max_num_candidates, num_labels-1] ner_scores = tf.concat( [dummy_scores, ner_scores], 2) # [num_sentences, max_num_candidates, num_labels] entity_starts, entity_ends, entity_scores, num_entities, top_entity_indices = get_ner_candidates( candidate_starts, candidate_ends, ner_scores, candidate_mask, text_len, config["entity_ratio"]) # Do we need to sort spans? else: flat_candidate_entity_scores = get_unary_scores( candidate_span_emb, config, self.dropout, 1, "entity_scores") # [num_candidates,] candidate_entity_scores = tf.gather( flat_candidate_entity_scores, candidate_span_ids ) + spans_log_mask # [num_sentences, max_num_candidates] entity_starts, entity_ends, entity_scores, num_entities, top_entity_indices = get_batch_topk( candidate_starts, candidate_ends, candidate_entity_scores, config["entity_ratio"], text_len, max_sentence_length, sort_spans=True, enforce_non_crossing=False) # Do we need to sort spans? entity_span_indices = batch_gather( candidate_span_ids, top_entity_indices) # [num_sentences, max_num_ents] entity_emb = tf.gather( candidate_span_emb, entity_span_indices) # [num_sentences, max_num_ents, emb] max_num_entities = util.shape(entity_scores, 1) if config["relation_weight"] > 0: if config['add_ner_emb']: ner_emb = tf.gather(flat_ner_scores, entity_span_indices) entity_emb = tf.concat([entity_emb, ner_emb], 2) rel_labels = get_relation_labels( entity_starts, entity_ends, num_entities, labels, max_sentence_length ) # [num_sentences, max_num_ents, max_num_ents] if config['bilinear']: rel_scores = get_rel_bilinear_scores( entity_emb, entity_scores, len(self.data.rel_labels), config, self.dropout ) # [num_sentences, max_num_ents, max_num_ents, num_labels] else: if config['rel_prop']: for i in range(config['rel_prop']): rel_scores, entity_emb, flat_entities_mask = get_rel_scores( entity_emb, entity_scores, len(self.data.rel_labels), config, self.dropout, num_entities ) # [num_sentences, max_num_ents, max_num_ents, num_labels] if config['rel_prop_emb']: entity_emb_size = util.shape(entity_emb, -1) flat_entity_emb = tf.reshape(entity_emb, [ num_sentences * max_num_entities, entity_emb_size ]) flat_entity_emb = tf.boolean_mask( flat_entity_emb, flat_entities_mask) entity_indices = tf.boolean_mask( tf.reshape(entity_span_indices, [-1]), flat_entities_mask) old_entity_emb = tf.gather(candidate_span_emb, entity_indices) entity_indices = tf.expand_dims(entity_indices, 1) old_entity_emb_padded = tf.scatter_nd( entity_indices, old_entity_emb, tf.shape(candidate_span_emb)) new_entity_emb_padded = tf.scatter_nd( entity_indices, flat_entity_emb, tf.shape(candidate_span_emb)) candidate_span_emb = candidate_span_emb - old_entity_emb_padded + new_entity_emb_padded else: rel_scores = get_rel_scores( entity_emb, entity_scores, len(self.data.rel_labels), config, self.dropout, num_entities ) # [num_sentences, max_num_ents, max_num_ents, num_labels] if config["relation_weight"] > 0: rel_loss = get_rel_softmax_loss( rel_scores, rel_labels, num_entities, config) # [num_sentences, max_num_ents, max_num_ents] predict_dict.update({ "entity_starts": entity_starts, "entity_ends": entity_ends, "entitiy_scores": entity_scores, "num_entities": num_entities, "rel_labels": tf.argmax(rel_scores, -1), # [num_sentences, num_ents, num_ents] "rel_scores": rel_scores }) else: rel_loss = 0 if config["ner_weight"] > 0: flat_ner_scores = get_unary_scores( candidate_span_emb, config, self.dropout, len(self.data.ner_labels) - 1, "ner_scores") # [num_candidates, num_labels-1] ner_scores = tf.gather( flat_ner_scores, candidate_span_ids) + tf.expand_dims( spans_log_mask, 2) # [num_sentences, max_num_candidates, num_labels-1] ner_scores = tf.concat( [dummy_scores, ner_scores], 2) # [num_sentences, max_num_candidates, num_labels] ner_loss = get_softmax_loss(ner_scores, gold_ner_labels, candidate_mask) # [num_sentences] ner_loss = tf.reduce_sum( ner_loss) # / tf.to_float(num_sentences) # [] predict_dict["ner_scores"] = ner_scores else: ner_loss = 0 # Get coref representations. if config["coref_weight"] > 0: candidate_mention_scores = get_unary_scores( candidate_span_emb, config, self.dropout, 1, "mention_scores") # [num_candidates] doc_ids = tf.expand_dims(inputs["doc_id"], 1) # [num_sentences, 1] candidate_doc_ids = tf.boolean_mask( tf.reshape( tf.tile(doc_ids, [1, max_num_candidates_per_sentence]), [-1]), flat_candidate_mask) # [num_candidates] k = tf.to_int32( tf.floor(tf.to_float(doc_len) * config["mention_ratio"])) top_mention_indices = srl_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(flat_candidate_starts, 0), tf.expand_dims(flat_candidate_ends, 0), tf.expand_dims(k, 0), doc_len, True, True) # [1, k] top_mention_indices.set_shape([1, None]) top_mention_indices = tf.squeeze(top_mention_indices, 0) # [k] mention_starts = tf.gather(flat_candidate_starts, top_mention_indices) # [k] mention_ends = tf.gather(flat_candidate_ends, top_mention_indices) #[k] mention_scores = tf.gather(candidate_mention_scores, top_mention_indices) #[k] mention_emb = tf.gather(candidate_span_emb, top_mention_indices) # [k, emb] mention_doc_ids = tf.gather(candidate_doc_ids, top_mention_indices) # [k] if head_scores is not None: predict_dict["coref_head_scores"] = head_scores max_mentions_per_doc = tf.reduce_max( #tf.segment_sum(data=tf.ones_like(mention_doc_ids, dtype=tf.int32), tf.unsorted_segment_sum( data=tf.ones_like(mention_doc_ids, dtype=tf.int32), segment_ids=mention_doc_ids, num_segments=tf.reduce_max(mention_doc_ids) + 1)) # [] k_Print = tf.Print( k, [num_sentences, doc_len, k, max_mentions_per_doc], "Num sents, num tokens, num_mentions, max_antecedents") max_antecedents = tf.minimum( tf.minimum(config["max_antecedents"], k - 1), max_mentions_per_doc - 1) if self.config["coarse_to_fine"]: antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = coarse_to_fine_pruning( mention_emb, mention_scores, max_antecedents, mention_doc_ids, self.dropout) else: antecedents, antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = distance_pruning( mention_emb, mention_scores, max_antecedents, mention_doc_ids) antecedent_log_mask = tf.log( tf.to_float(antecedents_mask)) # [k, max_ant] # [k, max_ant], [k, max_ant, emb], [k, max_ant, emb2] antecedent_scores, antecedent_emb, pair_emb = get_antecedent_scores( mention_emb, mention_scores, antecedents, config, self.dropout, top_fast_antecedent_scores, top_antecedent_offsets) # [k, max_ant] antecedent_scores = tf.concat( [tf.zeros([k, 1]), antecedent_scores], 1) # [k, max_ant+1] # Compute Coref loss. if config["coref_weight"] > 0: flat_cluster_ids = tf.boolean_mask( tf.reshape(gold_coref_cluster_ids, [-1]), flat_candidate_mask) # [num_candidates] mention_cluster_ids = tf.gather(flat_cluster_ids, top_mention_indices) # [k] antecedent_cluster_ids = tf.gather(mention_cluster_ids, antecedents) # [k, max_ant] antecedent_cluster_ids += tf.to_int32( antecedent_log_mask) # [k, max_ant] same_cluster_indicator = tf.equal(antecedent_cluster_ids, tf.expand_dims( mention_cluster_ids, 1)) # [k, max_ant] non_dummy_indicator = tf.expand_dims(mention_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and( same_cluster_indicator, non_dummy_indicator) # [k, max_ant] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keep_dims=True)) # [k, 1] antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, max_ant+1] coref_loss = get_coref_softmax_loss(antecedent_scores, antecedent_labels) # [k] coref_loss = tf.reduce_sum( coref_loss) # / tf.to_float(num_sentences) # [] predict_dict.update({ "candidate_mention_starts": flat_candidate_starts, # [num_candidates] "candidate_mention_ends": flat_candidate_ends, # [num_candidates] "candidate_mention_scores": candidate_mention_scores, # [num_candidates] "mention_starts": mention_starts, # [k] "mention_ends": mention_ends, # [k] "antecedents": antecedents, # [k, max_ant] "antecedent_scores": antecedent_scores, # [k, max_ant+1] }) else: coref_loss = 0 tf.summary.scalar("REL_loss", rel_loss) tf.summary.scalar("NER_loss", ner_loss) tf.summary.scalar("Coref_loss", coref_loss) #srl_loss_Print = tf.Print(srl_loss, [srl_loss, ner_loss, coref_loss], "Loss") loss = config["ner_weight"] * ner_loss + ( config["coref_weight"] * coref_loss + config["relation_weight"] * rel_loss) return predict_dict, loss
def fit_cube(max_iter = 5000, resolution = 4, discontinuous = False, repeats = 1, log_interval = 10, display_interval = None, display_res = 512, out_dir = '.', log_fn = None, imgsave_interval = None, imgsave_fn = None): if out_dir: os.makedirs(out_dir, exist_ok=True) datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' fn = 'cube_%s.npz' % ('d' if discontinuous else 'c') with np.load(f'{datadir}/{fn}') as f: pos_idx, vtxp, col_idx, vtxc = f.values() print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0])) # Transformation matrix input to TF graph. mtx_in = tf.placeholder(tf.float32, [4, 4]) # Setup TF graph for reference. vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32) pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...] rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False) color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx) color = dr.antialias(color, rast_out, pos_clip, pos_idx) # Optimized variables. vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape) vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape) # Optimization variable setters for initialization. vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape) vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape) opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in)) # Setup TF graph for what we optimize result. vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1) pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...] rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False) color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx) color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx) # Image-space loss and optimizer. loss = tf.reduce_mean((color_opt - color)**2) lr_in = tf.placeholder(tf.float32, []) train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt]) # Setup TF graph for display. rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False) color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx) color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx) rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False) color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx) color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx) # Geometric error calculation geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5) # Open log file. log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None # Repeats. for rep in range(repeats): # Optimize. ang = 0.0 gl_avg = [] util.init_uninitialized_vars() for it in range(max_iter + 1): # Initialize optimization. if it == 0: vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape) util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)}) # Learning rate ramp. lr = 1e-2 lr = lr * max(0.01, 10**(-it*0.0005)) # Random rotation/translation matrix for optimization. r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) # Modelview and modelview + projection matrices. proj = util.projection(x=0.4) r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) # Run training and measure geometric error. gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr}) gl_avg.append(gl_val) # Print/save log. if log_interval and (it % log_interval == 0): gl_val, gl_avg = np.mean(np.asarray(gl_avg)), [] s = ("rep=%d," % rep) if repeats > 1 else "" s += "iter=%d,err=%f" % (it, gl_val) print(s) if log_file: log_file.write(s + "\n") # Show/save image. display_image = display_interval and (it % display_interval == 0) save_image = imgsave_interval and (it % imgsave_interval == 0) if display_image or save_image: ang = ang + 0.1 img_o = util.run(color_opt, {mtx_in: r_mvp})[0] img_b = util.run(color, {mtx_in: r_mvp})[0] img_d = util.run(color_disp, {mtx_in: a_mvp})[0] img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0] scl = display_res // img_o.shape[0] img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1) img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1) result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1) if display_image: util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) if save_image: util.save_image(out_dir + '/' + (imgsave_fn % it), result_image) # All repeats done. if log_file: log_file.close()
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] # self.a , self.b = text_len , max_sentence_length text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) k = tf.minimum(500, k) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] # c = tf.minimum(self.config["max_top_antecedents"], k) # self.top = top_span_emb orig_dim = 1270 with tf.name_scope("transformer"): with tf.name_scope("embedding_transformer"): W = tf.Variable(tf.random_normal((orig_dim, self.new_dim))) b = tf.Variable(tf.random_normal((self.new_dim, ))) temp_input = tf.nn.relu(tf.matmul(top_span_emb, W) + b) padding_mask_partial = tf.cast(tf.sequence_mask( tf.shape(temp_input)[0], maxlen=self.seq_length), dtype=tf.float32) multiples = [self.seq_length] padding_mask_partial2 = tf.tile(padding_mask_partial, multiples) enc_padding_mask = tf.reshape(padding_mask_partial2, [multiples[0], -1]) # enc_padding_mask = tf.matrix_set_diag(enc_padding_mask, tf.zeros(enc_padding_mask.shape[0:-1]), name=None) dec_padding_mask = tf.reshape(padding_mask_partial2, [multiples[0], -1]) dec_padding_mask = tf.matrix_set_diag( dec_padding_mask, tf.zeros(dec_padding_mask.shape[0:-1]), name=None) look_ahead_mask = create_look_ahead_mask( tf.shape(padding_mask_partial)[0]) combined_mask = tf.minimum(enc_padding_mask, look_ahead_mask) s = tf.shape(temp_input) paddings = [[0, self.seq_length - s[0]], [0, 0]] padded_embd = tf.pad(temp_input, paddings, "CONSTANT") predictions, _ = self.sample_transformer(padded_embd, padded_embd, True, enc_padding_mask, combined_mask, dec_padding_mask) # self.chikka = predictions # self.chikka2 = predictions[:k] top_span_emb = tf.concat([predictions[:k], top_span_emb], 1) # hidd = self.new_dim // 3 # with tf.name_scope("Scorer"): # h1_1 = tf.layers.dense(predictions, hidd) # h1_2 = tf.layers.dense(predictions, hidd) # h1 = tf.concat([h1_1 , h1_2] , 1 ) # W2 = tf.Variable(tf.random_normal((hidd*2, 1))) # b2 = tf.Variable(tf.random_normal((1,))) # score = tf.nn.relu(tf.matmul(h1, W) + b) c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] # with tf.variable_scope("coref_layer"): # top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] # top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def fit_env_phong(max_iter = 1000, log_interval = 10, display_interval = None, display_res = 1024, res = 1024, lr_base = 1e-2, lr_ramp = 1.0, out_dir = None, log_fn = None, mp4save_interval = None, mp4save_fn = None): log_file = None writer = None if out_dir: os.makedirs(out_dir, exist_ok=True) if log_fn: log_file = open(out_dir + '/' + log_fn, 'wt') if mp4save_interval != 0: writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M') else: mp4save_interval = None # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' with np.load(f'{datadir}/envphong.npz') as f: pos_idx, pos, normals, env = f.values() env = env.astype(np.float32)/255.0 env = np.stack(env)[:, ::-1].copy() print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) # Move all the stuff to GPU. pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda') pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda') normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda') env = torch.as_tensor(env, dtype=torch.float32, device='cuda') # Target Phong parameters. phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32) phong_exp = 25.0 phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda') # Learned variables: environment maps, phong color, phong exponent. env_var = torch.ones_like(env) * .5 env_var.requires_grad_() phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda') phong_var_raw.requires_grad_() phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda') # Render. ang = 0.0 imgloss_avg, phong_avg = [], [] glctx = dr.RasterizeGLContext() zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda') one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda') # Adam optimizer for environment map and phong with a learning rate ramp. optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter))) for it in range(max_iter + 1): phong_var = phong_var_raw * phong_var_mul # Random rotation/translation matrix for optimization. r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. ang = ang + 0.01 a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) # Modelview and modelview + projection matrices. proj = util.projection(x=0.4, n=1.0, f=200.0) r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) a_mvc = a_mvp r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda') a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda') # Solve camera positions. a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda') r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda') # Random light direction. lightdir = np.random.normal(size=[3]) lightdir /= np.linalg.norm(lightdir) + 1e-8 lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda') def render_refl(ldir, cpos, mvp): # Transform and rasterize. viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices. reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices. reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize. pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...] rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res]) refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors. # Phong light. refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5 # Normalize. ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R. # Return return refl, refld, ldotr, (rast_out[..., -1:] == 0) # Render the reflections. refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp) # Reference color. No need for AA because we are not learning geometry. color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong. color = torch.where(mask, one_tensor, color) # White background. # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead. color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong. color_opt = torch.where(mask, one_tensor, color_opt) # White background. # Compute loss and train. loss = torch.mean((color - color_opt)**2) # L2 pixel loss. optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # Collect losses. imgloss_avg.append(loss.detach().cpu().numpy()) phong_avg.append(phong_var.detach().cpu().numpy()) # Print/save log. if log_interval and (it % log_interval == 0): imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), [] phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), [] phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5 phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val) print(s) if log_file: log_file.write(s + '\n') # Show/save result image. display_image = display_interval and (it % display_interval == 0) save_mp4 = mp4save_interval and (it % mp4save_interval == 0) if display_image or save_mp4: lightdir = np.asarray([.8, -1., .5, 0.0]) lightdir = np.matmul(a_mvc, lightdir)[:3] lightdir /= np.linalg.norm(lightdir) lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda') refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp) color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube') color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] color_opt = torch.where(mask, one_tensor, color_opt) result_image = color_opt.detach()[0].cpu().numpy() if display_image: util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) if save_mp4: writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8)) # Done. if writer is not None: writer.close() if log_file: log_file.close()
def fit_pose(max_iter=10000, repeats=1, log_interval=10, display_interval=None, display_res=512, lr_base=0.01, lr_falloff=1.0, nr_base=1.0, nr_falloff=1e-4, grad_phase_start=0.5, resolution=256, out_dir=None, log_fn=None, mp4save_interval=None, mp4save_fn=None): log_file = None writer = None if out_dir: os.makedirs(out_dir, exist_ok=True) if log_fn: log_file = open(out_dir + '/' + log_fn, 'wt') if mp4save_interval != 0: writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M') else: mp4save_interval = None datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' with np.load(f'{datadir}/cube_p.npz') as f: pos_idx, pos, col_idx, col = f.values() print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0])) # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1). Drop # the last column in that case. if pos.shape[1] == 4: pos = pos[:, 0:3] # Create position/triangle index tensors pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda() col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda() vtx_col = torch.from_numpy(col.astype(np.float32)).cuda() glctx = dr.RasterizeGLContext() for rep in range(repeats): pose_target = torch.tensor(q_rnd(), device='cuda') pose_init = q_rnd() pose_opt = torch.tensor(pose_init / np.sum(pose_init**2)**0.5, dtype=torch.float32, device='cuda', requires_grad=True) loss_best = np.inf pose_best = pose_opt.detach().clone() # Modelview + projection matrix. mvp = torch.tensor(np.matmul(util.projection(x=0.4), util.translate(0, 0, -3.5)).astype(np.float32), device='cuda') # Adam optimizer for texture with a learning rate ramp. optimizer = torch.optim.Adam([pose_opt], betas=(0.9, 0.999), lr=lr_base) # Render. for it in range(max_iter + 1): # Set learning rate. itf = 1.0 * it / max_iter nr = nr_base * nr_falloff**itf lr = lr_base * lr_falloff**itf for param_group in optimizer.param_groups: param_group['lr'] = lr # Noise input. if itf >= grad_phase_start: noise = q_unit() else: noise = q_scale(q_rnd(), nr) noise = q_mul(noise, q_rnd_S4()) # Orientation noise. # Render. color = render(glctx, torch.matmul(mvp, q_to_mtx(pose_target)), vtx_pos, pos_idx, vtx_col, col_idx, resolution) pose_total_opt = q_mul_torch(pose_opt, noise) mtx_total_opt = torch.matmul(mvp, q_to_mtx(pose_total_opt)) color_opt = render(glctx, mtx_total_opt, vtx_pos, pos_idx, vtx_col, col_idx, resolution) # Image-space loss. diff = (color_opt - color)**2 # L2 norm. diff = torch.tanh(5.0 * torch.max(diff, dim=-1)[0]) loss = torch.mean(diff) # Measure image-space loss and update best found pose. loss_val = float(loss) if (loss_val < loss_best) and (loss_val > 0.0): pose_best = pose_total_opt.detach().clone() loss_best = loss_val if itf < grad_phase_start: with torch.no_grad(): pose_opt[:] = pose_best # Print/save log. if log_interval and (it % log_interval == 0): err = q_angle_deg(pose_opt, pose_target) ebest = q_angle_deg(pose_best, pose_target) s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % ( rep, it, err, ebest, loss_val, loss_best, lr, nr) print(s) if log_file: log_file.write(s + "\n") # Run gradient training step. if itf >= grad_phase_start: optimizer.zero_grad() loss.backward() optimizer.step() with torch.no_grad(): pose_opt /= torch.sum(pose_opt**2)**0.5 # Show/save image. display_image = display_interval and (it % display_interval == 0) save_mp4 = mp4save_interval and (it % mp4save_interval == 0) if display_image or save_mp4: c = color[0].detach().cpu().numpy() img_ref = color[0].detach().cpu().numpy() img_opt = color_opt[0].detach().cpu().numpy() img_best = render(glctx, torch.matmul(mvp, q_to_mtx(pose_best)), vtx_pos, pos_idx, vtx_col, col_idx, resolution)[0].detach().cpu().numpy() result_image = np.concatenate([img_ref, img_best, img_opt], axis=1) if display_image: util.display_image(result_image, size=display_res, title='(%d) %d / %d' % (rep, it, max_iter)) if save_mp4: writer.append_data( np.clip(np.rint(result_image * 255.0), 0, 255).astype(np.uint8)) # Done. if writer is not None: writer.close() if log_file: log_file.close()
def get_predictions_and_loss(self, inputs): tokens, context_word_emb, lm_emb, char_index, text_len, is_training, gold_labels = inputs self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout(self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(tokens)[0] max_sentence_length = tf.shape(tokens)[1] context_emb_list = [] context_emb_list.append(context_word_emb) char_emb = tf.gather(tf.get_variable("char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3)]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn(flattened_char_emb, self.config["filter_widths"], self.config["filter_size"]) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1)]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) if self.lm_file is not None: # Only add these layers if we're using contextualized embeddings lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax(tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable("lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape(lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul(flattened_lm_emb, tf.expand_dims(self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape(flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout(context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] candidate_scores_mask = tf.logical_and(tf.expand_dims(text_len_mask,[1]),tf.expand_dims(text_len_mask,[2])) #[num_sentence, max_sentence_length,max_sentence_length] sentence_ends_leq_starts = tf.tile(tf.expand_dims(tf.logical_not(tf.sequence_mask(tf.range(max_sentence_length),max_sentence_length)), 0),[num_sentences,1,1]) #[num_sentence, max_sentence_length,max_sentence_length] candidate_scores_mask = tf.logical_and(candidate_scores_mask,sentence_ends_leq_starts) flattened_candidate_scores_mask = tf.reshape(candidate_scores_mask,[-1]) #[num_sentence * max_sentence_length * max_sentence_length] context_outputs = self.lstm_contextualize(context_emb, text_len,self.lstm_dropout) # [num_sentence, max_sentence_length, emb] with tf.variable_scope("candidate_starts_ffnn"): candidate_starts_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length,emb] with tf.variable_scope("candidate_ends_ffnn"): candidate_ends_emb = util.projection(context_outputs,self.config["ffnn_size"]) #[num_sentences, max_sentences_length, emb] candidate_ner_scores = util.bilinear_classifier(candidate_starts_emb,candidate_ends_emb,self.dropout,output_size=self.num_types+1)#[num_sentence, max_sentence_length,max_sentence_length,types+1] candidate_ner_scores = tf.boolean_mask(tf.reshape(candidate_ner_scores,[-1,self.num_types+1]),flattened_candidate_scores_mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=gold_labels, logits=candidate_ner_scores) loss = tf.reduce_sum(loss) return candidate_ner_scores, loss
def fit_cube(max_iter=5000, resolution=4, discontinuous=False, repeats=1, log_interval=10, display_interval=None, display_res=512, out_dir=None, log_fn=None, mp4save_interval=None, mp4save_fn=None): log_file = None writer = None if out_dir: os.makedirs(out_dir, exist_ok=True) if log_fn: log_file = open(f'{out_dir}/{log_fn}', 'wt') if mp4save_interval != 0: writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M') else: mp4save_interval = None datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data' fn = 'cube_%s.npz' % ('d' if discontinuous else 'c') with np.load(f'{datadir}/{fn}') as f: pos_idx, vtxp, col_idx, vtxc = f.values() print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0])) # Create position/triangle index tensors pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda() col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda() vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda() vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda() glctx = dr.RasterizeGLContext() # Repeats. for rep in range(repeats): ang = 0.0 gl_avg = [] vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape) vtx_pos_opt = torch.tensor(vtx_pos_rand, dtype=torch.float32, device='cuda', requires_grad=True) vtx_col_opt = torch.tensor(vtx_col_rand, dtype=torch.float32, device='cuda', requires_grad=True) # Adam optimizer for vertex position and color with a learning rate ramp. optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: max(0.01, 10**(-x * 0.0005))) for it in range(max_iter + 1): # Random rotation/translation matrix for optimization. r_rot = util.random_rotation_translation(0.25) # Smooth rotation for display. a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang)) # Modelview and modelview + projection matrices. proj = util.projection(x=0.4) r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot) r_mvp = np.matmul(proj, r_mv).astype(np.float32) a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot) a_mvp = np.matmul(proj, a_mv).astype(np.float32) # Compute geometric error for logging. with torch.no_grad(): geom_loss = torch.mean( torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5) gl_avg.append(float(geom_loss)) # Print/save log. if log_interval and (it % log_interval == 0): gl_val = np.mean(np.asarray(gl_avg)) gl_avg = [] s = ("rep=%d," % rep) if repeats > 1 else "" s += "iter=%d,err=%f" % (it, gl_val) print(s) if log_file: log_file.write(s + "\n") color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx, resolution) color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, resolution) # Compute loss and train. loss = torch.mean((color - color_opt)**2) # L2 pixel loss. optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() # Show/save image. display_image = display_interval and (it % display_interval == 0) save_mp4 = mp4save_interval and (it % mp4save_interval == 0) if display_image or save_mp4: ang = ang + 0.01 img_b = color[0].cpu().numpy() img_o = color_opt[0].detach().cpu().numpy() img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt, col_idx, display_res)[0] img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col, col_idx, display_res)[0] scl = display_res // img_o.shape[0] img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1) img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1) result_image = make_grid( np.stack([ img_o, img_b, img_d.detach().cpu().numpy(), img_r.cpu().numpy() ])) if display_image: util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter)) if save_mp4: writer.append_data( np.clip(np.rint(result_image * 255.0), 0, 255).astype(np.uint8)) # Done. if writer is not None: writer.close() if log_file: log_file.close()
def get_predictions_and_loss(self, input_ids, input_mask, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, sentence_map): model = modeling.BertModel(config=self.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, use_one_hot_embeddings=False, scope='bert') self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) mention_doc = model.get_sequence_output( ) # (batch_size, seq_len, hidden) mention_doc = self.flatten_emb_by_sentence( mention_doc, input_mask) # (b, s, e) -> (b*s, e) 取出有效token的emb num_words = util.shape(mention_doc, 0) # b*s # candidate_span: 每个位置都可能是起点,对每个起点有max_span_width种不同的终点,总共有(num_words, max_span_width)种可能 candidate_starts = tf.tile(tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width],根据index将对应位置的sentence_id取出来 candidate_start_sentence_indices = tf.gather(sentence_map, candidate_starts) candidate_end_sentence_indices = tf.gather( sentence_map, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width],合法的span需要满足start/end不能越界;start/end必须在同一个句子里 candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal(candidate_start_sentence_indices, candidate_end_sentence_indices)) flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] # [num_candidates] 把候选span mask掉再铺平 candidate_starts = tf.boolean_mask(tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] 每个候选span的cluster_id # [num_candidates, emb] 候选答案的向量表示 [num_candidates,] 候选答案的得分 candidate_span_emb = self.get_span_embmax_top_antecedents( mention_doc, candidate_starts, candidate_ends) candidate_mention_scores = self.get_mention_scores( candidate_span_emb, candidate_starts, candidate_ends) # beam size 所有span的数量小于num_words * top_span_ratio k = tf.minimum( 3900, tf.to_int32( tf.floor( tf.to_float(num_words) * self.config["top_span_ratio"]))) c = tf.minimum(self.config["max_top_antecedents"], k) # 初筛挑出0.4*500=200个候选,细筛再挑出50个候选 # pull from beam,光使用mention_score卡前0.4*num_words个span top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), num_words, True) # [1, k] top_span_indices = tf.reshape( top_span_indices, [-1]) # k个按mention_score初筛出来的candidate的index # 取出top_k的span的信息,过coarse的span pair筛选,每个span取前c个antecedent top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] # def body(idx, tensors): # fake_input = tf.stack([top_span_starts, top_span_ends]) # fake_model = modeling.BertModel( # config=self.bert_config, # is_training=is_training, # input_ids=fake_input, # use_one_hot_embeddings=False, # scope='bert') # fake_output = fake_model.get_sequence_output() # return idx + 1, tf.Print(tensors, [tf.shape(fake_output)], 'fake_output') # # # do the loop: # initial_outs = model.get_sequence_output() # _, final_outs = tf.while_loop(lambda z, t: z < 100, body, loop_vars=(0, initial_outs)) # top_span_emb = tf.Print(top_span_emb, [tf.shape(tf.stack(final_outs))], "final_outs") top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_pruning( top_span_emb, top_span_mention_scores, c) genre_emb = tf.gather( tf.get_variable( "genre_embeddings", [len(self.genres), self.config["feature_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)), genre) # [emb] if self.config['use_metadata']: speaker_ids = self.flatten_emb_by_sentence(speaker_ids, input_mask) # 拍平后加mask top_span_speaker_ids = tf.gather( speaker_ids, top_span_starts) # 每个span取start位置的speaker_id else: top_span_speaker_ids = None dummy_scores = tf.zeros([k, 1]) # [k, 1] num_segs, seg_len = util.shape(input_ids, 0), util.shape(input_ids, 1) word_segments = tf.tile(tf.expand_dims(tf.range(0, num_segs), 1), [1, seg_len]) flat_word_segments = tf.boolean_mask(tf.reshape(word_segments, [-1]), tf.reshape(input_mask, [-1])) # mention_segments:[num_candidates, ] 找出每个candidate_span在第几个segment里 mention_segments = tf.expand_dims( tf.gather(flat_word_segments, top_span_starts), 1) # [k, 1] # antecedent_segments: [k, c] 找出每个candidate_span的每个antecedents对应在第几个segment里 antecedent_segments = tf.gather(flat_word_segments, tf.gather(top_span_starts, top_antecedents)) # [k, c] segment_distance = None if self.config[ 'use_segment_distance']: # [k, c] 每个mention和其antecedent之间隔了几个segment segment_distance = tf.clip_by_value( mention_segments - antecedent_segments, 0, self.config['max_training_sentences'] - 1) if self.config['fine_grained']: # 所谓融入high-order information for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather( top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb, segment_distance ) # [k, c] 算出最后的得分s(i, j) =sm(i) + sm(j) + sc(i, j) + sa(i, j) # top_antecedent_weights: [k, c + 1] 每个mention对所有antecedent分配权重 # top_antecedent_emb:[k, c + 1, emb] 每个mention每个antecedent的embedding # attended_span_emb:[k, emb] 每个mention所有antecedent的表示做加权和 top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] else: top_antecedent_scores = top_fast_antecedent_scores top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] # top_antecedent_cluster_ids [k, c] 每个mention每个antecedent的cluster_id # same_cluster_indicator [k, c] 每个mention跟每个预测的antecedent是否同一个cluster # pairwise_labels [k, c] 用pairwise的方法得到的label,非mention、非antecedent都是0,mention跟antecedent共指是1 # top_antecedent_labels [k, c+1] 最终的标签,如果某个mention没有antecedent就是dummy_label为1 top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] # top_antecedent_labels = tf.Print(top_antecedent_labels, [tf.shape(top_antecedent_labels)], "ant labels") loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def fit_mesh_col( initial_mesh: dict, target_dataset_dir: str, max_iterations: int = 10000, resolution: int = 256, log_interval: int = None, display_interval = None, display_res = 512, out_dir = None, mp4save_interval = None ): distance = 3 target_dataset = util.ReferenceImages(target_dataset_dir, resolution, resolution) pos_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32)) vtx_pos = torch.from_numpy(initial_mesh['vtx_pos'].astype(np.float32)) laplace = util.compute_laplace_matrix(vtx_pos, pos_idx).cuda() pos_idx = pos_idx.cuda() vtx_pos = vtx_pos.cuda() init_rot = util.rotate_z(-math.pi/2).cuda() vtx_pos = transform_pos(init_rot, vtx_pos)[0][:, 0:3] vtx_pos.requires_grad = True col_idx = torch.from_numpy(initial_mesh['pos_idx'].astype(np.int32)).cuda() vtx_col = torch.ones_like(vtx_pos) * 0.5 vtx_col.requires_grad = True glctx = dr.RasterizeGLContext() M1 = torch.eye(len(target_dataset)).cuda() M1.requires_grad = True M2 = torch.eye(len(target_dataset)).cuda() M2.requires_grad = True #M3 = torch.zeros((3, vtx_pos.shape[0], len(target_dataset))).cuda() M3 = torch.zeros((3 * vtx_pos.shape[0], len(target_dataset))).cuda() M3.requires_grad = True lr_ramp = .1 params = [{'params': [M1, M2, M3], 'lr': 1e-3}, {'params': vtx_col, 'lr': 1e-2}] # params = [{'params': vtx_col, 'lr': 1e-2}] #lambdas = [lambda x: max(0.01, 10**(-x*0.0005)), lambda x: lr_ramp**(float(x)/float(max_iterations))] optimizer = torch.optim.Adam(params) #scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambdas) total_steps = 0 loss_hist, l2_hist, reg_hist = [], [], [] for i in range(max_iterations): for j, (img, angle) in enumerate(target_dataset): img = img.cuda().permute(2,1,0) frame_tensor = torch.zeros(len(target_dataset)) frame_tensor[j] = 1 frame_tensor = frame_tensor.cuda() frame_tensor.requires_grad = True deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten() #deformed_vtxs = vtx_pos + deltas.T deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3)) # create the model-view-projection matrix # rotate model about z axis by angle rot = util.rotate_y(angle) #rot = torch.eye(4) # translate by distance tr = util.translate(z=-distance) # perspective projection proj = util.projection(x=0.4) mtx = proj.matmul(tr.matmul(rot)).cuda() mtx.requires_grad = True estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0] # compute loss loss = torch.mean((estimate - img) ** 2) # compute regularizer reg = torch.mean((util.compute_curvature(deformed_vtxs, laplace) - util.compute_curvature(vtx_pos, laplace)) ** 2) + torch.mean(deltas**2) # combine loss = loss + 5 * reg loss_hist.append(loss.cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() #scheduler.step() with torch.no_grad(): #print(f"Loss: {loss}") # clamp color between 0 and 1 vtx_col.clamp_(0, 1) if (display_interval and (i % display_interval == 0)) or (i == max_iterations - 1): print(loss) with torch.no_grad(): estimate = render(glctx, mtx, deformed_vtxs, pos_idx, col_idx, vtx_col, resolution)[0].detach().cpu().numpy() Image.fromarray((estimate * 255).astype(np.uint8)).save('estimate.png') img = img.detach().cpu().numpy() Image.fromarray((img * 255).astype(np.uint8)).save('img.png') with torch.no_grad(): for i, (im, _) in enumerate(target_dataset): frame_tensor = torch.zeros(len(target_dataset)) frame_tensor[j] = 1 frame_tensor = frame_tensor.cuda() deltas = torch.matmul(M3, torch.matmul(M2, torch.matmul(M1, frame_tensor))).flatten() deformed_vtxs = (vtx_pos.flatten() + deltas).reshape((vtx_pos.shape[0], 3)) deformed_vtxs = torch.clamp(deformed_vtxs, -1.0, 1.0) #write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist()) util.write_obj(f"frame_{i}.obj", deformed_vtxs.detach().cpu().tolist(), pos_idx.detach().cpu().tolist(), vtx_col.detach().cpu().tolist()) np.savez('vtx_col.npz', vtx_col=vtx_col.cpu().detach().numpy())
def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, inject_starts, inject_ends): self.dropout = self.get_dropout(self.config["dropout_rate"], is_training) self.lexical_dropout = self.get_dropout( self.config["lexical_dropout_rate"], is_training) self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"], is_training) num_sentences = tf.shape(context_word_emb)[0] max_sentence_length = tf.shape(context_word_emb)[1] context_emb_list = [context_word_emb] head_emb_list = [head_word_emb] if self.config["char_embedding_size"] > 0: char_emb = tf.gather( tf.get_variable( "char_embeddings", [len(self.char_dict), self.config["char_embedding_size"]]), char_index ) # [num_sentences, max_sentence_length, max_word_length, emb] flattened_char_emb = tf.reshape(char_emb, [ num_sentences * max_sentence_length, util.shape(char_emb, 2), util.shape(char_emb, 3) ]) # [num_sentences * max_sentence_length, max_word_length, emb] flattened_aggregated_char_emb = util.cnn( flattened_char_emb, self.config["filter_widths"], self.config["filter_size"] ) # [num_sentences * max_sentence_length, emb] aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [ num_sentences, max_sentence_length, util.shape(flattened_aggregated_char_emb, 1) ]) # [num_sentences, max_sentence_length, emb] context_emb_list.append(aggregated_char_emb) head_emb_list.append(aggregated_char_emb) if not self.lm_file: elmo_module = hub.Module("https://tfhub.dev/google/elmo/2") lm_embeddings = elmo_module(inputs={ "tokens": tokens, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = lm_embeddings[ "word_emb"] # [num_sentences, max_sentence_length, 512] lm_emb = tf.stack([ tf.concat([word_emb, word_emb], -1), lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"] ], -1) # [num_sentences, max_sentence_length, 1024, 3] lm_emb_size = util.shape(lm_emb, 2) lm_num_layers = util.shape(lm_emb, 3) with tf.variable_scope("lm_aggregation"): self.lm_weights = tf.nn.softmax( tf.get_variable("lm_scores", [lm_num_layers], initializer=tf.constant_initializer(0.0))) self.lm_scaling = tf.get_variable( "lm_scaling", [], initializer=tf.constant_initializer(1.0)) flattened_lm_emb = tf.reshape( lm_emb, [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers]) flattened_aggregated_lm_emb = tf.matmul( flattened_lm_emb, tf.expand_dims( self.lm_weights, 1)) # [num_sentences * max_sentence_length * emb, 1] aggregated_lm_emb = tf.reshape( flattened_aggregated_lm_emb, [num_sentences, max_sentence_length, lm_emb_size]) aggregated_lm_emb *= self.lm_scaling context_emb_list.append(aggregated_lm_emb) context_emb = tf.concat(context_emb_list, 2) # [num_sentences, max_sentence_length, emb] head_emb = tf.concat(head_emb_list, 2) # [num_sentences, max_sentence_length, emb] context_emb = tf.nn.dropout( context_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] head_emb = tf.nn.dropout( head_emb, self.lexical_dropout) # [num_sentences, max_sentence_length, emb] text_len_mask = tf.sequence_mask( text_len, maxlen=max_sentence_length) # [num_sentence, max_sentence_length] context_outputs = self.lstm_contextualize( context_emb, text_len, text_len_mask) # [num_words, emb] num_words = util.shape(context_outputs, 0) genre_emb = tf.gather( tf.get_variable("genre_embeddings", [len(self.genres), self.config["feature_size"]]), genre) # [emb] sentence_indices = tf.tile( tf.expand_dims(tf.range(num_sentences), 1), [1, max_sentence_length]) # [num_sentences, max_sentence_length] flattened_sentence_indices = self.flatten_emb_by_sentence( sentence_indices, text_len_mask) # [num_words] flattened_head_emb = self.flatten_emb_by_sentence( head_emb, text_len_mask) # [num_words] if self._use_injected_mentions(is_training): candidate_starts = tf.transpose(tf.expand_dims(inject_starts, 1)) candidate_ends = tf.transpose(tf.expand_dims(inject_ends, 1)) else: candidate_starts = tf.tile( tf.expand_dims(tf.range(num_words), 1), [1, self.max_span_width]) # [num_words, max_span_width] candidate_ends = candidate_starts + tf.expand_dims( tf.range(self.max_span_width), 0) # [num_words, max_span_width] candidate_start_sentence_indices = tf.gather( flattened_sentence_indices, candidate_starts) # [num_words, max_span_width] candidate_end_sentence_indices = tf.gather( flattened_sentence_indices, tf.minimum(candidate_ends, num_words - 1)) # [num_words, max_span_width] candidate_mask = tf.logical_and( candidate_ends < num_words, tf.equal( candidate_start_sentence_indices, candidate_end_sentence_indices)) # [num_words, max_span_width] flattened_candidate_mask = tf.reshape( candidate_mask, [-1]) # [num_words * max_span_width] candidate_starts = tf.boolean_mask( tf.reshape(candidate_starts, [-1]), flattened_candidate_mask) # [num_candidates] candidate_ends = tf.boolean_mask( tf.reshape(candidate_ends, [-1]), flattened_candidate_mask) # [num_candidates] candidate_sentence_indices = tf.boolean_mask( tf.reshape(candidate_start_sentence_indices, [-1]), flattened_candidate_mask) # [num_candidates] candidate_cluster_ids = self.get_candidate_labels( candidate_starts, candidate_ends, gold_starts, gold_ends, cluster_ids) # [num_candidates] candidate_span_emb = self.get_span_emb( flattened_head_emb, context_outputs, candidate_starts, candidate_ends) # [num_candidates, emb] candidate_mention_scores = self.get_mention_scores( candidate_span_emb) # [k, 1] candidate_mention_scores = tf.squeeze(candidate_mention_scores, 1) # [k] if self._use_injected_mentions(is_training): k = tf.shape(candidate_starts)[0] top_span_indices = tf.expand_dims(tf.range(k), 0) else: k = tf.to_int32( tf.floor( tf.to_float(tf.shape(context_outputs)[0]) * self.config["top_span_ratio"])) top_span_indices = coref_ops.extract_spans( tf.expand_dims(candidate_mention_scores, 0), tf.expand_dims(candidate_starts, 0), tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0), util.shape(context_outputs, 0), True) # [1, k] top_span_indices.set_shape([1, None]) top_span_indices = tf.squeeze(top_span_indices, 0) # [k] top_span_starts = tf.gather(candidate_starts, top_span_indices) # [k] top_span_ends = tf.gather(candidate_ends, top_span_indices) # [k] top_span_emb = tf.gather(candidate_span_emb, top_span_indices) # [k, emb] top_span_cluster_ids = tf.gather(candidate_cluster_ids, top_span_indices) # [k] top_span_mention_scores = tf.gather(candidate_mention_scores, top_span_indices) # [k] top_span_sentence_indices = tf.gather(candidate_sentence_indices, top_span_indices) # [k] top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts) # [k] c = tf.minimum(self.config["max_top_antecedents"], k) if self.config["coarse_to_fine"]: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning( top_span_emb, top_span_mention_scores, c) else: top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning( top_span_emb, top_span_mention_scores, c) dummy_scores = tf.zeros([k, 1]) # [k, 1] for i in range(self.config["coref_depth"]): with tf.variable_scope("coref_layer", reuse=(i > 0)): top_antecedent_emb = tf.gather(top_span_emb, top_antecedents) # [k, c, emb] top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores( top_span_emb, top_antecedents, top_antecedent_emb, top_antecedent_offsets, top_span_speaker_ids, genre_emb) # [k, c] top_antecedent_weights = tf.nn.softmax( tf.concat([dummy_scores, top_antecedent_scores], 1)) # [k, c + 1] top_antecedent_emb = tf.concat( [tf.expand_dims(top_span_emb, 1), top_antecedent_emb], 1) # [k, c + 1, emb] attended_span_emb = tf.reduce_sum( tf.expand_dims(top_antecedent_weights, 2) * top_antecedent_emb, 1) # [k, emb] with tf.variable_scope("f"): f = tf.sigmoid( util.projection( tf.concat([top_span_emb, attended_span_emb], 1), util.shape(top_span_emb, -1))) # [k, emb] top_span_emb = f * attended_span_emb + ( 1 - f) * top_span_emb # [k, emb] top_antecedent_scores = tf.concat( [dummy_scores, top_antecedent_scores], 1) # [k, c + 1] top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids, top_antecedents) # [k, c] top_antecedent_cluster_ids += tf.to_int32( tf.log(tf.to_float(top_antecedents_mask))) # [k, c] same_cluster_indicator = tf.equal(top_antecedent_cluster_ids, tf.expand_dims( top_span_cluster_ids, 1)) # [k, c] non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0, 1) # [k, 1] pairwise_labels = tf.logical_and(same_cluster_indicator, non_dummy_indicator) # [k, c] dummy_labels = tf.logical_not( tf.reduce_any(pairwise_labels, 1, keepdims=True)) # [k, 1] top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels], 1) # [k, c + 1] loss = self.softmax_loss(top_antecedent_scores, top_antecedent_labels) # [k] loss = tf.reduce_sum(loss) # [] return [ candidate_starts, candidate_ends, candidate_mention_scores, top_span_starts, top_span_ends, top_antecedents, top_antecedent_scores ], loss
def get_span_emb(self, head_emb, context_outputs, span_starts, span_ends): span_emb_list = [] if self.config["use_multi_span"]: emb_size = 2 * self.config["contextualization_size"] dim0 = util.shape(span_starts, 0) dim1 = util.shape(span_starts, 1) reshaped_span_starts = tf.reshape(span_starts, [dim0 * dim1]) # [a*b] gathered_span_starts = tf.gather( context_outputs, reshaped_span_starts) # [a*b, emb] cnn_span_starts = tf.reshape(gathered_span_starts, [dim0, dim1, emb_size]) # [a, b, emb] span_starts_4dim = tf.expand_dims(cnn_span_starts, 3) span_start_emb = util.cnn2d(span_starts_4dim, self.config["emb_filter_widths"], ffnn_out_size=emb_size, name="start") reshaped_span_ends = tf.reshape(span_ends, [dim0 * dim1]) gathered_span_ends = tf.gather(context_outputs, reshaped_span_ends) cnn_span_ends = tf.reshape(gathered_span_ends, [dim0, dim1, emb_size]) span_ends_4dim = tf.expand_dims(cnn_span_ends, 3) span_end_emb = util.cnn2d(span_ends_4dim, self.config["emb_filter_widths"], ffnn_out_size=emb_size, name="end") span_starts = tf.squeeze(span_starts[:, :1], 1) # todo model_heads span_ends = tf.squeeze(span_ends[:, :1], 1) # todo model_heads else: span_start_emb = tf.gather(context_outputs, span_starts) # [k, emb] span_end_emb = tf.gather(context_outputs, span_ends) # [k, emb] span_width = 1 + span_ends - span_starts # [k] span_emb_list.append(span_start_emb) span_emb_list.append(span_end_emb) if self.config["use_features"]: span_width_index = span_width - 1 # [k] span_width_emb = tf.gather( tf.get_variable("span_width_embeddings", [ self.config["max_span_width"], self.config["feature_size"] ]), span_width_index) # [k, emb] span_width_emb = tf.nn.dropout(span_width_emb, self.dropout) span_emb_list.append(span_width_emb) if self.config["model_heads"]: # [k, max_span_width] span_indices = tf.expand_dims( tf.range(self.config["max_span_width"]), 0) + tf.expand_dims( span_starts, 1) span_indices = tf.minimum( util.shape(context_outputs, 0) - 1, span_indices) # [k, max_span_width] span_text_emb = tf.gather(head_emb, span_indices) # [k, max_span_width, emb] with tf.variable_scope("head_scores"): self.head_scores = util.projection(context_outputs, 1) # [num_words, 1] span_head_scores = tf.gather( self.head_scores, span_indices) # [k, max_span_width, 1] span_mask = tf.expand_dims( tf.sequence_mask(span_width, self.config["max_span_width"], dtype=tf.float32), 2) # [k, max_span_width, 1] span_head_scores += tf.log(span_mask) # [k, max_span_width, 1] span_attention = tf.nn.softmax(span_head_scores, 1) # [k, max_span_width, 1] span_head_emb = tf.reduce_sum(span_attention * span_text_emb, 1) # [k, emb] span_emb_list.append(span_head_emb) span_emb = tf.concat(span_emb_list, 1) # [k, emb] return span_emb # [k, emb]