def collect_attention_features(attention_meta: AttentionFeaturesMeta, graph, rank=2): dropout_rate = graph["inputs"]["dropout_rate"] cont_3d, cat_3d, placeholders_3d = collect_basic_features(attention_meta.get_token_features_meta(), rank) cont_2d, cat_2d, placeholders_2d = collect_basic_features(attention_meta.get_relation_features_meta(), rank-1) graph["inputs"].update({**placeholders_3d, **placeholders_2d}) return concat_tensors_list([dropout(cont_3d, dropout_rate), cat_3d]),\ concat_tensors_list([dropout(cont_2d, dropout_rate), cat_2d])
def create_attention_mechanism( context_encoding, seq_len, attention_3d_features, attention_2d_features, dropout_placeholder, props): attention_input = tf.nn.dropout(context_encoding, dropout_placeholder) if attention_3d_features is not None: attention_input = tf.concat([attention_input, attention_3d_features], axis=2) attention_aggregation_dense_size = props.get("dense_size", -1) if attention_aggregation_dense_size > 0: aggregation_tensor = apply_dense_layer_to_nd_tensor( context_encoding, attention_aggregation_dense_size, "att_aggr") else: aggregation_tensor = context_encoding if attention_2d_features is not None: queries = tf.tile(tf.expand_dims(attention_2d_features, 1), [1, tf.shape(context_encoding)[1], 1]) else: queries = None attention_type = props.get("type", "bahdanau") if attention_type == "bahdanau": keys = concat_tensors_list([attention_input, queries]) return BahdanauAttention.from_props(props)((keys, aggregation_tensor, seq_len)) elif attention_type == "luong": if queries is None: raise Exception("No attention 2d features provided for Luong attention") return LuongAttention.from_props(props)((attention_input, queries, aggregation_tensor, seq_len)) else: raise Exception("Unknown attention type")
def _init_char_features(features_meta: CharsFeaturesMeta, placeholders, input_dims): char_features = features_meta.get_char_features() embedded_features = [] for feature in char_features: placeholder = tf.placeholder(tf.int32, shape=[None]*(input_dims+1), name=feature['name'] + '_placeholder') placeholders[feature['name']] = placeholder embedded_features.append(create_embedding_lookup(placeholder, feature)) return concat_tensors_list(embedded_features)
def collect_word_embeddings(features_meta: WordEmbeddingsMeta, input_dims=2): placeholders = {} word_embeddings = [] for feature in features_meta.get_precomputed_features(): word_placeholder = tf.placeholder(tf.int32, shape=[None] * input_dims, name=feature['name'] + '_placeholder') placeholders[feature['name']] = word_placeholder embedding_matrix = tf.Variable(feature['vectors'], dtype=tf.float32, trainable=feature['trainable']) word_embeddings.append(tf.nn.embedding_lookup(embedding_matrix, word_placeholder)) return concat_tensors_list(word_embeddings), placeholders
def create_context_encoding(task_meta: RelExtTaskGraphMeta, graph, shared_encoding, word_embeddings, rank=2): seq_len = graph["inputs"]["seq_len"] dropout_rate = graph["inputs"]["dropout_rate"] cont, cat, placeholders = collect_basic_features(task_meta.metas.encoder, rank) if task_meta.props.get("add_we", False): cont = concat_tensors_list([cont, word_embeddings]) if task_meta.props.get("add_shared", False): cont = concat_tensors_list([cont, shared_encoding]) encoder_features = concat_tensors_list([dropout(cont, dropout_rate), cat]) graph["inputs"].update(placeholders) fw, bw, features_encoding = None, None, None if encoder_features is not None: graph["inputs"].update(placeholders) logger.info("Size of specific encoder features in {} task = {}".format( task_meta.task_name, encoder_features.shape[rank].value)) fw, bw, features_encoding = create_context_encoder( encoder_features, seq_len, skip_connection=task_meta.props.get("specific_encoder_skip_connection", False), encoding_type=task_meta.props.get("specific_encoder_type", "lstm"), encoding_size=task_meta.props.get("specific_encoder_size", 0), kernel_size=task_meta.props.get("specific_encoder_kernel_size", None), layers_num=task_meta.props.get("specific_encoder_layers_num", None), max_len=task_meta.props.get("specific_encoder_transformer_max_len", None), dropout=1 - dropout_rate) if task_meta.props.get("concat_we", False): features_encoding = concat_tensors_list([features_encoding, word_embeddings]) fw, bw = None, None if task_meta.props.get("concat_shared", False): features_encoding = concat_tensors_list([features_encoding, shared_encoding]) fw, bw = None, None return fw, bw, features_encoding
def collect_token_features(features_meta: TokenFeaturesMeta, dropout, input_dims=2, **kwargs): word_embeddings, we_placeholders = collect_word_embeddings(features_meta.we_meta, input_dims) basic_cont_features, basic_cat_features, bf_placeholders =\ collect_basic_features(features_meta.basic_meta, input_dims) char_features, cf_placeholders =\ collect_char_features(features_meta.char_meta, dropout, input_dims, kernel_sizes=kwargs.get("kernel_sizes", []), kernel_num_features=kwargs.get("kernel_num_features", [])) token_cont_features = [word_embeddings, basic_cont_features, char_features] placeholders = {**we_placeholders, **bf_placeholders, **cf_placeholders} return concat_tensors_list(token_cont_features), basic_cat_features, placeholders
def create_spans_aggregation(context_encoding, indices, attention_3d_features, attention_2d_features): fw, bw, _ = context_encoding if fw is None or bw is None: raise Exception("Spans aggregation requires encoding with forward and backward pass") if attention_3d_features is not None: logger.info("Attention 3d features provided in spans aggregation will be ignored") fw_at_ends = IndexedAggregation()((fw, indices[:, :, 1] - 1)) bw_at_starts = IndexedAggregation()((bw, indices[:, :, 0])) concatted = tf.reshape( tf.concat((fw_at_ends, bw_at_starts), axis=-1), [-1, indices.shape[1] * (fw.shape[2] + bw.shape[2])]) return concat_tensors_list([concatted, attention_2d_features])
def create_pooling_mechanism( context_encoding, seq_len, attention_3d_features, attention_2d_features, dropout_placeholder, props): _, _, context_encoding = context_encoding if attention_2d_features is not None: attention_2d_features = tf.tile(tf.expand_dims(attention_2d_features, 1), [1, tf.shape(context_encoding)[1], 1]) pooling_input = concat_tensors_list([context_encoding, attention_3d_features, attention_2d_features]) if "dense_size" in props: pooling_input = tf.nn.dropout(pooling_input, dropout_placeholder) pooling_input = apply_dense_layer_to_nd_tensor(pooling_input, props["dense_size"], "pooling_dense") if props["type"] == "max": aggregation = MaxPoolingAggregation() elif props["type"] == "mean": aggregation = MeanPoolingAggregation() else: raise Exception(f"{props['type']} pooling is not supported") return aggregation((pooling_input, seq_len))
def aggregate_encoding( context_encoding, seq_len, indices, attention_3d_features, attention_2d_features, task_name, dp_rate, props): if attention_3d_features is None and attention_2d_features is None: logger.info("No attention features in props in {} task".format(task_name)) if attention_3d_features is not None: logger.info("Size of attention 3D features in {} task = {}".format( task_name, attention_3d_features.shape[2].value)) if attention_2d_features is not None: logger.info("Size of attention 2D features in {} task = {}".format( task_name, attention_2d_features.shape[1].value)) strategies = { "attention": lambda prop: create_attention_mechanism( context_encoding[2], seq_len, attention_3d_features, attention_2d_features, dp_rate, prop), "max_pooling": lambda prop: create_pooling_mechanism( context_encoding, seq_len, attention_3d_features, attention_2d_features, dp_rate, {**prop, "type": "max"}), "mean_pooling": lambda prop: create_pooling_mechanism( context_encoding, seq_len, attention_3d_features, attention_2d_features, dp_rate, {**prop, "type": "mean"}), "take_spans": lambda prop: create_spans_aggregation( context_encoding, indices, attention_3d_features, attention_2d_features), "last_hiddens": lambda prop: create_last_hiddens_aggregation( context_encoding, seq_len, attention_3d_features, attention_2d_features) } if "aggregation" not in props: return None aggregations = [strategies[name](prop) for name, prop in sorted(props["aggregation"].items(), key=itemgetter(0))] return concat_tensors_list(aggregations)
def collect_classifier_features(features_meta: BasicFeaturesMeta, graph): cont, cat, placeholders = collect_basic_features(features_meta, 1) graph["inputs"].update(placeholders) return concat_tensors_list([dropout(cont, graph["inputs"]["dropout_rate"]), cat])
def build_graphs_with_shared_encoder(props: dict, shared_features_meta: TokenFeaturesMeta, task_specific_metas: List[TaskGraphMeta], rank: int = 2): dropout_rate = tf.placeholder_with_default(1.0, [], 'dropout') learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') seq_len = tf.placeholder(tf.int32, shape=[None] * (rank - 1), name='seq_len') optimizer = get_optimizer(props, learning_rate) freeze_shared_ce = tf.placeholder_with_default(False, [], 'freeze_shared_ce') shared_inputs = { 'dropout_rate': dropout_rate, 'learning_rate': learning_rate, "seq_len": seq_len, 'freeze_shared_ce': freeze_shared_ce } # TODO: we can use "seq_len" key to store list of input tensor lengths across each dimension. # this change will affect lots of other code, so it should be done as separate task if rank > 2: shared_inputs["chain_len"] = tf.placeholder(tf.int32, shape=[None] * (rank - 2), name='chain_len') if rank > 3: raise Exception(f"can't build graph with input rank {rank}") word_embeddings, we_placeholders = collect_word_embeddings( shared_features_meta.we_meta, input_dims=rank) cont, cat, basic_placeholders = collect_basic_features( shared_features_meta.basic_meta, input_dims=rank) char_features, char_placeholders = collect_char_features( shared_features_meta.char_meta, dropout_rate, input_dims=rank, kernel_sizes=props.get("char_kernel_sizes", []), kernel_num_features=props.get("char_kernel_num_features", [])) cont = concat_tensors_list([word_embeddings, cont, char_features]) shared_features = concat_tensors_list([dropout(cont, dropout_rate), cat]) if shared_features is None: raise Exception('No shared token features given.') shared_inputs.update({ **we_placeholders, **basic_placeholders, **char_placeholders }) logger.info("Size of shared context encoder features = {}".format( shared_features.shape[-1])) shared_features = create_filtering_layer(shared_features, **props) _, _, shared_context_encoding = create_context_encoder( shared_features, seq_len, encoding_type=props.get("encoding_type", "lstm"), encoding_size=props.get("encoding_size", 0), skip_connection=props.get("skip_connection", False), kernel_size=props.get("encoder_kernel_size", None), layers_num=props.get("encoder_layers_num", None), max_len=props.get("transformer_max_len", None), dropout=1 - dropout_rate) shared_context_encoding = tf.cond( freeze_shared_ce, true_fn=lambda: tf.stop_gradient(shared_context_encoding), false_fn=lambda: shared_context_encoding) logger.info("Size of shared context encoding = {}".format( shared_context_encoding.shape[-1].value)) task_graphs = [] for task_meta in task_specific_metas: with tf.variable_scope(task_meta.task_name): task_graph = task_meta.factory(shared_context_encoding, word_embeddings, shared_inputs, optimizer) task_graphs.append(task_graph) return task_graphs
def build_coref_graph(props: dict, entity_encoder_meta: TokenFeaturesMeta, task_meta: list): dropout_rate = tf.placeholder_with_default(1.0, [], 'dropout') learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') gold_idx = tf.placeholder(tf.int32, shape=[None], name='gold_labels') seq_len = tf.placeholder(tf.int32, shape=[None], name='seq_len') entity_seq_len = tf.placeholder(tf.int32, shape=[None, None], name='entity_seq_len') optimizer = get_optimizer(props, learning_rate) freeze_shared_ce = tf.placeholder_with_default(False, [], 'freeze_shared_ce') # XXX fid a way to remove this task_meta = task_meta[0] graph = { "inputs": { 'labels': gold_idx, 'dropout_rate': dropout_rate, 'learning_rate': learning_rate, "seq_len": seq_len, "entity_seq_len": entity_seq_len, 'freeze_shared_ce': freeze_shared_ce } } entity_cont, entity_cat, inputs = collect_token_features( entity_encoder_meta, dropout_rate, kernel_sizes=props.get("char_kernel_sizes", []), kernel_num_features=props.get("char_kernel_num_features", []), input_dims=3) entity_encoder_features = concat_tensors_list([dropout(entity_cont, dropout_rate), entity_cat]) logger.info("Size of entity encoder features = {}".format(entity_encoder_features.shape[-1])) graph["inputs"].update(inputs) encoder_cont, encoder_cat, inputs = collect_basic_features(task_meta.metas.encoder, 2) graph["inputs"].update(inputs) entity_hidden = _create_entity_encoding(entity_encoder_features, entity_seq_len, props) entity_hidden = concat_tensors_list([ dropout(concat_tensors_list([encoder_cont, entity_hidden]), dropout_rate), encoder_cat]) logger.info("Size of entity encoding = {}".format(entity_hidden.shape[-1].value)) _, _, context_encoding_state = create_birnn_layer( entity_hidden, seq_len, props['encoding_type'], props['encoding_size'], "context_encoding") hidden = tf.concat((context_encoding_state[0].h, context_encoding_state[1].h), axis=-1) hidden = tf.nn.dropout(hidden, dropout_rate) classifier_features = collect_classifier_features(task_meta.metas.classifier, graph) if classifier_features is not None: logger.info("Size of classifier features = {}".format(classifier_features.shape[1].value)) hidden = tf.concat([hidden, classifier_features], axis=-1) logger.info("Size of hidden vector = {}".format(hidden.shape[-1].value)) classification_dense_size = task_meta.props.get('classification_dense_size', 0) if classification_dense_size > 0: hidden = tf.layers.dense(hidden, classification_dense_size, tf.nn.sigmoid) loss, label, scores = get_loss(hidden, graph["inputs"]["labels"], task_meta.feature_extractor.get_labels_size(), task_meta.props) train_op = get_train_op(loss, optimizer, task_meta.props) graph.update({ 'losses': [loss], 'train_ops': [train_op], 'outputs': { "predictions": label, "scores": scores } }) return graph
def collect_basic_features(features_meta: BasicFeaturesMeta, input_dims=2): placeholders = {} embedded_features, one_hot_features = _collect_categorical_features(features_meta, placeholders, input_dims) embedded_features += _collect_vectorized_features(features_meta, placeholders, input_dims) return concat_tensors_list(embedded_features), concat_tensors_list(one_hot_features), placeholders