Ejemplo n.º 1
0
def collect_attention_features(attention_meta: AttentionFeaturesMeta, graph, rank=2):
    dropout_rate = graph["inputs"]["dropout_rate"]
    cont_3d, cat_3d, placeholders_3d = collect_basic_features(attention_meta.get_token_features_meta(), rank)
    cont_2d, cat_2d, placeholders_2d = collect_basic_features(attention_meta.get_relation_features_meta(), rank-1)
    graph["inputs"].update({**placeholders_3d, **placeholders_2d})
    return concat_tensors_list([dropout(cont_3d, dropout_rate), cat_3d]),\
        concat_tensors_list([dropout(cont_2d, dropout_rate), cat_2d])
Ejemplo n.º 2
0
def create_attention_mechanism(
        context_encoding, seq_len, attention_3d_features, attention_2d_features, dropout_placeholder, props):

    attention_input = tf.nn.dropout(context_encoding, dropout_placeholder)
    if attention_3d_features is not None:
        attention_input = tf.concat([attention_input, attention_3d_features], axis=2)

    attention_aggregation_dense_size = props.get("dense_size", -1)
    if attention_aggregation_dense_size > 0:
        aggregation_tensor = apply_dense_layer_to_nd_tensor(
            context_encoding, attention_aggregation_dense_size, "att_aggr")
    else:
        aggregation_tensor = context_encoding

    if attention_2d_features is not None:
        queries = tf.tile(tf.expand_dims(attention_2d_features, 1), [1, tf.shape(context_encoding)[1], 1])
    else:
        queries = None

    attention_type = props.get("type", "bahdanau")
    if attention_type == "bahdanau":
        keys = concat_tensors_list([attention_input, queries])
        return BahdanauAttention.from_props(props)((keys, aggregation_tensor, seq_len))
    elif attention_type == "luong":
        if queries is None:
            raise Exception("No attention 2d features provided for Luong attention")
        return LuongAttention.from_props(props)((attention_input, queries, aggregation_tensor, seq_len))
    else:
        raise Exception("Unknown attention type")
Ejemplo n.º 3
0
def _init_char_features(features_meta: CharsFeaturesMeta, placeholders, input_dims):
    char_features = features_meta.get_char_features()
    embedded_features = []
    for feature in char_features:
        placeholder = tf.placeholder(tf.int32, shape=[None]*(input_dims+1), name=feature['name'] + '_placeholder')
        placeholders[feature['name']] = placeholder
        embedded_features.append(create_embedding_lookup(placeholder, feature))
    return concat_tensors_list(embedded_features)
Ejemplo n.º 4
0
def collect_word_embeddings(features_meta: WordEmbeddingsMeta, input_dims=2):
    placeholders = {}
    word_embeddings = []
    for feature in features_meta.get_precomputed_features():
        word_placeholder = tf.placeholder(tf.int32, shape=[None] * input_dims, name=feature['name'] + '_placeholder')
        placeholders[feature['name']] = word_placeholder
        embedding_matrix = tf.Variable(feature['vectors'], dtype=tf.float32, trainable=feature['trainable'])
        word_embeddings.append(tf.nn.embedding_lookup(embedding_matrix, word_placeholder))

    return concat_tensors_list(word_embeddings), placeholders
Ejemplo n.º 5
0
def create_context_encoding(task_meta: RelExtTaskGraphMeta, graph, shared_encoding, word_embeddings, rank=2):
    seq_len = graph["inputs"]["seq_len"]
    dropout_rate = graph["inputs"]["dropout_rate"]

    cont, cat, placeholders = collect_basic_features(task_meta.metas.encoder, rank)

    if task_meta.props.get("add_we", False):
        cont = concat_tensors_list([cont, word_embeddings])

    if task_meta.props.get("add_shared", False):
        cont = concat_tensors_list([cont, shared_encoding])

    encoder_features = concat_tensors_list([dropout(cont, dropout_rate), cat])
    graph["inputs"].update(placeholders)

    fw, bw, features_encoding = None, None, None
    if encoder_features is not None:
        graph["inputs"].update(placeholders)

        logger.info("Size of specific encoder features in {} task = {}".format(
            task_meta.task_name, encoder_features.shape[rank].value))

        fw, bw, features_encoding = create_context_encoder(
            encoder_features, seq_len,
            skip_connection=task_meta.props.get("specific_encoder_skip_connection", False),
            encoding_type=task_meta.props.get("specific_encoder_type", "lstm"),
            encoding_size=task_meta.props.get("specific_encoder_size", 0),
            kernel_size=task_meta.props.get("specific_encoder_kernel_size", None),
            layers_num=task_meta.props.get("specific_encoder_layers_num", None),
            max_len=task_meta.props.get("specific_encoder_transformer_max_len", None),
            dropout=1 - dropout_rate)

    if task_meta.props.get("concat_we", False):
        features_encoding = concat_tensors_list([features_encoding, word_embeddings])
        fw, bw = None, None

    if task_meta.props.get("concat_shared", False):
        features_encoding = concat_tensors_list([features_encoding, shared_encoding])
        fw, bw = None, None

    return fw, bw, features_encoding
Ejemplo n.º 6
0
def collect_token_features(features_meta: TokenFeaturesMeta, dropout, input_dims=2, **kwargs):
    word_embeddings, we_placeholders = collect_word_embeddings(features_meta.we_meta, input_dims)
    basic_cont_features, basic_cat_features, bf_placeholders =\
        collect_basic_features(features_meta.basic_meta, input_dims)
    char_features, cf_placeholders =\
        collect_char_features(features_meta.char_meta, dropout, input_dims,
                              kernel_sizes=kwargs.get("kernel_sizes", []),
                              kernel_num_features=kwargs.get("kernel_num_features", []))

    token_cont_features = [word_embeddings, basic_cont_features, char_features]
    placeholders = {**we_placeholders, **bf_placeholders, **cf_placeholders}
    return concat_tensors_list(token_cont_features), basic_cat_features, placeholders
Ejemplo n.º 7
0
def create_spans_aggregation(context_encoding, indices, attention_3d_features, attention_2d_features):
    fw, bw, _ = context_encoding
    if fw is None or bw is None:
        raise Exception("Spans aggregation requires encoding with forward and backward pass")

    if attention_3d_features is not None:
        logger.info("Attention 3d features provided in spans aggregation will be ignored")

    fw_at_ends = IndexedAggregation()((fw, indices[:, :, 1] - 1))
    bw_at_starts = IndexedAggregation()((bw, indices[:, :, 0]))

    concatted = tf.reshape(
        tf.concat((fw_at_ends, bw_at_starts), axis=-1), [-1, indices.shape[1] * (fw.shape[2] + bw.shape[2])])

    return concat_tensors_list([concatted, attention_2d_features])
Ejemplo n.º 8
0
def create_pooling_mechanism(
        context_encoding, seq_len, attention_3d_features, attention_2d_features, dropout_placeholder, props):

    _, _, context_encoding = context_encoding

    if attention_2d_features is not None:
        attention_2d_features = tf.tile(tf.expand_dims(attention_2d_features, 1), [1, tf.shape(context_encoding)[1], 1])

    pooling_input = concat_tensors_list([context_encoding, attention_3d_features, attention_2d_features])

    if "dense_size" in props:
        pooling_input = tf.nn.dropout(pooling_input, dropout_placeholder)
        pooling_input = apply_dense_layer_to_nd_tensor(pooling_input, props["dense_size"], "pooling_dense")

    if props["type"] == "max":
        aggregation = MaxPoolingAggregation()
    elif props["type"] == "mean":
        aggregation = MeanPoolingAggregation()
    else:
        raise Exception(f"{props['type']} pooling is not supported")

    return aggregation((pooling_input, seq_len))
Ejemplo n.º 9
0
def aggregate_encoding(
        context_encoding, seq_len, indices, attention_3d_features, attention_2d_features, task_name, dp_rate, props):
    if attention_3d_features is None and attention_2d_features is None:
        logger.info("No attention features in props in {} task".format(task_name))

    if attention_3d_features is not None:
        logger.info("Size of attention 3D features in {} task = {}".format(
            task_name, attention_3d_features.shape[2].value))

    if attention_2d_features is not None:
        logger.info("Size of attention 2D features in {} task = {}".format(
            task_name, attention_2d_features.shape[1].value))

    strategies = {

        "attention": lambda prop: create_attention_mechanism(
            context_encoding[2], seq_len, attention_3d_features, attention_2d_features, dp_rate, prop),

        "max_pooling": lambda prop: create_pooling_mechanism(
            context_encoding, seq_len, attention_3d_features, attention_2d_features, dp_rate, {**prop, "type": "max"}),

        "mean_pooling": lambda prop: create_pooling_mechanism(
            context_encoding, seq_len, attention_3d_features, attention_2d_features, dp_rate, {**prop, "type": "mean"}),

        "take_spans": lambda prop: create_spans_aggregation(
            context_encoding, indices, attention_3d_features, attention_2d_features),

        "last_hiddens": lambda prop: create_last_hiddens_aggregation(
            context_encoding, seq_len, attention_3d_features, attention_2d_features)
    }

    if "aggregation" not in props:
        return None

    aggregations = [strategies[name](prop) for name, prop in sorted(props["aggregation"].items(), key=itemgetter(0))]
    return concat_tensors_list(aggregations)
Ejemplo n.º 10
0
def collect_classifier_features(features_meta: BasicFeaturesMeta, graph):
    cont, cat, placeholders = collect_basic_features(features_meta, 1)
    graph["inputs"].update(placeholders)
    return concat_tensors_list([dropout(cont, graph["inputs"]["dropout_rate"]), cat])
Ejemplo n.º 11
0
def build_graphs_with_shared_encoder(props: dict,
                                     shared_features_meta: TokenFeaturesMeta,
                                     task_specific_metas: List[TaskGraphMeta],
                                     rank: int = 2):
    dropout_rate = tf.placeholder_with_default(1.0, [], 'dropout')
    learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
    seq_len = tf.placeholder(tf.int32,
                             shape=[None] * (rank - 1),
                             name='seq_len')
    optimizer = get_optimizer(props, learning_rate)

    freeze_shared_ce = tf.placeholder_with_default(False, [],
                                                   'freeze_shared_ce')

    shared_inputs = {
        'dropout_rate': dropout_rate,
        'learning_rate': learning_rate,
        "seq_len": seq_len,
        'freeze_shared_ce': freeze_shared_ce
    }

    # TODO: we can use "seq_len" key to store list of input tensor lengths across each dimension.
    # this change will affect lots of other code, so it should be done as separate task
    if rank > 2:
        shared_inputs["chain_len"] = tf.placeholder(tf.int32,
                                                    shape=[None] * (rank - 2),
                                                    name='chain_len')
    if rank > 3:
        raise Exception(f"can't build graph with input rank {rank}")

    word_embeddings, we_placeholders = collect_word_embeddings(
        shared_features_meta.we_meta, input_dims=rank)
    cont, cat, basic_placeholders = collect_basic_features(
        shared_features_meta.basic_meta, input_dims=rank)
    char_features, char_placeholders = collect_char_features(
        shared_features_meta.char_meta,
        dropout_rate,
        input_dims=rank,
        kernel_sizes=props.get("char_kernel_sizes", []),
        kernel_num_features=props.get("char_kernel_num_features", []))

    cont = concat_tensors_list([word_embeddings, cont, char_features])
    shared_features = concat_tensors_list([dropout(cont, dropout_rate), cat])

    if shared_features is None:
        raise Exception('No shared token features given.')

    shared_inputs.update({
        **we_placeholders,
        **basic_placeholders,
        **char_placeholders
    })

    logger.info("Size of shared context encoder features = {}".format(
        shared_features.shape[-1]))

    shared_features = create_filtering_layer(shared_features, **props)
    _, _, shared_context_encoding = create_context_encoder(
        shared_features,
        seq_len,
        encoding_type=props.get("encoding_type", "lstm"),
        encoding_size=props.get("encoding_size", 0),
        skip_connection=props.get("skip_connection", False),
        kernel_size=props.get("encoder_kernel_size", None),
        layers_num=props.get("encoder_layers_num", None),
        max_len=props.get("transformer_max_len", None),
        dropout=1 - dropout_rate)

    shared_context_encoding = tf.cond(
        freeze_shared_ce,
        true_fn=lambda: tf.stop_gradient(shared_context_encoding),
        false_fn=lambda: shared_context_encoding)

    logger.info("Size of shared context encoding = {}".format(
        shared_context_encoding.shape[-1].value))

    task_graphs = []
    for task_meta in task_specific_metas:
        with tf.variable_scope(task_meta.task_name):
            task_graph = task_meta.factory(shared_context_encoding,
                                           word_embeddings, shared_inputs,
                                           optimizer)

        task_graphs.append(task_graph)

    return task_graphs
Ejemplo n.º 12
0
def build_coref_graph(props: dict, entity_encoder_meta: TokenFeaturesMeta, task_meta: list):
    dropout_rate = tf.placeholder_with_default(1.0, [], 'dropout')
    learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
    gold_idx = tf.placeholder(tf.int32, shape=[None], name='gold_labels')
    seq_len = tf.placeholder(tf.int32, shape=[None], name='seq_len')
    entity_seq_len = tf.placeholder(tf.int32, shape=[None, None], name='entity_seq_len')
    optimizer = get_optimizer(props, learning_rate)

    freeze_shared_ce = tf.placeholder_with_default(False, [], 'freeze_shared_ce')  # XXX fid a way to remove this

    task_meta = task_meta[0]

    graph = {
        "inputs": {
            'labels': gold_idx,
            'dropout_rate': dropout_rate,
            'learning_rate': learning_rate,
            "seq_len": seq_len,
            "entity_seq_len": entity_seq_len,
            'freeze_shared_ce': freeze_shared_ce
        }
    }

    entity_cont, entity_cat, inputs = collect_token_features(
        entity_encoder_meta, dropout_rate, kernel_sizes=props.get("char_kernel_sizes", []),
        kernel_num_features=props.get("char_kernel_num_features", []), input_dims=3)
    entity_encoder_features = concat_tensors_list([dropout(entity_cont, dropout_rate), entity_cat])

    logger.info("Size of entity encoder features = {}".format(entity_encoder_features.shape[-1]))

    graph["inputs"].update(inputs)

    encoder_cont, encoder_cat, inputs = collect_basic_features(task_meta.metas.encoder, 2)
    graph["inputs"].update(inputs)

    entity_hidden = _create_entity_encoding(entity_encoder_features, entity_seq_len, props)

    entity_hidden = concat_tensors_list([
        dropout(concat_tensors_list([encoder_cont, entity_hidden]), dropout_rate),
        encoder_cat])

    logger.info("Size of entity encoding = {}".format(entity_hidden.shape[-1].value))

    _, _, context_encoding_state = create_birnn_layer(
        entity_hidden, seq_len, props['encoding_type'], props['encoding_size'], "context_encoding")

    hidden = tf.concat((context_encoding_state[0].h, context_encoding_state[1].h), axis=-1)
    hidden = tf.nn.dropout(hidden, dropout_rate)

    classifier_features = collect_classifier_features(task_meta.metas.classifier, graph)
    if classifier_features is not None:
        logger.info("Size of classifier features  = {}".format(classifier_features.shape[1].value))
        hidden = tf.concat([hidden, classifier_features], axis=-1)

    logger.info("Size of hidden vector = {}".format(hidden.shape[-1].value))

    classification_dense_size = task_meta.props.get('classification_dense_size', 0)
    if classification_dense_size > 0:
        hidden = tf.layers.dense(hidden, classification_dense_size, tf.nn.sigmoid)

    loss, label, scores = get_loss(hidden, graph["inputs"]["labels"], task_meta.feature_extractor.get_labels_size(),
                                   task_meta.props)
    train_op = get_train_op(loss, optimizer, task_meta.props)

    graph.update({
        'losses': [loss],
        'train_ops': [train_op],
        'outputs': {
            "predictions": label,
            "scores": scores
        }
    })

    return graph
Ejemplo n.º 13
0
def collect_basic_features(features_meta: BasicFeaturesMeta, input_dims=2):
    placeholders = {}
    embedded_features, one_hot_features = _collect_categorical_features(features_meta, placeholders, input_dims)
    embedded_features += _collect_vectorized_features(features_meta, placeholders, input_dims)
    return concat_tensors_list(embedded_features), concat_tensors_list(one_hot_features), placeholders