Ejemplo n.º 1
0
def build_model_generator(conf, vocabulary, pretrained):
    inp_mentions = Input(shape=(conf.getint('embedding', 'length'), ),
                         dtype='int32',
                         name='inp_mentions')
    inp_candidates = Input(shape=(conf.getint('embedding', 'length'), ),
                           dtype='int32',
                           name='inp_candidates')

    embedding_layer = Embedding(len(vocabulary),
                                pretrained.shape[1],
                                mask_zero=False,
                                trainable=False,
                                weights=[pretrained],
                                name='embedding_layer')
    drop = layers.Dropout(conf.getfloat('cnn', 'dropout'), name='drop')
    encoded_mentions = drop(embedding_layer(inp_mentions))
    encoded_candidates = drop(embedding_layer(inp_candidates))

    SharedConv = Conv1D(filters=conf.getint('cnn', 'filters'),
                        kernel_size=conf.getint('cnn', 'kernel_size'),
                        activation='relu')
    conv_mentions = SharedConv(encoded_mentions)
    conv_candidates = SharedConv(encoded_candidates)

    pooled_mentions = GlobalMaxPooling1D()(conv_mentions)
    pooled_candidates = GlobalMaxPooling1D()(conv_candidates)

    entity_model = Model(inputs=inp_mentions, outputs=pooled_mentions)
    concept_model = Model(inputs=inp_candidates, outputs=pooled_candidates)

    #cos_sim = layers.dot([pooled_mentions, pooled_candidates], axes=-1, normalize=True, name='cos_sim')
    v_sem = semantic_similarity_layer(name='v_sem')(
        [pooled_mentions, pooled_candidates])

    # list of layers for concatenation
    concatenate_list = [pooled_mentions, pooled_candidates, v_sem]

    join_layer = Concatenate()(concatenate_list)
    hidden_layer = Dense(64, activation='relu',
                         name='hidden_layer')(join_layer)
    prediction_layer = Dense(1, activation='sigmoid',
                             name='prediction_layer')(hidden_layer)

    # list of input layers
    input_list = [inp_mentions, inp_candidates]

    model = Model(inputs=input_list, outputs=prediction_layer)
    model.compile(optimizer=cnn.return_optimizer(conf),
                  loss=cnn.return_loss(conf))

    return model, entity_model, concept_model
def predict(config,
            concept,
            positives,
            vocab,
            entity_model,
            concept_model,
            original_model,
            val_data,
            result=None):
    entity_examples = examples(config, concept, positives, vocab, neg_count=0)

    #c_token_indices = [[vocab.get(t.lower(), 1) for t in nltk.word_tokenize(neg)] for neg in concept.names]
    concept_examples = pad_sequences(concept.vectorize,
                                     maxlen=config.getint(
                                         'embedding', 'length'))

    entity_encodings = entity_model.predict_generator(entity_examples,
                                                      steps=len(positives))
    concept_encodings = concept_model.predict(concept_examples)

    ###################
    from sample import sped_up_format_x
    convoluted_input = sped_up_format_x(entity_encodings, concept_encodings)

    layerss = ['v_sem', 'hidden_layer', 'prediction_layer']
    v_sem = original_model.get_layer(layerss[0])
    d1 = original_model.get_layer(layerss[1])
    d2 = original_model.get_layer(layerss[2])

    entity_encodings = Input(shape=(convoluted_input[0].shape[1], ),
                             dtype='float32',
                             name='entity_encodings')
    concept_encodings = Input(shape=(convoluted_input[1].shape[1], ),
                              dtype='float32',
                              name='concept_encodings')
    sem = cnn.semantic_similarity_layer(weights=v_sem.get_weights())(
        [entity_encodings, concept_encodings])
    concatenate_list = [entity_encodings, concept_encodings, sem]
    join_layer = Concatenate()(concatenate_list)
    hidden_layer = Dense(d1.units,
                         activation=d1.activation,
                         weights=d1.get_weights())(join_layer)
    prediction_layer = Dense(d2.units,
                             activation=d2.activation,
                             weights=d2.get_weights())(hidden_layer)

    model = Model(inputs=[entity_encodings, concept_encodings],
                  outputs=prediction_layer)
    test_y = model.predict(convoluted_input)
    if not result:
        import callback
        evaluation_parameter = callback.evaluate(val_data.mentions, test_y,
                                                 val_data.y)
    else:
        evaluation_parameter = evaluate_w_results(val_data.mentions, test_y,
                                                  val_data.y, concept, result)
    ###################
    # sims = cosine_similarity(entity_encodings, concept_encodings)

    # best_hits = np.argmax(sims, axis=-1)
    # predictions = [concept.ids[i] for i in best_hits]

    # return predictions
    return evaluation_parameter