def predict(config,
            concept,
            positives,
            vocab,
            entity_model,
            concept_model,
            original_model,
            val_data,
            result=None):
    entity_examples = examples_evaluation(config, concept, positives, vocab)

    c_token_indices = [[
        vocab.get(t.lower(), 1) for t in nltk.word_tokenize(neg)
    ] for neg in concept.names]
    concept_examples = pad_sequences(c_token_indices,
                                     padding='post',
                                     maxlen=config.getint(
                                         'embedding', 'length'))

    entity_encodings = entity_model.predict_generator(entity_examples,
                                                      steps=len(positives))
    concept_encodings = concept_model.predict(concept_examples)

    ###################
    from sample import sped_up_format_x
    convoluted_input = sped_up_format_x(entity_encodings, concept_encodings)

    entity_encodings = Input(shape=(convoluted_input[0].shape[1], ),
                             dtype='float32',
                             name='entity_encodings')
    concept_encodings = Input(shape=(convoluted_input[1].shape[1], ),
                              dtype='float32',
                              name='concept_encodings')

    if sysargv5 == 'sem_matrix':
        model = _predict_shared_encoder(original_model, entity_encodings,
                                        concept_encodings)
    elif sysargv5 == 'cosine_sim':
        model = _predict_shared_encoder_dot(original_model, entity_encodings,
                                            concept_encodings)

    test_y = model.predict(convoluted_input)

    if not result:
        evaluation_parameter = callback.evaluate(val_data.mentions, test_y,
                                                 val_data.y)
    else:
        evaluation_parameter = evaluate_w_results(val_data.mentions, test_y,
                                                  val_data.y, concept, result)

    return evaluation_parameter
Ejemplo n.º 2
0
def forward_pass_speedup_shared_encoder_dot_xDense(model,corpus_padded,concept_padded,pretrained):
    '''
    Model to speed up forward pass, used in callback for evaluation
    '''
    model_mention = _forward_pass_speedup_conv(model,['inp_mentions','embedding_layer','drop','conv1d','global_max_pooling1d_1'])
    mentions = model_mention.predict(corpus_padded) # (787, 50)
    model_candidate = _forward_pass_speedup_conv(model,['inp_candidates','embedding_layer','drop','conv1d','global_max_pooling1d_2'])
    candidates = model_candidate.predict(concept_padded) # (67782,50)
    logger.info('Formatting pooled mentions and candidates...')
    # from sample import no_cangen_format_x
    from sample import sped_up_format_x
    convoluted_input = sped_up_format_x(mentions,candidates)
    model_sem = _forward_pass_speedup_dot_xDense(model,convoluted_input)
    return convoluted_input, model_sem
Ejemplo n.º 3
0
def predict(config, concept, positives, vocab, entity_model, concept_model, original_model,val_data,result=None):
    entity_examples = examples_evaluation(config, concept, positives, vocab)
    #entity_examples_1 = examples_(config, concept, positives, vocab, neg_count = 0)

    #import pdb;pdb.set_trace()
    c_token_indices = [[vocab.get(t.lower(), 1) for t in nltk.word_tokenize(neg)] for neg in concept.names]
    concept_examples = pad_sequences(c_token_indices, padding='post', maxlen=config.getint('embedding','length'))
    
    entity_encodings = entity_model.predict_generator(entity_examples, steps=len(positives))    
    concept_encodings = concept_model.predict(concept_examples)

    ###################
    from sample import sped_up_format_x
    convoluted_input = sped_up_format_x(entity_encodings,concept_encodings)
    
    layerss = ['v_sem','hidden_layer','prediction_layer']
    v_sem = original_model.get_layer(layerss[0])
    d2 = original_model.get_layer(layerss[2])
    if sysargv2 == 'full':
        d1 = original_model.get_layer(layerss[1])

    entity_encodings = Input(shape=(convoluted_input[0].shape[1],),dtype='float32', name='entity_encodings')
    concept_encodings = Input(shape=(convoluted_input[1].shape[1],),dtype='float32', name='concept_encodings')
    sem = cnn.semantic_similarity_layer(weights = v_sem.get_weights())([entity_encodings,concept_encodings])
    
    if sysargv2 == 'full':  
        concatenate_list = [entity_encodings,concept_encodings,sem]
        join_layer = Concatenate()(concatenate_list)
        hidden_layer = Dense(d1.units, activation=d1.activation,weights=d1.get_weights())(join_layer)
        prediction_layer = Dense(d2.units, activation=d2.activation,weights=d2.get_weights())(hidden_layer)
    else:
        prediction_layer = Dense(d2.units, activation=d2.activation,weights=d2.get_weights())(sem)

    model = Model(inputs=[entity_encodings,concept_encodings], outputs=prediction_layer)
    test_y = model.predict(convoluted_input)

    if not result:
        evaluation_parameter = callback.evaluate(val_data.mentions, test_y, val_data.y)
    else:
        evaluation_parameter = evaluate_w_results(val_data.mentions, test_y, val_data.y, concept, result)

    return evaluation_parameter
Ejemplo n.º 4
0
def predict(config,
            concept,
            positives,
            vocab,
            entity_model,
            concept_model,
            original_model,
            val_data,
            result=None):
    entity_examples = examples(config, concept, positives, vocab, neg_count=0)

    c_token_indices = [[
        vocab.get(t.lower(), 1) for t in nltk.word_tokenize(neg)
    ] for neg in concept.names]
    concept_examples = pad_sequences(c_token_indices,
                                     maxlen=config.getint(
                                         'embedding', 'length'))

    entity_encodings = entity_model.predict_generator(entity_examples,
                                                      steps=len(positives))
    concept_encodings = concept_model.predict(concept_examples)

    ###################
    from sample import sped_up_format_x
    convoluted_input = sped_up_format_x(entity_encodings, concept_encodings)

    layerss = ['v_sem', 'hidden_layer', 'prediction_layer']
    v_sem = original_model.get_layer(layerss[0])
    d1 = original_model.get_layer(layerss[1])
    d2 = original_model.get_layer(layerss[2])

    entity_encodings = Input(shape=(convoluted_input[0].shape[1], ),
                             dtype='float32',
                             name='entity_encodings')
    concept_encodings = Input(shape=(convoluted_input[1].shape[1], ),
                              dtype='float32',
                              name='concept_encodings')
    sem = cnn.semantic_similarity_layer(weights=v_sem.get_weights())(
        [entity_encodings, concept_encodings])
    #cos_sim = layers.dot([entity_encodings, concept_encodings], axes=-1, normalize=True, name='cos_sim')
    concatenate_list = [entity_encodings, concept_encodings, sem]
    join_layer = Concatenate()(concatenate_list)
    hidden_layer = Dense(d1.units,
                         activation=d1.activation,
                         weights=d1.get_weights())(join_layer)
    prediction_layer = Dense(d2.units,
                             activation=d2.activation,
                             weights=d2.get_weights())(hidden_layer)

    model = Model(inputs=[entity_encodings, concept_encodings],
                  outputs=prediction_layer)
    test_y = model.predict(convoluted_input)
    if not result:
        evaluation_parameter = callback.evaluate(val_data.mentions, test_y,
                                                 val_data.y)
    else:
        evaluation_parameter = evaluate_w_results(val_data.mentions, test_y,
                                                  val_data.y, concept, result)
    ###################
    # sims = cosine_similarity(entity_encodings, concept_encodings)

    # best_hits = np.argmax(sims, axis=-1)
    # predictions = [concept.ids[i] for i in best_hits]

    # return predictions
    return evaluation_parameter