def build_model_generator(conf, vocabulary, pretrained): inp_mentions = Input(shape=(conf.getint('embedding', 'length'), ), dtype='int32', name='inp_mentions') inp_candidates = Input(shape=(conf.getint('embedding', 'length'), ), dtype='int32', name='inp_candidates') embedding_layer = Embedding(len(vocabulary), pretrained.shape[1], mask_zero=False, trainable=False, weights=[pretrained], name='embedding_layer') drop = layers.Dropout(conf.getfloat('cnn', 'dropout'), name='drop') encoded_mentions = drop(embedding_layer(inp_mentions)) encoded_candidates = drop(embedding_layer(inp_candidates)) SharedConv = Conv1D(filters=conf.getint('cnn', 'filters'), kernel_size=conf.getint('cnn', 'kernel_size'), activation='relu') conv_mentions = SharedConv(encoded_mentions) conv_candidates = SharedConv(encoded_candidates) pooled_mentions = GlobalMaxPooling1D()(conv_mentions) pooled_candidates = GlobalMaxPooling1D()(conv_candidates) entity_model = Model(inputs=inp_mentions, outputs=pooled_mentions) concept_model = Model(inputs=inp_candidates, outputs=pooled_candidates) #cos_sim = layers.dot([pooled_mentions, pooled_candidates], axes=-1, normalize=True, name='cos_sim') v_sem = semantic_similarity_layer(name='v_sem')( [pooled_mentions, pooled_candidates]) # list of layers for concatenation concatenate_list = [pooled_mentions, pooled_candidates, v_sem] join_layer = Concatenate()(concatenate_list) hidden_layer = Dense(64, activation='relu', name='hidden_layer')(join_layer) prediction_layer = Dense(1, activation='sigmoid', name='prediction_layer')(hidden_layer) # list of input layers input_list = [inp_mentions, inp_candidates] model = Model(inputs=input_list, outputs=prediction_layer) model.compile(optimizer=cnn.return_optimizer(conf), loss=cnn.return_loss(conf)) return model, entity_model, concept_model
def predict(config, concept, positives, vocab, entity_model, concept_model, original_model, val_data, result=None): entity_examples = examples(config, concept, positives, vocab, neg_count=0) #c_token_indices = [[vocab.get(t.lower(), 1) for t in nltk.word_tokenize(neg)] for neg in concept.names] concept_examples = pad_sequences(concept.vectorize, maxlen=config.getint( 'embedding', 'length')) entity_encodings = entity_model.predict_generator(entity_examples, steps=len(positives)) concept_encodings = concept_model.predict(concept_examples) ################### from sample import sped_up_format_x convoluted_input = sped_up_format_x(entity_encodings, concept_encodings) layerss = ['v_sem', 'hidden_layer', 'prediction_layer'] v_sem = original_model.get_layer(layerss[0]) d1 = original_model.get_layer(layerss[1]) d2 = original_model.get_layer(layerss[2]) entity_encodings = Input(shape=(convoluted_input[0].shape[1], ), dtype='float32', name='entity_encodings') concept_encodings = Input(shape=(convoluted_input[1].shape[1], ), dtype='float32', name='concept_encodings') sem = cnn.semantic_similarity_layer(weights=v_sem.get_weights())( [entity_encodings, concept_encodings]) concatenate_list = [entity_encodings, concept_encodings, sem] join_layer = Concatenate()(concatenate_list) hidden_layer = Dense(d1.units, activation=d1.activation, weights=d1.get_weights())(join_layer) prediction_layer = Dense(d2.units, activation=d2.activation, weights=d2.get_weights())(hidden_layer) model = Model(inputs=[entity_encodings, concept_encodings], outputs=prediction_layer) test_y = model.predict(convoluted_input) if not result: import callback evaluation_parameter = callback.evaluate(val_data.mentions, test_y, val_data.y) else: evaluation_parameter = evaluate_w_results(val_data.mentions, test_y, val_data.y, concept, result) ################### # sims = cosine_similarity(entity_encodings, concept_encodings) # best_hits = np.argmax(sims, axis=-1) # predictions = [concept.ids[i] for i in best_hits] # return predictions return evaluation_parameter