def testing1(best_model_path): sentences1, sentences2, class1, train_pair = make_new_train_data() test_pair = make_test_data() #print('sentences1' , sentences1) tokenizer, embedding_matrix = word_embed_meta_data( sentences1 + sentences2, siamese_config['EMBEDDING_DIM']) embedding_meta_data = { 'tokenizer': tokenizer, 'embedding_matrix': embedding_matrix } class Configuration(object): """Dump stuff here""" CONFIG = Configuration() CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM'] CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] #print('go to siamese') siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.update_model(best_model_path, train_pair, class1, embedding_meta_data) #print(best_model_path) from operator import itemgetter from keras.models import load_model model = load_model(best_model_path) test_data_x1, test_data_x2, leaks_test = create_test_data( tokenizer, test_pair, siamese_config['MAX_SEQUENCE_LENGTH']) preds = list( model.predict([test_data_x1, test_data_x2, leaks_test], verbose=1).ravel()) results = [(x, y, z) for (x, y), z in zip(test_pair, preds)] results.sort(key=itemgetter(2), reverse=True) #print(results) #print(preds) return results, preds
def testing1(best_model_path): #making the training data text1, text2, class1, train_pair = make_new_train_data() #making test data test_pair = make_test_data() #making tokenizer and emedding matrix tokenizer, embedding_matrix = word_embed_meta_data( text1 + text2, siamese_config['EMBEDDING_DIM']) embedding_meta_data = { 'tokenizer': tokenizer, 'embedding_matrix': embedding_matrix } class Configuration(object): """Dump stuff here""" CONFIG = Configuration() # setting configuration for the model CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM'] CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] # making siamese network siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) #updating the pretrained model and saving it into the model. best_model_path = siamese.update_model(best_model_path, train_pair, class1, embedding_meta_data) # loading the best updated model model = load_model(best_model_path) # creatng text data as per requirement test_data_x1, test_data_x2, leaks_test = create_test_data( tokenizer, test_pair, siamese_config['MAX_SEQUENCE_LENGTH']) # storing results of test data in the preds varibale preds = list( model.predict([test_data_x1, test_data_x2, leaks_test], verbose=1).ravel()) ''' storing the results in the following form: known1, unknown, result2 known2, unknown, result2 known3, unknown , result3 ''' results = [(x, y, z) for (x, y), z in zip(test_pair, preds)] results.sort(key=itemgetter(2), reverse=True) return results, preds