def Update_Train(languege,question,answer): if (languege == 0): question_pre = load_data(pre_path_files + 'Persian/question_pre') answering_pre = load_data(pre_path_files + 'Persian/answering_pre') is_similar = load_data(pre_path_files + 'Persian/is_similar') tokenizer = load_data(pre_path_files + 'Persian/tokenizer') embedding_matrix = load_data(pre_path_files + 'Persian/embedding_matrix') embedding_meta_data = { 'tokenizer': tokenizer, 'embedding_matrix': embedding_matrix } sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)] siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory=pre_path_files + 'Persian/') elif (languege == 1): question_pre = load_data(pre_path_files + 'English/question_pre') answering_pre = load_data(pre_path_files + 'English/answering_pre') is_similar = load_data(pre_path_files + 'English/is_similar') tokenizer = load_data(pre_path_files + 'English/tokenizer') embedding_matrix = load_data(pre_path_files + 'English/embedding_matrix') embedding_meta_data = { 'tokenizer': tokenizer, 'embedding_matrix': embedding_matrix } sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)] siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory=pre_path_files + 'English/')
def Train(languege='Persian'): question_pre = load_data(pre_path_files+languege+'/question_pre') answering_pre = load_data(pre_path_files+languege+'/answering_pre') is_similar = load_data(pre_path_files+languege+'/is_similar') tokenizer = load_data(pre_path_files+languege+'/Word2Vec/tokenizer') embedding_matrix = load_data(pre_path_files+languege+'/Word2Vec/embedding_matrix') embedding_meta_data = { 'tokenizer': tokenizer, 'embedding_matrix': embedding_matrix } sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)] siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory=pre_path_files+languege+'/'+ 'Model/')
def train_model(train_x, train_y, embedding_meta_data, filename): class Configuration(object): """Dump stuff here""" CONFIG = Configuration() CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM'] CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model( train_x, train_y, embedding_meta_data, filename, model_save_directory='../data/model/siamese-lstm/') return best_model_path
def train_model(sentences1, sentences2 , is_similar): sentences_pair = [(x1, x2) for x1, x2 in zip(sentences1, sentences2)] class Configuration(object): """""" CONFIG = Configuration() CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']#100d CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] #20 CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] #100 CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] #0.2 CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] #100 CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] #relu CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] #0.2 CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] #0.15 siamese = SiameseBiLSTM(CONFIG.embedding_dim , CONFIG.max_sequence_length, CONFIG.number_lstm_units , CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory='./') return best_model_path
"""Dump stuff here""" CONFIG = Configuration() CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM'] CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] siamese = SiameseBiLSTM(CONFIG.embedding_dim , CONFIG.max_sequence_length, CONFIG.number_lstm_units , CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory='./') ######################## ###### Testing ######### ######################## def test(test_pair): model = load_model(best_model_path) test_sentence_pairs = test_pair bias1=random.uniform(0.28123,0.3156) bias2=random.uniform(0.07682,0.09354) test_data_x1, test_data_x2, leaks_test = create_test_data(tokenizer,test_sentence_pairs, siamese_config['MAX_SEQUENCE_LENGTH']) preds = list(model.predict([test_data_x1, test_data_x2, leaks_test], verbose=1).ravel()) results = [(x, y, z) for (x, y), z in zip(test_sentence_pairs, preds)]
CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model(sentences_pair, is_similar, model_save_directory='./') ####################### ##### Testing ######### ####################### # # model = load_model( # r'/Users/caowenli/Desktop/ml_pj/dl/time_series_similarity/checkpoints/1593418708/lstm_128_64_0.17_0.30.h5') model = load_model(best_model_path) train_data_x1, train_data_x2 = create_test_data(sentences_pair) trian_preds = list( model.predict([train_data_x1, train_data_x1], verbose=1).ravel()) res = [] for i in trian_preds: if i < 0.5:
CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] print('go to siamese') siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units, CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio) best_model_path = siamese.train_model( sentences_pair, is_similar, embedding_meta_data, model_save_directory='/home/mtp-2/Desktop/siamese paper/implementation') from operator import itemgetter from keras.models import load_model model = load_model(best_model_path) test_sentence_pairs = [ ('What can make Physics easy to learn? I am going to learn physis. I love it.', 'How can you make physics easy to learn? Physics is my love and i will learn it.' ), ('How many times a day do a clocks hands overlap? This clock is very lovely.', 'What does it mean that every time I look at the clock the numbers are the same? Clock looks beautiful.' )