예제 #1
0
def Update_Train(languege,question,answer):
    if (languege == 0):
        question_pre = load_data(pre_path_files + 'Persian/question_pre')
        answering_pre = load_data(pre_path_files + 'Persian/answering_pre')
        is_similar = load_data(pre_path_files + 'Persian/is_similar')
        tokenizer = load_data(pre_path_files + 'Persian/tokenizer')
        embedding_matrix = load_data(pre_path_files + 'Persian/embedding_matrix')
        embedding_meta_data = {
            'tokenizer': tokenizer,
            'embedding_matrix': embedding_matrix
        }
        sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)]
        siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units,
                                CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense,
                                CONFIG.activation_function, CONFIG.validation_split_ratio)
        best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data,
                                              model_save_directory=pre_path_files + 'Persian/')
    elif (languege == 1):
        question_pre = load_data(pre_path_files + 'English/question_pre')
        answering_pre = load_data(pre_path_files + 'English/answering_pre')
        is_similar = load_data(pre_path_files + 'English/is_similar')
        tokenizer = load_data(pre_path_files + 'English/tokenizer')
        embedding_matrix = load_data(pre_path_files + 'English/embedding_matrix')
        embedding_meta_data = {
            'tokenizer': tokenizer,
            'embedding_matrix': embedding_matrix
        }
        sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)]
        siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units,
                                CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense,
                                CONFIG.activation_function, CONFIG.validation_split_ratio)
        best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data,
                                              model_save_directory=pre_path_files + 'English/')
예제 #2
0
def Train(languege='Persian'):
        question_pre = load_data(pre_path_files+languege+'/question_pre')
        answering_pre = load_data(pre_path_files+languege+'/answering_pre')
        is_similar = load_data(pre_path_files+languege+'/is_similar')
        tokenizer = load_data(pre_path_files+languege+'/Word2Vec/tokenizer')
        embedding_matrix = load_data(pre_path_files+languege+'/Word2Vec/embedding_matrix')
        embedding_meta_data = {
            'tokenizer': tokenizer,
            'embedding_matrix': embedding_matrix
        }
        sentences_pair = [(x1, x2) for x1, x2 in zip(question_pre, answering_pre)]
        siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length, CONFIG.number_lstm_units,
                                CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense,
                                CONFIG.activation_function, CONFIG.validation_split_ratio)
        siamese.train_model(sentences_pair, is_similar, embedding_meta_data,
                                              model_save_directory=pre_path_files+languege+'/'+ 'Model/')
def train_model(train_x, train_y, embedding_meta_data, filename):
    class Configuration(object):
        """Dump stuff here"""

    CONFIG = Configuration()

    CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']
    CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH']
    CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
    CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
    CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
    CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
    CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
    CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']

    siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length,
                            CONFIG.number_lstm_units,
                            CONFIG.number_dense_units, CONFIG.rate_drop_lstm,
                            CONFIG.rate_drop_dense, CONFIG.activation_function,
                            CONFIG.validation_split_ratio)

    best_model_path = siamese.train_model(
        train_x,
        train_y,
        embedding_meta_data,
        filename,
        model_save_directory='../data/model/siamese-lstm/')
    return best_model_path
def train_model(sentences1, sentences2 , is_similar):

	sentences_pair = [(x1, x2) for x1, x2 in zip(sentences1, sentences2)]
	class Configuration(object):
	    """"""

	CONFIG = Configuration()

	CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']#100d
	CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH'] #20
	CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM'] #100
	CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM'] #0.2
	CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS'] #100
	CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION'] #relu
	CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE'] #0.2
	CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT'] #0.15

	siamese = SiameseBiLSTM(CONFIG.embedding_dim , CONFIG.max_sequence_length, CONFIG.number_lstm_units , CONFIG.number_dense_units, CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio)
	best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory='./')
	return best_model_path
예제 #5
0
    """Dump stuff here"""

CONFIG = Configuration()
CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']
CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH']
CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']

siamese = SiameseBiLSTM(CONFIG.embedding_dim , CONFIG.max_sequence_length, CONFIG.number_lstm_units , CONFIG.number_dense_units, 
					    CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense, CONFIG.activation_function, CONFIG.validation_split_ratio)

best_model_path = siamese.train_model(sentences_pair, is_similar, embedding_meta_data, model_save_directory='./')


########################
###### Testing #########
########################
def test(test_pair):
	model = load_model(best_model_path)

	test_sentence_pairs = test_pair
	bias1=random.uniform(0.28123,0.3156)
	bias2=random.uniform(0.07682,0.09354)
	test_data_x1, test_data_x2, leaks_test = create_test_data(tokenizer,test_sentence_pairs,  siamese_config['MAX_SEQUENCE_LENGTH'])
	
	preds = list(model.predict([test_data_x1, test_data_x2, leaks_test], verbose=1).ravel())
	results = [(x, y, z) for (x, y), z in zip(test_sentence_pairs, preds)]
예제 #6
0
CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH']
CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']

siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length,
                        CONFIG.number_lstm_units, CONFIG.number_dense_units,
                        CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense,
                        CONFIG.activation_function,
                        CONFIG.validation_split_ratio)

best_model_path = siamese.train_model(sentences_pair,
                                      is_similar,
                                      model_save_directory='./')

#######################
##### Testing #########
#######################
#
# model = load_model(
#     r'/Users/caowenli/Desktop/ml_pj/dl/time_series_similarity/checkpoints/1593418708/lstm_128_64_0.17_0.30.h5')
model = load_model(best_model_path)
train_data_x1, train_data_x2 = create_test_data(sentences_pair)
trian_preds = list(
    model.predict([train_data_x1, train_data_x1], verbose=1).ravel())
res = []
for i in trian_preds:
    if i < 0.5:
예제 #7
0
CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']
print('go to siamese')
siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length,
                        CONFIG.number_lstm_units, CONFIG.number_dense_units,
                        CONFIG.rate_drop_lstm, CONFIG.rate_drop_dense,
                        CONFIG.activation_function,
                        CONFIG.validation_split_ratio)

best_model_path = siamese.train_model(
    sentences_pair,
    is_similar,
    embedding_meta_data,
    model_save_directory='/home/mtp-2/Desktop/siamese paper/implementation')

from operator import itemgetter
from keras.models import load_model

model = load_model(best_model_path)

test_sentence_pairs = [
    ('What can make Physics easy to learn? I am going to learn physis. I love it.',
     'How can you make physics easy to learn? Physics is my love and i will learn it.'
     ),
    ('How many times a day do a clocks hands overlap? This clock is very lovely.',
     'What does it mean that every time I look at the clock the numbers are the same? Clock looks beautiful.'
     )