def testing1(best_model_path):
    sentences1, sentences2, class1, train_pair = make_new_train_data()

    test_pair = make_test_data()

    #print('sentences1' , sentences1)
    tokenizer, embedding_matrix = word_embed_meta_data(
        sentences1 + sentences2, siamese_config['EMBEDDING_DIM'])

    embedding_meta_data = {
        'tokenizer': tokenizer,
        'embedding_matrix': embedding_matrix
    }

    class Configuration(object):
        """Dump stuff here"""

    CONFIG = Configuration()

    CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']
    CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH']
    CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
    CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
    CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
    CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
    CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
    CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']
    #print('go to siamese')
    siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length,
                            CONFIG.number_lstm_units,
                            CONFIG.number_dense_units, CONFIG.rate_drop_lstm,
                            CONFIG.rate_drop_dense, CONFIG.activation_function,
                            CONFIG.validation_split_ratio)

    best_model_path = siamese.update_model(best_model_path, train_pair, class1,
                                           embedding_meta_data)

    #print(best_model_path)
    from operator import itemgetter
    from keras.models import load_model

    model = load_model(best_model_path)

    test_data_x1, test_data_x2, leaks_test = create_test_data(
        tokenizer, test_pair, siamese_config['MAX_SEQUENCE_LENGTH'])

    preds = list(
        model.predict([test_data_x1, test_data_x2, leaks_test],
                      verbose=1).ravel())
    results = [(x, y, z) for (x, y), z in zip(test_pair, preds)]
    results.sort(key=itemgetter(2), reverse=True)
    #print(results)

    #print(preds)
    return results, preds
예제 #2
0
def testing1(best_model_path):
    #making the training data
    text1, text2, class1, train_pair = make_new_train_data()
    #making test data
    test_pair = make_test_data()
    #making tokenizer and emedding matrix
    tokenizer, embedding_matrix = word_embed_meta_data(
        text1 + text2, siamese_config['EMBEDDING_DIM'])

    embedding_meta_data = {
        'tokenizer': tokenizer,
        'embedding_matrix': embedding_matrix
    }

    class Configuration(object):
        """Dump stuff here"""

    CONFIG = Configuration()
    # setting configuration for the model
    CONFIG.embedding_dim = siamese_config['EMBEDDING_DIM']
    CONFIG.max_sequence_length = siamese_config['MAX_SEQUENCE_LENGTH']
    CONFIG.number_lstm_units = siamese_config['NUMBER_LSTM']
    CONFIG.rate_drop_lstm = siamese_config['RATE_DROP_LSTM']
    CONFIG.number_dense_units = siamese_config['NUMBER_DENSE_UNITS']
    CONFIG.activation_function = siamese_config['ACTIVATION_FUNCTION']
    CONFIG.rate_drop_dense = siamese_config['RATE_DROP_DENSE']
    CONFIG.validation_split_ratio = siamese_config['VALIDATION_SPLIT']
    # making siamese network
    siamese = SiameseBiLSTM(CONFIG.embedding_dim, CONFIG.max_sequence_length,
                            CONFIG.number_lstm_units,
                            CONFIG.number_dense_units, CONFIG.rate_drop_lstm,
                            CONFIG.rate_drop_dense, CONFIG.activation_function,
                            CONFIG.validation_split_ratio)
    #updating the pretrained model and saving it into the model.
    best_model_path = siamese.update_model(best_model_path, train_pair, class1,
                                           embedding_meta_data)
    # loading the best updated model
    model = load_model(best_model_path)
    # creatng text data as per requirement
    test_data_x1, test_data_x2, leaks_test = create_test_data(
        tokenizer, test_pair, siamese_config['MAX_SEQUENCE_LENGTH'])
    # storing results of test data in the preds varibale
    preds = list(
        model.predict([test_data_x1, test_data_x2, leaks_test],
                      verbose=1).ravel())
    '''
	storing the results in the following form:
	known1, unknown, result2
	known2, unknown, result2
	known3, unknown , result3
	'''
    results = [(x, y, z) for (x, y), z in zip(test_pair, preds)]
    results.sort(key=itemgetter(2), reverse=True)
    return results, preds