コード例 #1
0
ファイル: baseline.py プロジェクト: JacobLoe/QA_project
encoder_model.load_weights(path + 'encoder_model.h5')
encoder_json_file.close()

with open(path + 'decoder_model.json', 'r') as decoder_json_file:
    loaded_model_json = decoder_json_file.read()
    decoder_model = model_from_json(loaded_model_json)
decoder_model.load_weights(path + 'decoder_model.h5')
decoder_json_file.close()

# answer question
qid_to_answer_dict = {}
for slice_size in range(math.ceil(len(train_new['context']) / size)):
    print('inference on part %s of the dataset' % slice_size)
    input_data = ppd.process_data([
        train_new['context'][size * slice_size:size * (slice_size + 1)],
        train_new['question'][size * slice_size:size * (slice_size + 1)],
        train_new['answer'][size * slice_size:size * (slice_size + 1)]
    ], data_info)
    for seq_index in tqdm(
            range(
                len(train_new['context'][size * slice_size:size *
                                         (slice_size + 1)]))):
        decoded_sentence = ppd.decode_sequence(
            input_data['encoder_input']['context_encoder_input']
            [seq_index:seq_index + 1], input_data['encoder_input']
            ['question_encoder_input'][seq_index:seq_index + 1],
            data_info['answer_token_to_int'], data_info['answer_int_to_token'],
            encoder_model, decoder_model)
        qid_to_answer_dict[train_new['qid'][seq_index +
                                            (slice_size *
                                             size)]] = decoded_sentence
コード例 #2
0
ファイル: main.py プロジェクト: wer1123/titanic_analysis
import pandas as pd
import numpy as np

# 显示所有列
from model_tree import model_data
from preprocess_data import process_data
from read_data import read_data
from view_data import view_data

pd.set_option('display.max_columns', None)
# 显示所有行
# pd.set_option('display.max_rows', None)

if __name__ == '__main__':
    filepath_train_data = 'train.csv'
    filepath_test_data = 'test.csv'
    # 读取训练数据和测试数据
    df_train_data = read_data(filepath_train_data)
    df_test_data = read_data(filepath_test_data)
    # 合并训练集和测试集
    df_data = df_train_data.append(df_test_data, sort=False)
    # df_data.reset_index(inplace=True)
    # 数据预处理
    df_data = process_data(df_data)
    print("=" * 50)
    # 数据可视化探索
    df_train_data = df_data[df_data['Survived'].notnull()]
    df_test_data = df_data[df_data['Survived'].isnull()]
    view_data(df_data, df_train_data)
    # 模型训练并输出预测结果
    clf = model_data(df_data)
コード例 #3
0
@app.route('/search/results', methods=['GET', 'POST'])
def search_request():
    # print(request.form["input"])
    search_term = request.form.get("input")
    # search_term = flask.request.args.get('name')
    Q = cosine_similarity(books_data=books_data,
                          DF=DF,
                          tf_idf=tf_idf,
                          total_vocab=total_vocab,
                          total_vocab_size=total_vocab_size,
                          k=10,
                          query=search_term)
    print(Q)
    return render_template('results.html', res=Q)


# def index():
#     return render_template('index.html', variable = Q)

if __name__ == "__main__":
    load_data = False
    if not load_data:
        books_data = load_file()
        N = books_data.shape[0]
        processed_bookname, processed_text = process_data(books_data)
        DF, total_vocab_size, total_vocab = build_DF(N, processed_text,
                                                     processed_bookname)
        tf_idf, df = tf_idf(N, processed_text, processed_bookname)
    # Q = cosine_similarity(books_data = books_data,DF = DF, tf_idf = tf_idf,total_vocab = total_vocab, total_vocab_size = total_vocab_size, k = 10, query = "The evening of the day on which Mr Gibson had been to see the squire")
    app.run(debug=True)
コード例 #4
0
def train_slices(data, data_info, embedding, hidden_size, embedding_dim,
                 batch_size, epochs, slice_size, size, path):
    # check whether the path is available, if not create it
    if not os.path.isdir(path):
        os.makedirs(path)
    print('training on part %s of the dataset' % slice_size)
    # save which part of the SQuAD we currently working with, to make it possible to start from there if the training aborts
    with open(path + '/slice_size.txt', 'w') as file:
        file.write(str(slice_size))
    file.close()
    #prepare the input data, for the defined slice of the dataset
    input_data = ppd.process_data([
        data[0][size * slice_size:size * (slice_size + 1)],
        data[1][size * slice_size:size * (slice_size + 1)],
        data[2][size * slice_size:size * (slice_size + 1)]
    ], data_info)
    ######################################################################################
    #create the models based on the given parameters
    models = mrm.models(embedding, data_info, hidden_size, embedding_dim)

    # if there are models available from a previous run load them into the built models
    if os.path.isfile(path + str('train_model.h5')):
        print('load models from previous run')
        models['train_model'].load_weights(path + str('train_model.h5'))
        models['encoder_model'].load_weights(path + str('encoder_model.h5'))
        models['decoder_model'].load_weights(path + str('decoder_model.h5'))

    # train the model on the input data, with the given batch size and for the given epochs
    print('training model')
    models['train_model'].fit(
        [
            input_data['encoder_input']['context_encoder_input'],
            input_data['encoder_input']['question_encoder_input'],
            input_data['decoder_input']['answer_decoder_input']
        ],
        input_data['decoder_input']['answer_decoder_target'],
        batch_size=batch_size,
        epochs=epochs)
    #####################################################################################
    # save the weights of the models to create a checkpoint
    print('save models')
    models['train_model'].save_weights(path +
                                       str('train_model.h5'))  #save weights
    models['encoder_model'].save_weights(
        path + str('encoder_model.h5'))  #save weights
    models['decoder_model'].save_weights(
        path + str('decoder_model.h5'))  #save weights

    #save the architecture of the models
    train_model_json = models['train_model'].to_json()
    with open(path + str('train_model.json'), 'w') as train_json_file:
        train_json_file.write(train_model_json)
    train_json_file.close()

    encoder_model_json = models['encoder_model'].to_json()
    with open(path + str('encoder_model.json'), 'w') as encoder_json_file:
        encoder_json_file.write(encoder_model_json)
    encoder_json_file.close()

    decoder_model_json = models['decoder_model'].to_json()
    with open(path + str('decoder_model.json'), 'w') as decoder_json_file:
        decoder_json_file.write(decoder_model_json)
    decoder_json_file.close()
    #delete the models and input data to save memory
    del models
    del input_data