encoder_model.load_weights(path + 'encoder_model.h5') encoder_json_file.close() with open(path + 'decoder_model.json', 'r') as decoder_json_file: loaded_model_json = decoder_json_file.read() decoder_model = model_from_json(loaded_model_json) decoder_model.load_weights(path + 'decoder_model.h5') decoder_json_file.close() # answer question qid_to_answer_dict = {} for slice_size in range(math.ceil(len(train_new['context']) / size)): print('inference on part %s of the dataset' % slice_size) input_data = ppd.process_data([ train_new['context'][size * slice_size:size * (slice_size + 1)], train_new['question'][size * slice_size:size * (slice_size + 1)], train_new['answer'][size * slice_size:size * (slice_size + 1)] ], data_info) for seq_index in tqdm( range( len(train_new['context'][size * slice_size:size * (slice_size + 1)]))): decoded_sentence = ppd.decode_sequence( input_data['encoder_input']['context_encoder_input'] [seq_index:seq_index + 1], input_data['encoder_input'] ['question_encoder_input'][seq_index:seq_index + 1], data_info['answer_token_to_int'], data_info['answer_int_to_token'], encoder_model, decoder_model) qid_to_answer_dict[train_new['qid'][seq_index + (slice_size * size)]] = decoded_sentence
import pandas as pd import numpy as np # 显示所有列 from model_tree import model_data from preprocess_data import process_data from read_data import read_data from view_data import view_data pd.set_option('display.max_columns', None) # 显示所有行 # pd.set_option('display.max_rows', None) if __name__ == '__main__': filepath_train_data = 'train.csv' filepath_test_data = 'test.csv' # 读取训练数据和测试数据 df_train_data = read_data(filepath_train_data) df_test_data = read_data(filepath_test_data) # 合并训练集和测试集 df_data = df_train_data.append(df_test_data, sort=False) # df_data.reset_index(inplace=True) # 数据预处理 df_data = process_data(df_data) print("=" * 50) # 数据可视化探索 df_train_data = df_data[df_data['Survived'].notnull()] df_test_data = df_data[df_data['Survived'].isnull()] view_data(df_data, df_train_data) # 模型训练并输出预测结果 clf = model_data(df_data)
@app.route('/search/results', methods=['GET', 'POST']) def search_request(): # print(request.form["input"]) search_term = request.form.get("input") # search_term = flask.request.args.get('name') Q = cosine_similarity(books_data=books_data, DF=DF, tf_idf=tf_idf, total_vocab=total_vocab, total_vocab_size=total_vocab_size, k=10, query=search_term) print(Q) return render_template('results.html', res=Q) # def index(): # return render_template('index.html', variable = Q) if __name__ == "__main__": load_data = False if not load_data: books_data = load_file() N = books_data.shape[0] processed_bookname, processed_text = process_data(books_data) DF, total_vocab_size, total_vocab = build_DF(N, processed_text, processed_bookname) tf_idf, df = tf_idf(N, processed_text, processed_bookname) # Q = cosine_similarity(books_data = books_data,DF = DF, tf_idf = tf_idf,total_vocab = total_vocab, total_vocab_size = total_vocab_size, k = 10, query = "The evening of the day on which Mr Gibson had been to see the squire") app.run(debug=True)
def train_slices(data, data_info, embedding, hidden_size, embedding_dim, batch_size, epochs, slice_size, size, path): # check whether the path is available, if not create it if not os.path.isdir(path): os.makedirs(path) print('training on part %s of the dataset' % slice_size) # save which part of the SQuAD we currently working with, to make it possible to start from there if the training aborts with open(path + '/slice_size.txt', 'w') as file: file.write(str(slice_size)) file.close() #prepare the input data, for the defined slice of the dataset input_data = ppd.process_data([ data[0][size * slice_size:size * (slice_size + 1)], data[1][size * slice_size:size * (slice_size + 1)], data[2][size * slice_size:size * (slice_size + 1)] ], data_info) ###################################################################################### #create the models based on the given parameters models = mrm.models(embedding, data_info, hidden_size, embedding_dim) # if there are models available from a previous run load them into the built models if os.path.isfile(path + str('train_model.h5')): print('load models from previous run') models['train_model'].load_weights(path + str('train_model.h5')) models['encoder_model'].load_weights(path + str('encoder_model.h5')) models['decoder_model'].load_weights(path + str('decoder_model.h5')) # train the model on the input data, with the given batch size and for the given epochs print('training model') models['train_model'].fit( [ input_data['encoder_input']['context_encoder_input'], input_data['encoder_input']['question_encoder_input'], input_data['decoder_input']['answer_decoder_input'] ], input_data['decoder_input']['answer_decoder_target'], batch_size=batch_size, epochs=epochs) ##################################################################################### # save the weights of the models to create a checkpoint print('save models') models['train_model'].save_weights(path + str('train_model.h5')) #save weights models['encoder_model'].save_weights( path + str('encoder_model.h5')) #save weights models['decoder_model'].save_weights( path + str('decoder_model.h5')) #save weights #save the architecture of the models train_model_json = models['train_model'].to_json() with open(path + str('train_model.json'), 'w') as train_json_file: train_json_file.write(train_model_json) train_json_file.close() encoder_model_json = models['encoder_model'].to_json() with open(path + str('encoder_model.json'), 'w') as encoder_json_file: encoder_json_file.write(encoder_model_json) encoder_json_file.close() decoder_model_json = models['decoder_model'].to_json() with open(path + str('decoder_model.json'), 'w') as decoder_json_file: decoder_json_file.write(decoder_model_json) decoder_json_file.close() #delete the models and input data to save memory del models del input_data