def load_data(data_path="../data/testa_seg.pkl", word2id_path='../data/word2id.obj'): with open(word2id_path, 'rb') as f: word2id = pickle.load(f) raw_data = pickle.load(open(data_path, "rb")) transformed_data = transform_data_to_id(raw_data, word2id) data = [x + [y[2]] for x, y in zip(transformed_data, raw_data)] data = sorted(data, key=lambda x: len(x[1])) print('test data size {:d}'.format(len(data))) return data
def load_data(w2id_path="data/word2id.obj",seg_path="data/test_seg.pkl"): print("data loading...") with open(w2id_path, 'rb') as f: word2id = pickle.load(f) raw_data = pickle.load(open(seg_path, "rb"))[:100] transformed_data = transform_data_to_id(raw_data, word2id) data = [x + [y[2]] for x, y in zip(transformed_data, raw_data)] data = sorted(data, key=lambda x: len(x[1])) print('test data size {:d}'.format(len(data))) return data
def get_pkl(md_list, dt_name, opts, get_argmax=True): # raw_data = seg_data(args.data) with open("data/word2id.obj", 'rb') as f: word2id = pickle.load(f) raw_data = pickle.load(open("data/" + dt_name + "_seg.pkl", "rb")) # TODO: 更改预测数据 transformed_data = transform_data_to_id(raw_data, word2id) data = [x + [y[2]] for x, y in zip(transformed_data, raw_data)] data = sorted(data, key=lambda x: len(x[1])) print('test data size {:d}'.format(len(data))) for model_name in md_list: print("{} in [{}]".format(model_name, " ".join(md_list))) model_path = "net/" + model_name + ".pt" with open(model_path, 'rb') as f: model = torch.load(f) if torch.cuda.is_available(): model.cuda() inference(model, data, model_name, dt_name, opts, get_argmax)
padded_list.append(ans) padded_list = pad_answer(padded_list) return padded_list with open(args.model, 'rb') as f: model = torch.load(f) if args.cuda: model.cuda() with open(args.word_path, 'rb') as f: word2id = pickle.load(f) # raw_data = seg_data(args.data) raw_data = pickle.load(open("data/testa_seg.pkl", "rb")) transformed_data = transform_data_to_id(raw_data, word2id) data = [x + [y[2]] for x, y in zip(transformed_data, raw_data)] data = sorted(data, key=lambda x: len(x[1])) print('test data size {:d}'.format(len(data))) def inference(): model.eval() predictions = [] id_prediction = {} with torch.no_grad(): for i in range(0, len(data), args.batch_size): print("{} in {}".format(i, len(data))) one = data[i:i + args.batch_size] query, _ = padding([x[0] for x in one], max_len=50) passage, _ = padding([x[1] for x in one], max_len=300)