def write_represents_to_pkl(path, output_path, name='train'): data = Data(ori_texts=[], labels=[], if_train=False) with open(path, 'rb') as rbf: data.char_alphabet.instance2index = pickle.load(rbf) data.word_alphabet.instance2index = pickle.load(rbf) data.label_alphabet.instance2index = pickle.load(rbf) data.char_alphabet_size = pickle.load(rbf) data.word_alphabet_size = pickle.load(rbf) data.label_alphabet_size = pickle.load(rbf) data.label_alphabet.instances = pickle.load(rbf) data.train_texts = pickle.load(rbf) data.train_ids = pickle.load(rbf) data.fix_alphabet() model = TextMatchModel(data) model.load_state_dict( torch.load(model_dir, map_location=model.configs['map_location'])) model.eval() model.to(model.configs['device']) train_texts, train_represents, train_label_ids = get_represents( data, model, name, model.configs) # 写入 # with open(path, 'ab') as abf: # pickle.dump(train_texts, abf) # pickle.dump(train_represents, abf) # pickle.dump(train_label_ids, abf) with open(output_path, 'wb') as wbf: pickle.dump(train_represents, wbf)
def write_represents_to_txt(path, output_path, name='train'): data = Data(ori_texts=[], labels=[], if_train=False) with open(path, 'rb') as rbf: data.char_alphabet.instance2index = pickle.load(rbf) data.word_alphabet.instance2index = pickle.load(rbf) data.label_alphabet.instance2index = pickle.load(rbf) data.char_alphabet_size = pickle.load(rbf) data.word_alphabet_size = pickle.load(rbf) data.label_alphabet_size = pickle.load(rbf) data.label_alphabet.instances = pickle.load(rbf) data.train_texts = pickle.load(rbf) data.train_ids = pickle.load(rbf) data.fix_alphabet() model = TextMatchModel(data) model.load_state_dict( torch.load(model_dir, map_location=model.configs['map_location'])) model.eval() model.to(model.configs['device']) data.no_train_texts, data.no_train_ids = data.read_no_train(no_train_path) train_texts, train_represents, train_label_ids = get_represents( data, model, name, model.configs) if not os.path.exists(output_path + '/train_texts.txt'): with open(output_path + '/train_texts.txt', 'w') as wf: for item in train_texts: wf.write('%s\n' % item) with open(output_path + '/train_represents.txt', 'w') as wf: for item in train_represents: wf.write('%s\n' % item) with open(output_path + '/train_label_ids.txt', 'w') as wf: for item in train_label_ids: wf.write('%s\n' % item)
if __name__ == '__main__': # 场景匹配的demo: dset_path = os.path.join(ROOT_PATH, 'models/text_match_v1/data/alphabet.dset') model_dir = os.path.join(ROOT_PATH, 'saved_models/text_match_v1/text_match_v1.model') data = Data(ori_texts=[], labels=[], if_train=False) with open(dset_path, 'rb') as rbf: data.char_alphabet.instance2index = pickle.load(rbf) data.word_alphabet.instance2index = pickle.load(rbf) data.label_alphabet.instance2index = pickle.load(rbf) data.char_alphabet_size = pickle.load(rbf) data.word_alphabet_size = pickle.load(rbf) data.label_alphabet_size = pickle.load(rbf) data.label_alphabet.instances = pickle.load(rbf) data.train_texts = pickle.load(rbf) data.fix_alphabet() model = TextMatchModel(data) model.load_state_dict( torch.load(model_dir, map_location=model.configs['map_location'])) model.eval() model.to(model.configs['device']) # 准备场景的测试语料 scene_name = [] text_list, label_list = [], [] scenes = [{'name': '回家'}, {'name': '休息'}] for scene in scenes: scene_name.append(scene['name']) text_list.append(scene['name']) # add scene name