def __init__(self, train=True): super(RmrbDataset, self).__init__() # 读取词表字典 vocab_dic = json.loads( open(opt.vocab_file_path, 'r', encoding='utf-8').read() ) # 读取标签字典 tag_dic = json.loads( open(opt.tag_file_path, 'r', encoding='utf-8').read() ) # 构造数据集 self.X_data, self.Y_data = [], [] if train: fin = open(opt.train_data_path, 'r', encoding='utf-8') else: fin = open(opt.test_data_path, 'r', encoding='utf-8') data = fin.read().strip() # 去除头尾回车 sentences = data.split('\n\n') # 切分句子 for s in sentences: x, y = [], [] items = s.split('\n') seq_len = len(items) # 句子长度 # 切分词和标签 for item in items: features = item.split() character = features[0] tag = features[-1] x.append(character) y.append(tag) x = word2idx(x) y = tag2idx(y) # 句子补齐和截断 if seq_len > opt.max_length: x = x[:opt.max_length] y = y[:opt.max_length] else: for _ in range(opt.max_length - seq_len): x.append(vocab_dic['_pad']) y.append(tag_dic['O']) self.X_data.append(x) self.Y_data.append(y) self.X_data = t.LongTensor(self.X_data) self.Y_data = t.LongTensor(self.Y_data)
def __init__(self, *, env, model, nsteps, gamma, lam): self.mv_maker = utils.movie_maker(path=vedio_path) self.env = env self.model = model nenv = env.num_envs ##### self.obs = np.zeros((nenv, ) + env.observation_space.shape, dtype=model.train_model.X.dtype.name) self.insts = np.zeros((nenv, 5)) self.obs, self.insts = env.reset() self.insts = [utils.id2str(inst) for inst in self.insts] prev_insts = self.insts self.insts = [utils.word2idx(inst) for inst in self.insts] self.gamma = gamma self.lam = lam self.nsteps = nsteps self.states = model.initial_state self.dones = [False for _ in range(nenv)]
def predict(sentence, print_entity=False): """ 模型预测 """ # 模型 bilstm_crf = BiLSTM_CRF(opt.vocab_size, opt.emb_dim, opt.emb_dim//2, opt.tag_num, dropout=opt.dropout) if opt.load_model_path: bilstm_crf.load(opt.load_model_path) bilstm_crf.eval() # 数据 x = word2idx(sentence) x = t.LongTensor(x).unsqueeze(dim=0) tag_idx = bilstm_crf(x).squeeze(dim=0) tag_idx = tag_idx.numpy().tolist() length = min(opt.max_length, len(sentence)) entity_list = [] i = 0 while i < length: if tag_idx[i] == 1: entity = sentence[i] j = i + 1 for j in range(i+1, length): if tag_idx[j] == 2: entity += sentence[j] else: break i = j entity_list.append(entity) else: i += 1 if print_entity: print(entity_list) print('\n') return idx2tag(tag_idx)
# ===================== images_all = np.zeros((0, img_dim[0], img_dim[1], img_dim[2])) insts_all = np.zeros((0, inst_dim)) actions_all = np.zeros((0)) rewards_all = np.zeros((0, )) # Get demonstration data # ===================== print("#" * 50) print('Collecting data from teacher (fake AI) ... ') action = 0 for i in range(steps): if i == 0: ob, state = env.reset() inst = utils.word2idx(utils.id2str(state[0:3])) action = state[3] else: ob, state, reward, done, _ = env.step(action) inst = utils.word2idx(utils.id2str(state[0:3])) action = state[3] images_all = np.concatenate([images_all, [ob]], axis=0) insts_all = np.concatenate([insts_all, [inst]], axis=0) actions_all = np.concatenate([actions_all, [action]], axis=0) # Pretrain model using data for demonstration # ===================== model.load_model() # Aggregate and retrain
def run(self): mb_obs, mb_rewards, mb_actions, mb_values, mb_dones, mb_neglogpacs = [],[],[],[],[],[] mb_insts = [] mb_states = self.states epinfos = [] for _ in range(self.nsteps): actions, values, self.states, neglogpacs = self.model.step( self.obs, self.insts, self.states, self.dones) mb_obs.append(self.obs.copy()) mb_actions.append(actions) mb_values.append(values) mb_neglogpacs.append(neglogpacs) mb_dones.append(self.dones) ##### mb_insts.append(self.insts) prev_insts = self.insts (self.obs, self.insts), rewards, self.dones, infos = self.env.step(actions) self.insts = [utils.id2str(inst) for inst in self.insts] self.insts = [utils.word2idx(inst) for inst in self.insts] if self.dones[0] == False: self.mv_maker.add_new_image(self.obs[0]) else: self.mv_maker.export_ani(self.obs[0], prev_insts[0], rewards[0]) for info in infos: maybeepinfo = info.get('episode') if maybeepinfo: epinfos.append(maybeepinfo) mb_rewards.append(rewards) #batch of steps to batch of rollouts mb_obs = np.asarray(mb_obs, dtype=self.obs.dtype) mb_insts = np.asarray(mb_insts, dtype=np.int32) mb_rewards = np.asarray(mb_rewards, dtype=np.float32) mb_actions = np.asarray(mb_actions) mb_values = np.asarray(mb_values, dtype=np.float32) mb_neglogpacs = np.asarray(mb_neglogpacs, dtype=np.float32) mb_dones = np.asarray(mb_dones, dtype=np.bool) last_values = self.model.value(self.obs, self.insts, self.states, self.dones) #discount/bootstrap off value fn mb_returns = np.zeros_like(mb_rewards) mb_advs = np.zeros_like(mb_rewards) lastgaelam = 0 for t in reversed(range(self.nsteps)): if t == self.nsteps - 1: nextnonterminal = 1.0 - self.dones nextvalues = last_values else: nextnonterminal = 1.0 - mb_dones[t + 1] nextvalues = mb_values[t + 1] delta = mb_rewards[ t] + self.gamma * nextvalues * nextnonterminal - mb_values[t] mb_advs[ t] = lastgaelam = delta + self.gamma * self.lam * nextnonterminal * lastgaelam mb_returns = mb_advs + mb_values return (*map(sf01, (mb_obs, mb_insts, mb_returns, mb_dones, mb_actions, mb_values, mb_neglogpacs)), mb_states, epinfos)
# DataSet # ===================== images_all = np.zeros((0, img_dim[0], img_dim[1], img_dim[2])) insts_all = np.zeros((0, inst_dim)) actions_all = np.zeros((0)) rewards_all = np.zeros((0, )) # Get demonstration data # ===================== print("#" * 50) print('Collecting data from teacher (fake AI) ... ') collect_num_envs = 16 collect_data_env = utils.multi_env(app_path, num_envs=collect_num_envs) obs, states = collect_data_env.reset() insts = [utils.word2idx(utils.id2str(state[0:3])) for state in states] actions = [state[3] for state in states] for _ in range(300): (obs, states), reward, done, _ = collect_data_env.step(actions) insts = [utils.word2idx(utils.id2str(state[0:3])) for state in states] actions = [state[3] for state in states] print(actions) images_all = np.concatenate([images_all, obs], axis=0) insts_all = np.concatenate([insts_all, insts], axis=0) actions_all = np.concatenate([actions_all, actions], axis=0) # Count DATA # ===================== actions_all_2 = actions_all.tolist()