def __init__(self): super().__init__() self.params = defineClass.params() # action self.action_df = pd.read_csv(self.params.get('path_class_name')) actions = self.action_df['clsname'].values.tolist() self.actions = actions # actionにindex付け self.actionIndex = {} for i, val in enumerate(self.actions): self.actionIndex[i] = val # 状態は「心象」「直前のシステム対話行為」「対話の位置」の組み合わせ self.states_sys_da = ['ct','io','re','qs'] self.states_noun_presence = ['Nx','No'] self.states_impression = ['l','n','h'] self.states = list(itertools.product(self.states_sys_da, self.states_noun_presence, self.states_impression)) self.states = ['_'.join(x) for x in self.states] # stateにindex付け self.stateIndex = {} for i, val in enumerate(self.states): self.stateIndex[val] = i self.thres_low_UI = 3 self.thres_high_UI = 5 self.persist_UI_exchgs = 3 self.reward_da_df = pd.read_csv(self.params.get('path_reward_da'), index_col=0) self.sys_utterance_log = [] self.user_utterance_log = [] self.weight_specific_theme = 0.6
def getUtteranceClassTheme(utterance): params = defineClass.params() CLSdf = pd.read_csv(params.get('path_utterance_by_class')) THEMEdf = pd.read_csv(params.get('path_theme_info')) if '***' in utterance: return '-', '-' else: clsInfo = CLSdf[CLSdf['agent_utterance'] == utterance]['cls'].values.astype('str') clsInfo = '-'.join(clsInfo) themeInfo = THEMEdf[THEMEdf['agent_utterance'] == utterance]['theme'].values[0] return clsInfo, themeInfo
else: print('############\n{}\n############'.format(str(options))) # seed np.random.seed(options.seed) ######## python q_learning.py -A [ACT] --model [MODEL] ############## Qtable_name = '{}/{}_Q'.format(options.model, options.model) Qfreq_name = '{}/{}_Qfreq'.format(options.model, options.model) hm_name = '{}/{}_hm.png'.format(options.model, options.model) reward_name = '{}/{}_reward.png'.format(options.model, options.model) reward_list_name = '{}/{}_reward.npy'.format(options.model, options.model) log_name = '{}/{}_log.csv'.format(options.model, options.model) # params params = defineClass.params() # Qを学習 if options.action == 'train': # dir作成 if not os.path.exists(options.model): os.mkdir(options.model) else: print('model "{}" already exists.'.format(options.model)) if_del = input('### overwrite if you push enter. ###') env = DialogueEnv() agent = QlearningAgent(epsilon=options.epsilon) agent.learn(env, episode_count=options.n_episode, learning_rate=options.alpha,
def utterance_selection(user_impression, user_utterance, history_utterance, theme, candidate_action_num=1): # params init params = defineClass.params() ACTdf = pd.read_csv(params.get('path_utterance_by_class')) THEMEdf = pd.read_csv(params.get('path_theme_info')) df = pd.merge(ACTdf[['agent_utterance', 'cls']], THEMEdf[['agent_utterance', 'theme']], on='agent_utterance') # 特定話題の選択に重み(weight)をつける def weightSpecificTheme(df, w=0.6): themes = df['theme'].values themes = [1 - w if t == 'default' else w for t in themes] themes = [x / np.sum(themes) for x in themes] df = df.reset_index(drop=True) select_index = np.random.choice(df.index.values, p=themes) return df.loc[select_index] # 1交換目のとき if user_utterance == None: next_sysutte = ' *** これから{}の話をしましょう***'.format(theme) next_sysutte_action = 'change_theme' history_utterance.add_sysutte(next_sysutte, next_sysutte_action) return next_sysutte, next_sysutte_action else: ## ユークリッド距離計算 # クラスごとの代表値を読み込み feadf = pd.read_csv(params.get('path_feature_average_by_class')) feadf['prevUI3'] = feadf['UI3average'].values - feadf[ 'UI3average_diff'].values fea_ave = feadf[['prevUI3', 'num_noun_before', 'u_utte_len_before']].values # 現在の特徴量を計算 num_noun_cnt, u_utte_len_cnt = getNounandLen(user_utterance) fea_cnt = [[user_impression, num_noun_cnt, u_utte_len_cnt]] ss = preprocessing.StandardScaler() ss.fit(fea_ave) ave_ss = ss.transform(fea_ave) cnt_ss = ss.transform(fea_cnt) # 距離を計算 action = np.sort(list(set(df['cls'].values))) distance = [] for i, val in enumerate(action): dist = np.linalg.norm(cnt_ss - ave_ss[i]) distance.append(dist) # 一番近いクラスを選択 Dict = dict(zip(action, distance)) candidate_action = [] for k, v in sorted(Dict.items(), key=lambda x: x[1])[:candidate_action_num]: candidate_action.append(k) n_next_action = np.random.choice(candidate_action) # 発話の選択 CANDIDATEdf = df[(df['cls'] == n_next_action) & ( (df['theme'] == theme) | (df['theme'] == 'default'))] CANDIDATEdf = CANDIDATEdf.reset_index(drop=True) CANDIDATEdf = CANDIDATEdf[['agent_utterance', 'theme', 'cls']] # 使えないものを削除 for i in range(len(CANDIDATEdf)): if CANDIDATEdf.loc[i, :][ 'agent_utterance'] in history_utterance.history_sysutte: CANDIDATEdf = CANDIDATEdf.drop(index=[i]) # 候補が残っていないなら,action気にせず候補を決定 if len(CANDIDATEdf) == 0: CANDIDATEdf = df[(df['theme'] == theme) | (df['theme'] == 'default')] CANDIDATEdf = CANDIDATEdf.reset_index(drop=True) CANDIDATEdf = CANDIDATEdf[['agent_utterance', 'theme', 'cls']] # 使えないものを削除 for i in range(len(CANDIDATEdf)): if CANDIDATEdf.loc[i, :][ 'agent_utterance'] in history_utterance.history_sysutte: CANDIDATEdf = CANDIDATEdf.drop(index=[i]) # 選択して終了 SELECTdf = weightSpecificTheme(CANDIDATEdf) next_sysutte, next_theme, next_action = SELECTdf.values history_utterance.add_sysutte(next_sysutte, next_action) return next_sysutte, next_action