def predict_dialogue(self, user_utt, sys_utt, da, user_start_time): s_len = len(sys_utt) u_len = len(user_utt) diff = s_len - u_len df = pf.makeDiaDF(user_start_time, s_len, u_len, diff, da) X_test = df.iloc[0, :].values d_pred = pf.predUnknown(X_test, "dialogue") return d_pred
def predict_text(self, user_utt): df = etf.makeFea(user_utt) pca = pickle.load( open("../refData/RL_files/modelUIpred/pca.model", "rb")) df = pf.PCAonlyBOW(df, pca=pca) X_test = df.iloc[0, 1:-1].values X_test = X_test.astype(np.float32) t_pred = pf.predUnknown(X_test, "text") return t_pred
# 心象を線形変換します def changePredValueRange(fusion_pred): lowUI_before = x1 = 4.4 lowUI_after = y1 = 3 highUI_before = x2 = 4.6 highUI_after = y2 = 5 fusion_pred_scaled = ((y2 - y1) / (x2 - x1)) * (fusion_pred - x1) + y1 if fusion_pred_scaled < 1: fusion_pred_scaled = 1 if fusion_pred_scaled > 7: fusion_pred_scaled = 7 return fusion_pred_scaled if __name__ == '__main__': # ここは関数の保持のみを行うコード print('it is the code has main function.') #### 推定値のrangeの調整 import predUI as pf d_pred, v_pred, t_pred, f_pred = 4.0, 4.0, 0.1, 0.1 df = pd.DataFrame(data=[[d_pred, v_pred, t_pred, f_pred]]) X_test = df.iloc[0, :].values user_impression = pf.predUnknown(X_test, 'fusion')
def predict_face(self, start, end): file_name = "{}/{}_face.csv".format(self.data_path, self.user) X_test = eff.predictionFace(start, end, file_name) f_pred = pf.predUnknown(X_test, "face") return f_pred
def predict_voice(self, file_name): df = pd.read_csv(file_name) df = pf.selectVoiceFea(df, case="pred") X_test = df.iloc[0, :].values v_pred = pf.predUnknown(X_test, "voice") return v_pred
def process(self): chg_theme, theme = self.themeHis.decideNextTheme(self.user_impression) # システム発話生成 sys_utt, da = self.env.utterance_selection_softmax(\ chg_theme, theme, self.agent.Q[self.state]) # 発話選択 self.env.sys_utterance_log.append(sys_utt) file_name = "../../user_data/{0}/voice/{0}_{1}_voice"\ .format(self.user, str(self.current_turn).zfill(3)) self.record_log(sys_utt) print("system:{}".format(sys_utt)) sys_start_time = time.time() - self.base_time self.mmd.say(sys_utt) self.record_log("*") self.record.start(file_name) self.asr.start() while True: if self.asr.m_turn.is_sys_turn_end == True: break else: time.sleep(0.1) self.record.stop() user_turn_end_time = time.time() - self.base_time user_utt = self.asr.read_result() self.env.user_utterance_log.append(user_utt) user_utt_start_time = self.asr.get_utt_start_time() # 録音したwavファイルが保存されるまで待機 while True: if self.record.save_complete == True: time.sleep(0.1) break else: time.sleep(0.1) self.opensmile.run(file_name) print("user:{}".format(user_utt)) # predict mono modal d_pred = self.predict_dialogue(user_utt, sys_utt, da, user_utt_start_time) v_pred = self.predict_voice("{}.csv".format(file_name)) t_pred = self.predict_text(user_utt) f_pred = self.predict_face(sys_start_time, user_turn_end_time) self.print_debug("dialogue predict:{}".format(d_pred)) self.print_debug("voice predict:{}".format(v_pred)) self.print_debug("text predict:{}".format(t_pred)) self.print_debug("face predict:{}".format(f_pred)) # fusion 4 modal df = pd.DataFrame(data=[[d_pred, v_pred, t_pred, f_pred]]) X_test = df.iloc[0, :].values self.user_impression = pf.predUnknown(X_test, "fusion") # 値のrange変更 self.user_impression = pf.changePredValueRange(self.user_impression) self.print_debug("user_impression:{}".format(self.user_impression)) # 状態更新 n_state = self.env.get_next_state(self.user_impression, sys_utt, user_utt) self.state = n_state # 終了判定 self.current_turn += 1 if self.current_turn == self.turn_num: is_end = True else: is_end = False return is_end