class RL(object): def __init__(self, args): self.env = Env(args.ip, args.port, args.epi_steps, args.emo_model) self.action_dict = action_dict() self.ai = irl.sarsa(actions=range(self.env.action_dim), epsilon=1.0, alpha=0.3, gamma=0.9) self.reward = [] if args.restore: self._restore(args.q_file, args.r_file) self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) rohobon_message_pb2_grpc.add_RoBoHoNMessageServicer_to_server( Servicer(), self.server) self.server.add_insecure_port('[::]:50051') self.server.start() def train(self, total_epi): #Default posture: sit down sit = True epi_num = 1 action_index = range(self.env.action_dim) #TODO while epi_num <= total_epi: epi_reward = 0.0 #Do "Introduction" do_action(self, 'a100') s = self.env.GetInitState() #TODO action_num = self.ai.chooseAction(s) while action_num not in action_index: action_num = self.ai.chooseAction(s) action = self.action_dict.keys()[action_num] while True: #TODO #action = random.choice(self.action_dict.keys()) #If needed, stand up do_action(self, 'a200') sit = False #Do the action print(action_num) #TODO: test do_action(self, action) #TODO #del self.action_dict[action] action_index.remove(action_num) r, s2, t = self.env.Step(s) #TODO: Update the Q table self.ai.epsilon = max( (10000 - len(self.reward) * 5) / 10000.0 * self.ai.epsilon, 0.0) a2 = self.ai.chooseAction(s2) while a2 not in action_index: a2 = self.ai.chooseAction(s2) self.ai.learn(s, a, r, 0.0, s2, a2) epi_reward += r s = s2 #TODO action_num = a2 action = self.action_dict.keys()[action_num] if t: #Do "take a photo" do_action(self, 'a320') #Do "Bye Bye" do_action(self, 'a101') self.reward.append(epi_reward) epi_num += 1 break raw_input('Please disconnect RoBoHoN and press enter to continue: ') self._save_reward_q() self.server.stop(0) def _restore(self, q_file, r_file): import pickle self.ai.q = pickle.load(open(q_file, 'rb')) self.reward = pickle.load(open(q_file, 'rb')) def _save_reward_q(self): import pickle pickle.dump(self.reward, open('./reward', 'wb')) pickle.dump(self.ai.q, open('./q', 'wb'))
class Demo(object): def __init__(self, args): self.env = Env(args.ip, args.port, args.epi_steps, args.emo_model) self.action_dict = action_dict() self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) rohobon_message_pb2_grpc.add_RoBoHoNMessageServicer_to_server( Servicer(), self.server) self.server.add_insecure_port('[::]:50051') self.server.start() def ruleBasedAction(self, state): dialogue = [2, 4, 5] joke = [0, 1, 3, 6] riddle = [10, 11, 12, 13, 14] dance = [7, 8, 9] if self.env.goodState(state): action = random.sample(dialogue, 1)[0] elif state == 0 or state == 32: action = random.sample(riddle, 1)[0] else: action = random.sample(joke + dance, 1)[0] return action def train(self, total_epi): #Default posture: sit down sit = True epi_num = 1 while epi_num <= total_epi: #TODO: Randomly choose one of the jokes, dialogues, and dances ''' dialogue = random.sample([2, 4, 5], 1)[0] joke = random.sample([0, 1, 3, 6], 1)[0] riddle = random.sample([10, 11, 12, 13, 14], 1)[0] dance = random.sample([7, 8, 9], 1)[0] action_series = [joke, dialogue, dance] ''' #Do "Introduction" sub_action_num = 6 for a in range(sub_action_num): global action_id action_id = 'ma100' + str(a + 1) time.sleep(1.5) raw_input('Please press enter to continue: ') s = self.env.GetInitState() #TODO #action_num = action_series[self.env.state_num] action_num = self.ruleBasedAction(s) action = self.action_dict.keys()[action_num] done_action = [] done_action.append(action_num) while True: #If needed, stand up if action in ['a300', 'a310', 'a311'] and sit: sub_action_num = 3 for a in range(sub_action_num): global action_id action_id = 'ma200' + str(a + 1) time.sleep(1.5) raw_input('Please press enter to continue: ') sit = False #Do the action sub_action_num = self.action_dict[action] for a in range(sub_action_num): global action_id action_id = 'm' + str(action) + str(a + 1) time.sleep(1.5) raw_input('Please press enter to continue: ') r, s2, t = self.env.Step(s) s = s2 if t: #Do "Take a photo" sub_action_num = 7 for a in range(sub_action_num): global action_id action_id = 'ma320' + str(a + 1) time.sleep(1.5) raw_input('Please press enter to continue: ') #Do "Bye Bye" sub_action_num = 4 for a in range(sub_action_num): global action_id action_id = 'ma101' + str(a + 1) time.sleep(1.5) raw_input('Please press enter to continue: ') epi_num += 1 break #TODO #action_num = action_series[self.env.state_num] action_num = self.ruleBasedAction(s) fail = 0 while action_num in done_action and fail < 5: fail += 1 action_num = self.ruleBasedAction(s) if fail == 0: done_action.append(action_num) action = self.action_dict.keys()[action_num] raw_input('Please disconnect RoBoHoN and press enter to continue: ') self.server.stop(0)