コード例 #1
0
class RL(object):
    def __init__(self, args):
        self.env = Env(args.ip, args.port, args.epi_steps, args.emo_model)
        self.action_dict = action_dict()
        self.ai = irl.sarsa(actions=range(self.env.action_dim),
                            epsilon=1.0,
                            alpha=0.3,
                            gamma=0.9)

        self.reward = []
        if args.restore:
            self._restore(args.q_file, args.r_file)

        self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        rohobon_message_pb2_grpc.add_RoBoHoNMessageServicer_to_server(
            Servicer(), self.server)
        self.server.add_insecure_port('[::]:50051')
        self.server.start()

    def train(self, total_epi):
        #Default posture: sit down
        sit = True

        epi_num = 1
        action_index = range(self.env.action_dim)  #TODO
        while epi_num <= total_epi:
            epi_reward = 0.0

            #Do "Introduction"
            do_action(self, 'a100')
            s = self.env.GetInitState()

            #TODO
            action_num = self.ai.chooseAction(s)
            while action_num not in action_index:
                action_num = self.ai.chooseAction(s)
            action = self.action_dict.keys()[action_num]

            while True:
                #TODO
                #action = random.choice(self.action_dict.keys())

                #If needed, stand up
                do_action(self, 'a200')
                sit = False

                #Do the action
                print(action_num)  #TODO: test
                do_action(self, action)

                #TODO
                #del self.action_dict[action]
                action_index.remove(action_num)

                r, s2, t = self.env.Step(s)

                #TODO: Update the Q table
                self.ai.epsilon = max(
                    (10000 - len(self.reward) * 5) / 10000.0 * self.ai.epsilon,
                    0.0)
                a2 = self.ai.chooseAction(s2)
                while a2 not in action_index:
                    a2 = self.ai.chooseAction(s2)
                self.ai.learn(s, a, r, 0.0, s2, a2)

                epi_reward += r
                s = s2
                #TODO
                action_num = a2
                action = self.action_dict.keys()[action_num]

                if t:
                    #Do "take a photo"
                    do_action(self, 'a320')

                    #Do "Bye Bye"
                    do_action(self, 'a101')

                    self.reward.append(epi_reward)
                    epi_num += 1
                    break

        raw_input('Please disconnect RoBoHoN and press enter to continue: ')

        self._save_reward_q()
        self.server.stop(0)

    def _restore(self, q_file, r_file):
        import pickle
        self.ai.q = pickle.load(open(q_file, 'rb'))
        self.reward = pickle.load(open(q_file, 'rb'))

    def _save_reward_q(self):
        import pickle
        pickle.dump(self.reward, open('./reward', 'wb'))
        pickle.dump(self.ai.q, open('./q', 'wb'))
コード例 #2
0
class Demo(object):
    def __init__(self, args):
        self.env = Env(args.ip, args.port, args.epi_steps, args.emo_model)
        self.action_dict = action_dict()

        self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
        rohobon_message_pb2_grpc.add_RoBoHoNMessageServicer_to_server(
            Servicer(), self.server)
        self.server.add_insecure_port('[::]:50051')
        self.server.start()

    def ruleBasedAction(self, state):
        dialogue = [2, 4, 5]
        joke = [0, 1, 3, 6]
        riddle = [10, 11, 12, 13, 14]
        dance = [7, 8, 9]

        if self.env.goodState(state):
            action = random.sample(dialogue, 1)[0]
        elif state == 0 or state == 32:
            action = random.sample(riddle, 1)[0]
        else:
            action = random.sample(joke + dance, 1)[0]

        return action

    def train(self, total_epi):
        #Default posture: sit down
        sit = True

        epi_num = 1
        while epi_num <= total_epi:
            #TODO: Randomly choose one of the jokes, dialogues, and dances
            '''
            dialogue = random.sample([2, 4, 5], 1)[0]
            joke = random.sample([0, 1, 3, 6], 1)[0]
            riddle = random.sample([10, 11, 12, 13, 14], 1)[0]
            dance = random.sample([7, 8, 9], 1)[0]
            action_series = [joke, dialogue, dance]
            '''

            #Do "Introduction"
            sub_action_num = 6
            for a in range(sub_action_num):
                global action_id
                action_id = 'ma100' + str(a + 1)
                time.sleep(1.5)
                raw_input('Please press enter to continue: ')

            s = self.env.GetInitState()

            #TODO
            #action_num = action_series[self.env.state_num]
            action_num = self.ruleBasedAction(s)
            action = self.action_dict.keys()[action_num]
            done_action = []
            done_action.append(action_num)

            while True:
                #If needed, stand up
                if action in ['a300', 'a310', 'a311'] and sit:
                    sub_action_num = 3
                    for a in range(sub_action_num):
                        global action_id
                        action_id = 'ma200' + str(a + 1)
                        time.sleep(1.5)
                        raw_input('Please press enter to continue: ')
                    sit = False

                #Do the action
                sub_action_num = self.action_dict[action]
                for a in range(sub_action_num):
                    global action_id
                    action_id = 'm' + str(action) + str(a + 1)
                    time.sleep(1.5)
                    raw_input('Please press enter to continue: ')

                r, s2, t = self.env.Step(s)

                s = s2

                if t:
                    #Do "Take a photo"
                    sub_action_num = 7
                    for a in range(sub_action_num):
                        global action_id
                        action_id = 'ma320' + str(a + 1)
                        time.sleep(1.5)
                        raw_input('Please press enter to continue: ')

                    #Do "Bye Bye"
                    sub_action_num = 4
                    for a in range(sub_action_num):
                        global action_id
                        action_id = 'ma101' + str(a + 1)
                        time.sleep(1.5)
                        raw_input('Please press enter to continue: ')

                    epi_num += 1
                    break

                #TODO
                #action_num = action_series[self.env.state_num]
                action_num = self.ruleBasedAction(s)
                fail = 0
                while action_num in done_action and fail < 5:
                    fail += 1
                    action_num = self.ruleBasedAction(s)
                if fail == 0:
                    done_action.append(action_num)
                action = self.action_dict.keys()[action_num]

        raw_input('Please disconnect RoBoHoN and press enter to continue: ')

        self.server.stop(0)