Example #1
0
 def _setup_graph(self):
     with tf.device('/cpu:0'):
         with tf.variable_scope(tf.get_variable_scope(), reuse=None):
             self.sess = self.trainer.sess
             self.async_predictor = MultiThreadAsyncPredictor(
                 self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'], self.predictor_threads),
                 batch_size=self.predict_batch_size)
             self.async_predictor.run()
 def _setup_graph(self):
     self.sess = self.trainer.sess
     self.async_predictor = MultiThreadAsyncPredictor(
         self.trainer.get_predict_funcs(['state'],
                                        ['logitsT', 'pred_value'],
                                        PREDICTOR_THREAD),
         batch_size=15)
     self.async_predictor.run()
Example #3
0
 def _setup_graph(self):
     self.async_predictor = MultiThreadAsyncPredictor(
         self.trainer.get_predictors(
             ['state'],
             ['policy', 'value'],
             # ['Pred/policy', 'Pred/value'],
             PREDICTOR_THREAD),
         batch_size=PREDICT_BATCH_SIZE)
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self, pipe_c2s, pipe_s2c, model):
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
        self.M = model
        self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)

    def _setup_graph(self):
        self.sess = self.trainer.sess
        self.async_predictor = MultiThreadAsyncPredictor(
            self.trainer.get_predict_funcs(['state'],
                                           ['logitsT', 'pred_value'],
                                           PREDICTOR_THREAD),
            batch_size=15)
        self.async_predictor.run()

    def _on_state(self, state, ident):
        def cb(outputs):
            distrib, value = outputs.result()
            assert np.all(np.isfinite(distrib)), distrib
            action = np.random.choice(len(distrib), p=distrib)
            client = self.clients[ident]
            client.memory.append(
                TransitionExperience(state, action, None, value=value))
            self.send_queue.put([ident, dumps(action)])

        self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        self._parse_memory(0, ident, True)

    def _on_datapoint(self, ident):
        client = self.clients[ident]
        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False)

    def _parse_memory(self, init_r, ident, isOver):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        mem.reverse()
        R = float(init_r)
        for idx, k in enumerate(mem):
            R = np.clip(k.reward, -1, 1) + GAMMA * R
            # print "Clipping: {}".format(R)
            self.queue.put([k.state, k.action, R])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []
Example #5
0
 def _setup_graph(self):
     self.sess = self.trainer.sess
     self.async_predictor = MultiThreadAsyncPredictor(
             self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'],
             PREDICTOR_THREAD), batch_size=15)
     # else:
     #     self.async_predictor = MultiThreadAsyncPredictor(
     #         self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value', FEATURE],
     #                                        PREDICTOR_THREAD), batch_size=15)
     if FEATURE:
         logger.info("Initialize density network")
         cfg = PredictConfig(
                 session_init=NewSession(),
                 model=Model(),
                 input_var_names=['state'],
                 output_var_names=[FEATURE])
         self.offline_predictor = get_predict_func(cfg)
     self.async_predictor.run()
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self,
                 worker_id,
                 neptune_client,
                 pipe_c2s,
                 pipe_s2c,
                 model,
                 dummy,
                 predictor_threads,
                 predict_batch_size=16,
                 do_train=True):
        # predictor_threads is previous PREDICTOR_THREAD
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c,
                                                args.simulator_procs,
                                                os.getpid())
        self.M = model
        self.do_train = do_train

        # the second queue is here!
        self.queue = queue.Queue(maxsize=args.my_sim_master_queue)
        self.dummy = dummy
        self.predictor_threads = predictor_threads

        self.last_queue_put = start_timer()
        self.queue_put_times = []
        self.predict_batch_size = predict_batch_size
        self.counter = 0

        self.worker_id = worker_id
        self.neptune_client = neptune_client
        self.stats = defaultdict(StatCounter)
        self.games = StatCounter()

    def _setup_graph(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope(tf.get_variable_scope(), reuse=None):
                self.sess = self.trainer.sess
                self.async_predictor = MultiThreadAsyncPredictor(
                    self.trainer.get_predict_funcs(['state'],
                                                   ['logitsT', 'pred_value'],
                                                   self.predictor_threads),
                    batch_size=self.predict_batch_size)
                self.async_predictor.run()

    def _on_state(self, state, ident):
        ident, ts = ident
        client = self.clients[ident]

        if self.dummy:
            action = 0
            value = 0.0
            client.memory.append(
                TransitionExperience(state, action, None, value=value))
            self.send_queue.put([ident, dumps(action)])
        else:

            def cb(outputs):
                # distrib, value, global_step, isAlive  = outputs.result()
                o = outputs.result()
                if o[-1]:
                    distrib = o[0]
                    value = o[1]
                    global_step = o[2]
                    assert np.all(np.isfinite(distrib)), distrib
                    action = np.random.choice(len(distrib), p=distrib)
                    client = self.clients[ident]
                    client.memory.append(
                        TransitionExperience(state,
                                             action,
                                             None,
                                             value=value,
                                             ts=ts))
                else:
                    self.send_queue.put([ident, dumps((0, 0, False))])
                    return

                #print"Q-debug: MySimulatorMaster send_queue before put, size: ", self.send_queue.qsize(), '/', self.send_queue.maxsize
                self.send_queue.put(
                    [ident, dumps((action, global_step, True))])

            self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        ident, ts = ident

        client = self.clients[ident]
        # send game score to neptune
        self.games.feed(self.stats[ident].sum)
        self.stats[ident].reset()

        if self.games.count == 10:
            self.neptune_client.send(
                (self.worker_id, ('online', self.games.average)))
            self.games.reset()

        self._parse_memory(0, ident, True, ts)

    def _on_datapoint(self, ident):
        ident, ts = ident
        client = self.clients[ident]

        self.stats[ident].feed(client.memory[-1].reward)

        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False, ts)

    def _parse_memory(self, init_r, ident, isOver, ts):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        mem.reverse()
        R = float(init_r)
        for idx, k in enumerate(mem):
            R = np.clip(k.reward, -1, 1) + GAMMA * R
            point_ts = k.ts
            self.log_queue_put()
            if self.do_train:
                self.queue.put(
                    [k.state, k.action, R, point_ts, init_r, isOver])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []

    def log_queue_put(self):
        self.counter += 1
        elapsed_last_put = elapsed_time_ms(self.last_queue_put)
        self.queue_put_times.append(elapsed_last_put)
        k = 1000
        if self.counter % 1 == 0:
            logger.debug("queue_put_times elapsed {elapsed}".format(
                elapsed=elapsed_last_put))
            logger.debug("queue_put_times {puts_s} puts/s".format(
                puts_s=1000.0 / np.mean(self.queue_put_times[-k:])))
        self.last_queue_put = start_timer()
Example #7
0
 def _setup_graph(self):
     self.sess = self.trainer.sess
     self.async_predictor = MultiThreadAsyncPredictor(
             self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'], self.predictor_threads),
         batch_size=self.predict_batch_size)
     self.async_predictor.run()
Example #8
0
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self, pipe_c2s, pipe_s2c, model, dummy, predictor_threads, predict_batch_size=16, do_train=True):
        # predictor_threads is previous PREDICTOR_THREAD
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
        self.M = model
        self.do_train = do_train
        
        #the second queue is here!
        self.queue = queue.Queue(maxsize=args.my_sim_master_queue)
        self.dummy = dummy
        self.predictor_threads = predictor_threads

        self.last_queue_put = 0
        self.queue_put_times = []
        self.predict_batch_size = predict_batch_size
        self.counter = 0

    def _setup_graph(self):
        self.sess = self.trainer.sess
        self.async_predictor = MultiThreadAsyncPredictor(
                self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'], self.predictor_threads),
            batch_size=self.predict_batch_size)
        self.async_predictor.run()

    def _on_state(self, state, ident):
        ident, ts = ident
        client = self.clients[ident]


        if self.dummy:
            action = 0
            value = 0.0
            client.memory.append(TransitionExperience(state, action, None, value=value))
            self.send_queue.put([ident, dumps(action)])
        else:
            def cb(outputs):
                distrib, value = outputs.result()
                #distrib, value, ts = outputs.result()
                
                #print '_on_state cb', distrib
                assert np.all(np.isfinite(distrib)), distrib
                action = np.random.choice(len(distrib), p=distrib)
                client = self.clients[ident]
                client.memory.append(TransitionExperience(state, action, None, value=value))
                #print("Q-debug: MySimulatorMaster send_queue before put, size: ", self.send_queue.qsize(), '/', self.send_queue.maxsize)
                ts = 0.0
                self.send_queue.put([ident, dumps((action, ts))])
            self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        ident, ts = ident
        self._parse_memory(0, ident, True, ts)

    def _on_datapoint(self, ident):
        ident, ts = ident
        client = self.clients[ident]
        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False, ts)

    def _parse_memory(self, init_r, ident, isOver, ts):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        mem.reverse()
        R = float(init_r)
        for idx, k in enumerate(mem):
            #print '### reward: ', k.reward
            R = np.clip(k.reward, -1, 1) + GAMMA * R
            #print("Q-debug id=39dksc: MySimulatorMaster self.queue before put, size: ", self.queue.qsize(), '/', self.queue.maxsize)
            logger.debug("Q-debug id=39dksc: MySimulatorMaster self.queue before put, size: {qsize} / {maxsize}".format(
                qsize=self.queue.qsize(),
                maxsize=self.queue.maxsize))
            self.log_queue_put()
            if self.do_train:
                self.queue.put([k.state, k.action, R, ts])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []

    def log_queue_put(self):
        self.counter += 1
        elapsed_last_put = 0
        self.queue_put_times.append(elapsed_last_put)
        k = 1000
        if self.counter % 1 == 0:
            logger.debug("queue_put_times elapsed {elapsed}".format(elapsed=elapsed_last_put))
            logger.debug("queue_put_times {puts_s} puts/s".format(puts_s=1000.0 / np.mean(self.queue_put_times[-k:])))
        self.last_queue_put = 0
Example #9
0
 def _setup_graph(self):
     self.async_predictor = MultiThreadAsyncPredictor(
         self.trainer.get_predictors(['state'],
                                     ['policy', 'value'],
                                     # ['Pred/policy', 'Pred/value'],
                                     PREDICTOR_THREAD), batch_size=PREDICT_BATCH_SIZE)
Example #10
0
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self, pipe_c2s, pipe_s2c, model):
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
        self.M = model
        self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
        from tensorpack.utils.utils import get_rng
        self._rng = get_rng(self)

    def _setup_graph(self):
        self.async_predictor = MultiThreadAsyncPredictor(
            self.trainer.get_predictors(['state'],
                                        ['policy', 'value'],
                                        # ['Pred/policy', 'Pred/value'],
                                        PREDICTOR_THREAD), batch_size=PREDICT_BATCH_SIZE)

    def _before_train(self):
        self.async_predictor.start()

    def _on_state(self, state, ident):
        client = self.clients[ident]
        if not hasattr(client, '_cidx'):
            # client._explore = self._rng.rand()
            cidx = int(ident.decode('utf-8').replace(u'simulator-', ''))
            client._cidx = cidx
        #     if cidx % 4 == 0: client._explore = 0.

        def cb(outputs):
            try:
                policy, value = outputs.result()
            except CancelledError:
                logger.info("Client {} cancelled.".format(ident))
                return
            assert np.all(np.isfinite(policy)), policy
            action = policy
            # action = np.clip(action, -1., 1.)
            # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验
            # 新手上路,方向盘保守一点,带点油门,不踩刹车
            # if client._cidx < SIMULATOR_PROC:
            #     if self.epoch_num <= 1:
            #         if self.local_step % 10 == 0:
            #             action[1] = self._rng.rand() * 0.5 + 0.5
            #     if action[1] < 0: action[1] = 0.
            #     if self.epoch_num <= 2:
            #         action[1] = np.clip(action[1], 0, 1.)
            #         if self.local_step % 3 == 0:
            #             action[0] *= self._rng.choice([-1., 1.])
            #             # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.])
            #         else:
            #             action[0] = np.clip(action[0], -0.2, 0.2)
            # if self._rng.rand() < client._explore:
            #     action[0] = self._rng.rand() - 0.5

            client.memory.append(TransitionExperience(
                state, action=None, reward=None, value=value))
            self.send_queue.put([ident, dumps((action,value))])
        self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        self._parse_memory(0, ident, True)

    def _on_datapoint(self, ident):
        client = self.clients[ident]
        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False)

    def _parse_memory(self, init_r, ident, isOver):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        def discount(x, gamma):
            from scipy.signal import lfilter
            return lfilter(
                [1], [1, -gamma], x[::-1], axis=0)[::-1]
        rewards_plus = np.asarray([m.reward for m in mem] + [float(init_r)])
        discounted_rewards = discount(rewards_plus, GAMMA)[:-1]
        values_plus = np.asarray([m.value for m in mem] + [float(init_r)])
        rewards = np.asarray([m.reward for m in mem])
        advantages = rewards + GAMMA * values_plus[1:] - values_plus[:-1]

        for idx, k in enumerate(mem):
            self.queue.put([k.state, k.action, discounted_rewards[idx], advantages[idx]])
        # mem.reverse()
        # R = float(init_r)
        # for idx, k in enumerate(mem):
        #     R = k.reward + GAMMA * R
        #     # R = np.clip(k.reward, -1, 1) + GAMMA * R
        #     self.queue.put([k.state, k.action, R])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []
Example #11
0
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self, pipe_c2s, pipe_s2c, model):
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
        self.M = model
        self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
        from tensorpack.utils.utils import get_rng
        self._rng = get_rng(self)

    def _setup_graph(self):
        self.async_predictor = MultiThreadAsyncPredictor(
            self.trainer.get_predictors(
                ['state'],
                ['policy', 'value'],
                # ['Pred/policy', 'Pred/value'],
                PREDICTOR_THREAD),
            batch_size=PREDICT_BATCH_SIZE)

    def _before_train(self):
        self.async_predictor.start()

    def _on_state(self, state, ident):
        client = self.clients[ident]
        if not hasattr(client, '_cidx'):
            # client._explore = self._rng.rand()
            cidx = int(ident.decode('utf-8').replace(u'simulator-', ''))
            client._cidx = cidx
        #     if cidx % 4 == 0: client._explore = 0.

        def cb(outputs):
            try:
                policy, value = outputs.result()
            except CancelledError:
                logger.info("Client {} cancelled.".format(ident))
                return
            assert np.all(np.isfinite(policy)), policy
            action = policy
            # action = np.clip(action, -1., 1.)
            # 能否在初期得到比较好的reward决定了收敛的快慢,所以此处加入一些先验
            # 新手上路,方向盘保守一点,带点油门,不踩刹车
            # if client._cidx < SIMULATOR_PROC:
            #     if self.epoch_num <= 1:
            #         if self.local_step % 10 == 0:
            #             action[1] = self._rng.rand() * 0.5 + 0.5
            #     if action[1] < 0: action[1] = 0.
            #     if self.epoch_num <= 2:
            #         action[1] = np.clip(action[1], 0, 1.)
            #         if self.local_step % 3 == 0:
            #             action[0] *= self._rng.choice([-1., 1.])
            #             # action[0] *= (self._rng.rand() * 0.2 + 0.2) * self._rng.choice([-1., 1.])
            #         else:
            #             action[0] = np.clip(action[0], -0.2, 0.2)
            # if self._rng.rand() < client._explore:
            #     action[0] = self._rng.rand() - 0.5

            client.memory.append(
                TransitionExperience(state,
                                     action=None,
                                     reward=None,
                                     value=value))
            self.send_queue.put([ident, dumps((action, value))])

        self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        self._parse_memory(0, ident, True)

    def _on_datapoint(self, ident):
        client = self.clients[ident]
        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False)

    def _parse_memory(self, init_r, ident, isOver):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        def discount(x, gamma):
            from scipy.signal import lfilter
            return lfilter([1], [1, -gamma], x[::-1], axis=0)[::-1]

        rewards_plus = np.asarray([m.reward for m in mem] + [float(init_r)])
        discounted_rewards = discount(rewards_plus, GAMMA)[:-1]
        values_plus = np.asarray([m.value for m in mem] + [float(init_r)])
        rewards = np.asarray([m.reward for m in mem])
        advantages = rewards + GAMMA * values_plus[1:] - values_plus[:-1]

        for idx, k in enumerate(mem):
            self.queue.put(
                [k.state, k.action, discounted_rewards[idx], advantages[idx]])
        # mem.reverse()
        # R = float(init_r)
        # for idx, k in enumerate(mem):
        #     R = k.reward + GAMMA * R
        #     # R = np.clip(k.reward, -1, 1) + GAMMA * R
        #     self.queue.put([k.state, k.action, R])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []
Example #12
0
class MySimulatorMaster(SimulatorMaster, Callback):
    def __init__(self, pipe_c2s, pipe_s2c, model):
        super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
        self.M = model
        self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)

    def _setup_graph(self):
        self.sess = self.trainer.sess
        self.async_predictor = MultiThreadAsyncPredictor(
            self.trainer.get_predict_funcs(['state'],
                                           ['logitsT', 'pred_value'],
                                           PREDICTOR_THREAD),
            batch_size=15)
        # else:
        #     self.async_predictor = MultiThreadAsyncPredictor(
        #         self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value', FEATURE],
        #                                        PREDICTOR_THREAD), batch_size=15)
        if FEATURE:
            logger.info("Initialize density network")
            cfg = PredictConfig(session_init=NewSession(),
                                model=Model(),
                                input_var_names=['state'],
                                output_var_names=[FEATURE])
            self.offline_predictor = get_predict_func(cfg)
        self.async_predictor.run()

    def _trigger_epoch(self):
        if FEATURE:
            if self.epoch_num % 1 == 0:
                logger.info("update density network at epoch %d." %
                            (self.epoch_num))
                cfg = PredictConfig(session_init=JustCurrentSession(),
                                    model=Model(),
                                    input_var_names=['state'],
                                    output_var_names=[FEATURE])
                self.offline_predictor = get_predict_func(cfg)

    def _on_state(self, state, ident):
        def cb(outputs):
            #if not FEATURE:
            distrib, value = outputs.result()
            #else:
            #    distrib, value, feature = outputs.result()
            assert np.all(np.isfinite(distrib)), distrib
            action = np.random.choice(len(distrib), p=distrib)
            client = self.clients[ident]
            client.memory.append(
                TransitionExperience(state, action, None, value=value))
            if not FEATURE:
                self.send_queue.put([ident, dumps(action)])
            else:
                feature = self.offline_predictor([[state]])[0][0]
                self.send_queue.put([ident, dumps([action, feature])])

        self.async_predictor.put_task([state], cb)

    def _on_episode_over(self, ident):
        self._parse_memory(0, ident, True)

    def _on_datapoint(self, ident):
        client = self.clients[ident]
        if len(client.memory) == LOCAL_TIME_MAX + 1:
            R = client.memory[-1].value
            self._parse_memory(R, ident, False)

    def _parse_memory(self, init_r, ident, isOver):
        client = self.clients[ident]
        mem = client.memory
        if not isOver:
            last = mem[-1]
            mem = mem[:-1]

        mem.reverse()
        R = float(init_r)
        for idx, k in enumerate(mem):
            R = np.clip(k.reward, -1, 1) + GAMMA * R
            self.queue.put([k.state, k.action, R])

        if not isOver:
            client.memory = [last]
        else:
            client.memory = []