예제 #1
0
 def _get_optimizer(self, name):
     from tensorpack.tfutils import optimizer
     from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip, MapGradient
     init_lr = INIT_LEARNING_RATE_A if name == 'actor' else INIT_LEARNING_RATE_C
     import tensorpack.tfutils.symbolic_functions as symbf
     lr = symbf.get_scalar_var('learning_rate/' + name,
                               init_lr,
                               summary=True)
     opt = tf.train.AdamOptimizer(lr)
     logger.info("create opt {}".format(name))
     if name == 'critic':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.05),
                         regex='^critic/.*')
         ]
     elif name == 'actor':
         gradprocs = [
             MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1),
                         regex='^actor/.*')
         ]
     else:
         assert (0)
     gradprocs.append(SummaryGradient())
     opt = optimizer.apply_grad_processors(opt, gradprocs)
     return opt
def do_rollout(env, action_function):
    """ Builds a path by running through an environment using a provided function to select actions. """
    obs, rewards, actions, human_obs, distances = [], [], [], [], []
    max_timesteps_per_episode = 10000
    raw_ob = env.reset()
    logger.info('get obs {}'.format(raw_ob.shape))
    ob = raw_ob[:-1]
    distance = raw_ob[-1]
    # Primary environment loop
    for i in range(max_timesteps_per_episode):
        action = action_function(env, ob)
        obs.append(ob)
        raw_ob, action, reward, done = env.step((action, 0., [0.,
                                                              0.], [0., 0.]))
        ob = raw_ob[0:-1]
        distance = raw_ob[-1]
        # logger.info('agent {} running at distance {}'.format(env._agentIdent,distance))
        actions.append(action)
        rgb = env._cur_screen

        # logger.info("[{:04d}: step".format(env._agentIdent))
        rewards.append(reward)
        human_obs.append(rgb)
        distances.append(distance)
        if done:
            break
    # Build path dictionary
    path = {
        "obs": np.array(obs),
        "original_rewards": np.array(rewards),
        "actions": np.array(actions),
        "human_obs": np.array(human_obs),
        'distances': np.array((distances))
    }
    return path
예제 #3
0
def write_segment_to_video(segment, fname):
    # logger.info("write_segment_to_video = {}".format(fname))

    os.makedirs(osp.dirname(fname), exist_ok=True)
    frames = segment["human_obs"]
    logger.info("frames = {}".format(frames.shape))
    # for i in range(int(env.fps * 0.2)):
    #     frames.append(frames[-1])
    export_video(frames, fname, fps=24)
    def convert_segment_to_media_url(self, comparison_uuid, side, segment):
        tmp_media_dir = '/tmp/rl_teacher_media'
        logger.info('processing : {}'.format(comparison_uuid))
        media_id = "%s-%s.mp4" % (comparison_uuid, side)
        local_path = osp.join(tmp_media_dir, media_id)
        self._upload_workers.apply_async(_write_and_upload_video, (local_path, segment))
        local_ip=get_ip()

        media_url = "http://%s:5000/%s" % (local_ip,media_id)
        return media_url
예제 #5
0
    def make_observaton(self, raw_obs):
        if self.vision is False:
            names = ['focus',
                     'speedX', 'speedY', 'speedZ', 'angle', 'damage',
                     'opponents',
                     'rpm',
                     'track',
                     'trackPos',
                     'wheelSpinVel',
                     'distFromStart']
            from drlutils.utils import logger
            logger.info('distance  from start {}'.format(raw_obs['distFromStart'] ))
            logger.info('distRaced  {}'.format(raw_obs['distRaced']))
            Observation = col.namedtuple('Observaion', names)
            return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
                               speedX=np.array(raw_obs['speedX'], dtype=np.float32)/300.0,
                               speedY=np.array(raw_obs['speedY'], dtype=np.float32)/300.0,
                               speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/300.0,
                               angle=np.array(raw_obs['angle'], dtype=np.float32)/3.1416,
                               damage=np.array(raw_obs['damage'], dtype=np.float32),
                               opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
                               rpm=np.array(raw_obs['rpm'], dtype=np.float32)/10000,
                               track=np.array(raw_obs['track'], dtype=np.float32)/200.,
                               trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
                               wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32),
                               distFromStart=np.array(raw_obs['distFromStart'],dtype=np.float32))
        else:
            assert(0)
            names = ['focus',
                     'speedX', 'speedY', 'speedZ', 'angle',
                     'opponents',
                     'rpm',
                     'track',
                     'trackPos',
                     'wheelSpinVel',
                     'img']
            Observation = col.namedtuple('Observaion', names)

            # Get RGB from observation
            image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]])

            return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200.,
                               speedX=np.array(raw_obs['speedX'], dtype=np.float32)/self.default_speed,
                               speedY=np.array(raw_obs['speedY'], dtype=np.float32)/self.default_speed,
                               speedZ=np.array(raw_obs['speedZ'], dtype=np.float32)/self.default_speed,
                               opponents=np.array(raw_obs['opponents'], dtype=np.float32)/200.,
                               rpm=np.array(raw_obs['rpm'], dtype=np.float32),
                               track=np.array(raw_obs['track'], dtype=np.float32)/200.,
                               trackPos=np.array(raw_obs['trackPos'], dtype=np.float32)/1.,
                               wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32),
                               img=image_rgb)
예제 #6
0
            data_io,
            # PeriodicTrigger(Evaluator(
            #     EVAL_EPISODE, ['state'], ['policy'], get_player),
            #     every_k_epochs=3),
        ] + evaluators,
        session_creator=sesscreate.NewSessionCreator(
            config=get_default_sess_config(0.5)),
        steps_per_epoch=STEPS_PER_EPOCH,
        max_epoch=1000,
    )


if __name__ == '__main__':
    if args['train']:
        from drlutils.utils import logger
        logger.info("Begin train task")
        from ad_cur.autodrive.agent.pool import AgentPool, AgentPoolFake
        clsPool = AgentPoolFake if args['--fake_agent'] else AgentPool
        from drlutils.evaluator.evaluator import EvaluatorBase

        class Evaluator(EvaluatorBase):
            def _init(self):
                super(Evaluator, self)._init()

        if args['--gpu']:
            os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
                sorted(list(set(args['--gpu'].split(',')))))
        # os.system('killall -9 torcs-bin')

        dirname = '/tmp/torcs/trainlog'
        from tensorpack.utils import logger