Esempio n. 1
0
def main():
    config = None
    args = get_args()
    config, checkpoint = get_config_and_checkpoint(args)

    set_random_seeds(args, config)
    eval_log_dir = args.save_dir + "_eval"
    try:
        os.makedirs(args.save_dir)
        os.makedirs(eval_log_dir)
    except OSError:
        pass

    now = datetime.datetime.now()
    experiment_name = args.experiment_name + '_' + now.strftime(
        "%Y-%m-%d_%H-%M-%S")

    # Create checkpoint file
    save_dir_model = os.path.join(args.save_dir, 'model', experiment_name)
    save_dir_config = os.path.join(args.save_dir, 'config', experiment_name)
    try:
        os.makedirs(save_dir_model)
        os.makedirs(save_dir_config)
    except OSError as e:
        logger.error(e)
        exit()

    if args.config:
        shutil.copy2(args.config, save_dir_config)

    # Tensorboard Logging
    writer = SummaryWriter(
        os.path.join(args.save_dir, 'tensorboard', experiment_name))

    # Logger that writes to STDOUT and a file in the save_dir
    logger = setup_carla_logger(args.save_dir, experiment_name)

    device = torch.device("cuda:0" if args.cuda else "cpu")
    norm_reward = not config.no_reward_norm
    norm_obs = not config.no_obs_norm

    assert not (config.num_virtual_goals > 0) or (
        config.reward_class
        == 'SparseReward'), 'Cant use HER with dense reward'
    obs_converter = CarlaObservationConverter(
        h=84, w=84, rel_coord_system=config.rel_coord_system)
    action_converter = CarlaActionsConverter(config.action_type)
    envs = make_vec_envs(obs_converter,
                         action_converter,
                         args.starting_port,
                         config.seed,
                         config.num_processes,
                         config.gamma,
                         device,
                         config.reward_class,
                         num_frame_stack=1,
                         subset=config.experiments_subset,
                         norm_reward=norm_reward,
                         norm_obs=norm_obs,
                         apply_her=config.num_virtual_goals > 0,
                         video_every=args.video_interval,
                         video_dir=os.path.join(args.save_dir, 'video',
                                                experiment_name))

    if config.agent == 'forward':
        agent = agents.ForwardCarla()

    if config.agent == 'a2c':
        agent = agents.A2CCarla(obs_converter,
                                action_converter,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps,
                                alpha=config.alpha,
                                max_grad_norm=config.max_grad_norm)

    elif config.agent == 'acktr':
        agent = agents.A2CCarla(obs_converter,
                                action_converter,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps,
                                alpha=config.alpha,
                                max_grad_norm=config.max_grad_norm,
                                acktr=True)

    elif config.agent == 'ppo':
        agent = agents.PPOCarla(obs_converter,
                                action_converter,
                                config.clip_param,
                                config.ppo_epoch,
                                config.num_mini_batch,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps,
                                max_grad_norm=config.max_grad_norm)

    if checkpoint is not None:
        load_modules(agent.optimizer, agent.model, checkpoint)

    rollouts = RolloutStorage(config.num_steps, config.num_processes,
                              envs.observation_space, envs.action_space, 20,
                              config.num_virtual_goals,
                              config.rel_coord_system, obs_converter)

    obs = envs.reset()
    # Save the first observation
    obs = obs_to_dict(obs)
    rollouts.obs = obs_to_dict(rollouts.obs)
    for k in rollouts.obs:
        rollouts.obs[k][rollouts.step + 1].copy_(obs[k])
    rollouts.obs = dict_to_obs(rollouts.obs)
    rollouts.to(device)

    start = time.time()

    total_steps = 0
    total_episodes = 0
    total_reward = 0

    episode_reward = torch.zeros(config.num_processes)

    for j in range(config.num_updates):

        for step in range(config.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = agent.act(
                    rollouts.get_obs(step),
                    rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Observe reward and next obs
            obs, reward, done, info = envs.step(action)

            # For logging purposes
            carla_rewards = torch.tensor([i['carla-reward'] for i in info],
                                         dtype=torch.float)
            episode_reward += carla_rewards
            total_reward += carla_rewards.sum().item()
            total_steps += config.num_processes

            if done.any():
                total_episodes += done.sum()
                torch_done = torch.tensor(done.astype(int)).byte()
                mean_episode_reward = episode_reward[torch_done].mean().item()
                logger.info('{} episode(s) finished with reward {}'.format(
                    done.sum(), mean_episode_reward))
                writer.add_scalar('train/mean_ep_reward_vs_steps',
                                  mean_episode_reward, total_steps)
                writer.add_scalar('train/mean_ep_reward_vs_episodes',
                                  mean_episode_reward, total_episodes)
                episode_reward[torch_done] = 0

            # If done then clean the history of observations.
            masks = torch.FloatTensor(1 - done)

            rollouts.insert(obs, recurrent_hidden_states,
                            action, action_log_prob, value, reward,
                            masks.unsqueeze(-1))

        if config.num_virtual_goals > 0:
            rollouts.apply_her(config.num_virtual_goals,
                               device,
                               beta=config.beta)

        with torch.no_grad():
            next_value = agent.get_value(
                rollouts.get_obs(-1),  # Get last observation
                rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, config.use_gae, config.gamma,
                                 config.tau)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        if j % args.save_interval == 0 and args.save_dir != "" and config.agent != 'forward':
            save_path = os.path.join(save_dir_model, str(j) + '.pth.tar')
            save_modules(agent.optimizer, agent.model, args, config, save_path)

        total_num_steps = (j + 1) * config.num_processes * config.num_steps

        if j % args.log_interval == 0:

            # Logging to the stdout/our logs
            end = time.time()
            logger.info('------------------------------------')
            logger.info('Episodes {}, Updates {}, num timesteps {}, FPS {}'\
                .format(total_episodes, j + 1, total_num_steps, total_num_steps / (end - start)))
            logger.info('------------------------------------')

            # Logging to tensorboard
            writer.add_scalar('train/cum_reward_vs_steps', total_reward,
                              total_steps)
            writer.add_scalar('train/cum_reward_vs_updates', total_reward,
                              j + 1)

            if config.agent in ['a2c', 'acktr', 'ppo']:
                writer.add_scalar('debug/value_loss_vs_steps', value_loss,
                                  total_steps)
                writer.add_scalar('debug/value_loss_vs_updates', value_loss,
                                  j + 1)
                writer.add_scalar('debug/action_loss_vs_steps', action_loss,
                                  total_steps)
                writer.add_scalar('debug/action_loss_vs_updates', action_loss,
                                  j + 1)
                writer.add_scalar('debug/dist_entropy_vs_steps', dist_entropy,
                                  total_steps)
                writer.add_scalar('debug/dist_entropy_vs_updates',
                                  dist_entropy, j + 1)

            # Sample the last reward
            writer.add_scalar('debug/sampled_normalized_reward_vs_steps',
                              reward.mean(), total_steps)
            writer.add_scalar('debug/sampled_normalized_reward_vs_updates',
                              reward.mean(), j + 1)
            writer.add_scalar('debug/sampled_carla_reward_vs_steps',
                              carla_rewards.mean(), total_steps)
            writer.add_scalar('debug/sampled_carla_reward_vs_updates',
                              carla_rewards.mean(), j + 1)

        if (args.eval_interval is not None and j % args.eval_interval == 0):
            eval_envs = make_vec_envs(args.env_name, args.starting_port,
                                      obs_converter,
                                      args.x + config.num_processes,
                                      config.num_processes, config.gamma,
                                      eval_log_dir, config.add_timestep,
                                      device, True)

            vec_norm = get_vec_normalize(eval_envs)
            if vec_norm is not None:
                vec_norm.ob_rms = get_vec_normalize(envs).ob_rms

            eval_episode_rewards = []

            obs = eval_envs.reset()
            eval_recurrent_hidden_states = torch.zeros(config.num_processes,
                                                       20,
                                                       device=device)
            eval_masks = torch.zeros(config.num_processes, 1, device=device)

            while len(eval_episode_rewards) < 10:
                with torch.no_grad():
                    _, action, _, eval_recurrent_hidden_states = agent.act(
                        obs,
                        eval_recurrent_hidden_states,
                        eval_masks,
                        deterministic=True)

                # Obser reward and next obs
                carla_obs, reward, done, infos = eval_envs.step(action)

                eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                                for done_ in done])
                for info in infos:
                    if 'episode' in info.keys():
                        eval_episode_rewards.append(info['episode']['r'])

            eval_envs.close()

            logger.info(
                " Evaluation using {} episodes: mean reward {:.5f}\n".format(
                    len(eval_episode_rewards), np.mean(eval_episode_rewards)))
Esempio n. 2
0
    def __init__(self,
                 obs_converter,
                 action_converter,
                 env_id,
                 random_seed=0,
                 exp_suite_name='TrainingSuite',
                 reward_class_name='CarlaReward',
                 host='127.0.0.1',
                 port=2000,
                 city_name='Town01',
                 subset=None,
                 video_every=100,
                 video_dir='./video/',
                 distance_for_success=2.0,
                 benchmark=False,
                 constraint_turn=False):

        self.logger = get_carla_logger()
        self.logger.info('Environment {} running in port {}'.format(
            env_id, port))
        self.host, self.port = host, port
        self.id = env_id
        self._obs_converter = obs_converter
        self.observation_space = obs_converter.get_observation_space()
        self._action_converter = action_converter
        self.action_space = self._action_converter.get_action_space()
        if benchmark:
            self._experiment_suite = getattr(experiment_suites_benchmark,
                                             exp_suite_name)(city_name)
        else:
            self._experiment_suite = getattr(experiment_suites,
                                             exp_suite_name)(city_name, subset)
        self._reward = getattr(rewards, reward_class_name)()
        self._experiments = self._experiment_suite.get_experiments()
        self.subset = subset
        self._make_carla_client(host, port)
        self._distance_for_success = distance_for_success
        self._planner = Planner(city_name)
        self.done = False
        self.last_obs = None
        self.last_distance_to_goal = None
        self.last_direction = None
        self.last_measurements = None
        np.random.seed(random_seed)
        self.video_every = video_every
        self.video_dir = video_dir
        self.video_writer = None
        self._success = False
        self._failure_timeout = False
        self._failure_collision = False
        self.benchmark = benchmark
        self.benchmark_index = [0, 0, 0]
        try:
            if not os.path.isdir(self.video_dir):
                os.makedirs(self.video_dir)
        except OSError:
            pass
        self.steps = 0
        self.num_episodes = 1

        self.converter = CarlaObservationConverter()
        self.constraint_turn = constraint_turn
Esempio n. 3
0
class CarlaEnv(object):
    '''
        An OpenAI Gym Environment for CARLA.
    '''
    def __init__(self,
                 obs_converter,
                 action_converter,
                 env_id,
                 random_seed=0,
                 exp_suite_name='TrainingSuite',
                 reward_class_name='CarlaReward',
                 host='127.0.0.1',
                 port=2000,
                 city_name='Town01',
                 subset=None,
                 video_every=100,
                 video_dir='./video/',
                 distance_for_success=2.0,
                 benchmark=False,
                 constraint_turn=False):

        self.logger = get_carla_logger()
        self.logger.info('Environment {} running in port {}'.format(
            env_id, port))
        self.host, self.port = host, port
        self.id = env_id
        self._obs_converter = obs_converter
        self.observation_space = obs_converter.get_observation_space()
        self._action_converter = action_converter
        self.action_space = self._action_converter.get_action_space()
        if benchmark:
            self._experiment_suite = getattr(experiment_suites_benchmark,
                                             exp_suite_name)(city_name)
        else:
            self._experiment_suite = getattr(experiment_suites,
                                             exp_suite_name)(city_name, subset)
        self._reward = getattr(rewards, reward_class_name)()
        self._experiments = self._experiment_suite.get_experiments()
        self.subset = subset
        self._make_carla_client(host, port)
        self._distance_for_success = distance_for_success
        self._planner = Planner(city_name)
        self.done = False
        self.last_obs = None
        self.last_distance_to_goal = None
        self.last_direction = None
        self.last_measurements = None
        np.random.seed(random_seed)
        self.video_every = video_every
        self.video_dir = video_dir
        self.video_writer = None
        self._success = False
        self._failure_timeout = False
        self._failure_collision = False
        self.benchmark = benchmark
        self.benchmark_index = [0, 0, 0]
        try:
            if not os.path.isdir(self.video_dir):
                os.makedirs(self.video_dir)
        except OSError:
            pass
        self.steps = 0
        self.num_episodes = 1

        self.converter = CarlaObservationConverter()
        self.constraint_turn = constraint_turn

    def step(self, action):

        if self.done:
            raise ValueError(
                'self.done should always be False when calling step')

        while True:

            try:
                # Send control
                control = self._action_converter.action_to_control(
                    action, self.last_measurements)
                self._client.send_control(control)

                # Gather the observations (including measurements, sensor and directions)
                measurements, sensor_data = self._client.read_data()
                self.last_measurements = measurements
                current_timestamp = measurements.game_timestamp
                distance_to_goal = self._get_distance_to_goal(
                    measurements, self._target)
                self.last_distance_to_goal = distance_to_goal
                directions = self._get_directions(
                    measurements.player_measurements.transform, self._target)
                self.last_direction = directions
                obs = self._obs_converter.convert(measurements, sensor_data,
                                                  directions, self._target,
                                                  self.id)

                if self.video_writer is not None and self.steps % 2 == 0:
                    self._raster_frame(sensor_data, measurements, directions,
                                       obs)

                self.last_obs = obs

            except CameraException:
                self.logger.debug('Camera Exception in step()')
                obs = self.last_obs
                distance_to_goal = self.last_distance_to_goal
                current_timestamp = self.last_measurements.game_timestamp

            except TCPConnectionError as e:
                self.logger.debug(
                    'TCPConnectionError inside step(): {}'.format(e))
                self.done = True
                return self.last_obs, 0.0, True, {
                    'carla-reward': 0.0,
                    'carla-reward-raw': 0.0,
                    'constraint_turn_violated': False
                }

            break

        # Check if terminal state
        timeout = (current_timestamp -
                   self._initial_timestamp) > (self._time_out * 1000)
        collision, _ = self._is_collision(measurements)
        success = distance_to_goal < self._distance_for_success
        if timeout:
            self.logger.debug('Timeout')
            self._failure_timeout = True
        if collision:
            self.logger.debug('Collision')
            self._failure_collision = True
        if success:
            self.logger.debug('Success')
        self.done = timeout or collision or success

        # Get the reward
        env_state = {
            'timeout': timeout,
            'collision': collision,
            'success': success
        }
        reward = self._reward.get_reward(measurements, self._target,
                                         self.last_direction, control,
                                         env_state)
        raw_reward = reward

        constraint_turn_violated = False
        direction_str = self.converter.direction_to_string(directions)
        if direction_str == 'TURN_LEFT' and control.steer > 0:
            constraint_turn_violated = True
        if direction_str == 'TURN_RIGHT' and control.steer < 0:
            constraint_turn_violated = True

        if self.constraint_turn:
            if constraint_turn_violated:
                reward -= 1

        # Additional information
        info = {
            'carla-reward': reward,
            'carla-reward-raw': raw_reward,
            'constraint_turn_violated': constraint_turn_violated
        }

        self.steps += 1

        return obs, reward, self.done, info

    def reset(self):

        # Loop forever due to TCPConnectionErrors
        while True:
            try:
                self._reward.reset_reward()
                self.done = False
                if self.video_writer is not None:
                    try:
                        self.video_writer.close()
                    except Exception as e:
                        self.logger.debug(
                            'Error when closing video writer in reset')
                        self.logger.error(e)
                    self.video_writer = None
                if self.benchmark:
                    end_indicator = self._new_episode_benchmark()
                    if end_indicator is False:
                        return False
                else:
                    self._new_episode()
                # Hack: Try sleeping so that the server is ready. Reduces the number of TCPErrors
                time.sleep(4)
                # measurements, sensor_data = self._client.read_data()
                self._client.send_control(VehicleControl())
                measurements, sensor_data = self._client.read_data()
                self._initial_timestamp = measurements.game_timestamp
                self.last_measurements = measurements
                self.last_distance_to_goal = self._get_distance_to_goal(
                    measurements, self._target)
                directions = self._get_directions(
                    measurements.player_measurements.transform, self._target)
                self.last_direction = directions
                obs = self._obs_converter.convert(measurements, sensor_data,
                                                  directions, self._target,
                                                  self.id)
                self.last_obs = obs
                self.done = False
                self._success = False
                self._failure_timeout = False
                self._failure_collision = False
                return obs

            except CameraException:
                self.logger.debug('Camera Exception in reset()')
                continue

            except TCPConnectionError as e:
                self.logger.debug('TCPConnectionError in reset()')
                self.logger.error(e)
                # Disconnect and reconnect
                self.disconnect()
                time.sleep(5)
                self._make_carla_client(self.host, self.port)

    def disconnect(self):

        if self.video_writer is not None:
            try:
                self.video_writer.close()
            except Exception as e:
                self.logger.debug(
                    'Error when closing video writer in disconnect')
                self.logger.error(e)
            self.video_writer = None

        self._client.disconnect()

    def _raster_frame(self, sensor_data, measurements, directions, obs):

        frame = sensor_data['CameraRGB'].data.copy()
        cv2.putText(frame,
                    text='Episode number: {:,}'.format(self.num_episodes - 1),
                    org=(50, 50),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,
                    color=[0, 0, 0],
                    thickness=2)
        cv2.putText(frame,
                    text='Environment steps: {:,}'.format(self.steps),
                    org=(50, 80),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,
                    color=[0, 0, 0],
                    thickness=2)

        REACH_GOAL = 0.0
        GO_STRAIGHT = 5.0
        TURN_RIGHT = 4.0
        TURN_LEFT = 3.0
        LANE_FOLLOW = 2.0
        if np.isclose(directions, REACH_GOAL):
            dir_str = 'REACH GOAL'
        elif np.isclose(directions, GO_STRAIGHT):
            dir_str = 'GO STRAIGHT'
        elif np.isclose(directions, TURN_RIGHT):
            dir_str = 'TURN RIGHT'
        elif np.isclose(directions, TURN_LEFT):
            dir_str = 'TURN LEFT'
        elif np.isclose(directions, LANE_FOLLOW):
            dir_str = 'LANE FOLLOW'
        else:
            raise ValueError(directions)
        cv2.putText(frame,
                    text='Direction: {}'.format(dir_str),
                    org=(50, 110),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,
                    color=[0, 0, 0],
                    thickness=2)
        cv2.putText(frame,
                    text='Speed: {:.02f}'.format(
                        measurements.player_measurements.forward_speed * 3.6),
                    org=(50, 140),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,
                    color=[0, 0, 0],
                    thickness=2)
        cv2.putText(frame,
                    text='rel_x: {:.02f}, rel_y: {:.02f}'.format(
                        obs['v'][-2].item(), obs['v'][-1].item()),
                    org=(50, 170),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=1.0,
                    color=[0, 0, 0],
                    thickness=2)
        self.video_writer.writeFrame(frame)

    def _get_distance_to_goal(self, measurements, target):

        current_x = measurements.player_measurements.transform.location.x
        current_y = measurements.player_measurements.transform.location.y
        distance_to_goal = np.linalg.norm(np.array([current_x, current_y]) - \
                            np.array([target.location.x, target.location.y]))
        return distance_to_goal

    def _new_episode(self):
        experiment_idx = np.random.randint(0, len(self._experiments))
        experiment = self._experiments[experiment_idx]
        exp_settings = experiment.conditions
        exp_settings.set(QualityLevel='Low')
        positions = self._client.load_settings(exp_settings).player_start_spots
        idx_pose = np.random.randint(0, len(experiment.poses))
        pose = experiment.poses[idx_pose]
        self.logger.info('Env {} gets experiment {} with pose {}'.format(
            self.id, experiment_idx, idx_pose))
        start_index = pose[0]
        end_index = pose[1]
        self._client.start_episode(start_index)
        self._time_out = self._experiment_suite.calculate_time_out(
            self._get_shortest_path(positions[start_index],
                                    positions[end_index]))
        self._target = positions[end_index]
        self._episode_name = str(experiment.Conditions.WeatherId) + '_' \
                            + str(experiment.task) + '_' + str(start_index) \
                            + '_' + str(end_index)

        if ((self.num_episodes % self.video_every) == 0) and (self.id == 0):
            video_path = os.path.join(
                self.video_dir, '{:08d}_'.format(self.num_episodes) +
                self._episode_name + '.mp4')
            self.logger.info('Writing video at {}'.format(video_path))
            self.video_writer = skvideo.io.FFmpegWriter(
                video_path, inputdict={'-r': '30'}, outputdict={'-r': '30'})
        else:
            self.video_writer = None

        self.num_episodes += 1

    def _new_episode_benchmark(self):
        experiment_idx_past = self.benchmark_index[0]
        pose_idx_past = self.benchmark_index[1]
        repetition_idx_past = self.benchmark_index[2]

        experiment_past = self._experiments[experiment_idx_past]
        poses_past = experiment_past.poses[0:]
        repetition_past = experiment_past.repetitions

        if repetition_idx_past == repetition_past:
            if pose_idx_past == len(poses_past) - 1:
                if experiment_idx_past == len(self._experiments) - 1:
                    return False
                else:
                    experiment = self._experiments[experiment_idx_past + 1]
                    pose = experiment.poses[0:][0]
                    self.benchmark_index = [experiment_idx_past + 1, 0, 1]
            else:
                experiment = experiment_past
                pose = poses_past[pose_idx_past + 1]
                self.benchmark_index = [
                    experiment_idx_past, pose_idx_past + 1, 1
                ]
        else:
            experiment = experiment_past
            pose = poses_past[pose_idx_past]
            self.benchmark_index = [
                experiment_idx_past, pose_idx_past, repetition_idx_past + 1
            ]
        exp_settings = experiment.Conditions
        exp_settings.set(QualityLevel='Low')
        positions = self._client.load_settings(exp_settings).player_start_spots
        start_index = pose[0]
        end_index = pose[1]
        self._client.start_episode(start_index)
        self._time_out = self._experiment_suite.calculate_time_out(
            self._get_shortest_path(positions[start_index],
                                    positions[end_index]))
        self._target = positions[end_index]
        self._episode_name = str(experiment.Conditions.WeatherId) + '_' \
                            + str(experiment.task) + '_' + str(start_index) \
                            + '_' + str(end_index)
        if ((self.num_episodes % self.video_every) == 0) and (self.id == 0):
            video_path = os.path.join(
                self.video_dir, '{:08d}_'.format(self.num_episodes) +
                self._episode_name + '.mp4')
            self.logger.info('Writing video at {}'.format(video_path))
            self.video_writer = skvideo.io.FFmpegWriter(
                video_path, inputdict={'-r': '30'}, outputdict={'-r': '30'})
        else:
            self.video_writer = None

        self.num_episodes += 1

    def _get_directions(self, current_point, end_point):

        directions = self._planner.get_next_command(
            (current_point.location.x, current_point.location.y, 0.22),
            (current_point.orientation.x, current_point.orientation.y,
             current_point.orientation.z),
            (end_point.location.x, end_point.location.y, 0.22),
            (end_point.orientation.x, end_point.orientation.y,
             end_point.orientation.z))
        return directions

    def _get_shortest_path(self, start_point, end_point):

        return self._planner.get_shortest_path_distance(
            [start_point.location.x, start_point.location.y, 0.22],
            [start_point.orientation.x, start_point.orientation.y, 0.22],
            [end_point.location.x, end_point.location.y, end_point.location.z],
            [
                end_point.orientation.x, end_point.orientation.y,
                end_point.orientation.z
            ])

    @staticmethod
    def _is_collision(measurements):

        c = 0
        c += measurements.player_measurements.collision_vehicles
        c += measurements.player_measurements.collision_pedestrians
        c += measurements.player_measurements.collision_other

        sidewalk_intersection = measurements.player_measurements.intersection_offroad

        otherlane_intersection = measurements.player_measurements.intersection_otherlane

        return (c > 1e-9) or (sidewalk_intersection >
                              0.01) or (otherlane_intersection > 0.9), c

    def _make_carla_client(self, host, port):

        for _ in range(
                20
        ):  # Try to connect for up to 20 times (usually connects first attempt if everything is correct)
            try:
                self.logger.info(
                    "Trying to make client on port {}".format(port))
                self._client = CarlaClient(host, port, timeout=100)
                self._client.connect()
                self._client.load_settings(CarlaSettings(QualityLevel='Low'))
                self._client.start_episode(0)
                self.logger.info(
                    "Successfully made client on port {}".format(port))
                break
            except TCPConnectionError as error:
                self.logger.debug('Got TCPConnectionError..sleeping for 1')
                self.logger.error(error)
                time.sleep(1)
Esempio n. 4
0
    argparser.add_argument('--cuda',
                           default=False,
                           help='If you are using a CPU, set it to False')
    argparser.add_argument('--save-dir',
                           default='./outputs',
                           help='Directory to save model, logs and videos')
    argparser.add_argument('--video-interval', type=int, default=1)
    argparser.add_argument('--save-interval', type=int, default=5)
    args = argparser.parse_args()
    log_level = logging.INFO

    # logging.basicConfig(filename='test.log', format='%(levelname)s: %(message)s', level=log_level)
    logging.info('listening to server %s:%s', args.host, args.port)

    config, checkpoint = get_config_and_checkpoint(args)
    obs_converter = CarlaObservationConverter(h=84, w=84)
    action_converter = CarlaActionsConverter(config.action_type)

    device = torch.device("cpu")
    # device = torch.device("cuda:0" if args.cuda else "cpu")
    norm_reward = not config.no_reward_norm

    # We instantiate an experiment suite. Basically a set of experiments
    # that are going to be evaluated on this benchmark.
    if args.corl_2017:
        experiment_suite = CoRL2017(args.city_name)
        experiment_name = 'CoRL2017'
    else:
        print(
            ' WARNING: running the basic driving benchmark, to run for CoRL 2017'
            ' experiment suites, you should run'
Esempio n. 5
0
 def __init__(self):
     self.converter = CarlaObservationConverter()
Esempio n. 6
0
class CIRLReward():
    '''
        Reward function from https://arxiv.org/abs/1807.03776. 
    '''
    def __init__(self):
        self.converter = CarlaObservationConverter()

    def _r_s(self, control, direction):
        if direction == 'TURN_RIGHT' and control.steer < 0:
            return -15
        if direction == 'TURN_LEFT' and control.steer > 0:
            return -15
        if direction == 'GO_STRAIGHT' and np.abs(control.steer) > 0.2:
            return -20
        return 0

    def _r_v(self, velocity, direction):
        if direction == 'LANE_FOLLOW':
            return min(25.0, velocity)
        if direction == 'GO_STRAIGHT':
            return min(35.0, velocity)
        if velocity <= 20:
            return velocity
        if velocity > 20:
            return 20 - velocity

        assert False

    def get_reward(self, measurements, target, direction, control, env_state):

        reward = 0
        direction = self.converter.direction_to_string(direction)

        if direction == 'REACH_GOAL':
            # TODO: What is right to return here?
            return 0

        # Speed (km/h)
        v = measurements.player_measurements.forward_speed * 3.6
        reward += self._r_v(v, direction)

        reward += self._r_s(control, direction)

        # Collisions (r_d in the paper)
        if measurements.player_measurements.collision_vehicles > 1e-6 or measurements.player_measurements.collision_pedestrians > 1e-6:
            reward += -100
        if measurements.player_measurements.collision_other:
            reward += -50

        # Intersection with sidewalk (r_r)
        s = measurements.player_measurements.intersection_offroad
        if s > 1e-6:
            reward += -100

        # Intersection with opposite lane (r_o)
        o = measurements.player_measurements.intersection_otherlane
        if o > 1e-6:
            reward += -100

        return reward

    def reset_reward(self):
        return