Beispiel #1
0
def gen_env(policy, env_config_='configs/env.config'):

    # # configure paths
    # make_new_dir = True
    # if os.path.exists(output_dir):
    #     shutil.rmtree(output_dir)

    # if make_new_dir:
    #     os.makedirs(output_dir)
    #     shutil.copy(env_config_, output_dir)
    #     shutil.copy(policy_config_, output_dir)
    # log_file = os.path.join(output_dir, 'output.log')
    # rl_weight_file = os.path.join(output_dir, 'rl_model.pth')

    # # configure logging
    # # mode = 'a' if resume else 'w'
    # # file_handler = logging.FileHandler(log_file, mode=mode)
    # # stdout_handler = logging.StreamHandler(sys.stdout)
    # # level = logging.INFO if not debug else logging.DEBUG
    # # logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
    # #                     format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_)
    env = CrowdSim_PPO()
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    env.set_robot(robot)

    # reinforcement learning
    robot.set_policy(policy)
    #robot.print_info()
    return env
Beispiel #2
0
    def configure(self, config):
        self.config = config
        self.time_limit = config.env.time_limit
        self.time_step = config.env.time_step
        self.robot_sensor_range = config.env.robot_sensor_range
        self.success_reward = config.reward.success_reward
        self.collision_penalty = config.reward.collision_penalty

        self.case_capacity = {
            'train': np.iinfo(np.uint32).max - 2000,
            'val': 1000,
            'test': 1000
        }
        self.case_size = {
            'train': config.env.train_size,
            'val': config.env.val_size,
            'test': config.env.test_size
        }
        self.train_val_scenario = config.sim.train_val_scenario
        self.test_scenario = config.sim.test_scenario
        self.square_width = config.sim.square_width
        self.circle_radius = config.sim.circle_radius

        self.case_counter = {'train': 0, 'test': 0, 'val': 0}

        logging.info('Training simulation: {}, test simulation: {}'.format(
            self.train_val_scenario, self.test_scenario))
        logging.info('Square width: {}, circle width: {}'.format(
            self.square_width, self.circle_radius))

        self.human_num = config.sim.human_num
        self.agents = [Robot(config, 'robot') for _ in range(self.human_num)]
        for agent in self.agents:
            agent.time_step = self.time_step
Beispiel #3
0
    def _make_env(self, silent=False):
        # Create env
        config_dir = resource_filename('crowd_nav', 'config')
        config_file = os.path.join(config_dir, 'test_soadrl_static.config')
        config_file = os.path.expanduser(config_file)
        config = configparser.RawConfigParser()
        config.read(config_file)

        env = gym.make('CrowdSim-v0')
        env.configure(config, silent=silent)
        robot = Robot(config, 'humans')
        env.set_robot(robot)

        policy = SDOADRLDummyPolicy()
        policy.configure(config)
        if self.LEGACY_MODE:
            sess = tf.Session()
            policy = SDOADRL()
            policy.configure(sess, 'global', config)
            policy.set_phase('test')
            policy.load_model(os.path.expanduser('~/soadrl/Final_models/angular_map_full_FOV/rl_model'))

        env.robot.set_policy(policy)
        if not silent:
            env.robot.print_info()

        self.soadrl_sim = env
Beispiel #4
0
    def __init__(self, env, env_config, policy):

        #
        self.env = env
        self.env_config = env_config
        # configure robot
        self.robot = Robot(env_config, 'robot')
        self.robot.set_policy(policy)

        self.env.set_robot(self.robot)  #pass robot parameters into env
        self.ob = env.reset(
            'test', 1
        )  #intial some parameters from .config file such as time_step,success_reward for other instances
        self.policy = policy
        self.policy.set_env(env)
        # for subscribers
        self.pose = PoseStamped()
        self.vel = Vector3()
        self.psi = 0.0

        # for publishers
        self.global_goal = PoseStamped()
        self.goal = PoseStamped()
        self.desired_position = PoseStamped()
        self.desired_action = np.zeros((2, ))

        # # publishers
        self.pub_twist = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
        self.pub_pose_marker = rospy.Publisher('', Marker, queue_size=1)
        # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1)
        # self.pub_path_marker = rospy.Publisher('~path_marker',Marker,queue_size=1)
        # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1)
        # # sub
        self.sub_pose = rospy.Subscriber('/odom', Odometry, self.cbPose)
        self.sub_global_goal = rospy.Subscriber('/goal', PoseStamped,
                                                self.cbGlobalGoal)
        # self.sub_subgoal = rospy.Subscriber('~subgoal',PoseStamped, self.cbSubGoal)

        # subgoals
        self.sub_goal = Vector3()

        # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters)

        # control timer
        # self.control_timer = rospy.Timer(rospy.Duration(0.01),self.cbControl)
        self.nn_timer = rospy.Timer(rospy.Duration(0.3),
                                    self.cbComputeActionCrowdNav)
Beispiel #5
0
def initialize_robot():
    model_dir = "crowd_nav/data/output-lab-base-cases/"
    phase = "test"
    model_weights = model_dir + "rl_model.pth"
    policy_config_file = model_dir + "policy.config"
    env_config_file = model_dir + "env.config"
    cuda = raw_input("Set device as Cuda? (y/n)")
    if torch.cuda.is_available() and cuda == 'y':
        device = torch.device("cuda:0")
        print "================================"
        print "=== Device: ", device, "==="
        print "================================"
    else:
        device = torch.device("cpu")
        print "===================="
        print "=== Device: ", device, "==="
        print "===================="

    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)

    policy = policy_factory["sarl"]()
    policy.configure(policy_config)

    if policy.trainable:
        print "SETTING MODEL WEIGHTS"
        policy.get_model().load_state_dict(torch.load(model_weights))

    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)

    env = gym.make('CrowdSim-v0')
    env.configure(env_config)

    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)

    env.set_robot(robot)

    policy.set_phase(phase)
    policy.set_device(device)
    policy.set_env(env)
    # TODO: NEED TO SET POLICY TIME_STEP

    return robot
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='cadrl')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=True, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=2)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=False)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--human_policy', type=str, default='socialforce')
    parser.add_argument('--trained_env', type=str, default='socialforce')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.policy_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    # device = 'cpu'
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(torch.load(model_weights))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim_mixed-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'

    PPL = env.human_num

    robot = Robot(env_config, 'robot')
    # print(robot.px)
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    ppl_local = []
    robot_states = []
    robot_vel = []
    policy.set_env(env)
    robot.print_info()
    non_attentive_humans = []

    for case in range(args.test_case):
        print(case)
        rewards = []

        # if args.visualize:

        ob = env.reset(test_case=case)

        # ob = env.reset(args.phase, case)
        done = False
        last_pos = np.array(robot.get_position())
        # non_attentive_humans = random.sample(env.humans, int(math.ceil(env.human_num/10)))
        non_attentive_humans = []
        non_attentive_humans = set(non_attentive_humans)

        while not done:

            # print(env.global_time)
            # count = env.global_time
            # if count != 0:
            #     #     old_non_attentive_humans = []
            #     # else:
            #     old_non_attentive_humans = non_attentive_humans
            # # only record the first time the human reaches the goal

            # if count % 4 == 0:
            #     print("true")
            #
            #     non_attentive_humans = Human.get_random_humans()
            #     old_non_attentive_humans = non_attentive_humans
            # # else:
            # non_attentive_humans = old_non_attentive_humans

            action = robot.act(ob)
            ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step(
                action, non_attentive_humans)
            # ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step(action)

            rewards.append(_)

            ppl_local.append(ppl_count)
            robot_states.append(robot_pose)
            robot_vel.append(robot_velocity)

            current_pos = np.array(robot.get_position())
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos

        gamma = 0.9
        cumulative_reward = sum([
            pow(gamma, t * robot.time_step * robot.v_pref) * reward
            for t, reward in enumerate(rewards)
        ])
        #
        # #
        # # # # # #
        # if args.traj:
        #     env.render('traj', args.video_file)
        # else:
        #     env.render('video', args.video_file)

        logging.info('It takes %.2f seconds to finish. Final status is %s',
                     env.global_time, info)

        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()

            logging.info('Average time for humans to reach goal: %.2f',
                         sum(human_times) / len(human_times))

        # logging.info('PPl counter ', ppl_local)

        # logging.info('PPl counter ', ppl_local)
        main = "Results/"
        human_policy = args.human_policy
        trained_env = args.trained_env

        if human_policy == 'socialforce':
            maindir = 'SocialForce/'
            if not os.path.exists(main + maindir):
                os.mkdir(main + maindir)
        else:
            maindir = 'ORCA/'
            if not os.path.exists(main + maindir):
                os.mkdir(main + maindir)

        robot_policy = args.policy
        trained_env = args.trained_env

        if robot_policy == 'igp_dist':
            method_dir = 'igp_dist/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if robot_policy == 'ssp':
            method_dir = 'ssp/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'model_predictive_rl' and trained_env == 'orca'):
            method_dir = 'model_predictive_rl/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'model_predictive_rl'
                and trained_env == 'socialforce'):
            method_dir = 'model_predictive_rl_social/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if (robot_policy == 'rgl' and trained_env == 'orca'):
            method_dir = 'rgl/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'rgl' and trained_env == 'socialforce'):
            method_dir = 'rgl_social/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if (robot_policy == 'sarl' and trained_env == 'orca'):
            method_dir = 'sarl/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'sarl' and trained_env == 'socialforce'):
            method_dir = 'sarl_social/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if (robot_policy == 'sarl' and trained_env == 'igpdist_sfm'):
            method_dir = 'sarl_igp_sfm/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if (robot_policy == 'sarl' and trained_env == 'igpdist_orca'):
            method_dir = 'sarl_igp_orca/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if (robot_policy == 'cadrl' and trained_env == 'orca'):
            method_dir = 'cadrl/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'cadrl' and trained_env == 'socialforce'):
            method_dir = 'cadrl_social/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'cadrl' and trained_env == 'orca_new'):
            method_dir = 'cadrl_new/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        if (robot_policy == 'cadrl' and trained_env == 'socialforce_new'):
            method_dir = 'cadrl_social_new/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        if robot_policy == 'ssp2':
            method_dir = 'ssp2/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)
        # elif robot_policy == 'cadrl':
        #     method_dir = 'cadrl/'
        #     if not os.path.exists(maindir+method_dir):
        #         os.mkdir(maindir+method_dir)
        # elif robot_policy == 'sarl':
        #     method_dir = 'sarl/'
        #     if not os.path.exists(maindir+method_dir):
        #         os.mkdir(maindir+method_dir)
        # elif robot_policy == 'lstm_rl':
        #     method_dir = 'lstm_rl/'
        #     if not os.path.exists(maindir+method_dir):
        #         os.mkdir(maindir+method_dir)
        elif robot_policy == 'orca':
            method_dir = 'orca/'
            if not os.path.exists(main + maindir + method_dir):
                os.mkdir(main + maindir + method_dir)

        robot_data = pd.DataFrame()
        robot_data['robot_x'] = np.array(robot_states)[:, 0]
        robot_data['robot_y'] = np.array(robot_states)[:, 1]
        robot_data['local_ppl_cnt'] = np.array(ppl_local)
        # robot_data['dmin'] = np.array(dmin)

        out_name = f'robot_data{case}.csv'

        if not os.path.exists(main + maindir + method_dir + f'{PPL}/'):
            os.mkdir(main + maindir + method_dir + f'{PPL}/')
        # outdir = f'{PPL}/robot_data_{PPL}/'
        if not os.path.exists(main + maindir + method_dir +
                              f'{PPL}/robot_data_{PPL}/'):
            os.mkdir(main + maindir + method_dir + f'{PPL}/robot_data_{PPL}/')

        fullname = os.path.join(
            main + maindir + method_dir + f'{PPL}/robot_data_{PPL}/', out_name)

        robot_data.to_csv(fullname, index=True)

        if not os.path.exists(main + maindir + method_dir +
                              f'{PPL}/time_{PPL}'):
            os.mkdir(main + maindir + method_dir + f'{PPL}/time_{PPL}')
        Time_data = pd.DataFrame()
        Time_data['time (s)'] = [env.global_time]
        Time_data['mean_local'] = np.mean(ppl_local)
        Time_data['std_local'] = np.std(ppl_local)
        Time_data['collision_flag'] = info
        Time_data['dmin'] = dmin
        Time_data['reward'] = cumulative_reward
        Time_data.to_csv(
            main + maindir + method_dir +
            f'{PPL}/time_{PPL}/robot_time_data_seconds_{PPL}_{case}.csv')

        if not os.path.exists(main + maindir + method_dir +
                              f'{PPL}/localdensity_{PPL}'):
            os.mkdir(main + maindir + method_dir + f'{PPL}/localdensity_{PPL}')
        LD = pd.DataFrame()
        LD['local_ppl_cnt'] = np.array(ppl_local)
        LD['vx'] = np.array(robot_vel)[:, 0]
        LD['vy'] = np.array(robot_vel)[:, 1]
        LD.to_csv(main + maindir + method_dir +
                  f'{PPL}/localdensity_{PPL}/localdensity_{PPL}_{case}.csv')
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--config',
                        type=str,
                        default='configs/icra_benchmark/mp_separate.py')
    parser.add_argument('--env_config',
                        type=str,
                        default='data/ORCA/mprl/eth_env.config')
    # parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/icra_benchmark/policy.config')
    parser.add_argument('--policy', type=str, default='model_predictive_rl')
    parser.add_argument('--model_dir', type=str, default='data/ORCA/mprl')
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--rl', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--human_num', type=int, default=None)
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--rect', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--traj_new', default=False, action='store_true')
    parser.add_argument('--final_file', type=str, default='final_metrics')
    parser.add_argument('--plot', default=False, action='store_true')
    parser.add_argument('--trained_env', type=str, default='orca')
    parser.add_argument('--resumed', default=False, action='store_true')
    parser.add_argument('--debug', default=False, action='store_true')
    parser.add_argument('--plot_test_scenarios_hist',
                        default=True,
                        action='store_true')
    parser.add_argument('-d', '--planning_depth', type=int, default=None)
    parser.add_argument('-w', '--planning_width', type=int, default=None)
    parser.add_argument('--sparse_search', default=False, action='store_true')

    args = parser.parse_args()

    # if model_dir:
    #     args.model_dir = model_dir
    # if args.il:
    #     model_weights = os.path.join(args.model_dir, 'il_model.pth')
    # else:
    #     if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
    #         model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
    #     else:
    #         model_weights = os.path.join(args.model_dir, 'rl_model.pth')

    if args.model_dir is not None:
        if args.config is not None:
            config_file = args.config
            env_config_file = os.path.join(args.model_dir,
                                           os.path.basename(args.env_config))
        else:
            config_file = os.path.join(args.model_dir,
                                       'configs/icra_benchmark/config.py')
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
            logging.info('Loaded IL weights')
        elif args.rl:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                print(os.listdir(args.model_dir))
                model_weights = os.path.join(
                    args.model_dir,
                    sorted(os.listdir(args.model_dir))[-1])
            logging.info('Loaded RL weights')
        else:
            model_weights = os.path.join(args.model_dir, 'best_val.pth')
            logging.info('Loaded RL weights with best VAL')
    else:
        config_file = args.config
        env_config_file = args.env_config
        # policy_config_file = args.policy_config

    spec = importlib.util.spec_from_file_location('config', config_file)
    if spec is None:
        parser.error('Config file not found.')
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)

    # if args.model_dir is not None:
    #     env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
    #     policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
    #     if args.il:
    #         model_weights = os.path.join(args.model_dir, 'il_model.pth')
    #     else:
    #         if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
    #             model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
    #         else:
    #             model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    # else:
    #     env_config_file = args.env_config
    #     policy_config_file = args.policy_config
    # env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
    # policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
    # if args.il:
    #     model_weights = os.path.join(args.model_dir, 'il_model.pth')
    # elif args.resumed:
    #     model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
    # else:
    #     model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    # else:
    #     env_config_file = args.env_config
    #     policy_config_file = args.policy_config

    # if args.model_dir is not None:
    #     env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
    #     policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
    #     if args.il:
    #         model_weights = os.path.join(args.model_dir, 'il_model.pth')
    #     else:
    #         if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
    #             model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
    #         else:
    #             model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    # else:
    #     env_config_file = args.env_config
    #     policy_config_file = args.env_config

    # configure logging and device
    level = logging.DEBUG if args.debug else logging.INFO

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy_config = config.PolicyConfig(args.debug)
    policy = policy_factory[policy_config.name]()
    if args.planning_depth is not None:
        policy_config.model_predictive_rl.do_action_clip = True
        policy_config.model_predictive_rl.planning_depth = args.planning_depth
    if args.planning_width is not None:
        policy_config.model_predictive_rl.do_action_clip = True
        policy_config.model_predictive_rl.planning_width = args.planning_width
    if args.sparse_search:
        policy_config.model_predictive_rl.sparse_search = True

    policy.configure(policy_config)
    # if policy.trainable:
    #     if args.model_dir is None:
    #         parser.error('Trainable policy must be specified with a model weights directory')
    #     policy.load_model(model_weights)

    # # configure policy
    # policy = policy_factory[args.policy]()
    # policy_config = configparser.RawConfigParser()
    # policy_config.read(policy_config_file)
    # policy.configure(policy_config)
    # if policy.trainable:
    #     if args.model_dir is None:
    #         parser.error('Trainable policy must be specified with a model weights directory')
    #     policy.get_model().load_state_dict(torch.load(model_weights))

    step_number = start_index  # index for given data

    # configure environment
    # env_config = config.EnvConfig(args.debug)
    # if args.human_num is not None:
    #     env_config.sim.human_num = args.human_num

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    if args.human_num is not None:
        env_config.sim.human_num = args.human_num
    env = gym.make('CrowdSim_eth-v0')
    if data_dir:
        env.set_data_path(data_dir)

    # pdb.set_trace()
    env.configure(env_config)
    # env.initialize_eth_data()
    env.set_ped_traj_length(100)  # 100
    env.set_step_number(step_number)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    if args.rect:
        env.test_sim = 'rectangle_crossing'

    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot, v_pref=robot_vel)
    robot_goal_index = start_index + num_steps
    if ignore_collisions:
        env.set_ignore_collisions()
    env.set_time_limit(time_limit)
    env.set_plot_folder(plot_folder)
    env.set_metrics_folder(metrics_folder)
    last_removed_index, removed_traj = env.set_remove_ped(
        remove_ped,
        start_index=start_index,
        goal_index=robot_goal_index,
        set_robot_vel=True
    )  # sets ped to (0.0, 0.0) position and sets robot start
    env.set_human_num(remove_ped)
    print(f"Number of Humans: {env.human_num}")
    # pdb.set_trace()
    # and goal positions similar to a given
    # ped
    # env.set_robot_states(start=[4.3729, 4.5884], goal=[11.9214, 5.3149])
    # env.set_robot_states(ped=10)
    explorer = Explorer(env, robot, device, gamma=0.9)

    # calc remove ped distance
    # distance_list.append(env.calc_total_distance_travelled_by_ped(remove_ped))
    # if env.calc_total_distance_travelled_by_ped(remove_ped) == 0.0:
    #     ignore_peds.append(remove_ped)
    # return

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()
    generated_traj = []
    if args.visualize:
        ob = env.reset(args.phase, args.test_case)
        # pdb.set_trace()
        done = False
        last_pos = np.array(robot.get_position())
        generated_traj.append(last_pos)
        import time

        non_attentive_humans = []
        non_attentive_humans = set(non_attentive_humans)

        while not done:
            try:
                if env.use_eth_data:
                    if not env.set_step_number(step_number):
                        done = True
            except ValueError as e:
                logging.warn(e)
                break
            time_start = time.time()
            action = robot.act(ob)
            time_end = time.time()
            env.add_time(time_start, time_end)
            env.set_human_num(remove_ped, step_number)
            print(f"Number of Humans: {env.human_num}")
            # logging.info("time taken to select an action: {}".format(toc-tic))
            ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin = env.step(
                action, non_attentive_humans)
            current_pos = np.array(robot.get_position())
            logging.info(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
            generated_traj.append(last_pos)
            step_number += 1
            robot_goal_index += 1
            if robot_goal_index < last_removed_index:
                robot = env.set_new_robot_goal(robot, remove_ped,
                                               robot_goal_index)
            # else:
            #     done = True
        # generated_traj = np.vstack(generated_traj)
        # with open(f'{remove_ped}_{start_index}_{num_steps}_trajectory.pkl', 'wb') as file:
        #     pickle.dump((removed_traj, generated_traj), file)
        if args.traj:
            env.render('traj', args.video_file)
        elif args.traj_new:
            env.render(mode='traj_new',
                       plots=args.plot,
                       output_file=args.video_file,
                       final_file=args.final_file)
        else:
            env.render('video', args.video_file)

        logging.info('It takes %.2f seconds to finish. Final status is %s',
                     env.global_time, info)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f',
                         sum(human_times) / len(human_times))
    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True)
Beispiel #8
0
def main():

    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--train_config',
                        type=str,
                        default='configs/train.config')
    parser.add_argument('--output_dir', type=str, default='data')
    parser.add_argument('--resume', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--debug', default=False, action='store_true')
    parser.add_argument('--gpu_num', default="0", type=str)
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num

    ########## CONFIGURE PATHS ##########

    make_new_dir = True
    outputs = os.listdir(args.output_dir)

    # Find the 'output' files in output directory
    output_nums = [
        name[name.find("output") + 6:] for name in outputs if 'output' in name
        and os.path.isdir(os.path.join(args.output_dir, name))
    ]
    num = 0
    if output_nums != []:
        num = max([int(num) if num != '' else 0 for num in output_nums])
        if num == 0:
            num = ''
        key = input('Continue from last output ? (y/n)')
        if key == 'y' and not args.resume:
            args.output_dir = os.path.join(args.output_dir,
                                           "output" + str(num))
            args.env_config = os.path.join(args.output_dir,
                                           os.path.basename(args.env_config))
            args.policy_config = os.path.join(
                args.output_dir, os.path.basename(args.policy_config))
            args.train_config = os.path.join(
                args.output_dir, os.path.basename(args.train_config))
        else:
            key = input(
                'Overwrite last output directory instead of new ? (y/n)')
            if key == 'y' and not args.resume:
                args.output_dir = os.path.join(args.output_dir,
                                               'output' + str(num))
                shutil.rmtree(args.output_dir)
                os.makedirs(args.output_dir)
            else:
                num = num + 1 if num else 1
                args.output_dir = os.path.join(args.output_dir,
                                               'output' + str(num))
                os.makedirs(args.output_dir)
            shutil.copy(args.env_config, args.output_dir)
            shutil.copy(args.policy_config, args.output_dir)
            shutil.copy(args.train_config, args.output_dir)
    else:
        args.output_dir = os.path.join(args.output_dir, 'output')
        os.makedirs(args.output_dir)
        shutil.copy(args.env_config, args.output_dir)
        shutil.copy(args.policy_config, args.output_dir)
        shutil.copy(args.train_config, args.output_dir)

    log_file = os.path.join(args.output_dir, 'output.log')
    il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
    rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')

    ########## CONFIGURE LOGGING ##########

    mode = 'a' if args.resume else 'w'
    logger = logging.getLogger('train_sgan')
    logger.propogate = False
    level = logging.INFO if not args.debug else logging.DEBUG
    logger.setLevel(level)
    file_handler = logging.FileHandler(log_file, mode=mode)
    file_handler.setLevel(level)
    stdout_handler = logging.StreamHandler(sys.stdout)
    stdout_handler.setLevel(level)
    formatter = logging.Formatter('%(asctime)s, %(levelname)s: %(message)s')
    stdout_handler.setFormatter(formatter)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.addHandler(stdout_handler)

    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")

    ########## CONFIGURE POLICY ##########

    policy = TLSGAN(device, logger)
    if args.policy_config is None:
        parser.error(
            'Policy config has to be specified for a trainable network')
    policy_config = configparser.RawConfigParser()
    policy_config.read(args.policy_config)

    policy.configure(policy_config)
    policy.set_device(device)

    ########## CONFIGURE ENVIRONMENT ##########

    env_config = configparser.RawConfigParser()
    env_config.read(args.env_config)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    env.set_robot(robot)

    ########## READ RL PARAMETERS ##########
    if args.train_config is None:
        parser.error(
            'Train config has to be specified for a trainable network')
    train_config = configparser.RawConfigParser()
    train_config.read(args.train_config)
    rl_learning_rate = train_config.getfloat('train', 'rl_learning_rate')
    train_batches = train_config.getint('train', 'train_batches')
    train_episodes = train_config.getint('train', 'train_episodes')
    sample_episodes = train_config.getint('train', 'sample_episodes')
    target_update_interval = train_config.getint('train',
                                                 'target_update_interval')
    evaluation_interval = train_config.getint('train', 'evaluation_interval')
    capacity = train_config.getint('train', 'capacity')
    epsilon_start = train_config.getfloat('train', 'epsilon_start')
    epsilon_end = train_config.getfloat('train', 'epsilon_end')
    epsilon_decay = train_config.getfloat('train', 'epsilon_decay')
    checkpoint_interval = train_config.getint('train', 'checkpoint_interval')

    ########## CONFIGURE MEMORY, TRAINER, EXPLORER ##########
    train_memory = ReplayMemory(capacity)
    val_memory = ReplayMemory(capacity)
    model = policy.get_model()
    batch_size = train_config.getint('trainer', 'batch_size')
    trainer = Trainer(model, train_memory, val_memory, device, batch_size,
                      args.output_dir, logger)
    explorer = Explorer(env, robot, policy.policy_learning, device,
                        train_memory, val_memory, policy.gamma, logger, policy,
                        policy.obs_len)

    logger.info('Using device: %s', device)

    ########## IMITATION LEARNING ##########
    if args.resume:
        if not os.path.exists(rl_weight_file):
            logger.error('RL weights does not exist')
        model.load_state_dict(torch.load(rl_weight_file, map_location=device))
        rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
        logger.info(
            'Load reinforcement learning trained weights. Resume training')
    elif os.path.exists(il_weight_file):
        model.load_state_dict(torch.load(il_weight_file, map_location=device),
                              strict=False)

        logger.info('Load imitation learning trained weights.')
        if robot.visible:
            safety_space = 0
        else:
            safety_space = train_config.getfloat('imitation_learning',
                                                 'safety_space')
        il_policy = train_config.get('imitation_learning', 'il_policy')
        il_policy = policy_factory[il_policy]()
        il_policy.multiagent_training = policy.multiagent_training
        il_policy.safety_space = safety_space
        robot.set_test_policy(il_policy)
        robot.set_policy(policy)
        policy.set_env(env)
        robot.policy.set_epsilon(epsilon_end)

        explorer.run_k_episodes(100,
                                0,
                                'val',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)
        explorer.run_k_episodes(100,
                                0,
                                'val',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)
        explorer.run_k_episodes(100,
                                0,
                                'val',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)
        explorer.run_k_episodes(100,
                                0,
                                'val',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)
        explorer.run_k_episodes(100,
                                0,
                                'val',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)

        logger.info('Experience training set size: %d/%d', len(train_memory),
                    train_memory.capacity)
    else:
        train_episodes = train_config.getint('imitation_learning',
                                             'train_episodes')
        validation_episodes = train_config.getint('imitation_learning',
                                                  'validation_episodes')
        logger.info(
            'Starting imitation learning on %d training episodes and %d validation episodes',
            train_episodes, validation_episodes)

        il_policy = train_config.get('imitation_learning', 'il_policy')
        il_epochs = train_config.getint('imitation_learning', 'il_epochs')
        il_learning_rate = train_config.getfloat('imitation_learning',
                                                 'il_learning_rate')
        trainer.set_learning_rate(il_learning_rate)
        if robot.visible:
            safety_space = 0
        else:
            safety_space = train_config.getfloat('imitation_learning',
                                                 'safety_space')
        il_policy = policy_factory[il_policy]()
        il_policy.multiagent_training = policy.multiagent_training
        il_policy.safety_space = safety_space
        robot.set_test_policy(il_policy)
        robot.set_policy(il_policy)
        explorer.run_k_episodes(train_episodes,
                                validation_episodes,
                                'train',
                                update_memory=True,
                                imitation_learning=True,
                                with_render=False)
        trainer.optimize_epoch_il(il_epochs, robot, with_validation=True)
        torch.save(model.state_dict(), il_weight_file)
        logger.info('Finish imitation learning. Weights saved.')
        logger.info('Experience train_set size: %d/%d', len(train_memory),
                    train_memory.capacity)
        logger.info('Experience validation_set size: %d/%d', len(val_memory),
                    val_memory.capacity)

    ########## REINFORCEMENT LEARNING ##########
    raise NotImplementedError

    policy.set_env(env)
    robot.set_policy(policy)

    robot.print_info()
    trainer.set_learning_rate(rl_learning_rate)

    explorer.update_target_model(model)
    robot.policy.set_epsilon(epsilon_end)

    rl_memory = ReplayMemory(capacity)
    explorer.train_memory = rl_memory
    trainer.train_memory = rl_memory

    episode = 0
    while episode < train_episodes:
        if args.resume:
            epsilon = epsilon_end
        else:
            if episode < epsilon_decay:
                epsilon = epsilon_start + (
                    epsilon_end - epsilon_start) / epsilon_decay * episode
            else:
                epsilon = epsilon_end
        robot.policy.set_epsilon(epsilon)

        # evaluate the model
        if episode % evaluation_interval == 0:
            explorer.run_k_episodes(env.case_size['val'],
                                    0,
                                    'val',
                                    episode=episode,
                                    with_render=False)

        # sample k episodes into memory and optimize over the generated memory
        explorer.run_k_episodes(sample_episodes,
                                0,
                                'train',
                                update_memory=True,
                                episode=episode)
        trainer.optimize_batch_rl(train_batches)
        rl_memory.clear()

        episode += 1

        if episode % target_update_interval == 0:
            explorer.update_target_model(model)

        if episode != 0 and episode % checkpoint_interval == 0:
            torch.save(model.state_dict(), rl_weight_file)

    # final test
    explorer.run_k_episodes(env.case_size['test'], 0, 'test', episode=episode)
Beispiel #9
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env_wall.config')
    parser.add_argument('--policy_config', type=str, default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='ssp')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=True, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=2)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--wall', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=True)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--human_policy',  type=str, default='orca')
    parser.add_argument('--trained_env',  type=str, default='socialforce')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.policy_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    # device = 'cpu'
    logging.info('Using device: %s', device)


    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error('Trainable policy must be specified with a model weights directory')
        policy.get_model().load_state_dict(torch.load(model_weights))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim_wall-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.wall:
        env.test_sim = 'wall_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'


    PPL = env.human_num




    robot = Robot(env_config, 'robot')
    # print(robot.px)
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    ppl_local = []
    frame_lst = []
    agentx = []
    agenty = []
    ID_lst = []
    frame_num = 0
    robot_states = []
    agent_states = pd.DataFrame(columns=["Frame", "ID", "px", "py"])
    agent_vel = []
    policy.set_env(env)
    robot.print_info()
    for case in range(args.test_case):

        # if args.visualize:

        ob = env.reset(test_case=case)

        # ob = env.reset(args.phase, case)

        done = False
        last_pos = np.array(robot.get_position())

        while not done:

            action = robot.act(ob)
            ob, _, done, info, ppl_count, robot_pose, robot_velocity, dmin, humans = env.step(action)
            for j, human in enumerate(humans):
                frame_lst.append(frame_num)
                ID_lst.append(j+1)
                agentx.append(human.px)
                agenty.append(human.py)

                # agent_states.append((frame_num, j+1, human.px, human.py))
            # collision_count = 0
            # if info == "Collision":
            # # print(info)
            #     collision_count = 1

            ppl_local.append(ppl_count)
            # agent_states.append((frame_num, 0, robot_pose))
            # robot_vel.append(robot_velocity)
            frame_lst.append(frame_num)
            ID_lst.append(0)
            agentx.append(robot_pose[0])
            agenty.append(robot_pose[1])
            # agent_states["Frame"] = frame_num
            # agent_states["ID"] = 0
            # agent_states["px"] = robot_pose[0]
            # agent_states["py"] = robot_pose[1]

            current_pos = np.array(robot.get_position())
            logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos

            frame_num += 1
            #
        # #
        # # # # # #
        if args.traj:
            env.render('traj', args.video_file)
        else:
             env.render('video', args.video_file)
        #

        logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info)

        agent_states.head(20)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()

            logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))

        # logging.info('PPl counter ', ppl_local)


        #
        # # logging.info('PPl counter ', ppl_local)
        # main = "Results/"
        # human_policy = args.human_policy
        # trained_env = args.trained_env
        #
        # if human_policy == 'socialforce':
        #     maindir = 'SocialForce/'
        #     if not os.path.exists(main+maindir):
        #         os.mkdir(main+maindir)
        # else:
        #     maindir =  'ORCA/'
        #     if not os.path.exists(main+maindir):
        #         os.mkdir(main+maindir)
        #
        # robot_policy = args.policy
        # trained_env = args.trained_env
        # if robot_policy == 'ssp':
        #     method_dir = 'ssp/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        # if (robot_policy == 'model_predictive_rl'and trained_env == 'orca'):
        #     method_dir = 'model_predictive_rl/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        # if (robot_policy == 'model_predictive_rl' and trained_env == 'socialforce'):
        #     method_dir = 'model_predictive_rl_social/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        #
        # if (robot_policy == 'rgl'and trained_env == 'orca'):
        #     method_dir = 'rgl/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        # if (robot_policy == 'rgl' and trained_env == 'socialforce'):
        #     method_dir = 'rgl_social/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        #
        # if (robot_policy == 'sarl'and trained_env == 'orca'):
        #     method_dir = 'sarl/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        # if (robot_policy == 'sarl' and trained_env == 'socialforce'):
        #     method_dir = 'sarl_social/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        #
        #
        # if (robot_policy == 'cadrl'and trained_env == 'orca'):
        #     method_dir = 'cadrl/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        # if (robot_policy == 'cadrl' and trained_env == 'socialforce'):
        #     method_dir = 'cadrl_social/'
        #     if not os.path.exists(main+maindir + method_dir):
        #         os.mkdir(main+maindir + method_dir)
        #
        #
        # if robot_policy == 'ssp2':
        #     method_dir = 'ssp2/'
        #     if not os.path.exists(main+maindir+method_dir):
        #         os.mkdir(main+maindir+method_dir)
        # # elif robot_policy == 'cadrl':
        # #     method_dir = 'cadrl/'
        # #     if not os.path.exists(maindir+method_dir):
        # #         os.mkdir(maindir+method_dir)
        # # elif robot_policy == 'sarl':
        # #     method_dir = 'sarl/'
        # #     if not os.path.exists(maindir+method_dir):
        # #         os.mkdir(maindir+method_dir)
        # # elif robot_policy == 'lstm_rl':
        # #     method_dir = 'lstm_rl/'
        # #     if not os.path.exists(maindir+method_dir):
        # #         os.mkdir(maindir+method_dir)
        # elif robot_policy == 'orca':
        #     method_dir = 'orca/'
        #     if not os.path.exists(main+maindir+method_dir):
        #         os.mkdir(main+maindir+method_dir)
        #
        # agent_data = pd.DataFrame()
        #
        agent_states['Frame'] = frame_lst
        agent_states['ID'] = ID_lst
        agent_states['px'] = agentx
        agent_states['py'] = agenty
        # agent_data['local_ppl_cnt'] = np.array(ppl_local)
        # robot_data['dmin'] = np.array(dmin)

        out_name = f'agent_data{case}.csv'

        # if not os.path.exists(main+maindir + method_dir + f'{PPL}/'):
        #     os.mkdir(main+maindir + method_dir + f'{PPL}/')
        # # outdir = f'{PPL}/robot_data_{PPL}/'
        # if not os.path.exists(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/'):
        #     os.mkdir(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/')
        #
        # fullname = os.path.join(main+maindir + method_dir + f'{PPL}/robot_data_{PPL}/', out_name)

        agent_states.to_csv(out_name, index=True)
Beispiel #10
0
def main(args):
    set_random_seeds(args.randomseed)
    # configure paths
    make_new_dir = True
    if os.path.exists(args.output_dir):
        if args.overwrite:
            shutil.rmtree(args.output_dir)
        else:
            key = input(
                'Output directory already exists! Overwrite the folder? (y/n)')
            if key == 'y' and not args.resume:
                shutil.rmtree(args.output_dir)
            else:
                make_new_dir = False
    if make_new_dir:
        os.makedirs(args.output_dir)
        shutil.copy(args.config, os.path.join(args.output_dir, 'config.py'))

        # # insert the arguments from command line to the config file
        # with open(os.path.join(args.output_dir, 'config.py'), 'r') as fo:
        #     config_text = fo.read()
        # search_pairs = {r"gcn.X_dim = \d*": "gcn.X_dim = {}".format(args.X_dim),
        #                 r"gcn.num_layer = \d": "gcn.num_layer = {}".format(args.layers),
        #                 r"gcn.similarity_function = '\w*'": "gcn.similarity_function = '{}'".format(args.sim_func),
        #                 r"gcn.layerwise_graph = \w*": "gcn.layerwise_graph = {}".format(args.layerwise_graph),
        #                 r"gcn.skip_connection = \w*": "gcn.skip_connection = {}".format(args.skip_connection)}
        #
        # for find, replace in search_pairs.items():
        #     config_text = re.sub(find, replace, config_text)
        #
        # with open(os.path.join(args.output_dir, 'config.py'), 'w') as fo:
        #     fo.write(config_text)

    args.config = os.path.join(args.output_dir, 'config.py')
    log_file = os.path.join(args.output_dir, 'output.log')
    il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
    rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')

    spec = importlib.util.spec_from_file_location('config', args.config)
    if spec is None:
        parser.error('Config file not found.')
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)

    # configure logging
    mode = 'a' if args.resume else 'w'
    file_handler = logging.FileHandler(log_file, mode=mode)
    stdout_handler = logging.StreamHandler(sys.stdout)
    level = logging.INFO if not args.debug else logging.DEBUG
    logging.basicConfig(level=level,
                        handlers=[stdout_handler, file_handler],
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    repo = git.Repo(search_parent_directories=True)
    logging.info('Current git head hash code: {}'.format(
        repo.head.object.hexsha))
    logging.info('Current config content is :{}'.format(config))
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)
    writer = SummaryWriter(log_dir=args.output_dir)

    # configure policy
    policy_config = config.PolicyConfig()
    policy = policy_factory[policy_config.name]()
    if not policy.trainable:
        parser.error('Policy has to be trainable')
    policy.configure(policy_config)
    policy.set_device(device)

    # configure environment
    env_config = config.EnvConfig(args.debug)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    robot.time_step = env.time_step
    env.set_robot(robot)

    # read training parameters
    train_config = config.TrainConfig(args.debug)
    rl_learning_rate = train_config.train.rl_learning_rate
    train_batches = train_config.train.train_batches
    train_episodes = train_config.train.train_episodes
    sample_episodes = train_config.train.sample_episodes
    target_update_interval = train_config.train.target_update_interval
    evaluation_interval = train_config.train.evaluation_interval
    capacity = train_config.train.capacity
    epsilon_start = train_config.train.epsilon_start
    epsilon_end = train_config.train.epsilon_end
    epsilon_decay = train_config.train.epsilon_decay
    checkpoint_interval = train_config.train.checkpoint_interval

    # configure trainer and explorer
    memory = ReplayMemory(capacity)
    model = policy.get_model()
    batch_size = train_config.trainer.batch_size
    optimizer = train_config.trainer.optimizer
    if policy_config.name == 'model_predictive_rl':
        trainer = MPRLTrainer(
            model,
            policy.state_predictor,
            memory,
            device,
            policy,
            writer,
            batch_size,
            optimizer,
            env.human_num,
            reduce_sp_update_frequency=train_config.train.
            reduce_sp_update_frequency,
            freeze_state_predictor=train_config.train.freeze_state_predictor,
            detach_state_predictor=train_config.train.detach_state_predictor,
            share_graph_model=policy_config.model_predictive_rl.
            share_graph_model)
    else:
        trainer = VNRLTrainer(model, memory, device, policy, batch_size,
                              optimizer, writer)
    explorer = Explorer(env,
                        robot,
                        device,
                        writer,
                        memory,
                        policy.gamma,
                        target_policy=policy)

    # imitation learning
    if args.resume:
        if not os.path.exists(rl_weight_file):
            logging.error('RL weights does not exist')
        model.load_state_dict(torch.load(rl_weight_file))
        rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
        logging.info(
            'Load reinforcement learning trained weights. Resume training')
    elif os.path.exists(il_weight_file):
        model.load_state_dict(torch.load(il_weight_file))
        logging.info('Load imitation learning trained weights.')
    else:
        il_episodes = train_config.imitation_learning.il_episodes
        il_policy = train_config.imitation_learning.il_policy
        il_epochs = train_config.imitation_learning.il_epochs
        il_learning_rate = train_config.imitation_learning.il_learning_rate
        trainer.set_learning_rate(il_learning_rate)
        if robot.visible:
            safety_space = 0
        else:
            safety_space = train_config.imitation_learning.safety_space
        il_policy = policy_factory[il_policy]()
        il_policy.multiagent_training = policy.multiagent_training
        il_policy.safety_space = safety_space
        robot.set_policy(il_policy)
        explorer.run_k_episodes(il_episodes,
                                'train',
                                update_memory=True,
                                imitation_learning=True)
        trainer.optimize_epoch(il_epochs)
        policy.save_model(il_weight_file)
        logging.info('Finish imitation learning. Weights saved.')
        logging.info('Experience set size: %d/%d', len(memory),
                     memory.capacity)

    trainer.update_target_model(model)

    # reinforcement learning
    policy.set_env(env)
    robot.set_policy(policy)
    robot.print_info()
    trainer.set_learning_rate(rl_learning_rate)
    # fill the memory pool with some RL experience
    if args.resume:
        robot.policy.set_epsilon(epsilon_end)
        explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
        logging.info('Experience set size: %d/%d', len(memory),
                     memory.capacity)
    episode = 0
    best_val_reward = -1
    best_val_model = None
    # evaluate the model after imitation learning

    if episode % evaluation_interval == 0:
        logging.info(
            'Evaluate the model instantly after imitation learning on the validation cases'
        )
        explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
        explorer.log('val', episode // evaluation_interval)

        if args.test_after_every_eval:
            explorer.run_k_episodes(env.case_size['test'],
                                    'test',
                                    episode=episode,
                                    print_failure=True)
            explorer.log('test', episode // evaluation_interval)

    episode = 0
    while episode < train_episodes:
        if args.resume:
            epsilon = epsilon_end
        else:
            if episode < epsilon_decay:
                epsilon = epsilon_start + (
                    epsilon_end - epsilon_start) / epsilon_decay * episode
            else:
                epsilon = epsilon_end
        robot.policy.set_epsilon(epsilon)

        # sample k episodes into memory and optimize over the generated memory
        explorer.run_k_episodes(sample_episodes,
                                'train',
                                update_memory=True,
                                episode=episode)
        explorer.log('train', episode)

        trainer.optimize_batch(train_batches, episode)
        episode += 1

        if episode % target_update_interval == 0:
            trainer.update_target_model(model)
        # evaluate the model
        if episode % evaluation_interval == 0:
            _, _, _, reward, _ = explorer.run_k_episodes(env.case_size['val'],
                                                         'val',
                                                         episode=episode)
            explorer.log('val', episode // evaluation_interval)

            if episode % checkpoint_interval == 0 and reward > best_val_reward:
                best_val_reward = reward
                best_val_model = copy.deepcopy(policy.get_state_dict())
        # test after every evaluation to check how the generalization performance evolves
            if args.test_after_every_eval:
                explorer.run_k_episodes(env.case_size['test'],
                                        'test',
                                        episode=episode,
                                        print_failure=True)
                explorer.log('test', episode // evaluation_interval)

        if episode != 0 and episode % checkpoint_interval == 0:
            current_checkpoint = episode // checkpoint_interval - 1
            save_every_checkpoint_rl_weight_file = rl_weight_file.split(
                '.')[0] + '_' + str(current_checkpoint) + '.pth'
            policy.save_model(save_every_checkpoint_rl_weight_file)

    # # test with the best val model
    if best_val_model is not None:
        policy.load_state_dict(best_val_model)
        torch.save(best_val_model, os.path.join(args.output_dir,
                                                'best_val.pth'))
        logging.info('Save the best val model with the reward: {}'.format(
            best_val_reward))
    explorer.run_k_episodes(env.case_size['test'],
                            'test',
                            episode=episode,
                            print_failure=True)
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--env_config',
                        type=str,
                        default='../configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='../configs/policy.config')
    parser.add_argument('--imitate_config',
                        type=str,
                        default='../configs/demonstrate.config')
    parser.add_argument('--num_frames', type=int, default=1)
    parser.add_argument('--output_dir',
                        type=str,
                        default='../data/output/temp')
    parser.add_argument('--memory_dir',
                        type=str,
                        default='../data/memory/temp')
    parser.add_argument('--human_type',
                        type=str,
                        default='normal',
                        choices=['aggressive', 'normal', 'cautious'])
    parser.add_argument('--expert_file',
                        type=str,
                        default='../data/expert/rl_model.pth')
    parser.add_argument('--freeze', default=False, action='store_true')
    parser.add_argument('--resume', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--debug', default=False, action='store_true')
    args = parser.parse_args()

    # configure paths
    make_new_dir = True
    if os.path.exists(args.output_dir):
        key = input(
            'Output directory already exists! Overwrite the folder? (y/n)')
        if key == 'y' and not args.resume:
            shutil.rmtree(args.output_dir)
        else:
            make_new_dir = False
            args.env_config = os.path.join(args.output_dir,
                                           os.path.basename(args.env_config))
            args.policy_config = os.path.join(
                args.output_dir, os.path.basename(args.policy_config))
            args.imitate_config = os.path.join(
                args.output_dir, os.path.basename(args.imitate_config))
    if make_new_dir:
        os.makedirs(args.output_dir)
        shutil.copy(args.env_config, args.output_dir)
        shutil.copy(args.policy_config, args.output_dir)
        shutil.copy(args.imitate_config, args.output_dir)
    log_file = os.path.join(args.output_dir, 'output.log')

    # configure logging
    mode = 'a' if args.resume else 'w'
    file_handler = logging.FileHandler(log_file, mode=mode)
    stdout_handler = logging.StreamHandler(sys.stdout)
    level = logging.INFO if not args.debug else logging.DEBUG
    logging.basicConfig(level=level,
                        handlers=[stdout_handler, file_handler],
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    repo = git.Repo(search_parent_directories=True)
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info(' =========== Demonstrate %s ============ ', args.policy)
    logging.info('Current git head hash code: %s', (repo.head.object.hexsha))
    logging.info('Using device: %s', device)

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(args.env_config)
    #env = gym.make('CrowdSim-v0')
    env = CrowdSim()
    env.configure(env_config)
    env_mods = None
    if args.human_type == 'aggressive':
        env_mods = {'safety_space': 0.03, 'time_horizon': 1}
    elif args.human_type == 'cautious':
        env_mods = {'safety_space': 0.3, 'time_horizon': 8}
    modify_env_params(env, mods=env_mods)
    robot = Robot(env_config, 'robot')
    env.set_robot(robot)

    # multi-frame env
    if args.num_frames > 1:
        logging.info("stack %d frames", args.num_frames)
        env = FrameStack(env, args.num_frames)

    # configure policy
    policy = policy_factory[args.policy]()
    if not policy.trainable:
        parser.error('Policy has to be trainable')
    if args.policy_config is None:
        parser.error(
            'Policy config has to be specified for a trainable network')
    policy_config = configparser.RawConfigParser()
    policy_config.read(args.policy_config)

    # read training parameters
    if args.imitate_config is None:
        parser.error(
            'Train config has to be specified for a trainable network')
    imitate_config = configparser.RawConfigParser()
    imitate_config.read(args.imitate_config)

    # configure demonstrateion and explorer
    memory = ReplayMemory(
        10000000)  # sufficiently large to store expert demonstration

    if not os.path.exists(args.memory_dir):
        os.makedirs(args.memory_dir)
    if robot.visible:
        demonstration_file = os.path.join(args.memory_dir,
                                          'data_imit_visible.pt')
    else:
        demonstration_file = os.path.join(args.memory_dir,
                                          'data_imit_invisible.pt')

    il_episodes = imitate_config.getint('demonstration', 'il_episodes')
    expert_policy = imitate_config.get('demonstration', 'il_policy')

    expert_policy = policy_factory[expert_policy]()
    expert_policy.configure(policy_config)
    expert_policy.set_device(device)
    expert_policy.set_env(env)
    expert_policy.multiagent_training = policy.multiagent_training

    counter = imitate_config.getint('demonstration', 'counter_start')
    env.set_counter(counter)

    if robot.visible:
        expert_policy.safety_space = imitate_config.getfloat(
            'demonstration', 'safety_space')
    else:
        expert_policy.safety_space = imitate_config.getfloat(
            'demonstration', 'safety_space')
    robot.set_policy(expert_policy)

    noise_explore = imitate_config.getfloat('demonstration', 'noise_explore')

    gamma = policy_config.getfloat('rl', 'gamma')
    explorer = Explorer(env,
                        robot,
                        device,
                        args.num_frames,
                        memory,
                        gamma=gamma,
                        target_policy=policy)

    if expert_policy.trainable:
        if args.expert_file is None:
            logging.warning(
                'Trainable policy is NOT specified with a model weights directory'
            )
        else:
            try:
                expert_policy.get_model().load_state_dict(
                    torch.load(args.expert_file))
                logging.info('Loaded policy from %s', args.expert_file)
            except Exception as e:
                logging.warning(e)
                expert_policy.get_model().load_state_dict(torch.load(
                    args.expert_file),
                                                          strict=False)
                logging.info('Loaded policy partially from %s',
                             args.expert_file)
        robot.policy.set_epsilon(-1.0)

    # data collection
    robot.policy.multiagent_training = True
    human_obsv = explorer.run_k_episodes(il_episodes,
                                         'train',
                                         update_memory=True,
                                         imitation_learning=True,
                                         noise_explore=noise_explore)

    torch.save(memory.memory, demonstration_file)
    logging.info('Save memory to %s', demonstration_file)

    obs_file = os.path.join(args.memory_dir, 'data_traj.pt')
    torch.save(human_obsv, obs_file)
    logging.info('Save #%d episodes of obsv to %s', len(human_obsv), obs_file)
Beispiel #12
0
def main(args):
    # configure logging and device
    level = logging.DEBUG if args.debug else logging.INFO
    logging.basicConfig(level=level, format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    if args.model_dir is not None:
        if args.config is not None:
            config_file = args.config
        else:
            config_file = os.path.join(args.model_dir, 'config.py')
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
            logging.info('Loaded IL weights')
        elif args.rl:
            if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
            else:
                print(os.listdir(args.model_dir))
                model_weights = os.path.join(args.model_dir, sorted(os.listdir(args.model_dir))[-1])
            logging.info('Loaded RL weights')
        else:
            model_weights = os.path.join(args.model_dir, 'best_val.pth')
            logging.info('Loaded RL weights with best VAL')

    else:
        config_file = args.config

    spec = importlib.util.spec_from_file_location('config', config_file)
    if spec is None:
        parser.error('Config file not found.')
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)

    # configure policy
    policy_config = config.PolicyConfig(args.debug)
    policy = policy_factory[policy_config.name]()
    if args.planning_depth is not None:
        policy_config.model_predictive_rl.do_action_clip = True
        policy_config.model_predictive_rl.planning_depth = args.planning_depth
    if args.planning_width is not None:
        policy_config.model_predictive_rl.do_action_clip = True
        policy_config.model_predictive_rl.planning_width = args.planning_width
    if args.sparse_search:
        policy_config.model_predictive_rl.sparse_search = True

    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error('Trainable policy must be specified with a model weights directory')
        policy.load_model(model_weights)

    # configure environment
    env_config = config.EnvConfig(args.debug)

    if args.human_num is not None:
        env_config.sim.human_num = args.human_num
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)

    if args.square:
        env.test_scenario = 'square_crossing'
    if args.circle:
        env.test_scenario = 'circle_crossing'
    if args.test_scenario is not None:
        env.test_scenario = args.test_scenario

    robot = Robot(env_config, 'robot')
    env.set_robot(robot)
    robot.time_step = env.time_step
    robot.set_policy(policy)
    explorer = Explorer(env, robot, device, None, gamma=0.9)

    train_config = config.TrainConfig(args.debug)
    epsilon_end = train_config.train.epsilon_end
    if not isinstance(robot.policy, ORCA):
        robot.policy.set_epsilon(epsilon_end)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = args.safety_space
        else:
            robot.policy.safety_space = args.safety_space
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    if args.visualize:
        rewards = []
        ob = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        while not done:
            action = robot.act(ob)
            ob, _, done, info = env.step(action)
            rewards.append(_)
            current_pos = np.array(robot.get_position())
            logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
        gamma = 0.9
        cumulative_reward = sum([pow(gamma, t * robot.time_step * robot.v_pref)
             * reward for t, reward in enumerate(rewards)])

        if args.traj:
            env.render('traj', args.video_file)
        else:
            if args.video_dir is not None:
                if policy_config.name == 'gcn':
                    args.video_file = os.path.join(args.video_dir, policy_config.name + '_' + policy_config.gcn.similarity_function)
                else:
                    args.video_file = os.path.join(args.video_dir, policy_config.name)
                args.video_file = args.video_file + '_' + args.phase + '_' + str(args.test_case) + '.mp4'
            env.render('video', args.video_file)
        logging.info('It takes %.2f seconds to finish. Final status is %s, cumulative_reward is %f', env.global_time, info, cumulative_reward)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
    else:
        explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
        if args.plot_test_scenarios_hist:
            test_angle_seeds = np.array(env.test_scene_seeds)
            b = [i * 0.01 for i in range(101)]
            n, bins, patches = plt.hist(test_angle_seeds, b, facecolor='g')
            plt.savefig(os.path.join(args.model_dir, 'test_scene_hist.png'))
            plt.close()
Beispiel #13
0
policy = select_policy[policy_name]     #{SARL(),CADRL(),LstmRL()}
policy_config = configparser.RawConfigParser()
policy_config.read(policy_config_file)
policy.configure(policy_config)
policy.get_model().load_state_dict(torch.load(model_weights))
policy.set_device(device)
policy.set_phase(phase)

# configure environment / obstacles
env_config = configparser.RawConfigParser()
env_config.read(env_config_file)
env = gym.make('CrowdSim-v0')   #env is inherited from CrowdSim class in crowd_sim.py
env.configure(env_config)

# configure robot
robot = Robot(env_config, 'robot')
robot.set_policy(policy)

env.set_robot(robot)    #pass robot parameters into env
ob = env.reset(phase,test_case)     #intial some parameters from .config file such as time_step,success_reward for other instances
policy.set_env(env)     #pass the env info into policy

# ************************************ Input ************************************
# robot: position, goal, radius, x_velocity, y_velocity, theta, radius
# position
robot_x = 0.1
robot_y = 0.1
# goal
goal_x = 1
goal_y = 1
# velocity
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='data/output/env.config')
    parser.add_argument('--policy_config', type=str, default='data/output/policy.config')
    parser.add_argument('--policy', type=str, default='scr')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--plot_file', type=str, default=None)
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error('Trainable policy must be specified with a model weights directory')
        policy.get_model().load_state_dict(torch.load(model_weights))

    # configure environment
    env = gym.make('CrowdSim-v0')
    env.configure(args.env_config)

    robot = Robot()
    robot.configure(args.env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)

    humans = [Human() for _ in range(env.human_num)]
    for human in humans:
        human.configure(args.env_config, 'humans')
    env.set_humans(humans)

    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'

    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    ob = env.reset(args.phase, args.test_case)
    done = False
    last_pos = np.array(robot.get_position())

    observation_subscribers = []

    if args.plot_file:
        plotter = Plotter(args.plot_file)
        observation_subscribers.append(plotter)
    if args.video_file:
        video = Video(args.video_file)
        observation_subscribers.append(video)
    t = 0
    while not done:
        action = robot.act(ob)
        ob, _, done, info = env.step(action)

        notify(observation_subscribers, env.state)
        if args.visualize:
            env.render()
            plt.pause(.001)

        current_pos = np.array(robot.get_position())
        logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
        last_pos = current_pos
        t += 1

    if args.plot_file:
        plotter.save()
    if args.video_file:
        video.make([robot.policy.draw_attention, robot.policy.draw_observation])

    logging.info('It takes %.2f seconds to finish. Final status is %s', env.global_time, info)
    if robot.visible and info == 'reach goal':
        human_times = env.get_human_times()
        logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config',
                        type=str,
                        default='data/output/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='data/output/policy.config')
    parser.add_argument('--policy', type=str, default='scr')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--plot_file', type=str, default=None)
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(torch.load(model_weights))

    # configure environment
    env = gym.make('CrowdSim-v0')
    env.configure(args.env_config)

    robot = Robot()
    robot.configure(args.env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)

    humans = [Human() for _ in range(env.human_num)]
    for human in humans:
        human.configure(args.env_config, 'humans')
    env.set_humans(humans)

    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'

    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    explorer.run_k_episodes(env.case_size[args.phase],
                            args.phase,
                            print_failure=True)
Beispiel #16
0
    def configure(self, config):
        self.config = config

        self.time_limit = config.env.time_limit
        self.time_step = config.env.time_step
        self.randomize_attributes = config.env.randomize_attributes

        self.success_reward = config.reward.success_reward
        self.collision_penalty = config.reward.collision_penalty
        self.discomfort_dist = config.reward.discomfort_dist_back
        self.discomfort_dist_front = config.reward.discomfort_dist_front
        self.discomfort_penalty_factor = config.reward.discomfort_penalty_factor

        if self.config.humans.policy == 'orca' or self.config.humans.policy == 'social_force':
            self.case_capacity = {
                'train': np.iinfo(np.uint32).max - 2000,
                'val': 1000,
                'test': 1000
            }
            self.case_size = {
                'train': np.iinfo(np.uint32).max - 2000,
                'val': self.config.env.val_size,
                'test': self.config.env.test_size
            }
            self.circle_radius = config.sim.circle_radius
            self.human_num = config.sim.human_num
            self.group_human = config.sim.group_human

        else:
            raise NotImplementedError
        self.case_counter = {'train': 0, 'test': 0, 'val': 0}

        logging.info('human number: {}'.format(self.human_num))
        if self.randomize_attributes:
            logging.info("Randomize human's radius and preferred speed")
        else:
            logging.info("Not randomize human's radius and preferred speed")

        logging.info('Circle width: {}'.format(self.circle_radius))

        self.robot_fov = np.pi * config.robot.FOV
        self.human_fov = np.pi * config.humans.FOV
        logging.info('robot FOV %f', self.robot_fov)
        logging.info('humans FOV %f', self.human_fov)

        # set dummy human and dummy robot
        # dummy humans, used if any human is not in view of other agents
        self.dummy_human = Human(self.config, 'humans')
        # if a human is not in view, set its state to (px = 100, py = 100, vx = 0, vy = 0, theta = 0, radius = 0)
        self.dummy_human.set(7, 7, 7, 7, 0, 0, 0)  # (7, 7, 7, 7, 0, 0, 0)
        self.dummy_human.time_step = config.env.time_step

        self.dummy_robot = Robot(self.config, 'robot')
        self.dummy_robot.set(7, 7, 7, 7, 0, 0, 0)
        self.dummy_robot.time_step = config.env.time_step
        self.dummy_robot.kinematics = 'holonomic'
        self.dummy_robot.policy = ORCA(config)

        self.r = self.config.humans.radius

        # configure noise in state
        self.add_noise = config.noise.add_noise
        if self.add_noise:
            self.noise_type = config.noise.type
            self.noise_magnitude = config.noise.magnitude

        # configure randomized goal changing of humans midway through episode
        self.random_goal_changing = config.humans.random_goal_changing
        if self.random_goal_changing:
            self.goal_change_chance = config.humans.goal_change_chance

        # configure randomized goal changing of humans after reaching their respective goals
        self.end_goal_changing = config.humans.end_goal_changing
        if self.end_goal_changing:
            self.end_goal_change_chance = config.humans.end_goal_change_chance

        # configure randomized radii changing when reaching goals
        self.random_radii = config.humans.random_radii

        # configure randomized v_pref changing when reaching goals
        self.random_v_pref = config.humans.random_v_pref

        # configure randomized goal changing of humans after reaching their respective goals
        self.random_unobservability = config.humans.random_unobservability
        if self.random_unobservability:
            self.unobservable_chance = config.humans.unobservable_chance

        self.last_human_states = np.zeros((self.human_num, 5))

        # configure randomized policy changing of humans every episode
        self.random_policy_changing = config.humans.random_policy_changing

        # set robot for this envs
        rob_RL = Robot(config, 'robot')
        self.set_robot(rob_RL)

        return
Beispiel #17
0
class CrowdSim(gym.Env):
    def __init__(self):
        """
        Movement simulation for n+1 agents
        Agent can either be human or robot.
        humans are controlled by a unknown and fixed policy.
        robot is controlled by a known and learnable policy.
        """
        self.time_limit = None
        self.time_step = None
        self.robot = None  # a Robot instance representing the robot
        self.humans = None  # a list of Human instances, representing all humans in the environment
        self.global_time = None

        # reward function
        self.success_reward = None
        self.collision_penalty = None
        self.discomfort_dist = None
        self.discomfort_dist_front = None
        self.discomfort_penalty_factor = None
        # simulation configuration
        self.config = None
        self.case_capacity = None
        self.case_size = None
        self.case_counter = None
        self.randomize_attributes = None

        self.circle_radius = None
        self.human_num = None

        self.action_space = None
        self.observation_space = None

        # limit FOV
        self.robot_fov = None
        self.human_fov = None

        self.dummy_human = None
        self.dummy_robot = None

        #seed
        self.thisSeed = None  # the seed will be set when the env is created

        #nenv
        self.nenv = None  # the number of env will be set when the env is created.
        # Because the human crossing cases are controlled by random seed, we will calculate unique random seed for each
        # parallel env.

        self.phase = None  # set the phase to be train, val or test
        self.test_case = None  # the test case ID, which will be used to calculate a seed to generate a human crossing case

        # for render
        self.render_axis = None

        self.humans = []

        self.potential = None

    def configure(self, config):
        self.config = config

        self.time_limit = config.env.time_limit
        self.time_step = config.env.time_step
        self.randomize_attributes = config.env.randomize_attributes

        self.success_reward = config.reward.success_reward
        self.collision_penalty = config.reward.collision_penalty
        self.discomfort_dist = config.reward.discomfort_dist_back
        self.discomfort_dist_front = config.reward.discomfort_dist_front
        self.discomfort_penalty_factor = config.reward.discomfort_penalty_factor

        if self.config.humans.policy == 'orca' or self.config.humans.policy == 'social_force':
            self.case_capacity = {
                'train': np.iinfo(np.uint32).max - 2000,
                'val': 1000,
                'test': 1000
            }
            self.case_size = {
                'train': np.iinfo(np.uint32).max - 2000,
                'val': self.config.env.val_size,
                'test': self.config.env.test_size
            }
            self.circle_radius = config.sim.circle_radius
            self.human_num = config.sim.human_num
            self.group_human = config.sim.group_human

        else:
            raise NotImplementedError
        self.case_counter = {'train': 0, 'test': 0, 'val': 0}

        logging.info('human number: {}'.format(self.human_num))
        if self.randomize_attributes:
            logging.info("Randomize human's radius and preferred speed")
        else:
            logging.info("Not randomize human's radius and preferred speed")

        logging.info('Circle width: {}'.format(self.circle_radius))

        self.robot_fov = np.pi * config.robot.FOV
        self.human_fov = np.pi * config.humans.FOV
        logging.info('robot FOV %f', self.robot_fov)
        logging.info('humans FOV %f', self.human_fov)

        # set dummy human and dummy robot
        # dummy humans, used if any human is not in view of other agents
        self.dummy_human = Human(self.config, 'humans')
        # if a human is not in view, set its state to (px = 100, py = 100, vx = 0, vy = 0, theta = 0, radius = 0)
        self.dummy_human.set(7, 7, 7, 7, 0, 0, 0)  # (7, 7, 7, 7, 0, 0, 0)
        self.dummy_human.time_step = config.env.time_step

        self.dummy_robot = Robot(self.config, 'robot')
        self.dummy_robot.set(7, 7, 7, 7, 0, 0, 0)
        self.dummy_robot.time_step = config.env.time_step
        self.dummy_robot.kinematics = 'holonomic'
        self.dummy_robot.policy = ORCA(config)

        self.r = self.config.humans.radius

        # configure noise in state
        self.add_noise = config.noise.add_noise
        if self.add_noise:
            self.noise_type = config.noise.type
            self.noise_magnitude = config.noise.magnitude

        # configure randomized goal changing of humans midway through episode
        self.random_goal_changing = config.humans.random_goal_changing
        if self.random_goal_changing:
            self.goal_change_chance = config.humans.goal_change_chance

        # configure randomized goal changing of humans after reaching their respective goals
        self.end_goal_changing = config.humans.end_goal_changing
        if self.end_goal_changing:
            self.end_goal_change_chance = config.humans.end_goal_change_chance

        # configure randomized radii changing when reaching goals
        self.random_radii = config.humans.random_radii

        # configure randomized v_pref changing when reaching goals
        self.random_v_pref = config.humans.random_v_pref

        # configure randomized goal changing of humans after reaching their respective goals
        self.random_unobservability = config.humans.random_unobservability
        if self.random_unobservability:
            self.unobservable_chance = config.humans.unobservable_chance

        self.last_human_states = np.zeros((self.human_num, 5))

        # configure randomized policy changing of humans every episode
        self.random_policy_changing = config.humans.random_policy_changing

        # set robot for this envs
        rob_RL = Robot(config, 'robot')
        self.set_robot(rob_RL)

        return

    def set_robot(self, robot):
        raise NotImplementedError

    def generate_random_human_position(self, human_num):
        """
        Generate human position: generate start position on a circle, goal position is at the opposite side
        :param human_num:
        :return:
        """
        # initial min separation distance to avoid danger penalty at beginning
        for i in range(human_num):
            self.humans.append(self.generate_circle_crossing_human())

    # return a static human in env
    # position: (px, py) for fixed position, or None for random position
    def generate_circle_static_obstacle(self, position=None):
        # generate a human with radius = 0.3, v_pref = 1, visible = True, and policy = orca
        human = Human(self.config, 'humans')
        # For fixed position
        if position:
            px, py = position
        # For random position
        else:
            while True:
                angle = np.random.random() * np.pi * 2
                # add some noise to simulate all the possible cases robot could meet with human
                v_pref = 1.0 if human.v_pref == 0 else human.v_pref
                px_noise = (np.random.random() - 0.5) * v_pref
                py_noise = (np.random.random() - 0.5) * v_pref
                px = self.circle_radius * np.cos(angle) + px_noise
                py = self.circle_radius * np.sin(angle) + py_noise
                collide = False
                for agent in [self.robot] + self.humans:
                    min_dist = human.radius + agent.radius + self.discomfort_dist
                    if norm((px - agent.px, py - agent.py)) < min_dist or \
                                    norm((px - agent.gx, py - agent.gy)) < min_dist:
                        collide = True
                        break
                if not collide:
                    break

        # make it a static obstacle
        # px, py, gx, gy, vx, vy, theta
        human.set(px, py, px, py, 0, 0, 0, v_pref=0)
        return human

    def generate_circle_crossing_human(self):
        human = Human(self.config, 'humans')
        if self.randomize_attributes:
            human.sample_random_attributes()

        while True:
            angle = np.random.random() * np.pi * 2
            # add some noise to simulate all the possible cases robot could meet with human
            v_pref = 1.0 if human.v_pref == 0 else human.v_pref
            px_noise = (np.random.random() - 0.5) * v_pref
            py_noise = (np.random.random() - 0.5) * v_pref
            px = self.circle_radius * np.cos(angle) + px_noise
            py = self.circle_radius * np.sin(angle) + py_noise
            collide = False

            if self.group_human:
                collide = self.check_collision_group((px, py), human.radius)

            else:
                for i, agent in enumerate([self.robot] + self.humans):
                    # keep human at least 3 meters away from robot
                    if self.robot.kinematics == 'unicycle' and i == 0:
                        min_dist = self.circle_radius / 2  # Todo: if circle_radius <= 4, it will get stuck here
                    else:
                        min_dist = human.radius + agent.radius + self.discomfort_dist
                    if norm((px - agent.px, py - agent.py)) < min_dist or \
                            norm((px - agent.gx, py - agent.gy)) < min_dist:
                        collide = True
                        break
            if not collide:
                break

        # px = np.random.uniform(-6, 6)
        # py = np.random.uniform(-3, 3.5)
        # human.set(px, py, px, py, 0, 0, 0)
        human.set(px, py, -px, -py, 0, 0, 0)
        return human

    # add noise according to env.config to state
    def apply_noise(self, ob):
        if isinstance(ob[0], ObservableState):
            for i in range(len(ob)):
                if self.noise_type == 'uniform':
                    noise = np.random.uniform(-self.noise_magnitude,
                                              self.noise_magnitude, 5)
                elif self.noise_type == 'gaussian':
                    noise = np.random.normal(size=5)
                else:
                    print('noise type not defined')
                ob[i].px = ob[i].px + noise[0]
                ob[i].py = ob[i].px + noise[1]
                ob[i].vx = ob[i].px + noise[2]
                ob[i].vy = ob[i].px + noise[3]
                ob[i].radius = ob[i].px + noise[4]
            return ob
        else:
            if self.noise_type == 'uniform':
                noise = np.random.uniform(-self.noise_magnitude,
                                          self.noise_magnitude, len(ob))
            elif self.noise_type == 'gaussian':
                noise = np.random.normal(size=len(ob))
            else:
                print('noise type not defined')
                noise = [0] * len(ob)

            return ob + noise

    # update the robot belief of human states
    # if a human is visible, its state is updated to its current ground truth state
    # else we assume it keeps going in a straight line with last observed velocity
    def update_last_human_states(self, human_visibility, reset):
        """
        update the self.last_human_states array
        human_visibility: list of booleans returned by get_human_in_fov (e.x. [T, F, F, T, F])
        reset: True if this function is called by reset, False if called by step
        :return:
        """
        # keep the order of 5 humans at each timestep
        for i in range(self.human_num):
            if human_visibility[i]:
                humanS = np.array(self.humans[i].get_observable_state_list())
                self.last_human_states[i, :] = humanS

            else:
                if reset:
                    humanS = np.array([15., 15., 0., 0., 0.3])
                    self.last_human_states[i, :] = humanS

                else:
                    px, py, vx, vy, r = self.last_human_states[i, :]
                    # Plan A: linear approximation of human's next position
                    px = px + vx * self.time_step
                    py = py + vy * self.time_step
                    self.last_human_states[i, :] = np.array(
                        [px, py, vx, vy, r])

                    # Plan B: assume the human doesn't move, use last observation
                    # self.last_human_states[i, :] = np.array([px, py, 0., 0., r])

    # return the ground truth locations of all humans
    def get_true_human_states(self):
        true_human_states = np.zeros((self.human_num, 2))
        for i in range(self.human_num):
            humanS = np.array(self.humans[i].get_observable_state_list())
            true_human_states[i, :] = humanS[:2]
        return true_human_states

    def randomize_human_policies(self):
        """
        Randomize the moving humans' policies to be either orca or social force
        """
        for human in self.humans:
            if not human.isObstacle:
                new_policy = random.choice(['orca', 'social_force'])
                new_policy = policy_factory[new_policy]()
                human.set_policy(new_policy)

    # Generates group of circum_num humans in a circle formation at a random viable location
    def generate_circle_group_obstacle(self, circum_num):
        group_circumference = self.config.getfloat('humans',
                                                   'radius') * 2 * circum_num
        # print("group circum: ", group_circumference)
        group_radius = group_circumference / (2 * np.pi)
        # print("group radius: ", group_radius)
        while True:
            rand_cen_x = np.random.uniform(-3, 3)
            rand_cen_y = np.random.uniform(-3, 3)
            success = True
            for i, group in enumerate(self.circle_groups):
                # print(i)
                dist_between_groups = np.sqrt((rand_cen_x - group[1])**2 +
                                              (rand_cen_y - group[2])**2)
                sum_radius = group_radius + group[
                    0] + 2 * self.config.getfloat('humans', 'radius')
                if dist_between_groups < sum_radius:
                    success = False
                    break
            if success:
                # print("------------\nsuccessfully found valid x: ", rand_cen_x, " y: ", rand_cen_y)
                break
        self.circle_groups.append((group_radius, rand_cen_x, rand_cen_y))

        # print("current groups:")
        # for i in self.circle_groups:
        #     print(i)

        arc = 2 * np.pi / circum_num
        for i in range(circum_num):
            angle = arc * i
            curr_x = rand_cen_x + group_radius * np.cos(angle)
            curr_y = rand_cen_y + group_radius * np.sin(angle)
            point = (curr_x, curr_y)
            # print("adding circle point: ", point)
            curr_human = self.generate_circle_static_obstacle(point)
            curr_human.isObstacle = True
            self.humans.append(curr_human)

        return

    # given an agent's xy location and radius, check whether it collides with all other humans
    # including the circle group and moving humans
    # return True if collision, False if no collision
    def check_collision_group(self, pos, radius):
        # check circle groups
        for r, x, y in self.circle_groups:
            if np.linalg.norm(
                [pos[0] - x, pos[1] - y]
            ) <= r + radius + 2 * 0.5:  # use 0.5 because it's the max radius of human
                return True

        # check moving humans
        for human in self.humans:
            if human.isObstacle:
                pass
            else:
                if np.linalg.norm([pos[0] - human.px, pos[1] - human.py
                                   ]) <= human.radius + radius:
                    return True
        return False

    # check collision between robot goal position and circle groups
    def check_collision_group_goal(self, pos, radius):
        # print('check goal', len(self.circle_groups))
        collision = False
        # check circle groups
        for r, x, y in self.circle_groups:
            # print(np.linalg.norm([pos[0] - x, pos[1] - y]), r + radius + 4 * 0.5)
            if np.linalg.norm(
                [pos[0] - x, pos[1] - y]
            ) <= r + radius + 4 * 0.5:  # use 0.5 because it's the max radius of human
                collision = True
        return collision

    # set robot initial state and generate all humans for reset function
    # for crowd nav: human_num == self.human_num
    # for leader follower: human_num = self.human_num - 1
    def generate_robot_humans(self, phase, human_num=None):
        if human_num is None:
            human_num = self.human_num
        # for Group environment
        if self.group_human:
            # set the robot in a dummy far away location to avoid collision with humans
            self.robot.set(10, 10, 10, 10, 0, 0, np.pi / 2)

            # generate humans
            self.circle_groups = []
            humans_left = human_num

            while humans_left > 0:
                # print("****************\nhumans left: ", humans_left)
                if humans_left <= 4:
                    if phase in ['train', 'val']:
                        self.generate_random_human_position(
                            human_num=humans_left)
                    else:
                        self.generate_random_human_position(
                            human_num=humans_left)
                    humans_left = 0
                else:
                    if humans_left < 10:
                        max_rand = humans_left
                    else:
                        max_rand = 10
                    # print("randint from 4 to ", max_rand)
                    circum_num = np.random.randint(4, max_rand)
                    # print("circum num: ", circum_num)
                    self.generate_circle_group_obstacle(circum_num)
                    humans_left -= circum_num

            # randomize starting position and goal position while keeping the distance of goal to be > 6
            # set the robot on a circle with radius 5.5 randomly
            rand_angle = np.random.uniform(0, np.pi * 2)
            # print('rand angle:', rand_angle)
            increment_angle = 0.0
            while True:
                px_r = np.cos(rand_angle + increment_angle) * 5.5
                py_r = np.sin(rand_angle + increment_angle) * 5.5
                # check whether the initial px and py collides with any human
                collision = self.check_collision_group((px_r, py_r),
                                                       self.robot.radius)
                # if the robot goal does not fall into any human groups, the goal is okay, otherwise keep generating the goal
                if not collision:
                    #print('initial pos angle:', rand_angle+increment_angle)
                    break
                increment_angle = increment_angle + 0.2

            increment_angle = increment_angle + np.pi  # start at opposite side of the circle
            while True:
                gx = np.cos(rand_angle + increment_angle) * 5.5
                gy = np.sin(rand_angle + increment_angle) * 5.5
                # check whether the goal is inside the human groups
                # check whether the initial px and py collides with any human
                collision = self.check_collision_group_goal((gx, gy),
                                                            self.robot.radius)
                # if the robot goal does not fall into any human groups, the goal is okay, otherwise keep generating the goal
                if not collision:
                    # print('goal pos angle:', rand_angle + increment_angle)
                    break
                increment_angle = increment_angle + 0.2

            self.robot.set(px_r, py_r, gx, gy, 0, 0, np.pi / 2)

        # for FoV environment
        else:
            if self.robot.kinematics == 'unicycle':
                angle = np.random.uniform(0, np.pi * 2)
                px = self.circle_radius * np.cos(angle)
                py = self.circle_radius * np.sin(angle)
                while True:
                    gx, gy = np.random.uniform(-self.circle_radius,
                                               self.circle_radius, 2)
                    if np.linalg.norm([px - gx, py - gy]) >= 6:  # 1 was 6
                        break
                self.robot.set(px, py, gx, gy, 0, 0,
                               np.random.uniform(
                                   0, 2 * np.pi))  # randomize init orientation

            # randomize starting position and goal position
            else:
                while True:
                    px, py, gx, gy = np.random.uniform(-self.circle_radius,
                                                       self.circle_radius, 4)
                    if np.linalg.norm([px - gx, py - gy]) >= 6:
                        break
                self.robot.set(px, py, gx, gy, 0, 0, np.pi / 2)

            # generate humans
            self.generate_random_human_position(human_num=human_num)

    def reset(self, phase='train', test_case=None):
        """
        Set px, py, gx, gy, vx, vy, theta for robot and humans
        :return:
        """

        if self.phase is not None:
            phase = self.phase
        if self.test_case is not None:
            test_case = self.test_case

        if self.robot is None:
            raise AttributeError('robot has to be set!')
        assert phase in ['train', 'val', 'test']
        if test_case is not None:
            self.case_counter[
                phase] = test_case  # test case is passed in to calculate specific seed to generate case
        self.global_time = 0

        self.humans = []
        # train, val, and test phase should start with different seed.
        # case capacity: the maximum number for train(max possible int -2000), val(1000), and test(1000)
        # val start from seed=0, test start from seed=case_capacity['val']=1000
        # train start from self.case_capacity['val'] + self.case_capacity['test']=2000
        counter_offset = {
            'train': self.case_capacity['val'] + self.case_capacity['test'],
            'val': 0,
            'test': self.case_capacity['val']
        }

        np.random.seed(counter_offset[phase] + self.case_counter[phase] +
                       self.thisSeed)

        self.generate_robot_humans(phase)

        # If configured to randomize human policies, do so
        if self.random_policy_changing:
            self.randomize_human_policies()

        for agent in [self.robot] + self.humans:
            agent.time_step = self.time_step
            agent.policy.time_step = self.time_step

        # case size is used to make sure that the case_counter is always between 0 and case_size[phase]
        self.case_counter[phase] = (self.case_counter[phase] +
                                    int(1 * self.nenv)) % self.case_size[phase]

        # get current observation
        ob = self.generate_ob(reset=True)

        # initialize potential
        self.potential = -abs(
            np.linalg.norm(
                np.array([self.robot.px, self.robot.py]) -
                np.array([self.robot.gx, self.robot.gy])))

        return ob

    # Update the humans' end goals in the environment
    # Produces valid end goals for each human
    def update_human_goals_randomly(self):
        # Update humans' goals randomly
        for human in self.humans:
            if human.isObstacle or human.v_pref == 0:
                continue
            if np.random.random() <= self.goal_change_chance:
                if not self.group_human:  # to improve the runtime
                    humans_copy = []
                    for h in self.humans:
                        if h != human:
                            humans_copy.append(h)

                # Produce valid goal for human in case of circle setting
                while True:
                    angle = np.random.random() * np.pi * 2
                    # add some noise to simulate all the possible cases robot could meet with human
                    v_pref = 1.0 if human.v_pref == 0 else human.v_pref
                    gx_noise = (np.random.random() - 0.5) * v_pref
                    gy_noise = (np.random.random() - 0.5) * v_pref
                    gx = self.circle_radius * np.cos(angle) + gx_noise
                    gy = self.circle_radius * np.sin(angle) + gy_noise
                    collide = False

                    if self.group_human:
                        collide = self.check_collision_group((gx, gy),
                                                             human.radius)
                    else:
                        for agent in [self.robot] + humans_copy:
                            min_dist = human.radius + agent.radius + self.discomfort_dist
                            if norm((gx - agent.px, gy - agent.py)) < min_dist or \
                                    norm((gx - agent.gx, gy - agent.gy)) < min_dist:
                                collide = True
                                break
                    if not collide:
                        break

                # Give human new goal
                human.gx = gx
                human.gy = gy
        return

    # Update the specified human's end goals in the environment randomly
    def update_human_goal(self, human):

        # Update human's goals randomly
        if np.random.random() <= self.end_goal_change_chance:
            if not self.group_human:
                humans_copy = []
                for h in self.humans:
                    if h != human:
                        humans_copy.append(h)

            # Update human's radius now that it's reached goal
            if self.random_radii:
                human.radius += np.random.uniform(-0.1, 0.1)

            # Update human's v_pref now that it's reached goal
            if self.random_v_pref:
                human.v_pref += np.random.uniform(-0.1, 0.1)

            while True:
                angle = np.random.random() * np.pi * 2
                # add some noise to simulate all the possible cases robot could meet with human
                v_pref = 1.0 if human.v_pref == 0 else human.v_pref
                gx_noise = (np.random.random() - 0.5) * v_pref
                gy_noise = (np.random.random() - 0.5) * v_pref
                gx = self.circle_radius * np.cos(angle) + gx_noise
                gy = self.circle_radius * np.sin(angle) + gy_noise
                collide = False
                if self.group_human:
                    collide = self.check_collision_group((gx, gy),
                                                         human.radius)
                else:
                    for agent in [self.robot] + humans_copy:
                        min_dist = human.radius + agent.radius + self.discomfort_dist
                        if norm((gx - agent.px, gy - agent.py)) < min_dist or \
                                norm((gx - agent.gx, gy - agent.gy)) < min_dist:
                            collide = True
                            break
                if not collide:
                    break

            # Give human new goal
            human.gx = gx
            human.gy = gy
        return

    # Caculate whether agent2 is in agent1's FOV
    # Not the same as whether agent1 is in agent2's FOV!!!!
    # arguments:
    # state1, state2: can be agent instance OR state instance
    # robot1: is True if state1 is robot, else is False
    # return value:
    # return True if state2 is visible to state1, else return False
    def detect_visible(self, state1, state2, robot1=False, custom_fov=None):
        if self.robot.kinematics == 'holonomic':
            real_theta = np.arctan2(state1.vy, state1.vx)
        else:
            real_theta = state1.theta
        # angle of center line of FOV of agent1
        v_fov = [np.cos(real_theta), np.sin(real_theta)]

        # angle between agent1 and agent2
        v_12 = [state2.px - state1.px, state2.py - state1.py]
        # angle between center of FOV and agent 2

        v_fov = v_fov / np.linalg.norm(v_fov)
        v_12 = v_12 / np.linalg.norm(v_12)

        offset = np.arccos(np.clip(np.dot(v_fov, v_12), a_min=-1, a_max=1))
        if custom_fov:
            fov = custom_fov
        else:
            if robot1:
                fov = self.robot_fov
            else:
                fov = self.human_fov

        if np.abs(offset) <= fov / 2:
            return True
        else:
            return False

    # for robot:
    # return only visible humans to robot and number of visible humans and visible humans' ids (0 to 4)
    def get_num_human_in_fov(self):
        human_ids = []
        humans_in_view = []
        num_humans_in_view = 0

        for i in range(self.human_num):
            visible = self.detect_visible(self.robot,
                                          self.humans[i],
                                          robot1=True)
            if visible:
                humans_in_view.append(self.humans[i])
                num_humans_in_view = num_humans_in_view + 1
                human_ids.append(True)
            else:
                human_ids.append(False)

        return humans_in_view, num_humans_in_view, human_ids

    # convert an np array with length = 34 to a JointState object
    def array_to_jointstate(self, obs_list):
        fullstate = FullState(obs_list[0], obs_list[1], obs_list[2],
                              obs_list[3], obs_list[4], obs_list[5],
                              obs_list[6], obs_list[7], obs_list[8])

        observable_states = []
        for k in range(self.human_num):
            idx = 9 + k * 5
            observable_states.append(
                ObservableState(obs_list[idx], obs_list[idx + 1],
                                obs_list[idx + 2], obs_list[idx + 3],
                                obs_list[idx + 4]))
        state = JointState(fullstate, observable_states)
        return state

    def last_human_states_obj(self):
        '''
        convert self.last_human_states to a list of observable state objects for old algorithms to use
        '''
        humans = []
        for i in range(self.human_num):
            h = ObservableState(*self.last_human_states[i])
            humans.append(h)
        return humans

    # find R(s, a)
    def calc_reward(self, action):
        # collision detection
        dmin = float('inf')

        danger_dists = []
        collision = False

        for i, human in enumerate(self.humans):
            dx = human.px - self.robot.px
            dy = human.py - self.robot.py
            closest_dist = (dx**2 +
                            dy**2)**(1 / 2) - human.radius - self.robot.radius

            if closest_dist < self.discomfort_dist:
                danger_dists.append(closest_dist)
            if closest_dist < 0:
                collision = True
                # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # check if reaching the goal
        reaching_goal = norm(
            np.array(self.robot.get_position()) -
            np.array(self.robot.get_goal_position())) < self.robot.radius

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            episode_info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            episode_info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            episode_info = ReachGoal()

        elif dmin < self.discomfort_dist:
            # only penalize agent for getting too close if it's visible
            # adjust the reward based on FPS
            # print(dmin)
            reward = (dmin - self.discomfort_dist
                      ) * self.discomfort_penalty_factor * self.time_step
            done = False
            episode_info = Danger(dmin)

        else:
            # potential reward
            potential_cur = np.linalg.norm(
                np.array([self.robot.px, self.robot.py]) -
                np.array(self.robot.get_goal_position()))
            reward = 2 * (-abs(potential_cur) - self.potential)
            self.potential = -abs(potential_cur)

            done = False
            episode_info = Nothing()

        # if the robot is near collision/arrival, it should be able to turn a large angle
        if self.robot.kinematics == 'unicycle':
            # add a rotational penalty
            # if action.r is w, factor = -0.02 if w in [-1.5, 1.5], factor = -0.045 if w in [-1, 1];
            # if action.r is delta theta, factor = -2 if r in [-0.15, 0.15], factor = -4.5 if r in [-0.1, 0.1]
            r_spin = -5 * action.r**2

            # add a penalty for going backwards
            if action.v < 0:
                r_back = -2 * abs(action.v)
            else:
                r_back = 0.

            reward = reward + r_spin + r_back

        return reward, done, episode_info

    # compute the observation
    def generate_ob(self, reset):
        visible_human_states, num_visible_humans, human_visibility = self.get_num_human_in_fov(
        )
        self.update_last_human_states(human_visibility, reset=reset)
        if self.robot.policy.name in ['lstm_ppo', 'srnn']:
            ob = [num_visible_humans]
            # append robot's state
            robotS = np.array(self.robot.get_full_state_list())
            ob.extend(list(robotS))

            ob.extend(list(np.ravel(self.last_human_states)))
            ob = np.array(ob)

        else:  # for orca and sf
            ob = self.last_human_states_obj()

        if self.add_noise:
            ob = self.apply_noise(ob)

        return ob

    def step(self, action, update=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)
        """

        # clip the action to obey robot's constraint
        action = self.robot.policy.clip_action(action, self.robot.v_pref)

        # step all humans
        human_actions = []  # a list of all humans' actions
        for i, human in enumerate(self.humans):
            # observation for humans is always coordinates
            ob = []
            for other_human in self.humans:
                if other_human != human:
                    # Chance for one human to be blind to some other humans
                    if self.random_unobservability and i == 0:
                        if np.random.random(
                        ) <= self.unobservable_chance or not self.detect_visible(
                                human, other_human):
                            ob.append(self.dummy_human.get_observable_state())
                        else:
                            ob.append(other_human.get_observable_state())
                    # Else detectable humans are always observable to each other
                    elif self.detect_visible(human, other_human):
                        ob.append(other_human.get_observable_state())
                    else:
                        ob.append(self.dummy_human.get_observable_state())

            if self.robot.visible:
                # Chance for one human to be blind to robot
                if self.random_unobservability and i == 0:
                    if np.random.random(
                    ) <= self.unobservable_chance or not self.detect_visible(
                            human, self.robot):
                        ob += [self.dummy_robot.get_observable_state()]
                    else:
                        ob += [self.robot.get_observable_state()]
                # Else human will always see visible robots
                elif self.detect_visible(human, self.robot):
                    ob += [self.robot.get_observable_state()]
                else:
                    ob += [self.dummy_robot.get_observable_state()]

            human_actions.append(human.act(ob))

        # compute reward and episode info
        reward, done, episode_info = self.calc_reward(action)

        # apply action and update all agents
        self.robot.step(action)
        for i, human_action in enumerate(human_actions):
            self.humans[i].step(human_action)
        self.global_time += self.time_step  # max episode length=time_limit/time_step

        ##### compute_ob goes here!!!!!
        ob = self.generate_ob(reset=False)

        if self.robot.policy.name in ['srnn']:
            info = {'info': episode_info}
        else:  # for orca and sf
            info = episode_info

        # Update all humans' goals randomly midway through episode
        if self.random_goal_changing:
            if self.global_time % 5 == 0:
                self.update_human_goals_randomly()

        # Update a specific human's goal once its reached its original goal
        if self.end_goal_changing:
            for human in self.humans:
                if not human.isObstacle and human.v_pref != 0 and norm(
                    (human.gx - human.px, human.gy - human.py)) < human.radius:
                    self.update_human_goal(human)

        return ob, reward, done, info

    def render(self, mode='human'):
        import matplotlib.pyplot as plt
        import matplotlib.lines as mlines
        from matplotlib import patches

        plt.rcParams['animation.ffmpeg_path'] = '/usr/bin/ffmpeg'

        robot_color = 'yellow'
        goal_color = 'red'
        arrow_color = 'red'
        arrow_style = patches.ArrowStyle("->", head_length=4, head_width=2)

        def calcFOVLineEndPoint(ang, point, extendFactor):
            # choose the extendFactor big enough
            # so that the endPoints of the FOVLine is out of xlim and ylim of the figure
            FOVLineRot = np.array([[np.cos(ang), -np.sin(ang), 0],
                                   [np.sin(ang), np.cos(ang), 0], [0, 0, 1]])
            point.extend([1])
            # apply rotation matrix
            newPoint = np.matmul(FOVLineRot, np.reshape(point, [3, 1]))
            # increase the distance between the line start point and the end point
            newPoint = [
                extendFactor * newPoint[0, 0], extendFactor * newPoint[1, 0], 1
            ]
            return newPoint

        ax = self.render_axis
        artists = []

        # add goal
        goal = mlines.Line2D([self.robot.gx], [self.robot.gy],
                             color=goal_color,
                             marker='*',
                             linestyle='None',
                             markersize=15,
                             label='Goal')
        ax.add_artist(goal)
        artists.append(goal)

        # add robot
        robotX, robotY = self.robot.get_position()

        robot = plt.Circle((robotX, robotY),
                           self.robot.radius,
                           fill=True,
                           color=robot_color)
        ax.add_artist(robot)
        artists.append(robot)

        plt.legend([robot, goal], ['Robot', 'Goal'], fontsize=16)

        # compute orientation in each step and add arrow to show the direction
        radius = self.robot.radius
        arrowStartEnd = []

        robot_theta = self.robot.theta if self.robot.kinematics == 'unicycle' else np.arctan2(
            self.robot.vy, self.robot.vx)

        arrowStartEnd.append(
            ((robotX, robotY), (robotX + radius * np.cos(robot_theta),
                                robotY + radius * np.sin(robot_theta))))

        for i, human in enumerate(self.humans):
            theta = np.arctan2(human.vy, human.vx)
            arrowStartEnd.append(
                ((human.px, human.py), (human.px + radius * np.cos(theta),
                                        human.py + radius * np.sin(theta))))

        arrows = [
            patches.FancyArrowPatch(*arrow,
                                    color=arrow_color,
                                    arrowstyle=arrow_style)
            for arrow in arrowStartEnd
        ]
        for arrow in arrows:
            ax.add_artist(arrow)
            artists.append(arrow)

        # draw FOV for the robot
        # add robot FOV
        FOVAng = self.robot_fov / 2
        FOVLine1 = mlines.Line2D([0, 0], [0, 0], linestyle='--')
        FOVLine2 = mlines.Line2D([0, 0], [0, 0], linestyle='--')

        startPointX = robotX
        startPointY = robotY
        endPointX = robotX + radius * np.cos(robot_theta)
        endPointY = robotY + radius * np.sin(robot_theta)

        # transform the vector back to world frame origin, apply rotation matrix, and get end point of FOVLine
        # the start point of the FOVLine is the center of the robot
        FOVEndPoint1 = calcFOVLineEndPoint(
            FOVAng, [endPointX - startPointX, endPointY - startPointY],
            20. / self.robot.radius)
        FOVLine1.set_xdata(
            np.array([startPointX, startPointX + FOVEndPoint1[0]]))
        FOVLine1.set_ydata(
            np.array([startPointY, startPointY + FOVEndPoint1[1]]))
        FOVEndPoint2 = calcFOVLineEndPoint(
            -FOVAng, [endPointX - startPointX, endPointY - startPointY],
            20. / self.robot.radius)
        FOVLine2.set_xdata(
            np.array([startPointX, startPointX + FOVEndPoint2[0]]))
        FOVLine2.set_ydata(
            np.array([startPointY, startPointY + FOVEndPoint2[1]]))

        ax.add_artist(FOVLine1)
        ax.add_artist(FOVLine2)
        artists.append(FOVLine1)
        artists.append(FOVLine2)

        # add humans and change the color of them based on visibility
        human_circles = [
            plt.Circle(human.get_position(), human.radius, fill=False)
            for human in self.humans
        ]

        for i in range(len(self.humans)):
            ax.add_artist(human_circles[i])
            artists.append(human_circles[i])

            # green: visible; red: invisible
            if self.detect_visible(self.robot, self.humans[i], robot1=True):
                human_circles[i].set_color(c='g')
            else:
                human_circles[i].set_color(c='r')

        plt.pause(0.1)
        for item in artists:
            item.remove(
            )  # there should be a better way to do this. For example,
Beispiel #18
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=True, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')

    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')

    parser.add_argument("--test_policy_flag", type=str, default="1")
    parser.add_argument("--optimizer", type=str, default="SGD")
    parser.add_argument("--multi_process", type=str, default="average")
    parser.add_argument("--human_num", type=int, default=5)

    # 环境reward相关的参数
    parser.add_argument("--agent_timestep", type=float, default=0.4)
    parser.add_argument("--human_timestep", type=float, default=0.5)
    parser.add_argument("--reward_increment", type=float, default=4.0)
    parser.add_argument("--position_variance", type=float, default=4.0)
    parser.add_argument("--direction_variance", type=float, default=4.0)

    # visable or not
    parser.add_argument("--visible", default=False, action="store_true")

    # act step cnt
    parser.add_argument("--act_steps", type=int, default=1)
    parser.add_argument("--act_fixed", default=False, action="store_true")

    args = parser.parse_args()

    human_num = args.human_num
    agent_timestep = args.agent_timestep
    human_timestep = args.human_timestep
    reward_increment = args.reward_increment
    position_variance = args.position_variance
    direction_variance = args.direction_variance

    agent_visible = args.visible
    print(agent_timestep, " ", human_timestep, " ", reward_increment, " ",
          position_variance, " ", direction_variance, " ", agent_visible)

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            print("model: il_model.pth")
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                print("model: resumed_rl_model.pth")
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_path_ = "rl_model_5400.pth"
                print("model: ", model_path_)
                model_weights = os.path.join(args.model_dir, model_path_)
    else:
        env_config_file = args.env_config
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    # device = torch.device("cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)

    policy_config.set("actenvcarl", "test_policy_flag", args.test_policy_flag)
    policy_config.set("actenvcarl", "multi_process", args.multi_process)
    policy_config.set("actenvcarl", "act_steps", args.act_steps)
    policy_config.set("actenvcarl", "act_fixed", args.act_fixed)

    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        # policy.get_model().load_state_dict(torch.load(model_weights, map_location={'cuda:2':'cuda:0'}))
        policy.get_model().load_state_dict(torch.load(model_weights))
        # policy.get_model().load_state_dict(torch.jit.load(model_weights))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)

    env_config.set("sim", "human_num", human_num)
    env_config.set("reward", "agent_timestep", agent_timestep)
    env_config.set("reward", "human_timestep", human_timestep)
    env_config.set("reward", "reward_increment", reward_increment)
    env_config.set("reward", "position_variance", position_variance)
    env_config.set("reward", "direction_variance", direction_variance)
    # env_config.set("robot", "visible", agent_visible)

    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.visible = agent_visible

    print("robot visable: ", robot.visible)

    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()
    if args.visualize:
        ob = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        while not done:
            action = robot.act(ob)
            ob, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
        if args.traj:
            env.render('traj', args.video_file)
        else:
            env.render('video', args.video_file)

        logging.info('It takes %.2f seconds to finish. Final status is %s',
                     env.global_time, info)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f',
                         sum(human_times) / len(human_times))
    else:
        logging.info("run k episodes")

        # explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
        explorer.run_k_episodes(50, args.phase, print_failure=True)
Beispiel #19
0
def main(args):
    set_random_seeds(args.randomseed)
    # configure paths
    make_new_dir = True
    if os.path.exists(args.output_dir):
        if args.overwrite:
            shutil.rmtree(args.output_dir)
        else:
            shutil.rmtree(args.output_dir)
    if make_new_dir:
        os.makedirs(args.output_dir)
        shutil.copy(args.config, os.path.join(args.output_dir, 'config.py'))

        # # insert the arguments from command line to the config file
        # with open(os.path.join(args.output_dir, 'config.py'), 'r') as fo:
        #     config_text = fo.read()
        # search_pairs = {r"gcn.X_dim = \d*": "gcn.X_dim = {}".format(args.X_dim),
        #                 r"gcn.num_layer = \d": "gcn.num_layer = {}".format(args.layers),
        #                 r"gcn.similarity_function = '\w*'": "gcn.similarity_function = '{}'".format(args.sim_func),
        #                 r"gcn.layerwise_graph = \w*": "gcn.layerwise_graph = {}".format(args.layerwise_graph),
        #                 r"gcn.skip_connection = \w*": "gcn.skip_connection = {}".format(args.skip_connection)}
        #
        # for find, replace in search_pairs.items():
        #     config_text = re.sub(find, replace, config_text)
        #
        # with open(os.path.join(args.output_dir, 'config.py'), 'w') as fo:
        #     fo.write(config_text)

    args.config = os.path.join(args.output_dir, 'config.py')
    log_file = os.path.join(args.output_dir, 'output.log')
    in_weight_file = os.path.join(args.output_dir, 'in_model.pth')
    il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
    rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')

    spec = importlib.util.spec_from_file_location('config', args.config)
    if spec is None:
        parser.error('Config file not found.')
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)

    # configure logging
    mode = 'a' if args.resume else 'w'
    file_handler = logging.FileHandler(log_file, mode=mode)
    stdout_handler = logging.StreamHandler(sys.stdout)
    level = logging.INFO if not args.debug else logging.DEBUG
    logging.basicConfig(level=level,
                        handlers=[stdout_handler, file_handler],
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    repo = git.Repo(search_parent_directories=True)
    logging.info('Current git head hash code: {}'.format(
        repo.head.object.hexsha))
    logging.info('Current random seed: {}'.format(sys_args.randomseed))
    logging.info('Current safe_weight: {}'.format(sys_args.safe_weight))
    logging.info('Current goal_weight: {}'.format(sys_args.goal_weight))
    logging.info('Current re_collision: {}'.format(sys_args.re_collision))
    logging.info('Current re_arrival: {}'.format(sys_args.re_arrival))
    logging.info('Current config content is :{}'.format(config))
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)
    writer = SummaryWriter(log_dir=args.output_dir)

    # configure policy
    policy_config = config.PolicyConfig()
    policy = policy_factory[policy_config.name]()
    if not policy.trainable:
        parser.error('Policy has to be trainable')
    policy.configure(policy_config, device)
    policy.set_device(device)

    # configure environment
    env_config = config.EnvConfig(args.debug)
    env_config.reward.collision_penalty = args.re_collision
    env_config.reward.success_reward = args.re_arrival
    env_config.reward.goal_factor = args.goal_weight
    env_config.reward.discomfort_penalty_factor = args.safe_weight
    env_config.sim.human_num = args.human_num

    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    robot.time_step = env.time_step
    env.set_robot(robot)

    reward_estimator = Reward_Estimator()
    reward_estimator.configure(env_config)
    policy.reward_estimator = reward_estimator
    # for continous action
    action_dim = env.action_space.shape[0]
    max_action = env.action_space.high
    min_action = env.action_space.low

    # read training parameters
    train_config = config.TrainConfig(args.debug)
    rl_learning_rate = train_config.train.rl_learning_rate
    train_batches = train_config.train.train_batches
    train_episodes = train_config.train.train_episodes
    sample_episodes = train_config.train.sample_episodes
    target_update_interval = train_config.train.target_update_interval
    evaluation_interval = train_config.train.evaluation_interval
    capacity = train_config.train.capacity
    epsilon_start = train_config.train.epsilon_start
    epsilon_end = train_config.train.epsilon_end
    epsilon_decay = train_config.train.epsilon_decay
    checkpoint_interval = train_config.train.checkpoint_interval

    # configure trainer and explorer
    memory = ReplayMemory(capacity)
    model = policy.get_model()
    batch_size = train_config.trainer.batch_size
    optimizer = train_config.trainer.optimizer
    print(policy_config.name)
    if policy_config.name == 'model_predictive_rl':
        trainer = MPRLTrainer(
            model,
            policy.state_predictor,
            memory,
            device,
            policy,
            writer,
            batch_size,
            optimizer,
            env.human_num,
            reduce_sp_update_frequency=train_config.train.
            reduce_sp_update_frequency,
            freeze_state_predictor=train_config.train.freeze_state_predictor,
            detach_state_predictor=train_config.train.detach_state_predictor,
            share_graph_model=policy_config.model_predictive_rl.
            share_graph_model)
    elif policy_config.name == 'tree_search_rl':
        trainer = TSRLTrainer(
            model,
            policy.state_predictor,
            memory,
            device,
            policy,
            writer,
            batch_size,
            optimizer,
            env.human_num,
            reduce_sp_update_frequency=train_config.train.
            reduce_sp_update_frequency,
            freeze_state_predictor=train_config.train.freeze_state_predictor,
            detach_state_predictor=train_config.train.detach_state_predictor,
            share_graph_model=policy_config.model_predictive_rl.
            share_graph_model)

    elif policy_config.name == 'gat_predictive_rl':
        trainer = MPRLTrainer(
            model,
            policy.state_predictor,
            memory,
            device,
            policy,
            writer,
            batch_size,
            optimizer,
            env.human_num,
            reduce_sp_update_frequency=train_config.train.
            reduce_sp_update_frequency,
            freeze_state_predictor=train_config.train.freeze_state_predictor,
            detach_state_predictor=train_config.train.detach_state_predictor,
            share_graph_model=policy_config.model_predictive_rl.
            share_graph_model)
    elif policy_config.name == 'td3_rl':
        policy.set_action(action_dim, max_action, min_action)
        trainer = TD3RLTrainer(
            policy.actor,
            policy.critic,
            policy.state_predictor,
            memory,
            device,
            policy,
            writer,
            batch_size,
            optimizer,
            env.human_num,
            reduce_sp_update_frequency=train_config.train.
            reduce_sp_update_frequency,
            freeze_state_predictor=train_config.train.freeze_state_predictor,
            detach_state_predictor=train_config.train.detach_state_predictor,
            share_graph_model=policy_config.model_predictive_rl.
            share_graph_model)
    else:
        trainer = VNRLTrainer(model, memory, device, policy, batch_size,
                              optimizer, writer)
    explorer = Explorer(env,
                        robot,
                        device,
                        writer,
                        memory,
                        policy.gamma,
                        target_policy=policy)
    policy.save_model(in_weight_file)
    # imitation learning
    # if args.resume:
    #     if not os.path.exists(rl_weight_file):
    #         logging.error('RL weights does not exist')
    #     policy.load_state_dict(torch.load(rl_weight_file))
    #     model = policy.get_model()
    #     rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
    #     logging.info('Load reinforcement learning trained weights. Resume training')
    # elif os.path.exists(il_weight_file):
    #     policy.load_state_dict(torch.load(rl_weight_file))
    #     model = policy.get_model()
    #     logging.info('Load imitation learning trained weights.')
    # else:
    #     il_episodes = train_config.imitation_learning.il_episodes
    #     il_policy = train_config.imitation_learning.il_policy
    #     il_epochs = train_config.imitation_learning.il_epochs
    #     il_learning_rate = train_config.imitation_learning.il_learning_rate
    #     trainer.set_learning_rate(il_learning_rate)
    #     if robot.visible:
    #         safety_space = 0
    #     else:
    #         safety_space = train_config.imitation_learning.safety_space
    #     il_policy = policy_factory[il_policy]()
    #     il_policy.set_common_parameters(policy_config)
    #     il_policy.multiagent_training = policy.multiagent_training
    #     il_policy.safety_space = safety_space
    #     robot.set_policy(il_policy)
    #     explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True)
    #     trainer.optimize_epoch(il_epochs)
    #     policy.save_model(il_weight_file)
    #     logging.info('Finish imitation learning. Weights saved.')
    #     logging.info('Experience set size: %d/%d', len(memory), memory.capacity)

    trainer.update_target_model(model)

    # reinforcement learning
    policy.set_env(env)
    robot.set_policy(policy)
    robot.print_info()
    trainer.set_rl_learning_rate(rl_learning_rate)
    # fill the memory pool with some RL experience
    if args.resume:
        robot.policy.set_epsilon(epsilon_end)
        explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
        logging.info('Experience set size: %d/%d', len(memory),
                     memory.capacity)
    episode = 0
    best_val_reward = -1
    best_val_return = -1
    best_val_model = None
    # evaluate the model after imitation learning

    if episode % evaluation_interval == 0:
        logging.info(
            'Evaluate the model instantly after imitation learning on the validation cases'
        )
        explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)
        explorer.log('val', episode // evaluation_interval)

        if args.test_after_every_eval:
            explorer.run_k_episodes(env.case_size['test'],
                                    'test',
                                    episode=episode,
                                    print_failure=True)
            explorer.log('test', episode // evaluation_interval)

    episode = 0
    reward_rec = []
    return_rec = []
    discom_tim_rec = []
    nav_time_rec = []
    total_time_rec = []
    reward_in_last_interval = 0
    return_in_last_interval = 0
    nav_time__in_last_interval = 0
    discom_time_in_last_interval = 0
    total_time_in_last_interval = 0
    eps_count = 0
    fw = open(sys_args.output_dir + '/data.txt', 'w')
    print("%f %f %f %f %f" % (0, 0, 0, 0, 0), file=fw)
    while episode < train_episodes:
        if args.resume:
            epsilon = epsilon_end
        else:
            if episode < epsilon_decay:
                epsilon = epsilon_start + (
                    epsilon_end - epsilon_start) / epsilon_decay * episode
            else:
                epsilon = epsilon_end
        robot.policy.set_epsilon(epsilon)

        # sample k episodes into memory and optimize over the generated memory
        _, _, nav_time, sum_reward, ave_return, discom_time, total_time = \
            explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode)
        eps_count = eps_count + 1
        reward_in_last_interval = reward_in_last_interval + sum_reward
        return_in_last_interval = return_in_last_interval + ave_return
        nav_time__in_last_interval = nav_time__in_last_interval + nav_time
        discom_time_in_last_interval = discom_time_in_last_interval + discom_time
        total_time_in_last_interval = total_time_in_last_interval + total_time
        interval = 100
        if eps_count % interval == 0:
            reward_rec.append(reward_in_last_interval / 100.0)
            return_rec.append(return_in_last_interval / 100.0)
            discom_tim_rec.append(discom_time_in_last_interval / 100.0)
            nav_time_rec.append(nav_time__in_last_interval / 100.0)
            total_time_rec.append(total_time_in_last_interval / 100.0)
            logging.info(
                'Train in episode %d reward in last 100 episodes %f %f %f %f %f',
                eps_count, reward_rec[-1], return_rec[-1], discom_tim_rec[-1],
                nav_time_rec[-1], total_time_rec[-1])
            print("%f %f %f %f %f" %
                  (reward_rec[-1], return_rec[-1], discom_tim_rec[-1],
                   nav_time_rec[-1], total_time_rec[-1]),
                  file=fw)
            reward_in_last_interval = 0
            return_in_last_interval = 0
            nav_time__in_last_interval = 0
            discom_time_in_last_interval = 0
            total_time_in_last_interval = 0
            min_reward = (np.min(reward_rec) // 5.0) * 5.0
            max_reward = (np.max(reward_rec) // 5.0 + 1) * 5.0
            pos = np.array(range(1, len(reward_rec) + 1)) * interval
            plt.plot(pos,
                     reward_rec,
                     color='r',
                     marker='.',
                     linestyle='dashed')
            plt.axis([0, eps_count, min_reward, max_reward])
            savefig(args.output_dir + "/reward_record.jpg")
        explorer.log('train', episode)

        trainer.optimize_batch(train_batches, episode)
        episode += 1

        if episode % target_update_interval == 0:
            trainer.update_target_model(model)
        # evaluate the model
        if episode % evaluation_interval == 0:
            _, _, _, reward, average_return, _, _ = explorer.run_k_episodes(
                env.case_size['val'], 'val', episode=episode)
            explorer.log('val', episode // evaluation_interval)

            if episode % checkpoint_interval == 0 and average_return > best_val_return:
                best_val_return = average_return
                best_val_model = copy.deepcopy(policy.get_state_dict())
        # test after every evaluation to check how the generalization performance evolves
            if args.test_after_every_eval:
                explorer.run_k_episodes(env.case_size['test'],
                                        'test',
                                        episode=episode,
                                        print_failure=True)
                explorer.log('test', episode // evaluation_interval)

        if episode != 0 and episode % checkpoint_interval == 0:
            current_checkpoint = episode // checkpoint_interval - 1
            save_every_checkpoint_rl_weight_file = rl_weight_file.split(
                '.')[0] + '_' + str(current_checkpoint) + '.pth'
            policy.save_model(save_every_checkpoint_rl_weight_file)

    # # test with the best val model
    fw.close()
    if best_val_model is not None:
        policy.load_state_dict(best_val_model)
        torch.save(best_val_model, os.path.join(args.output_dir,
                                                'best_val.pth'))
        logging.info('Save the best val model with the return: {}'.format(
            best_val_return))
    explorer.run_k_episodes(env.case_size['test'],
                            'test',
                            episode=episode,
                            print_failure=True)
Beispiel #20
0
def main():
    parser = argparse.ArgumentParser("Parse configuration file")
    parser.add_argument("--env_config", type=str, default="configs/env.config")
    parser.add_argument("--policy", type=str, default="cadrl")
    parser.add_argument("--policy_config",
                        type=str,
                        default="configs/policy.config")
    # parser.add_argument('--train_config', type=str, default='configs/train-test.config')
    parser.add_argument("--train_config",
                        type=str,
                        default="configs/train.config")
    parser.add_argument("--output_dir",
                        type=str,
                        default="data2/actenvcarl_alltf")
    parser.add_argument("--weights", type=str)
    parser.add_argument("--resume", default=False, action="store_true")
    parser.add_argument("--gpu", default=True, action="store_true")
    parser.add_argument("--debug", default=True, action="store_true")
    parser.add_argument("--phase", type=str, default="train")
    parser.add_argument("--test_policy_flag", type=str, default="1")
    parser.add_argument("--optimizer", type=str, default="SGD")
    parser.add_argument("--multi_process", type=str, default="average")

    # reward parameters
    parser.add_argument("--agent_timestep", type=float, default=0.4)
    parser.add_argument("--human_timestep", type=float, default=0.5)
    parser.add_argument("--reward_increment", type=float, default=4.0)
    parser.add_argument("--position_variance", type=float, default=4.0)
    parser.add_argument("--direction_variance", type=float, default=4.0)

    # visable or not
    parser.add_argument("--visible", default=False, action="store_true")

    # act step cnt
    parser.add_argument("--act_steps", type=int, default=1)
    parser.add_argument("--act_fixed", default=False, action="store_true")

    args = parser.parse_args()

    agent_timestep = args.agent_timestep
    human_timestep = args.human_timestep
    reward_increment = args.reward_increment
    position_variance = args.position_variance
    direction_variance = args.direction_variance

    # agent_visable = args.visable

    optimizer_type = args.optimizer

    # configure paths
    make_new_dir = True
    if os.path.exists(args.output_dir):
        key = input(
            "Output directory already exists! Overwrite the folder? (y/n)")
        if key == "y" and not args.resume:
            shutil.rmtree(args.output_dir)
        else:
            make_new_dir = False
            args.env_config = os.path.join(args.output_dir,
                                           os.path.basename(args.env_config))
            args.policy_config = os.path.join(
                args.output_dir, os.path.basename(args.policy_config))
            args.train_config = os.path.join(
                args.output_dir, os.path.basename(args.train_config))
            shutil.copy("utils/trainer.py", args.output_dir)
            shutil.copy("policy/" + args.policy + ".py", args.output_dir)

    if make_new_dir:
        os.makedirs(args.output_dir)
        shutil.copy(args.env_config, args.output_dir)
        shutil.copy(args.policy_config, args.output_dir)
        shutil.copy(args.train_config, args.output_dir)
        shutil.copy("utils/trainer.py", args.output_dir)
        shutil.copy("policy/" + args.policy + ".py", args.output_dir)
    log_file = os.path.join(args.output_dir, "output.log")
    il_weight_file = os.path.join(args.output_dir, "il_model.pth")
    rl_weight_file = os.path.join(args.output_dir, "rl_model.pth")

    # configure logging
    mode = "a" if args.resume else "w"
    file_handler = logging.FileHandler(log_file, mode=mode)
    stdout_handler = logging.StreamHandler(sys.stdout)
    level = logging.INFO if not args.debug else logging.DEBUG
    logging.basicConfig(
        level=level,
        handlers=[stdout_handler, file_handler],
        format="%(asctime)s, %(levelname)s: %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )
    # repo = git.Repo(search_parent_directories=True)
    # logging.info('Current git head hash code: %s'.format(repo.head.object.hexsha))
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info("Using device: %s", device)

    # configure policy
    policy = policy_factory[args.policy]()
    # policy.set_phase(args.phase)
    print("policy type: ", type(policy))

    if not policy.trainable:
        parser.error("Policy has to be trainable")
    if args.policy_config is None:
        parser.error(
            "Policy config has to be specified for a trainable network")
    policy_config = configparser.RawConfigParser()
    policy_config.read(args.policy_config)

    policy_config.set("actenvcarl", "test_policy_flag", args.test_policy_flag)
    policy_config.set("actenvcarl", "multi_process", args.multi_process)
    policy_config.set("actenvcarl", "act_steps", args.act_steps)
    policy_config.set("actenvcarl", "act_fixed", args.act_fixed)

    policy.configure(policy_config)
    policy.set_device(device)

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(args.env_config)
    env_config.set("reward", "agent_timestep", agent_timestep)
    env_config.set("reward", "human_timestep", human_timestep)
    env_config.set("reward", "reward_increment", reward_increment)
    env_config.set("reward", "position_variance", position_variance)
    env_config.set("reward", "direction_variance", direction_variance)
    env = gym.make("CrowdSim-v0")
    env.configure(env_config)

    # 只是为了获取部分的环境配置信息,如半径速度啥的
    robot = Robot(env_config, "robot")
    env.set_robot(robot)

    # read training parameters
    if args.train_config is None:
        parser.error(
            "Train config has to be specified for a trainable network")

    train_config = configparser.RawConfigParser()
    train_config.read(args.train_config)
    rl_learning_rate = train_config.getfloat("train", "rl_learning_rate")
    train_batches = train_config.getint("train", "train_batches")
    train_episodes = train_config.getint("train", "train_episodes")
    sample_episodes = train_config.getint("train", "sample_episodes")
    target_update_interval = train_config.getint("train",
                                                 "target_update_interval")
    evaluation_interval = train_config.getint("train", "evaluation_interval")
    capacity = train_config.getint("train", "capacity")
    epsilon_start = train_config.getfloat("train", "epsilon_start")
    epsilon_end = train_config.getfloat("train", "epsilon_end")
    epsilon_decay = train_config.getfloat("train", "epsilon_decay")
    checkpoint_interval = train_config.getint("train", "checkpoint_interval")

    # configure trainer and explorer
    memory = ReplayMemory(capacity)

    model = policy.get_model()

    batch_size = train_config.getint("trainer", "batch_size")

    trainer = Trainer(model, memory, device, batch_size)

    explorer = Explorer(env,
                        robot,
                        device,
                        memory,
                        policy.gamma,
                        target_policy=policy)

    # imitation learning
    if args.resume:
        if not os.path.exists(rl_weight_file):
            logging.error("RL weights does not exist")
        model.load_state_dict(torch.load(rl_weight_file))
        rl_weight_file = os.path.join(args.output_dir, "resumed_rl_model.pth")
        logging.info(
            "Load reinforcement learning trained weights. Resume training")
    elif os.path.exists(il_weight_file):
        model.load_state_dict(torch.load(il_weight_file))
        logging.info("Load imitation learning trained weights.")
    else:
        il_episodes = train_config.getint("imitation_learning", "il_episodes")
        il_policy = train_config.get("imitation_learning", "il_policy")
        il_epochs = train_config.getint("imitation_learning", "il_epochs")
        il_learning_rate = train_config.getfloat("imitation_learning",
                                                 "il_learning_rate")
        trainer.set_learning_rate(il_learning_rate, optimizer_type)
        if robot.visible:
            safety_space = 0
        else:
            safety_space = train_config.getfloat("imitation_learning",
                                                 "safety_space")
        il_policy = policy_factory[il_policy]()
        il_policy.multiagent_training = policy.multiagent_training
        il_policy.safety_space = safety_space

        print("il_policy: ", type(il_policy))
        robot.set_policy(il_policy)
        explorer.run_k_episodes(200,
                                "train",
                                update_memory=True,
                                imitation_learning=True)

        trainer.optimize_epoch(il_epochs)

        torch.save(model.state_dict(), il_weight_file)
        logging.info("Finish imitation learning. Weights saved.")
        logging.info("Experience set size: %d/%d", len(memory),
                     memory.capacity)

    explorer.update_target_model(model)

    # reinforcement learning
    policy.set_env(env)
    robot.set_policy(policy)
    robot.print_info()
    trainer.set_learning_rate(rl_learning_rate, optimizer_type)
    # fill the memory pool with some RL experienceo
    if args.resume:
        robot.policy.set_epsilon(epsilon_end)
        explorer.run_k_episodes(100, "train", update_memory=True, episode=0)
        logging.info("Experience set size: %d/%d", len(memory),
                     memory.capacity)
    episode = 0
    while episode < train_episodes:
        if args.resume:
            epsilon = epsilon_end
        else:
            if episode < epsilon_decay:
                epsilon = epsilon_start + (
                    epsilon_end - epsilon_start) / epsilon_decay * episode
            else:
                epsilon = epsilon_end
        robot.policy.set_epsilon(epsilon)

        # evaluate the model
        if episode % evaluation_interval == 0:
            explorer.run_k_episodes(env.case_size["val"],
                                    "val",
                                    episode=episode)

        # sample k episodes into memory and optimize over the generated memory
        explorer.run_k_episodes(sample_episodes,
                                "train",
                                update_memory=True,
                                episode=episode)
        trainer.optimize_batch(train_batches)
        episode += 1

        if episode % target_update_interval == 0:
            explorer.update_target_model(model)

        if episode != 0 and episode % checkpoint_interval == 0:
            # rl_weight_file_name = 'rl_model_' + str(episode)  + '.pth'
            rl_weight_file_name = "rl_model_{:d}.pth".format(episode)
            rl_weight_file = os.path.join(args.output_dir, rl_weight_file_name)
            torch.save(model.state_dict(),
                       rl_weight_file,
                       _use_new_zipfile_serialization=False)

    # final test
    explorer.run_k_episodes(env.case_size["test"], "test", episode=episode)
def test_model(m_p, model_type, visible=False, n_episodes=5000, model=None, env_type='orca', traj_path=None, traj_length=5, env_mods=None,
                layout='circle_crossing', return_exp=False, t_fex=None, pred_head=None, notebook=True, suffix=None) :
    policy_p = 'configs/policy.config'
    if not visible : 
        env_p = 'configs/env.config'
    else :
        env_p = 'configs/env_visible.config'
    policy_type = model_type
    
    # configure policy
    policy = policy_factory[policy_type]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_p)
    policy.configure(policy_config)

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_p)
    if env_type == 'orca' :
        env = gym.make('CrowdSim-v0')
    else :
        assert(env_type == 'socialforce')
        env = CrowdSim_SF()
    env.configure(env_config)
    modify_env_params(env, layout=layout, mods=env_mods)

    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    policy.set_env(env)

    policy.set_env(env)
    policy.set_phase('val')
    policy.set_device('cpu')

    prediction_head = None
    if traj_path is not None :
        #prediction_head = ProjHead(feat_dim=32, hidden_dim=16*max(int(traj_length/2), 1), head_dim=2*traj_length)
        prediction_head = torch.load(traj_path).to('cpu')
    if pred_head is not None :
        prediction_head = pred_head

    if model is None :
        policy.model.load_state_dict(torch.load(m_p))
    else :
        policy.model = model

    if t_fex is not None :
        policy.model.trajectory_fext = t_fex

    if model_type == 'sail' :
        explorer = ExplorerDs(env, robot, 'cpu', 1, gamma=0.9)
    else :
        explorer = ExplorerDs(env, robot, 'cpu', 5, gamma=0.9)
    explorer.robot = robot

    res, exp = explorer.run_k_episodes(n_episodes, 'test', progressbar=True, 
                                       output_info=True, notebook=notebook, print_info=True, 
                                       traj_head=prediction_head, return_states=True, suffix=suffix)
    if return_exp :
        return res, exp 
    return res
Beispiel #22
0
class NN_tb3():
    def __init__(self, env, env_config, policy):

        #
        self.env = env
        self.env_config = env_config
        # configure robot
        self.robot = Robot(env_config, 'robot')
        self.robot.set_policy(policy)

        self.env.set_robot(self.robot)  #pass robot parameters into env
        self.ob = env.reset(
            'test', 1
        )  #intial some parameters from .config file such as time_step,success_reward for other instances
        self.policy = policy
        self.policy.set_env(env)
        # for subscribers
        self.pose = PoseStamped()
        self.vel = Vector3()
        self.psi = 0.0

        # for publishers
        self.global_goal = PoseStamped()
        self.goal = PoseStamped()
        self.desired_position = PoseStamped()
        self.desired_action = np.zeros((2, ))

        # # publishers
        self.pub_twist = rospy.Publisher('/cmd_vel', Twist, queue_size=1)
        self.pub_pose_marker = rospy.Publisher('', Marker, queue_size=1)
        # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1)
        # self.pub_path_marker = rospy.Publisher('~path_marker',Marker,queue_size=1)
        # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1)
        # # sub
        self.sub_pose = rospy.Subscriber('/odom', Odometry, self.cbPose)
        self.sub_global_goal = rospy.Subscriber('/goal', PoseStamped,
                                                self.cbGlobalGoal)
        # self.sub_subgoal = rospy.Subscriber('~subgoal',PoseStamped, self.cbSubGoal)

        # subgoals
        self.sub_goal = Vector3()

        # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters)

        # control timer
        # self.control_timer = rospy.Timer(rospy.Duration(0.01),self.cbControl)
        self.nn_timer = rospy.Timer(rospy.Duration(0.3),
                                    self.cbComputeActionCrowdNav)

    def cbGlobalGoal(self, msg):
        self.stop_moving_flag = True
        self.new_global_goal_received = True
        self.global_goal = msg
        self.goal.pose.position.x = msg.pose.position.x
        self.goal.pose.position.y = msg.pose.position.y
        self.goal.header = msg.header

        # reset subgoals
        print("new goal: " +
              str([self.goal.pose.position.x, self.goal.pose.position.y]))

    def cbSubGoal(self, msg):
        self.sub_goal.x = msg.pose.position.x
        self.sub_goal.y = msg.pose.position.y
        # print "new subgoal: "+str(self.sub_goal)

    def cbPose(self, msg):
        self.cbVel(msg)
        q = msg.pose.pose.orientation
        self.psi = np.arctan2(2.0 * (q.w * q.z + q.x * q.y), 1 - 2 *
                              (q.y * q.y + q.z * q.z))  # bounded by [-pi, pi]
        self.pose = msg.pose
        # self.visualize_pose(msg.pose.pose.position,msg.pose.pose.orientation)

    def cbVel(self, msg):
        self.vel = msg.twist.twist.linear

    def cbClusters(self, msg):
        other_agents = []

        xs = []
        ys = []
        radii = []
        labels = []
        num_clusters = len(msg.labels)

        for i in range(num_clusters):
            index = msg.labels[i]
            x = msg.mean_points[i].x
            y = msg.mean_points[i].y
            v_x = msg.velocities[i].x
            v_y = msg.velocities[i].y
            radius = self.obst_rad

            xs.append(x)
            ys.append(y)
            radii.append(radius)
            labels.append(index)
            # self.visualize_other_agent(x,y,radius,msg.labels[i])
            # helper fields
            heading_angle = np.arctan2(v_y, v_x)
            pref_speed = np.linalg.norm(np.array([v_x, v_y]))
            goal_x = x + 5.0
            goal_y = y + 5.0

            if pref_speed < 0.2:
                pref_speed = 0
                v_x = 0
                v_y = 0
            other_agents.append(
                agent.Agent(x, y, goal_x, goal_y, radius, pref_speed,
                            heading_angle, index))
        self.visualize_other_agents(xs, ys, radii, labels)
        self.other_agents_state = other_agents

    def stop_moving(self):
        twist = Twist()
        self.pub_twist.publish(twist)

    def update_action(self, action):
        # print 'update action'
        self.desired_action = action
        self.desired_position.pose.position.x = self.pose.pose.position.x + 1 * action[
            0] * np.cos(action[1])
        self.desired_position.pose.position.y = self.pose.pose.position.y + 1 * action[
            0] * np.sin(action[1])

    def cbControl(self, event):

        if self.goal.header.stamp == rospy.Time(
                0
        ) or self.stop_moving_flag and not self.new_global_goal_received:
            self.stop_moving()
            return
        elif self.operation_mode.mode == self.operation_mode.NN:
            desired_yaw = self.desired_action[1]
            yaw_error = desired_yaw - self.psi
            if abs(yaw_error) > np.pi:
                yaw_error -= np.sign(yaw_error) * 2 * np.pi

            gain = 1.3  # canon: 2
            vw = gain * yaw_error

            use_d_min = True
            if False:  # canon: True
                # use_d_min = True
                # print "vmax:", self.find_vmax(self.d_min,yaw_error)
                vx = min(self.desired_action[0],
                         self.find_vmax(self.d_min, yaw_error))
            else:
                vx = self.desired_action[0]

            twist = Twist()
            twist.angular.z = vw
            twist.linear.x = vx
            self.pub_twist.publish(twist)
            self.visualize_action(use_d_min)
            return

        elif self.operation_mode.mode == self.operation_mode.SPIN_IN_PLACE:
            print('Spinning in place.')
            self.stop_moving_flag = False
            angle_to_goal = np.arctan2(self.global_goal.pose.position.y - self.pose.pose.position.y, \
                self.global_goal.pose.position.x - self.pose.pose.position.x)
            global_yaw_error = self.psi - angle_to_goal
            if abs(global_yaw_error) > 0.5:
                vx = 0.0
                vw = 1.0
                twist = Twist()
                twist.angular.z = vw
                twist.linear.x = vx
                self.pub_twist.publish(twist)
                # print twist
            else:
                print('Done spinning in place')
                self.operation_mode.mode = self.operation_mode.NN
                # self.new_global_goal_received = False
            return
        else:
            self.stop_moving()
            return

    def cbComputeActionCrowdNav(self, event):
        robot_x = self.pose.pose.position.x
        robot_y = self.pose.pose.position.y
        # goal
        goal_x = self.global_goal.pose.position.x
        goal_y = self.global_goal.pose.position.x
        # velocity
        robot_vx = self.vel.x
        robot_vy = self.vel.y
        # oriantation
        theta = self.psi
        robot_radius = 0.3

        # set robot info
        self.robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy,
                       theta, robot_radius)

        # obstacle: position, velocity, radius
        # position
        # obstacle_x = [0.1,0.2,0.3,0.4,0.5]
        # obstacle_y = [0.1,0.2,0.3,0.4,0.5]
        # # velocity
        # obstacle_vx = [0.1,0.2,0.3,0.4,0.5]
        # obstacle_vy = [0.1,0.2,0.3,0.4,0.5]

        obstacle_x = [-6.0, -6.0, -6.0, -6.0, -6.0]
        obstacle_y = [-6.0, -6.0, -6.0, -6.0, -6.0]
        # velocity
        obstacle_vx = [0.0, 0.0, 0.0, 0.0, 0.0]
        obstacle_vy = [0.0, 0.0, 0.0, 0.0, 0.0]
        obstacle_radius = 0.3

        # initial obstacle instances and set value
        for i in range(self.env_config.getint('sim', 'human_num')):
            self.env.humans[i].set(obstacle_x[i], obstacle_y[i], goal_x,
                                   goal_y, obstacle_vx[i], obstacle_vy[i],
                                   theta, obstacle_radius)
            self.ob[i] = self.env.humans[i].get_observable_state()

        # ************************************ Output ************************************
        # get action info
        action = self.robot.act(self.ob)
        position = self.robot.get_observable_state()
        print('\n---------\nrobot position (X,Y):', position.position)
        print(action)
        print(theta)

        # self.update_action(action)

    def update_subgoal(self, subgoal):
        self.goal.pose.position.x = subgoal[0]
        self.goal.pose.position.y = subgoal[1]

    def visualize_pose(self, pos, orientation):
        # Yellow Box for Vehicle
        marker = Marker()
        marker.header.stamp = rospy.Time.now()
        marker.header.frame_id = 'map'
        marker.ns = 'agent'
        marker.id = 0
        marker.type = marker.CUBE
        marker.action = marker.ADD
        marker.pose.position = pos
        marker.pose.orientation = orientation
        marker.scale = Vector3(x=0.7, y=0.42, z=1)
        marker.color = ColorRGBA(r=1.0, g=1.0, a=1.0)
        marker.lifetime = rospy.Duration(1.0)
        self.pub_pose_marker.publish(marker)

        # Red track for trajectory over time
        marker = Marker()
        marker.header.stamp = rospy.Time.now()
        marker.header.frame_id = 'map'
        marker.ns = 'agent'
        marker.id = self.num_poses
        marker.type = marker.CUBE
        marker.action = marker.ADD
        marker.pose.position = pos
        marker.pose.orientation = orientation
        marker.scale = Vector3(x=0.2, y=0.2, z=0.2)
        marker.color = ColorRGBA(r=1.0, a=1.0)
        marker.lifetime = rospy.Duration(10.0)
        self.pub_pose_marker.publish(marker)

    def on_shutdown(self):
        rospy.loginfo("[%s] Shutting down.")
        self.stop_moving()
        rospy.loginfo("Stopped %s's velocity.")
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--output_file', type=str, default=None)
    parser.add_argument('--vis_type', type=str, default='traj')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            policy_model_weight_file = os.path.join(args.model_dir,
                                                    'il_policy_model.pth')
            statistics_model_weight_file = os.path.join(
                args.model_dir, 'il_statistics_model.pth')
            forward_dynamics_model_weight_file = os.path.join(
                args.model_dir, 'il_forward_dynamics_model.pth')
        else:
            policy_model_weight_file = os.path.join(args.model_dir,
                                                    'rl_policy_model.pth')
            statistics_model_weight_file = os.path.join(
                args.model_dir, 'rl_statistics_model.pth')
            forward_dynamics_model_weight_file = os.path.join(
                args.model_dir, 'rl_forward_dynamics_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_policy_model().load_state_dict(
            torch.load(policy_model_weight_file, map_location=device))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = EmpowermentExplorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()
    if args.visualize:
        if args.vis_type in ['traj', 'video', 'snapshots']:
            visualize_episode(env, robot, args)

        elif args.vis_type == 'distribution':
            env.discomfort_dist = 0.5
            n_tests = 1
            n_reached_goal = 0
            n_too_close = 0
            min_dist = []
            hum_travel_dist = []
            hum_travel_time = []
            times = []

            for test_num in range(n_tests):
                ob = env.reset(args.phase, test_num)
                done = False
                last_pos = np.array(robot.get_position())
                while not done:
                    action = robot.act(ob)
                    action = ActionXY(action[0], action[1])
                    ob, _, done, info = env.step(action)
                    current_pos = np.array(robot.get_position())
                    logging.debug(
                        'Speed: %.2f',
                        np.linalg.norm(current_pos - last_pos) /
                        robot.time_step)
                    last_pos = current_pos

                    if isinstance(info, Danger):
                        n_too_close += 1
                        min_dist.append(info.min_dist)

                if isinstance(info, ReachGoal):
                    n_reached_goal += 1
                    times.append(env.global_time)

                    (human_times, human_distances) = env.get_human_times()
                    hum_travel_dist.append(human_distances)
                    hum_travel_time.append(human_times)

                logging.info(
                    'Test %s takes %.2f seconds to finish. Final status is %s.',
                    test_num, env.global_time, info)
            distribution_seperation_distance(min_dist)
            distribution_human_path_lengths(hum_travel_dist)

    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True)
Beispiel #24
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--test_policy', type=str, default='mctsrnn')
    parser.add_argument('--output_dir', type=str, default=None)
    parser.add_argument('--save_fig', default=False, action='store_true')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--pred_model_dir', type=str, default=None)
    parser.add_argument('--use_mcts_sef2', default=False, action='store_true')
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--mixed', default=True, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--interactive', default=False, action='store_true')
    parser.add_argument('--compare_scenarios',
                        default=False,
                        action='store_true')
    parser.add_argument('--pattern', type=int, default=0)
    args = parser.parse_args()

    #Need to create a robot policy within the simulated environmnet, even though we do not use it for testing when MCTS or PF is used
    if args.policy == 'mctsrnn':
        args.policy = 'orca'
        args.test_policy = 'mctsrnn'
    elif args.policy == 'mctscv':
        args.policy = 'orca'
        args.test_policy = 'mctscv'
    elif args.policy == 'pf':
        args.policy = 'orca'
        args.test_policy = 'pf'
    elif args.policy == 'sarl':
        args.test_policy = 'sarl'
    elif args.policy == 'orca':
        args.test_policy = 'orca'
    else:
        sys.exit(
            "--policy must be one of 'mctsrnn', 'mctscv', 'pf', 'sarl', 'orca'"
        )

    if args.model_dir is not None:
        #env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config))
        policy_config_file = args.model_dir + '/policy.config'
        env_config_file = args.env_config
        #policy_config_file = args.policy_config
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        #policy_config_file = args.env_config
        policy_config_file = args.policy_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)

    policy.configure(policy_config)

    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(torch.load(model_weights))
    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)

    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    if args.mixed:
        env.test_sim = 'mixed'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)
    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0.2
        else:
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    if args.interactive:
        #for testing how orca responds to action inputs
        print('phase', 'interactive')
        root = Tk()
        app = App(root, env, args.pattern)
        root.mainloop()

    elif args.visualize:
        print('phase', args.phase)
        ob = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        while not done:
            action = robot.act(ob)

            ob, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
        if args.traj:
            env.render('traj', args.video_file)
        else:
            env.render('video', args.video_file)

        logging.info('It takes %.2f seconds to finish. Final status is %s',
                     env.global_time, info)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f',
                         sum(human_times) / len(human_times))
    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True,
                                test_policy=args.test_policy,
                                output_dir=args.output_dir,
                                save_fig=args.save_fig,
                                model_dir=args.pred_model_dir,
                                use_mcts_sef2=args.use_mcts_sef2,
                                comparisons=args.compare_scenarios)
Beispiel #25
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=True, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=0)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--plot', default=False, action='store_true')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(
            torch.load(model_weights, map_location=torch.device('cpu')))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    if args.visualize:

        obs = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        policy_time = 0.0
        numFrame = 0

        dist = 20.0
        obs_flg = 0

        sim_time = False
        while sim_time == False:
            samples, robot_state, sim_time = pc2obs.pc2obs(
                voxel_size=voxel_size)
        t1 = float(sim_time)
        while (dist > 0.8):
            #t1 = time.time()
            env.humans = []
            samples, robot_state, sim_time = pc2obs.pc2obs(
                voxel_size=voxel_size)
            if type(samples) == type(False):
                print("Not Connect")
                continue
            dist = math.sqrt((GOAL_X - robot_state[0])**2 +
                             (GOAL_Y - robot_state[1])**2)
            if obs_flg == 0 and dist < 10:
                os.system('sh ./init.sh')
                obs_flg = 1
            print("dist: {}".format(dist))
            # rotate and shift obs position
            t2 = time.time()
            yaw = robot_state[2]
            rot_matrix = np.array([[math.cos(yaw),
                                    math.sin(yaw)],
                                   [-math.sin(yaw),
                                    math.cos(yaw)]])
            #samples = np.array([sample + robot_state[0:2] for sample in samples[:,0:2]])

            if len(samples) == 1:
                samples = np.array(
                    [np.dot(rot_matrix, samples[0][0:2]) + robot_state[0:2]])
                print(samples)
            elif len(samples) > 1:
                samples = np.array([
                    np.dot(rot_matrix, sample) + robot_state[0:2]
                    for sample in samples[:, 0:2]
                ])

            obs_position_list = samples
            obs = []

            for ob in obs_position_list:
                human = Human(env.config, 'humans')
                # px, py, gx, gy, vx, vy, theta
                human.set(ob[0], ob[1], ob[0], ob[1], 0, 0, 0)
                env.humans.append(human)
                # px, py, vx, vy, radius
                obs.append(ObservableState(ob[0], ob[1], 0, 0, voxel_size / 2))
            if len(obs_position_list) == 0:
                human = Human(env.config, 'humans')
                # SARL, CADRL
                human.set(0, -10, 0, -10, 0, 0, 0)
                # LSTM
                #human.set(robot_state[0]+10, robot_state[1]+10, robot_state[0]+10, robot_state[1]+10 , 0, 0, 0)
                env.humans.append(human)
                # SARL, CADRL
                obs.append(ObservableState(0, -10, 0, 0, voxel_size / 2))
                # LSTM
                #obs.append(ObservableState(robot_state[0]+10, robot_state[1]+10, 0, 0, voxel_size/2))
            robot.set_position(robot_state)
            robot.set_velocity([math.sin(yaw), math.cos(yaw)])
            #print(obs)
            action = robot.act(obs)
            obs, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            current_vel = np.array(robot.get_velocity())
            #print("Velocity: {}, {}".format(current_vel[0], current_vel[1]))
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
            policy_time += time.time() - t1
            numFrame += 1
            #print(t2-t1, time.time() - t2)

            diff_angle = (-yaw + math.atan2(current_vel[0], current_vel[1]))
            if diff_angle > math.pi:
                diff_angle = diff_angle - 2 * math.pi
            elif diff_angle < -math.pi:
                diff_angle = diff_angle + 2 * math.pi
            #print("diff_angle: {}, {}, {}".format(diff_angle, yaw ,-math.atan2(current_vel[0], current_vel[1])))
            if diff_angle < -0.7:
                direc = 2  # turn left
            elif diff_angle > 0.7:
                direc = 3  # turn right
            else:
                direc = 1  # go straight
            # GoEasy(direc)
            vx = SPEED * math.sqrt(current_vel[0]**2 + current_vel[1]**2)
            if diff_angle > 0:
                v_ang = ANGULAR_SPEED * min(diff_angle / (math.pi / 2), 1)
            else:
                v_ang = ANGULAR_SPEED * max(diff_angle / (math.pi / 2), -1)
            print(vx, -v_ang)
            easyGo.mvCurve(vx, -v_ang)
            if args.plot:
                plt.scatter(current_pos[0], current_pos[1], label='robot')
                plt.arrow(current_pos[0],
                          current_pos[1],
                          current_vel[0],
                          current_vel[1],
                          width=0.05,
                          fc='g',
                          ec='g')
                plt.arrow(current_pos[0],
                          current_pos[1],
                          math.sin(yaw),
                          math.cos(yaw),
                          width=0.05,
                          fc='r',
                          ec='r')
                if len(samples) == 1:
                    plt.scatter(samples[0][0],
                                samples[0][1],
                                label='obstacles')
                elif len(samples) > 1:
                    plt.scatter(samples[:, 0],
                                samples[:, 1],
                                label='obstacles')
                plt.xlim(-6.5, 6.5)
                plt.ylim(-9, 4)
                plt.legend()
                plt.title("gazebo test")
                plt.xlabel("x (m)")
                plt.ylabel("y (m)")
                plt.pause(0.001)
                plt.cla()
                plt.clf()
            print("NAV TIME {}".format(float(sim_time) - t1))
            time.sleep(0.5)
        print("NAV TIME {}".format(float(sim_time) - t1))
        easyGo.stop()
        plt.close()
        print("Average took {} sec per iteration, {} Frame".format(
            policy_time / numFrame, numFrame))
    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True)
Beispiel #26
0
    policy.configure(policy_config)
    policy.set_device(device)

    # getting trained weights
    weights_path = 'data/output/rl_model.pth'
    policy.model.load_state_dict(torch.load(weights_path))

    # domain/env config (initially same as used in training)
    env_config = configparser.RawConfigParser()
    env_config.read('configs/env.config')
    if args.policy == 'orca':
        env = CrowdSim()
    else:
        env = CrowdSim_SF()
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)

    # explorer, memory is not used as we will only use the explorer in test mode
    memory = ReplayMemory(100000)
    explorer = Explorer(env,
                        robot,
                        device,
                        memory,
                        policy.gamma,
                        target_policy=policy)

    policy.set_env(env)
    robot.set_policy(policy)
Beispiel #27
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config', type=str, default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_file', type=str, default=None)
    parser.add_argument('--result_dir', type=str, default='result')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--num_frames', type=int, default=1)
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--safety_space', type=float, default=0.15)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--traj', default=False, action='store_true')
    args = parser.parse_args()

    policy_config_file = args.policy_config

    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    # configure logging and device
    logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info(' =========== TEST %s ============ ', args.policy)
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_file is None:
            logging.warning('Trainable policy is NOT specified with a model weights directory')
        else:
            try:
                policy.get_model().load_state_dict(torch.load(args.model_file))
                logging.info('Loaded policy from %s', args.model_file)
            except Exception as e:
                logging.warning(e)
                policy.get_model().load_state_dict(torch.load(args.model_file), strict=False)
                logging.info('Loaded policy partially from %s', args.model_file)

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(args.env_config)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)

    # multi-frame env
    if args.num_frames > 1:
        logging.info("stack %d frames", args.num_frames)
        env = FrameStack(env, args.num_frames)

    gamma = policy_config.getfloat('rl', 'gamma')
    explorer = Explorer(env, robot, device, args.num_frames, gamma=gamma)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            # robot.policy.safety_space = 0
            robot.policy.safety_space = args.safety_space
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = args.safety_space
        logging.warning('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()
    if args.visualize:
        ob = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        while not done:
            # retrieve frame stack
            if not 'RNN' in robot.policy.name:
                ob = latest_frame(ob, args.num_frames)
            action = robot.act(ob)
            ob, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            logging.debug('Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos

        output_file = os.path.join(args.result_dir, 'traj_' + str(args.test_case))
        print("output_file", output_file)
        if args.traj:
            env.render('traj', output_file + '.png')
        else:
            env.render('video', output_file + '.mp4')

        logging.info('It takes %.2f seconds to finish. Final status is %s', env.get_global_time(), info)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f', sum(human_times) / len(human_times))
    else:
        explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=False)
Beispiel #28
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=False, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=None)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(args.policy_config)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(torch.load(model_weights))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()
    if args.visualize:
        ob = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        while not done:
            action = robot.act(ob)
            ob, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
        if args.traj:
            env.render('traj', args.video_file)
        else:
            print('video')
            env.render('video', args.video_file)

        logging.info('It takes %.2f seconds to finish. Final status is %s',
                     env.global_time, info)
        if robot.visible and info == 'reach goal':
            human_times = env.get_human_times()
            logging.info('Average time for humans to reach goal: %.2f',
                         sum(human_times) / len(human_times))
    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True)
Beispiel #29
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy', type=str, default='cadrl')
    parser.add_argument('--policy_config', type=str, default='configs/policy.config')
    parser.add_argument('--train_config', type=str, default='configs/train.config')
    parser.add_argument('--output_dir', type=str, default='data/output')
    parser.add_argument('--weights', type=str)
    parser.add_argument('--resume', default=True, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--debug', default=False, action='store_true')
    args = parser.parse_args()

    # configure paths
    make_new_dir = True
    if os.path.exists(args.output_dir):
        key = input('Output directory already exists! Overwrite the folder? (y/n)')
        if key == 'y' and not args.resume:
            shutil.rmtree(args.output_dir)
        else:
            make_new_dir = False
            args.env_config = os.path.join(args.output_dir, os.path.basename(args.env_config))
            args.policy_config = os.path.join(args.output_dir, os.path.basename(args.policy_config))
            args.train_config = os.path.join(args.output_dir, os.path.basename(args.train_config))
    if make_new_dir:
        os.makedirs(args.output_dir)
        shutil.copy(args.env_config, args.output_dir)
        shutil.copy(args.policy_config, args.output_dir)
        shutil.copy(args.train_config, args.output_dir)
    log_file = os.path.join(args.output_dir, 'output.log')
    il_weight_file = os.path.join(args.output_dir, 'il_model.pth')
    rl_weight_file = os.path.join(args.output_dir, 'rl_model.pth')

    # configure logging
    mode = 'a' if args.resume else 'w'
    file_handler = logging.FileHandler(log_file, mode=mode)
    stdout_handler = logging.StreamHandler(sys.stdout)
    level = logging.INFO if not args.debug else logging.DEBUG
    logging.basicConfig(level=level, handlers=[stdout_handler, file_handler],
                        format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S")
    repo = git.Repo(search_parent_directories=True)
    logging.info('Current git head hash code: %s'.format(repo.head.object.hexsha))
    device = torch.device("cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    if not policy.trainable:
        parser.error('Policy has to be trainable')
    if args.policy_config is None:
        parser.error('Policy config has to be specified for a trainable network')
    policy_config = configparser.RawConfigParser()
    policy_config.read(args.policy_config)
    policy.configure(policy_config)
    policy.set_device(device)

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(args.env_config)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    robot = Robot(env_config, 'robot')
    env.set_robot(robot)

    # read training parameters
    if args.train_config is None:
        parser.error('Train config has to be specified for a trainable network')
    train_config = configparser.RawConfigParser()
    train_config.read(args.train_config)
    rl_learning_rate = train_config.getfloat('train', 'rl_learning_rate')
    train_batches = train_config.getint('train', 'train_batches')
    train_episodes = train_config.getint('train', 'train_episodes')
    sample_episodes = train_config.getint('train', 'sample_episodes')
    target_update_interval = train_config.getint('train', 'target_update_interval')
    evaluation_interval = train_config.getint('train', 'evaluation_interval')
    capacity = train_config.getint('train', 'capacity')
    epsilon_start = train_config.getfloat('train', 'epsilon_start')
    epsilon_end = train_config.getfloat('train', 'epsilon_end')
    epsilon_decay = train_config.getfloat('train', 'epsilon_decay')
    checkpoint_interval = train_config.getint('train', 'checkpoint_interval')

    # configure trainer and explorer
    memory = ReplayMemory(capacity)
    model = policy.get_model()
    batch_size = train_config.getint('trainer', 'batch_size')
    trainer = Trainer(model, memory, device, batch_size)
    explorer = Explorer(env, robot, device, memory, policy.gamma, target_policy=policy)

    # imitation learning
    if args.resume:
        if not os.path.exists(rl_weight_file):
            logging.error('RL weights does not exist')
        model.load_state_dict(torch.load(rl_weight_file))
        rl_weight_file = os.path.join(args.output_dir, 'resumed_rl_model.pth')
        logging.info('Load reinforcement learning trained weights. Resume training')
    elif os.path.exists(il_weight_file):
        model.load_state_dict(torch.load(il_weight_file))
        logging.info('Load imitation learning trained weights.')
    else:
        il_episodes = train_config.getint('imitation_learning', 'il_episodes')
        il_policy = train_config.get('imitation_learning', 'il_policy')
        il_epochs = train_config.getint('imitation_learning', 'il_epochs')
        il_learning_rate = train_config.getfloat('imitation_learning', 'il_learning_rate')
        trainer.set_learning_rate(il_learning_rate)
        if robot.visible:
            safety_space = 0
        else:
            safety_space = train_config.getfloat('imitation_learning', 'safety_space')
        il_policy = policy_factory[il_policy]()
        il_policy.multiagent_training = policy.multiagent_training
        il_policy.safety_space = safety_space
        robot.set_policy(il_policy)
        explorer.run_k_episodes(il_episodes, 'train', update_memory=True, imitation_learning=True)
        trainer.optimize_epoch(il_epochs)
        torch.save(model.state_dict(), il_weight_file)
        logging.info('Finish imitation learning. Weights saved.')
        logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
    explorer.update_target_model(model)

    # reinforcement learning
    policy.set_env(env)
    robot.set_policy(policy)
    robot.print_info()
    trainer.set_learning_rate(rl_learning_rate)
    # fill the memory pool with some RL experience
    if args.resume:
        robot.policy.set_epsilon(epsilon_end)
        explorer.run_k_episodes(100, 'train', update_memory=True, episode=0)
        logging.info('Experience set size: %d/%d', len(memory), memory.capacity)
    episode = 0
    while episode < train_episodes:
        if args.resume:
            epsilon = epsilon_end
        else:
            if episode < epsilon_decay:
                epsilon = epsilon_start + (epsilon_end - epsilon_start) / epsilon_decay * episode
            else:
                epsilon = epsilon_end
        robot.policy.set_epsilon(epsilon)

        # evaluate the model
        if episode % evaluation_interval == 0:
            explorer.run_k_episodes(env.case_size['val'], 'val', episode=episode)

        # sample k episodes into memory and optimize over the generated memory
        explorer.run_k_episodes(sample_episodes, 'train', update_memory=True, episode=episode)
        trainer.optimize_batch(train_batches)
        episode += 1

        if episode % target_update_interval == 0:
            explorer.update_target_model(model)

        if episode != 0 and episode % checkpoint_interval == 0:
            torch.save(model.state_dict(), rl_weight_file)

    # final test
    explorer.run_k_episodes(env.case_size['test'], 'test', episode=episode)
Beispiel #30
0
class NN_tb3():
    def __init__(self, env, env_config, policy):
        # 
        self.env = env
        self.env_config = env_config
        # configure robot
        self.robot = Robot(env_config, 'robot')
        self.robot.set_policy(policy)
        self.env.set_robot(self.robot)    #pass robot parameters into env
        self.ob = env.reset('test',1)     #intial some parameters from .config file such as time_step,success_reward for other instances
        self.policy = policy
        self.policy.set_env(env) 
        # for action
        self.angle2Action = 0
        self.distance = 0
        # for subscribers
        self.pose = PoseStamped()
        self.vel = Vector3()
        self.psi = 0.0
        # for publishers
        self.global_goal = PoseStamped()
        self.goal = PoseStamped()
        self.desired_position = PoseStamped()
        self.desired_action = np.zeros((2,))
        # # publishers
        self.pub_twist = rospy.Publisher('/robot_0/cmd_vel',Twist,queue_size=1) 
        # self.pub_pose_marker = rospy.Publisher('',Marker,queue_size=1)
        # self.pub_agent_markers = rospy.Publisher('~agent_markers',MarkerArray,queue_size=1)
        # self.pub_path_marker = rospy.Publisher('/action',Marker,queue_size=1)
        # self.pub_goal_path_marker = rospy.Publisher('~goal_path_marker',Marker,queue_size=1)
        # # sub
        self.sub_pose = rospy.Subscriber('/robot_0/odom',Odometry,self.cbPose)
        self.sub_global_goal = rospy.Subscriber('/robot_0/move_base_simple/goal',PoseStamped, self.cbGlobalGoal)
        self.sub_subgoal = rospy.Subscriber('/robot_0/move_base_simple/goal',PoseStamped, self.cbSubGoal)
        self.sub_obstacle = rospy.Subscriber('/robot_0/move_base/TebLocalPlannerROS/obstacles', ObstacleArrayMsg, self.get_obstacles)
        # subgoals
        self.sub_goal = Vector3()
        # self.sub_clusters = rospy.Subscriber('~clusters',Clusters, self.cbClusters)
        # control timer
        self.control_timer = rospy.Timer(rospy.Duration(0.2),self.cbControl)
        self.nn_timer = rospy.Timer(rospy.Duration(0.01),self.cbComputeActionCrowdNav)
    def update_angle2Action(self):
        # action vector
        v_a = np.array([self.desired_position.pose.position.x-self.pose.pose.position.x,self.desired_position.pose.position.y-self.pose.pose.position.y])
        # pose direction
        e_dir = np.array([math.cos(self.psi), math.sin(self.psi)])
        # angle: <v_a, e_dir>
        self.angle2Action = np.math.atan2(np.linalg.det([v_a,e_dir]),np.dot(v_a,e_dir))
    def cbGlobalGoal(self,msg):
        self.stop_moving_flag = True
        self.new_global_goal_received = True
        self.global_goal = msg
        self.goal.pose.position.x = msg.pose.position.x
        self.goal.pose.position.y = msg.pose.position.y
        self.goal.header = msg.header
        # reset subgoals
        print("new goal: "+str([self.goal.pose.position.x,self.goal.pose.position.y])) 
    def cbSubGoal(self,msg):
        # update subGoal
        self.sub_goal.x = msg.pose.position.x
        self.sub_goal.y = msg.pose.position.y
    def goalReached(self):
        # check if near to global goal
        if self.distance > 0.3:
            return False
        else:
            return True
    def cbPose(self, msg):
        # update robot vel (vx,vy)
        self.cbVel(msg)
        # get pose angle
        q = msg.pose.pose.orientation
        self.psi = np.arctan2(2.0*(q.w*q.z + q.x*q.y), 1-2*(q.y*q.y+q.z*q.z)) # bounded by [-pi, pi]
        self.pose = msg.pose
        # self.visualize_path()
        v_p = msg.pose.pose.position
        v_g = self.sub_goal
        v_pg = np.array([v_g.x-v_p.x,v_g.y-v_p.y])
        self.distance = np.linalg.norm(v_pg)
        # self.visualize_pose(msg.pose.pose.position,msg.pose.pose.orientation)
    def cbVel(self, msg):
        self.vel = msg.twist.twist.linear
    def cbClusters(self,msg):
        other_agents = []
        xs = []; ys = []; radii = []; labels = []
        num_clusters = len(msg.labels)
        for i in range(num_clusters):
            index = msg.labels[i]
            x = msg.mean_points[i].x; y = msg.mean_points[i].y
            v_x = msg.velocities[i].x; v_y = msg.velocities[i].y
            radius = self.obst_rad
            xs.append(x); ys.append(y); radii.append(radius); labels.append(index)
            # self.visualize_other_agent(x,y,radius,msg.labels[i])
            # helper fields
            heading_angle = np.arctan2(v_y, v_x)
            pref_speed = np.linalg.norm(np.array([v_x, v_y]))
            goal_x = x + 5.0; goal_y = y + 5.0
            if pref_speed < 0.2:
                pref_speed = 0; v_x = 0; v_y = 0
            other_agents.append(agent.Agent(x, y, goal_x, goal_y, radius, pref_speed, heading_angle, index))
        self.visualize_other_agents(xs, ys, radii, labels)
        self.other_agents_state = other_agents
    def stop_moving(self):
        twist = Twist()
        self.pub_twist.publish(twist)
    def update_action(self, action):
        # print 'update action'
        self.desired_action = action
        # self.desired_position.pose.position.x = self.pose.pose.position.x + 1*action[0]*np.cos(action[1])
        # self.desired_position.pose.position.y = self.pose.pose.position.y + 1*action[0]*np.sin(action[1])
        self.desired_position.pose.position.x = self.pose.pose.position.x + (action[0])
        self.desired_position.pose.position.y = self.pose.pose.position.y + (action[1])
        # print(action[0])
    def cbControl(self, event):
        twist = Twist()
        if not self.goalReached():
            if abs(self.angle2Action) > 0.1 and self.angle2Action > 0:
                twist.angular.z = -0.3
                print("spinning in place +")
            elif abs(self.angle2Action) > 0.1 and self.angle2Action < 0:
                twist.angular.z = 0.3
                print("spinning in place -")
            # else:
            vel = np.array([self.desired_action[0],self.desired_action[1]])
            twist.linear.x = 0.1*np.linalg.norm(vel)
        self.pub_twist.publish(twist)
    def cbComputeActionCrowdNav(self, event):
        robot_x = self.pose.pose.position.x
        robot_y = self.pose.pose.position.y
        # goal
        goal_x = self.sub_goal.x
        goal_y = self.sub_goal.y
        # velocity
        robot_vx = self.vel.x
        robot_vy = self.vel.y
        # oriantation
        theta = self.psi
        robot_radius = 0.3
        # set robot info
        self.robot.set(robot_x, robot_y, goal_x, goal_y, robot_vx, robot_vy, theta, robot_radius)
        # obstacle: position, velocity, radius
        # position
        # obstacle_x = [0.1,0.2,0.3,0.4,0.5]
        # obstacle_y = [0.1,0.2,0.3,0.4,0.5]
        # # velocity
        # obstacle_vx = [0.1,0.2,0.3,0.4,0.5]
        # obstacle_vy = [0.1,0.2,0.3,0.4,0.5]
        # obstacle_x = [-6.0,-6.0,-6.0,-6.0,-6.0]
        # obstacle_y = [-6.0,-6.0,-6.0,-6.0,-6.0]
        # velocity
        # obstacle_vx = [0.0,0.0,0.0,0.0,0.0]
        # obstacle_vy = [0.0,0.0,0.0,0.0,0.0]
        obstacle_radius = 0.3
        # initial obstacle instances and set value
        for i in range(self.env_config.getint('sim','human_num')):
            obstacle_x = self.obstacles[i].polygon.points[0].x
            obstacle_y = self.obstacles[i].polygon.points[0].y
            obstacle_vx = self.obstacles[i].velocities.twist.linear.x
            obstacle_vy = self.obstacles[i].velocities.twist.linear.y
            self.env.humans[i].set(obstacle_x, obstacle_y, goal_x,goal_y, obstacle_vx, obstacle_vy, theta, obstacle_radius)
            self.ob[i]= self.env.humans[i].get_observable_state()
        # ************************************ Output ************************************
        # get action info
        action = self.robot.act(self.ob)
        # print('\n---------\nrobot position (X,Y):', position.position)
        # print(action)
        # print(theta)
        self.update_action(action)
        self.update_angle2Action()
    def get_obstacles(self,msg):
        self.obstacles = []
        for i in range(len(msg.obstacles)):
            self.obstacles.append(msg.obstacles[i])
    def update_subgoal(self,subgoal):
        self.goal.pose.position.x = subgoal[0]
        self.goal.pose.position.y = subgoal[1]
    # def visualize_path(self):
    #     marker = Marker()
    #     marker.header.stamp = rospy.Time.now()
    #     marker.header.frame_id = 'map'
    #     marker.ns = 'path_arrow'
    #     marker.id = 0
    #     marker.type = marker.ARROW
    #     marker.action = marker.ADD
    #     marker.points.append(self.pose.pose.position)
    #     marker.points.append(self.desired_position.pose.position)
    #     marker.scale = Vector3(x=0.1,y=0.2,z=0.2)
    #     marker.color = ColorRGBA(b=1.0,a=1.0)
    #     marker.lifetime = rospy.Duration(1)
    #     self.pub_path_marker.publish(marker)
    #     # # Display BLUE DOT at NN desired position
    #     # marker = Marker()
    #     # marker.header.stamp = rospy.Time.now()
    #     # marker.header.frame_id = 'map'
    #     # marker.ns = 'path_trail'
    #     # marker.id = self.num_poses
    #     # marker.type = marker.CUBE
    #     # marker.action = marker.ADD
    #     # marker.pose.position = copy.deepcopy(self.pose.pose.position)
    #     # marker.scale = Vector3(x=0.2,y=0.2,z=0.2)
    #     # marker.color = ColorRGBA(g=0.0,r=0,b=1.0,a=0.3)
    #     # marker.lifetime = rospy.Duration(60)
    #     # self.pub_path_marker.publish(marker)
    def on_shutdown(self):
        rospy.loginfo("[%s] Shutting down.")
        self.stop_moving()
        rospy.loginfo("Stopped %s's velocity.")