Ejemplo n.º 1
0
    agent = SocialForcesController(speed_div, orient_div, orient_quant)

if args.agent_type == 'Default':

    env.external_control = False
    agent = None
    #the person from the video
    pass

#*************************************************
#load reward network if present

if args.reward_path is not None:
    from irlmethods.deep_maxent import RewardNet

    state_size = feat_ext.extract_features(env.reset()).shape[0]
    reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
    reward_net.load(args.reward_path)

#*************************************************
#play


def reward_analysis():
    '''
    A function to analysis the rewards against actions for a given policy.
    A helpful visualization/ debugging tool
    '''
    for i in range(args.num_trajs):

        #reset the world
Ejemplo n.º 2
0
def main(args):

    output = {}

    # parameters for the feature extractors
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    if args.feat_extractor != "Raw_state":
        saved_policies = []
        assert os.path.isdir(
            args.parent_policy_folder), "Folder does not exist!"
        parent_path = pathlib.Path(args.parent_policy_folder)
        for seed_folder in parent_path.glob("./*"):
            for policy in seed_folder.glob("./*.pt"):
                saved_policies.append(str(policy))

    output["eval parameters"] = vars(args)

    # initialize environment
    from envs.gridworld_drone import GridWorldDrone

    consider_heading = True
    np.random.seed(0)
    env = GridWorldDrone(
        display=False,
        is_onehot=False,
        seed=0,
        obstacles=None,
        show_trail=True,
        is_random=False,
        subject=None,
        annotation_file=args.annotation_file,
        tick_speed=60,
        obs_width=10,
        step_size=step_size,
        agent_width=agent_width,
        external_control=True,
        replace_subject=args.dont_replace_subject,
        show_comparison=True,
        consider_heading=consider_heading,
        show_orientation=True,
        rows=576,
        cols=720,
        width=grid_size,
    )

    # initialize the feature extractor
    from featureExtractor.drone_feature_extractor import (
        DroneFeatureRisk_speedv2, )
    from featureExtractor.drone_feature_extractor import (
        VasquezF1,
        VasquezF2,
        VasquezF3,
    )

    from featureExtractor.drone_feature_extractor import (
        Fahad,
        GoalConditionedFahad,
    )

    if args.feat_extractor == "DroneFeatureRisk_speedv2":

        feat_ext_args = {
            "agent_width": agent_width,
            "obs_width": obs_width,
            "step_size": step_size,
            "grid_size": grid_size,
            "thresh1": 18,
            "thresh2": 30,
        }

        feat_ext = DroneFeatureRisk_speedv2(**feat_ext_args)

    if args.feat_extractor == "VasquezF1":
        feat_ext_args = {
            "density_radius": 6 * agent_width,
            "lower_speed_threshold": 18,
            "upper_speed_threshold": 30,
        }

        feat_ext = VasquezF1(
            feat_ext_args["density_radius"],
            feat_ext_args["lower_speed_threshold"],
            feat_ext_args["upper_speed_threshold"],
        )

    if args.feat_extractor == "VasquezF2":
        feat_ext_args = {
            "density_radius": 6 * agent_width,
            "lower_speed_threshold": 18,
            "upper_speed_threshold": 30,
        }

        feat_ext = VasquezF2(
            feat_ext_args["density_radius"],
            feat_ext_args["lower_speed_threshold"],
            feat_ext_args["upper_speed_threshold"],
        )

    if args.feat_extractor == "VasquezF3":
        feat_ext_args = {
            "agent_width": agent_width,
        }

        feat_ext = VasquezF3(feat_ext_args["agent_width"])

    if args.feat_extractor == "Fahad":
        feat_ext_args = {
            "inner_ring_rad": 36,
            "outer_ring_rad": 60,
            "lower_speed_threshold": 0.5,
            "upper_speed_threshold": 1.0,
        }

        feat_ext = Fahad(36, 60, 0.5, 1.0)

    if args.feat_extractor == "GoalConditionedFahad":
        feat_ext_args = {
            "inner_ring_rad": 36,
            "outer_ring_rad": 60,
            "lower_speed_threshold": 0.5,
            "upper_speed_threshold": 1.0,
        }

        feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0)

    # no features if dealing with raw trajectories
    if args.feat_extractor == "Raw_state":
        feat_ext_args = {}
        feat_ext = None

    output["feature_extractor_params"] = feat_ext_args
    output["feature_extractor"] = feat_ext

    if args.feat_extractor != "Raw_state":
        # initialize policy
        # for getting metrics from policy files
        for filename in saved_policies:

            policy_path = filename
            output_file = filename.split("/")[-3:]
            output_filename = ""
            for data in output_file:
                output_filename += data
            output_filename = output_filename.split(".")[0]

            sample_state = env.reset()
            state_size = feat_ext.extract_features(sample_state).shape[0]
            policy = Policy(state_size, env.action_space.n, [256])
            policy.load(policy_path)
            policy.to(DEVICE)

            # metric parameters
            metric_applicator = metric_utils.MetricApplicator()
            metric_applicator.add_metric(metrics.compute_trajectory_smoothness)
            metric_applicator.add_metric(
                metrics.compute_distance_displacement_ratio)
            metric_applicator.add_metric(metrics.proxemic_intrusions, [3])
            metric_applicator.add_metric(metrics.anisotropic_intrusions, [20])
            metric_applicator.add_metric(metrics.count_collisions, [10])
            metric_applicator.add_metric(metrics.goal_reached, [10, 10])
            metric_applicator.add_metric(metrics.pedestrian_hit, [10])
            metric_applicator.add_metric(metrics.trajectory_length)
            metric_applicator.add_metric(
                metrics.distance_to_nearest_pedestrian_over_time)
            # collect trajectories and apply metrics
            num_peds = len(env.pedestrian_dict.keys())
            output["metrics"] = metric_applicator.get_metrics()
            output["metric_results"] = {}

            metric_results = metric_utils.collect_trajectories_and_metrics(
                env,
                feat_ext,
                policy,
                num_peds,
                args.max_ep_length,
                metric_applicator,
                disregard_collisions=args.disregard_collisions,
            )

            output["metric_results"] = metric_results

            # drift calculation
            drift_matrix = np.zeros(
                (len(env.pedestrian_dict.keys()), len(args.drift_timesteps)))
            for drift_idx, drift_timestep in enumerate(args.drift_timesteps):
                ped_drifts = agent_drift_analysis(
                    policy,
                    "Policy_network",
                    env,
                    list([
                        int(ped_key) for ped_key in env.pedestrian_dict.keys()
                    ]),
                    feat_extractor=feat_ext,
                    pos_reset=drift_timestep,
                )

                assert len(ped_drifts) == len((env.pedestrian_dict.keys()))

                drift_matrix[:, drift_idx] = ped_drifts

            output["metric_results"]["drifts"] = drift_matrix

            pathlib.Path("./results/").mkdir(exist_ok=True)

            with open(
                    "./results/" + output_filename + "_" +
                    datetime.now().strftime("%Y-%m-%d-%H:%M"),
                    "wb",
            ) as f:
                pickle.dump(output, f)
    else:
        # when raw trajectories are directly provided.
        # metric parameters
        metric_applicator = metric_utils.MetricApplicator()
        metric_applicator.add_metric(metrics.compute_trajectory_smoothness,
                                     [10])
        metric_applicator.add_metric(
            metrics.compute_distance_displacement_ratio, [10])
        metric_applicator.add_metric(metrics.proxemic_intrusions, [3])
        metric_applicator.add_metric(metrics.anisotropic_intrusions, [20])
        metric_applicator.add_metric(metrics.count_collisions, [10])
        metric_applicator.add_metric(metrics.goal_reached, [10, 10])
        metric_applicator.add_metric(metrics.pedestrian_hit, [10])
        metric_applicator.add_metric(metrics.trajectory_length)
        metric_applicator.add_metric(
            metrics.distance_to_nearest_pedestrian_over_time)

        metric_results = metric_utils.collect_metrics_from_trajectory(
            args.trajectory_folder, metric_applicator)

        output["metric_results"] = metric_results

        pathlib.Path("./results/").mkdir(exist_ok=True)

        output_filename = args.trajectory_folder.strip().split("/")[-1]
        with open(
                "./results/" + output_filename + "_" +
                datetime.now().strftime("%Y-%m-%d-%H:%M"),
                "wb",
        ) as f:
            pickle.dump(output, f)
Ejemplo n.º 3
0
def extract_trajectory(annotation_file, 
                       folder_to_save, 
                       feature_extractor=None, 
                       display=False, 
                       extract_action=False,
                       show_states=False, subject=None, 
                       trajectory_length_limit=None):


    if not os.path.exists(folder_to_save):
        os.makedirs(folder_to_save)

    lag_val = 8
    
    tick_speed = 60
    subject_list = extract_subjects_from_file(annotation_file)
    print(subject_list)
    disp = display
    total_path_len = 0

    if show_states:
            tick_speed = 5
            disp = True
            
    #initialize world
    world = GridWorldDrone(display=disp, is_onehot=False, 
                        seed=10, obstacles=None, 
                        show_trail=False,
                        is_random=False,
                        show_orientation=True,
                        annotation_file=annotation_file,
                        subject=None,
                        external_control=False,
                        replace_subject=True,      
                        tick_speed=tick_speed,                  
                        rows=576, cols=720,
                        width=10)


    default_action = int(len(world.speed_array)/2)*int(len(world.orientation_array))+int(len(world.orientation_array)/2)
    
    
    default_action = torch.tensor(default_action)

    if subject is not None:
        subject_list = subject
    for sub in subject_list:
        print('Starting for subject :', sub)
        trajectory_info = []

        if extract_action:
            action_info = []
        step_counter_segment = 0

        segment_counter = 1
        world.subject = sub
        old_state = world.reset()
        cur_lag = 0
        print('Path lenghth :',world.final_frame - world.current_frame)
        path_len = world.final_frame - world.current_frame
        cur_subject_final_frame = world.final_frame
        total_path_len += world.final_frame - world.current_frame
        print('Total trajectory information :\nStarting frame: {},final frame: {}'.format(world.current_frame, cur_subject_final_frame))
        print('Total path length :', path_len)                                                                               
        if trajectory_length_limit is not None:

            traj_seg_length = min(trajectory_length_limit, path_len)
            #change the goal position
            world.goal_state = copy.deepcopy(world.return_position(world.cur_ped, world.current_frame + traj_seg_length)['position'])        
            world.state['goal_state'] = copy.deepcopy(world.goal_state) 
    
        print('Segment 1: Start frame :', world.current_frame)    
        while world.current_frame < cur_subject_final_frame:
            state, _, _, _ = world.step()
            step_counter_segment += 1
            #step_counter_trajectory += 1 
            #if disp:
            #    feature_extractor.overlay_bins(state)

            if extract_action:
                
                if cur_lag == lag_val:
                    
                    action = extract_expert_action(state, old_state, 
                                            world.orient_quantization,
                                            len(world.orientation_array),
                                            world.speed_quantization,
                                                len(world.speed_array))
                    '''
                    action = extract_expert_speed_orientation(state)
                    '''
                    old_state = copy.deepcopy(state)
                    action = torch.tensor(action)
                    action_info.append(action)
                    for i in range(cur_lag):
                        action_info.append(default_action)
                    cur_lag = 0
                    #pdb.set_trace()

                else:
                    cur_lag += 1
            if feature_extractor is not None:
                state = feature_extractor.extract_features(state)
                state = torch.tensor(state)
            trajectory_info.append(copy.deepcopy(state))
            if trajectory_length_limit is not None:

                if step_counter_segment%traj_seg_length == 0:
                    print('Segment {} final frame : {}'.format(segment_counter, world.current_frame))
                    path_len = cur_subject_final_frame - world.current_frame
                    traj_seg_length = min(trajectory_length_limit, path_len)
                    print('Length of next path :', traj_seg_length)

                    #change the goal position
                    world.goal_state = copy.deepcopy(world.return_position(world.cur_ped, world.current_frame + traj_seg_length)['position'])        
                    world.state['goal_state'] = copy.deepcopy(world.goal_state) 
                    print('Trajectory length : ', len(trajectory_info))

                    if feature_extractor is not None:
                        state_tensors = torch.stack(trajectory_info)
                        torch.save(state_tensors, 
                                os.path.join(folder_to_save, 
                                        'traj_of_sub_{}_segment{}.states'.format(str(sub), 
                                        str(segment_counter))))
                    else:
                        with open('traj_of_sub_{}_segment{}.states'.format(str(sub), 
                                  str(segment_counter)), 'w') as fout:
                            json.dump(trajectory_info, fout)
                    if extract_action:

                        acton_tensors = torch.stack(action_info)
                        torch.save(action_tensors,
                                os.path.join(folder_to_save, 
                                        'action_of_sub_{}_segment{}.actions'.format(str(sub),
                                        str(segment_counter))))
                    segment_counter += 1 
                    #pdb.set_trace()
                    step_counter_segment = 0 
                    trajectory_info = []
                    print('Segment {}: Start frame : {}'.format(segment_counter, 
                                                                world.current_frame))    

        #add the last bunch of actions

        for i in range(cur_lag):
            action_info.append(default_action)

        if trajectory_length_limit is None:

            if feature_extractor is not None:
                state_tensors = torch.stack(trajectory_info)
                torch.save(state_tensors, os.path.join(folder_to_save, 'traj_of_sub_{}_segment{}.states'.format(str(sub), str(segment_counter))))
            
                if extract_action:
                    #pdb.set_trace()
                    action_tensors = torch.stack(action_info)
                    torch.save(action_tensors,
                            os.path.join(folder_to_save, 
                                    'action_of_sub_{}_segment{}.actions'.format(str(sub),
                                    str(segment_counter))))
            else:
                '''
                with open('traj_of_sub_{}_segment{}.states'.format(str(sub), 
                            str(segment_counter)), 'w') as fout:
                    pdb.set_trace()
                    json.dump(trajectory_info, fout)
                '''
                np.save(os.path.join(folder_to_save, 'traj_of_sub_{}_segment{}.states'.format(str(sub), 
                            str(segment_counter))), trajectory_info)
                
                if extract_action:

                    action_tensors = torch.stack(action_info)
                    torch.save(action_tensors,
                            os.path.join(folder_to_save, 
                                    'action_of_sub_{}_segment{}.actions'.format(str(sub),
                                    str(segment_counter))))
        
    #if feature_extractor.debug_mode:
    #    feature_extractor.print_info()


    print('The average path length :', total_path_len/len(subject_list))
Ejemplo n.º 4
0
    def play_regression_policy(self,
                    num_runs,
                    max_episode_length,
                    feat_extractor):
        '''
        Loads up an environment and checks the performance of the agent.
        '''
        #initialize variables needed for the run 

        agent_width = 10
        obs_width = 10
        step_size = 2
        grid_size = 10
        
        #load up the environment
        annotation_file = "../envs/expert_datasets/university_students\
/annotation/processed/frame_skip_1/students003_processed_corrected.txt"
        env = GridWorldDrone(
                            display=True,
                            is_onehot=False,
                            seed=0,
                            obstacles=None,
                            show_trail=False,
                            is_random=False,
                            annotation_file=annotation_file,
                            subject=None,
                            tick_speed=60,
                            obs_width=10,
                            step_size=step_size,
                            agent_width=agent_width,
                            replace_subject=True,
                            segment_size=None,
                            external_control=True,
                            step_reward=0.001,
                            show_comparison=True,
                            consider_heading=True,
                            show_orientation=True,
                            continuous_action=False,
                            # rows=200, cols=200, width=grid_size)
                            rows=576,
                            cols=720,
                            width=grid_size,
                        )
        #initialize the feature extractor

        feat_ext = None
        if feat_extractor == "DroneFeatureRisk_speedv2":

            feat_ext = DroneFeatureRisk_speedv2(
                agent_width=agent_width,
                obs_width=obs_width,
                step_size=step_size,
                grid_size=grid_size,
                show_agent_persp=False,
                return_tensor=False,
                thresh1=18,
                thresh2=30,
            )

        #play the environment 

        for i in range(num_runs):
 
            state = env.reset()
            state_features = feat_ext.extract_features(state)
            state_features = torch.from_numpy(state_features).type(torch.FloatTensor).to(self.device)
            done = False
            t = 0
            while t < max_episode_length:

                action = self.policy.eval_action(state_features)

                state, _, done, _ = env.step(action)
                state_features = feat_ext.extract_features(state)
                state_features = torch.from_numpy(state_features).type(torch.FloatTensor).to(self.device)
                t+=1
                if done:
                    break
Ejemplo n.º 5
0
def main():

    #####for the logger
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
    ###################

    args = parser.parse_args()

    seed_all(args.seed)

    if args.on_server:

        matplotlib.use("Agg")
        # pygame without monitor
        os.environ["SDL_VIDEODRIVER"] = "dummy"

    from matplotlib import pyplot as plt

    mp.set_start_method("spawn")

    from rlmethods.b_actor_critic import ActorCritic
    from rlmethods.soft_ac import SoftActorCritic, QSoftActorCritic
    from rlmethods.rlutils import ReplayBuffer

    from envs.gridworld_drone import GridWorldDrone
    from featureExtractor.drone_feature_extractor import (
        DroneFeatureSAM1,
        DroneFeatureOccup,
        DroneFeatureRisk,
        DroneFeatureRisk_v2,
        VasquezF1,
        VasquezF2,
        VasquezF3,
        Fahad,
        GoalConditionedFahad,
    )
    from featureExtractor.gridworld_featureExtractor import (
        FrontBackSide,
        LocalGlobal,
        OneHot,
        SocialNav,
        FrontBackSideSimple,
    )
    from featureExtractor.drone_feature_extractor import (
        DroneFeatureRisk_speed,
        DroneFeatureRisk_speedv2,
    )

    from featureExtractor.drone_feature_extractor import VasquezF1

    save_folder = None

    if not args.dont_save and not args.play:

        if not args.save_folder:
            print("Provide save folder.")
            exit()

        policy_net_dims = "-policy_net-"
        for dim in args.policy_net_hidden_dims:
            policy_net_dims += str(dim)
            policy_net_dims += "-"

        reward_net_dims = "-reward_net-"
        for dim in args.reward_net_hidden_dims:
            reward_net_dims += str(dim)
            reward_net_dims += "-"

        save_folder = (
            "./results/"
            + args.save_folder
            + st
            + args.feat_extractor
            + "-seed-"
            + str(args.seed)
            + policy_net_dims
            + reward_net_dims
            + "-total-ep-"
            + str(args.total_episodes)
            + "-max-ep-len-"
            + str(args.max_ep_length)
        )

        experiment_logger = Logger(save_folder, "experiment_info.txt")
        experiment_logger.log_header("Arguments for the experiment :")
        repo = git.Repo(search_parent_directories=True)
        experiment_logger.log_info({'From branch : ' : repo.active_branch.name})
        experiment_logger.log_info({'Commit number : ' : repo.head.object.hexsha})
        experiment_logger.log_info(vars(args))

    window_size = 9
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    feat_ext = None
    # initialize the feature extractor to be used
    if args.feat_extractor == "Onehot":
        feat_ext = OneHot(grid_rows=10, grid_cols=10)
    if args.feat_extractor == "SocialNav":
        feat_ext = SocialNav(fieldList=["agent_state", "goal_state"])
    if args.feat_extractor == "FrontBackSideSimple":
        feat_ext = FrontBackSideSimple(
            thresh1=1,
            thresh2=2,
            thresh3=3,
            thresh4=4,
            step_size=step_size,
            agent_width=agent_width,
            obs_width=obs_width,
        )

    if args.feat_extractor == "LocalGlobal":
        feat_ext = LocalGlobal(
            window_size=11,
            grid_size=grid_size,
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
        )

    if args.feat_extractor == "DroneFeatureSAM1":

        feat_ext = DroneFeatureSAM1(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureOccup":

        feat_ext = DroneFeatureOccup(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            window_size=window_size,
        )

    if args.feat_extractor == "DroneFeatureRisk":

        feat_ext = DroneFeatureRisk(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_v2":

        feat_ext = DroneFeatureRisk_v2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            thresh1=15,
            thresh2=30,
        )

    if args.feat_extractor == "DroneFeatureRisk_speed":

        feat_ext = DroneFeatureRisk_speed(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            return_tensor=False,
            thresh1=10,
            thresh2=15,
        )

    if args.feat_extractor == "DroneFeatureRisk_speedv2":

        feat_ext = DroneFeatureRisk_speedv2(
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            grid_size=grid_size,
            show_agent_persp=False,
            return_tensor=False,
            thresh1=18,
            thresh2=30,
        )

    if args.feat_extractor == "VasquezF1":
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == "VasquezF2":
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == "VasquezF3":
        feat_ext = VasquezF3(agent_width)

    if args.feat_extractor == "Fahad":
        feat_ext = Fahad(36, 60, 0.5, 1.0)

    if args.feat_extractor == "GoalConditionedFahad":
        feat_ext = GoalConditionedFahad(36, 60, 0.5, 1.0)

    if feat_ext is None:
        print("Please enter proper feature extractor!")
        exit()
    # log feature extractor info

    if not args.dont_save and not args.play:

        experiment_logger.log_header("Parameters of the feature extractor :")
        experiment_logger.log_info(feat_ext.__dict__)

    # initialize the environment
    if args.replace_subject:
        replace_subject = True
    else:
        replace_subject = False

    env = GridWorldDrone(
        display=args.render,
        is_onehot=False,
        seed=args.seed,
        obstacles=None,
        show_trail=False,
        is_random=True,
        annotation_file=args.annotation_file,
        subject=args.subject,
        tick_speed=60,
        obs_width=10,
        step_size=step_size,
        agent_width=agent_width,
        replace_subject=replace_subject,
        segment_size=args.segment_size,
        external_control=True,
        step_reward=0.001,
        show_comparison=True,
        consider_heading=True,
        show_orientation=True,
        # rows=200, cols=200, width=grid_size)
        rows=576,
        cols=720,
        width=grid_size,
    )

    # env = gym.make('Acrobot-v1')
    # log environment info
    if not args.dont_save and not args.play:

        experiment_logger.log_header("Environment details :")
        experiment_logger.log_info(env.__dict__)

    # initialize RL

    if args.rl_method == "ActorCritic":
        model = ActorCritic(
            env,
            feat_extractor=feat_ext,
            gamma=1,
            log_interval=100,
            max_episode_length=args.max_ep_length,
            hidden_dims=args.policy_net_hidden_dims,
            save_folder=save_folder,
            lr=args.lr,
            entropy_coeff=args.entropy_coeff,
            max_episodes=args.total_episodes,
        )

    if args.rl_method == "SAC":

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        model = SoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            buffer_sample_size=args.replay_buffer_sample_size,
            entropy_tuning=True,
            play_interval=args.play_interval,
            entropy_target=args.entropy_target,
            gamma=args.gamma,
            learning_rate=args.lr,
        )

    if args.rl_method == "discrete_QSAC":

        replay_buffer = ReplayBuffer(args.replay_buffer_size)

        model = QSoftActorCritic(
            env,
            replay_buffer,
            feat_ext,
            buffer_sample_size=args.replay_buffer_sample_size,
            entropy_tuning=True,
            play_interval=args.play_interval,
            entropy_target=args.entropy_target,
            gamma=args.gamma,
            learning_rate=args.lr,
        )
    # log RL info
    if not args.dont_save and not args.play:

        experiment_logger.log_header("Details of the RL method :")
        experiment_logger.log_info(model.__dict__)

    if args.policy_path is not None:

        from debugtools import numericalSort

        policy_file_list = []
        reward_across_models = []
        # print(args.policy_path)
        if os.path.isfile(args.policy_path):
            policy_file_list.append(args.policy_path)
        if os.path.isdir(args.policy_path):
            policy_names = glob.glob(os.path.join(args.policy_path, "*.pt"))
            policy_file_list = sorted(policy_names, key=numericalSort)

        xaxis = np.arange(len(policy_file_list))

    if not args.play and not args.play_user:
        # no playing of any kind, so training

        if args.reward_path is None:

            if args.policy_path:
                model.policy.load(args.policy_path)

            if args.rl_method == "SAC" or args.rl_method == "discrete_QSAC":
                model.train(args.total_episodes, args.max_ep_length)

            else:
                model.train()

        else:
            from irlmethods.deep_maxent import RewardNet

            state_size = feat_ext.extract_features(env.reset()).shape[0]
            reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
            reward_net.load(args.reward_path)
            print(next(reward_net.parameters()).is_cuda)
            model.train(reward_net=reward_net)

        if not args.dont_save:
            model.policy.save(save_folder + "/policy-models/")

    if args.play:
        # env.tickSpeed = 15
        from debugtools import compile_results

        xaxis = []
        counter = 1
        plt.figure(0)
        avg_reward_list = []
        frac_good_run_list = []
        print(policy_file_list)
        for policy_file in policy_file_list:

            print("Playing for policy :", policy_file)
            model.policy.load(policy_file)
            policy_folder = policy_file.strip().split("/")[0:-2]
            save_folder = ""
            for p in policy_folder:
                save_folder = save_folder + p + "/"

            print("The final save folder ", save_folder)
            # env.tickSpeed = 10
            assert args.policy_path is not None, "pass a policy to play from!"
            if args.exp_trajectory_path is not None:
                from irlmethods.irlUtils import calculate_expert_svf

                expert_svf = calculate_expert_svf(
                    args.exp_trajectory_path,
                    max_time_steps=args.max_ep_length,
                    feature_extractor=feat_ext,
                    gamma=1,
                )
            # reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render))
            if args.exp_trajectory_path is None:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render
                    )
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        store_raw=args.store_raw_states,
                        path=save_folder + "/agent_generated_trajectories/",
                    )
            else:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render, expert_svf=expert_svf
                    )
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        path=save_folder + "/agent_generated_trajectories/",
                        expert_svf=expert_svf,
                    )

            avg_reward, good_run_frac = compile_results(
                rewards, state_info, sub_info
            )

            avg_reward_list.append(avg_reward)
            frac_good_run_list.append(good_run_frac)
            plt.plot(avg_reward_list, c="r")
            plt.plot(frac_good_run_list, c="g")
            plt.draw()
        plt.show()

    if args.play_user:
        env.tickSpeed = 200

        model.generate_trajectory_user(
            args.num_trajs, args.render, path="./user_generated_trajectories/"
        )
Ejemplo n.º 6
0
def main():

    output = {}

    # parameters for the feature extractors
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    args = parser.parse_args()
    output["eval parameters"] = vars(args)

    # initialize environment
    from envs.gridworld_drone import GridWorldDrone

    consider_heading = True
    np.random.seed(0)
    env = GridWorldDrone(
        display=False,
        is_onehot=False,
        seed=0,
        obstacles=None,
        show_trail=True,
        is_random=False,
        subject=None,
        annotation_file=args.annotation_file,
        tick_speed=60,
        obs_width=obs_width,
        step_size=step_size,
        agent_width=agent_width,
        external_control=True,
        replace_subject=args.dont_replace_subject,
        show_comparison=True,
        consider_heading=consider_heading,
        show_orientation=True,
        rows=576,
        cols=720,
        width=grid_size,
    )

    feat_ext = fe_utils.load_feature_extractor(args.feat_extractor)

    output["feature_extractor"] = feat_ext

    # initialize policy
    sample_state = env.reset()
    state_size = feat_ext.extract_features(sample_state).shape[0]
    policy = QNetwork(state_size, env.action_space.n, 512)
    policy.load(args.policy_path)
    policy.to(DEVICE)

    # metric parameters
    metric_applicator = metric_utils.MetricApplicator()
    metric_applicator.add_metric(metrics.compute_trajectory_smoothness)
    metric_applicator.add_metric(metrics.compute_distance_displacement_ratio)
    metric_applicator.add_metric(metrics.proxemic_intrusions, [3])
    metric_applicator.add_metric(metrics.anisotropic_intrusions, [20])
    metric_applicator.add_metric(metrics.count_collisions, [5])
    metric_applicator.add_metric(metrics.goal_reached, [10, 0.5])
    metric_applicator.add_metric(metrics.trajectory_length)

    # collect trajectories and apply metrics
    num_peds = len(env.pedestrian_dict.keys())
    output["metrics"] = metric_applicator.get_metrics()
    output["metric_results"] = {}

    metric_results = metric_utils.collect_trajectories_and_metrics(
        env,
        feat_ext,
        policy,
        num_peds,
        args.max_ep_length,
        metric_applicator,
        disregard_collisions=True,
    )

    pd_metrics = pd.DataFrame(metric_results).T
    pd_metrics = pd_metrics.applymap(lambda x: x[0])

    output["metric_results"] = pd_metrics

    pathlib.Path('./results/').mkdir(exist_ok=True)

    with open(
            "./results/" + args.output_name + "_" +
            datetime.now().strftime("%Y-%m-%d-%H:%M"),
            "wb",
    ) as f:
        pickle.dump(output, f)
Ejemplo n.º 7
0
def main():

    args = parser.parse_args()
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    #set up the feature extractor
    from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speedv2
    from featureExtractor.drone_feature_extractor import VasquezF1, VasquezF2, VasquezF3

    feat_ext = None
    if args.feat_extractor == 'DroneFeatureRisk_speedv2':

        feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width,
                                            obs_width=obs_width,
                                            step_size=step_size,
                                            grid_size=grid_size,
                                            thresh1=18,
                                            thresh2=30)

    if args.feat_extractor == 'VasquezF1':
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == 'VasquezF2':
        feat_ext = VasquezF1(agent_width * 6, 0.5, 1.0)

    if args.feat_extractor == 'VasquezF3':
        feat_ext = VasquezF3(agent_width)

    #set up the environment
    from envs.gridworld_drone import GridWorldDrone

    env = GridWorldDrone(
        display=True,
        is_onehot=False,
        obstacles=None,
        show_trail=False,
        is_random=True,
        annotation_file=args.annotation_file,
        tick_speed=60,
        obs_width=10,
        step_size=step_size,
        agent_width=agent_width,
        replace_subject=False,
        consider_heading=True,
        show_orientation=True,
        rows=576,
        cols=720,
        width=grid_size,
    )

    #set up the policy network
    from rlmethods.b_actor_critic import Policy
    state_size = feat_ext.extract_features(env.reset()).shape[0]
    policy_net = Policy(state_size, env.action_space.n,
                        args.policy_net_hidden_dims)
    policy_net.load(args.policy_path)
    print(next(policy_net.parameters()).is_cuda)

    #set up the reward network
    from irlmethods.deep_maxent import RewardNet

    state_size = feat_ext.extract_features(env.reset()).shape[0]
    reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
    reward_net.load(args.reward_path)
    print(next(reward_net.parameters()).is_cuda)
    #run stuff
    '''
    screenshot, reward_map = generate_reward_map(env, feat_ext, 
                        reward_net, 
                        render=args.render,
                        sample_rate=args.sample_rate, 
                        frame_id=args.frame_id)

    plot_map(reward_map, frame_img=screenshot)
    '''

    visualize_reward_per_spot(env,
                              feat_ext,
                              reward_net,
                              policy_net,
                              num_traj=20,
                              div=36,
                              render=True)
Ejemplo n.º 8
0
def main():

    # initalize summary writer
    tbx_writer = SummaryWriter(comment="_alpha_" + str(args.log_alpha))

    # initialize replay buffer
    replay_buffer = ReplayBuffer(args.replay_buffer_size)

    # initialize feature extractor
    feature_extractor = DroneFeatureRisk_speed(
        agent_width=agent_width,
        obs_width=obs_width,
        step_size=step_size,
        grid_size=grid_size,
        thresh1=18,
        thresh2=30,
    )

    # initialize checkpoint
    if args.checkpoint_path:
        checkpointer = Checkpointer.load_checkpointer(args.checkpoint_path)
    else:
        checkpointer = None

    # initialize environment
    env = GridWorldDrone(
        display=args.render,
        is_random=True,
        rows=576,
        cols=720,
        agent_width=agent_width,
        step_size=step_size,
        obs_width=obs_width,
        width=grid_size,
        annotation_file=args.annotation_file,
        external_control=True,
        continuous_action=True,
        consider_heading=True,
        is_onehot=False,
    )

    # initialize the reward network
    state_size = feature_extractor.extract_features(env.reset()).shape[0]
    reward_net = None
    if args.reward_path is not None:

        reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
        reward_net.load(args.reward_path)

    # intialize the RL method
    soft_ac = SoftActorCritic(
        env,
        replay_buffer,
        feature_extractor,
        buffer_sample_size=args.replay_buffer_sample_size,
        tbx_writer=tbx_writer,
        tau=0.005,
        log_alpha=args.log_alpha,
        entropy_tuning=True,
        entropy_target=args.entropy_target,
        render=args.render,
        play_interval=args.play_interval,
        checkpointer=checkpointer,
    )
    soft_ac.train(
        args.rl_episodes, args.max_episode_length, reward_network=reward_net
    )

    soft_ac.policy.save("./cont_world_policies")
Ejemplo n.º 9
0
def main():
    '''
    The main function 
    '''
    #**************************************************
    #parameters for the feature extractors
    thresh1 = 10
    thresh2 = 15

    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 3

    #**************************************************
    #for bookkeeping purposes

    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')

    args = parser.parse_args()

    #checks if all the parameters are in order
    check_parameters(args)

    if args.on_server:

        matplotlib.use('Agg')
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

    #*************************************************
    #initialize environment
    from envs.gridworld_drone import GridWorldDrone

    consider_heading = True
    np.random.seed(0)
    env = GridWorldDrone(display=args.render,
                         is_onehot=False,
                         seed=0,
                         obstacles=None,
                         show_trail=True,
                         is_random=False,
                         subject=None,
                         annotation_file=args.annotation_file,
                         tick_speed=60,
                         obs_width=10,
                         step_size=step_size,
                         agent_width=agent_width,
                         external_control=True,
                         replace_subject=args.run_exact,
                         show_comparison=True,
                         consider_heading=consider_heading,
                         show_orientation=True,
                         rows=576,
                         cols=720,
                         width=grid_size)

    print('Environment initalized successfully.')

    #*************************************************
    #initialize the feature extractor
    from featureExtractor.drone_feature_extractor import DroneFeatureRisk, DroneFeatureRisk_v2
    from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speed, DroneFeatureRisk_speedv2

    if args.feat_extractor == 'DroneFeatureRisk':

        feat_ext = DroneFeatureRisk(agent_width=agent_width,
                                    obs_width=obs_width,
                                    step_size=step_size,
                                    grid_size=grid_size,
                                    show_agent_persp=True,
                                    thresh1=thresh1,
                                    thresh2=thresh2)

    if args.feat_extractor == 'DroneFeatureRisk_v2':

        feat_ext = DroneFeatureRisk_v2(agent_width=agent_width,
                                       obs_width=obs_width,
                                       step_size=step_size,
                                       grid_size=grid_size,
                                       show_agent_persp=False,
                                       thresh1=thresh1,
                                       thresh2=thresh2)

    if args.feat_extractor == 'DroneFeatureRisk_speed':

        feat_ext = DroneFeatureRisk_speed(agent_width=agent_width,
                                          obs_width=obs_width,
                                          step_size=step_size,
                                          grid_size=grid_size,
                                          show_agent_persp=True,
                                          thresh1=thresh1,
                                          thresh2=thresh2)

    if args.feat_extractor == 'DroneFeatureRisk_speedv2':

        feat_ext = DroneFeatureRisk_speedv2(agent_width=agent_width,
                                            obs_width=obs_width,
                                            step_size=step_size,
                                            grid_size=grid_size,
                                            thresh1=18,
                                            thresh2=30)

    #*************************************************
    #initialize the agents
    agent_list = []  #list containing the paths to the agents
    agent_type_list = []  #list containing the type of the agents

    #for potential field agent
    attr_mag = 3
    rep_mag = 2

    #agent = PFController()
    ######################
    #for social forces agent

    ######################

    #for network based agents
    agent_file_list = [
        '/home/abhisek/Study/Robotics/deepirl/experiments/results/Beluga/IRL Runs/Variable-speed-hit-full-run-suppressed-local-updated-features2019-12-14_16:38:00-policy_net-256--reward_net-256--reg-0.001-seed-9-lr-0.0005/saved-models/28.pt'
    ]
    agent_file_list.append(
        '/home/abhisek/Study/Robotics/deepirl/experiments/results/Quadra/RL Runs/Possible_strawman2019-12-16 12:22:05DroneFeatureRisk_speedv2-seed-789-policy_net-256--reward_net-128--total-ep-8000-max-ep-len-500/policy-models/0.pt'
    )

    #initialize agents based on the agent files
    for agent_file in agent_file_list:

        agent_temp = Policy(feat_ext.state_rep_size,
                            env.action_space.n,
                            hidden_dims=args.policy_net_hidden_dims)

        agent_temp.load(agent_file)
        agent_list.append(agent_temp)
        agent_type_list.append('Policy_network')

    #####################

    for i in range(len(agent_list)):

        while env.cur_ped != env.last_pedestrian:

            state = env.reset()
            done = False
            t = 0
            traj = [copy.deepcopy(state)]
            while not done or t < args.max_ep_length:

                if agent_type_list[i] != 'Policy_Network':

                    feat = feat_ext.extract_features(state)
                    feat = torch.from_numpy(feat).type(
                        torch.FloatTensor).to(DEVICE)

                action = agent_list[i].eval_action(feat)
                state, _, done, _ = env.step(action)
                traj.append(copy.deepcopy(state))

                if done:
                    break

            total_smoothness, avg_smoothness = compute_trajectory_smoothness(
                traj)
            ratio = compute_distance_displacement_ratio(traj)

            proxemic_intrusions(traj, 10)
            anisotropic_intrusions(traj, 30)
            pdb.set_trace()
Ejemplo n.º 10
0
def main():

    #####for the logger
    ts = time.time()
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    ###################

    args = parser.parse_args()

    if args.on_server:

        matplotlib.use('Agg')
        # pygame without monitor
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

    from matplotlib import pyplot as plt
    mp.set_start_method('spawn')

    from rlmethods.scott_SAC.SAC import SAC
    from envs.gridworld_drone import GridWorldDrone
    from featureExtractor.drone_feature_extractor import DroneFeatureSAM1, DroneFeatureOccup, DroneFeatureRisk, DroneFeatureRisk_v2
    from featureExtractor.gridworld_featureExtractor import FrontBackSide, LocalGlobal, OneHot, SocialNav, FrontBackSideSimple
    from featureExtractor.drone_feature_extractor import DroneFeatureRisk_speed

    save_folder = None

    if not args.dont_save and not args.play:

        if not args.save_folder:
            print('Provide save folder.')
            exit()

        policy_net_dims = '-policy_net-'
        for dim in args.policy_net_hidden_dims:
            policy_net_dims += str(dim)
            policy_net_dims += '-'

        reward_net_dims = '-reward_net-'
        for dim in args.reward_net_hidden_dims:
            reward_net_dims += str(dim)
            reward_net_dims += '-'

        save_folder = './results/'+ args.save_folder +st + args.feat_extractor + \
                      '-seed-'+str(args.seed) + policy_net_dims + reward_net_dims + \
                      '-total-ep-'+str(args.total_episodes)+'-max-ep-len-'+ str(args.max_ep_length)

        experiment_logger = Logger(save_folder, 'experiment_info.txt')
        experiment_logger.log_header('Arguments for the experiment :')
        experiment_logger.log_info(vars(args))

    window_size = 9
    step_size = 2
    agent_width = 10
    obs_width = 10
    grid_size = 10

    feat_ext = None
    #initialize the feature extractor to be used
    if args.feat_extractor == 'Onehot':
        feat_ext = OneHot(grid_rows=10, grid_cols=10)
    if args.feat_extractor == 'SocialNav':
        feat_ext = SocialNav(fieldList=['agent_state', 'goal_state'])
    if args.feat_extractor == 'FrontBackSideSimple':
        feat_ext = FrontBackSideSimple(
            thresh1=1,
            thresh2=2,
            thresh3=3,
            thresh4=4,
            step_size=step_size,
            agent_width=agent_width,
            obs_width=obs_width,
        )

    if args.feat_extractor == 'LocalGlobal':
        feat_ext = LocalGlobal(
            window_size=11,
            grid_size=grid_size,
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
        )

    if args.feat_extractor == 'DroneFeatureSAM1':

        feat_ext = DroneFeatureSAM1(agent_width=agent_width,
                                    obs_width=obs_width,
                                    step_size=step_size,
                                    grid_size=grid_size,
                                    thresh1=15,
                                    thresh2=30)

    if args.feat_extractor == 'DroneFeatureOccup':

        feat_ext = DroneFeatureOccup(agent_width=agent_width,
                                     obs_width=obs_width,
                                     step_size=step_size,
                                     grid_size=grid_size,
                                     window_size=window_size)

    if args.feat_extractor == 'DroneFeatureRisk':

        feat_ext = DroneFeatureRisk(agent_width=agent_width,
                                    obs_width=obs_width,
                                    step_size=step_size,
                                    grid_size=grid_size,
                                    show_agent_persp=True,
                                    thresh1=15,
                                    thresh2=30)

    if args.feat_extractor == 'DroneFeatureRisk_v2':

        feat_ext = DroneFeatureRisk_v2(agent_width=agent_width,
                                       obs_width=obs_width,
                                       step_size=step_size,
                                       grid_size=grid_size,
                                       show_agent_persp=True,
                                       thresh1=15,
                                       thresh2=30)

    if args.feat_extractor == 'DroneFeatureRisk_speed':

        feat_ext = DroneFeatureRisk_speed(agent_width=agent_width,
                                          obs_width=obs_width,
                                          step_size=step_size,
                                          grid_size=grid_size,
                                          show_agent_persp=False,
                                          thresh1=10,
                                          thresh2=15)

    if feat_ext is None:
        print('Please enter proper feature extractor!')
        exit()
    #log feature extractor info

    if not args.dont_save and not args.play:

        experiment_logger.log_header('Parameters of the feature extractor :')
        experiment_logger.log_info(feat_ext.__dict__)

    #initialize the environment
    if args.replace_subject:
        replace_subject = True
    else:
        replace_subject = False

    env = GridWorldDrone(
        display=args.render,
        is_onehot=False,
        seed=args.seed,
        obstacles=None,
        show_trail=False,
        is_random=True,
        annotation_file=args.annotation_file,
        subject=args.subject,
        tick_speed=60,
        obs_width=10,
        step_size=step_size,
        agent_width=agent_width,
        replace_subject=replace_subject,
        segment_size=args.segment_size,
        external_control=True,
        step_reward=0.001,
        show_comparison=True,
        consider_heading=True,
        show_orientation=True,

        #rows=200, cols=300, width=grid_size)
        rows=576,
        cols=720,
        width=grid_size)

    #log environment info
    if not args.dont_save and not args.play:

        experiment_logger.log_header('Environment details :')
        experiment_logger.log_info(env.__dict__)

    #initialize RL
    model = SAC(env,
                feat_extractor=feat_ext,
                log_interval=100,
                max_ep_length=args.max_ep_length,
                hidden_dims=args.policy_net_hidden_dims,
                save_folder=save_folder,
                max_episodes=args.total_episodes)

    #log RL info
    if not args.dont_save and not args.play:

        experiment_logger.log_header('Details of the RL method :')
        experiment_logger.log_info(model.__dict__)

    if args.policy_path is not None:

        from debugtools import numericalSort
        policy_file_list = []
        reward_across_models = []
        if os.path.isfile(args.policy_path):
            policy_file_list.append(args.policy_path)
        if os.path.isdir(args.policy_path):
            policy_names = glob.glob(os.path.join(args.policy_path, '*.pt'))
            policy_file_list = sorted(policy_names, key=numericalSort)

        xaxis = np.arange(len(policy_file_list))

    if not args.play and not args.play_user:
        #no playing of any kind, so training

        if args.reward_path is None:
            if args.policy_path:
                model.policy.load(args.policy_path)
            model.train()
        else:
            from irlmethods.deep_maxent import RewardNet
            state_size = feat_ext.extract_features(env.reset()).shape[0]
            reward_net = RewardNet(state_size, args.reward_net_hidden_dims)
            reward_net.load(args.reward_path)
            print(next(reward_net.parameters()).is_cuda)
            model.train(reward_net=reward_net)

        if not args.dont_save:
            model.policy.save(save_folder + '/policy-models/')

    if args.play:
        #env.tickSpeed = 15
        from debugtools import compile_results
        xaxis = []
        counter = 1
        plt.figure(0)
        avg_reward_list = []
        frac_good_run_list = []
        for policy_file in policy_file_list:

            print('Playing for policy :', policy_file)
            model.policy.load(policy_file)
            policy_folder = policy_file.strip().split('/')[0:-2]
            save_folder = ''
            for p in policy_folder:
                save_folder = save_folder + p + '/'

            print('The final save folder ', save_folder)
            #env.tickSpeed = 10
            assert args.policy_path is not None, 'pass a policy to play from!'
            if args.exp_trajectory_path is not None:
                from irlmethods.irlUtils import calculate_expert_svf
                expert_svf = calculate_expert_svf(
                    args.exp_trajectory_path,
                    max_time_steps=args.max_ep_length,
                    feature_extractor=feat_ext,
                    gamma=1)
            #reward_across_models.append(model.generate_trajectory(args.num_trajs, args.render))
            if args.exp_trajectory_path is None:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render)
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        path=save_folder + '/agent_generated_trajectories/')
            else:

                if args.dont_save:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs, args.render, expert_svf=expert_svf)
                else:
                    rewards, state_info, sub_info = model.generate_trajectory(
                        args.num_trajs,
                        args.render,
                        path=save_folder + '/agent_generated_trajectories/',
                        expert_svf=expert_svf)

            avg_reward, good_run_frac = compile_results(
                rewards, state_info, sub_info)
            #pdb.set_trace()
            avg_reward_list.append(avg_reward)
            frac_good_run_list.append(good_run_frac)
            plt.plot(avg_reward_list, c='r')
            plt.plot(frac_good_run_list, c='g')
            plt.draw()
        plt.show()

    if args.play_user:
        env.tickSpeed = 200

        model.generate_trajectory_user(args.num_trajs,
                                       args.render,
                                       path='./user_generated_trajectories/')
Ejemplo n.º 11
0
def main():

    args = parser.parse_args()
    mp.set_start_method('spawn')

    from envs.gridworld_drone import GridWorldDrone

    agent_width = 10
    step_size = 2
    obs_width = 10
    grid_size = 10

    if args.feat_extractor == 'Onehot':
        feat_ext = OneHot(grid_rows=10, grid_cols=10)
    if args.feat_extractor == 'SocialNav':
        feat_ext = SocialNav(fieldList=['agent_state', 'goal_state'])
    if args.feat_extractor == 'FrontBackSideSimple':
        feat_ext = FrontBackSideSimple(
            thresh1=1,
            thresh2=2,
            thresh3=3,
            thresh4=4,
            step_size=step_size,
            agent_width=agent_width,
            obs_width=obs_width,
            fieldList=['agent_state', 'goal_state', 'obstacles'])

    if args.feat_extractor == 'LocalGlobal':
        feat_ext = LocalGlobal(
            window_size=3,
            grid_size=grid_size,
            agent_width=agent_width,
            obs_width=obs_width,
            step_size=step_size,
            fieldList=['agent_state', 'goal_state', 'obstacles'])

    #featExtract = OneHot(grid_rows=10,grid_cols=10)
    #featExtract = FrontBackSideSimple(thresh1 = 1,fieldList =  ['agent_state','goal_state','obstacles'])

    #featExtract = SocialNav(fieldList = ['agent_state','goal_state'])
    '''
    np.asarray([2,2]),np.asarray([7,4]),np.asarray([3,5]),
                                np.asarray([5,2]),np.asarray([8,3]),np.asarray([7,5]),
                                np.asarray([3,3]),np.asarray([3,7]),np.asarray([5,7])
                               
    env = GridWorld(display=args.render, is_onehot= False,is_random=True,
                    rows=10, agent_width=agent_width,step_size=step_size,
                    obs_width=obs_width,width=grid_size,
                    cols=10,
                    seed = 7,
                    obstacles = '../envs/map3.jpg',
                                
                    goal_state = np.asarray([5,5]))
    '''

    env = GridWorldDrone(display=args.render,
                         is_onehot=False,
                         seed=999,
                         obstacles=None,
                         show_trail=False,
                         is_random=False,
                         annotation_file=args.annotation_file,
                         subject=None,
                         tick_speed=90,
                         obs_width=10,
                         step_size=step_size,
                         agent_width=agent_width,
                         show_comparison=True,
                         rows=576,
                         cols=720,
                         width=grid_size)

    model = ActorCritic(env,
                        feat_extractor=featExtract,
                        gamma=0.99,
                        log_interval=50,
                        max_ep_length=500,
                        max_episodes=2000)

    if args.policy_path is not None:
        model.policy.load(args.policy_path)

    if not args.play and not args.play_user:
        if args.reward_path is None:
            model.train_mp(n_jobs=4)
        else:
            from irlmethods.deep_maxent import RewardNet
            state_size = featExtract.extract_features(env.reset()).shape[0]
            reward_net = RewardNet(state_size)
            reward_net.load(args.reward_path)
            print(next(reward_net.parameters()).is_cuda)
            model.train_mp(reward_net=reward_net, n_jobs=4)

        if not args.dont_save:
            model.policy.save('./saved-models/')

    if args.play:
        #env.tickSpeed = 15
        assert args.policy_path is not None, 'pass a policy to play from!'

        model.generate_trajectory(
            args.num_trajs, './trajs/ac_loc_glob_rectified_win_3_static_map3/')

    if args.play_user:
        env.tickSpeed = 200

        model.generate_trajectory_user(args.num_trajs,
                                       './trajs/ac_gridworld_user/')