Ejemplo n.º 1
0
time_travelled, distance_travelled, road_completed, action_sequences, state_sequences = [], [], [], [], []

writer = utils.create_tensorboard_writer(opt)

n_test = len(splits['test_indx'])
outcomes = []
for j in range(n_test):
    movie_dir = path.join(opt.save_dir, 'videos_simulator', plan_file,
                          f'ep{j + 1}')
    print(f'[new episode, will save to: {movie_dir}]')
    if opt.save_grad_vid:
        grad_movie_dir = path.join(opt.save_dir, 'grad_videos_simulator',
                                   plan_file, f'ep{j + 1}')
        print(f'[gradient videos will be saved to: {grad_movie_dir}]')
    car_path = dataloader.ids[splits['test_indx'][j]]
    timeslot, car_id = utils.parse_car_path(car_path)
    inputs = env.reset(time_slot=timeslot,
                       vehicle_id=car_id)  # if None => picked at random
    forward_model.reset_action_buffer(opt.npred)
    done, mu, std = False, None, None
    images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], []
    cntr = 0
    # inputs, cost, done, info = env.step(numpy.zeros((2,)))
    input_state_t0 = inputs['state'].contiguous()[-1]
    action_sequences.append([])
    state_sequences.append([])
    has_collided = False
    off_screen = False
    while not done:
        input_images = inputs['context'].contiguous()
        input_states = inputs['state'].contiguous()
Ejemplo n.º 2
0
def main():
    opt = parse_args()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    random.seed(opt.seed)
    numpy.random.seed(opt.seed)
    torch.manual_seed(opt.seed)

    data_path = 'traffic-data/state-action-cost/data_i80_v0'

    dataloader = DataLoader(None, opt, 'i80')
    (
        forward_model,
        value_function,
        policy_network_il,
        policy_network_mper,
        data_stats
    ) = load_models(opt, data_path, device)
    splits = torch.load(path.join(data_path, 'splits.pth'))

    if opt.u_reg > 0.0:
        forward_model.train()
        forward_model.opt.u_hinge = opt.u_hinge
        if hasattr(forward_model, 'value_function'):
            forward_model.value_function.train()
        planning.estimate_uncertainty_stats(
            forward_model, dataloader, n_batches=50, npred=opt.npred)

    gym.envs.registration.register(
        id='I-80-v1',
        entry_point='map_i80_ctrl:ControlledI80',
        kwargs=dict(
            fps=10,
            nb_states=opt.ncond,
            display=False,
            delta_t=0.1,
            store_simulator_video=opt.save_sim_video,
            show_frame_count=False,
        )
    )

    print('Building the environment (loading data, if any)')
    env_names = {
        'i80': 'I-80-v1',
    }
    env = gym.make(env_names[opt.map])

    plan_file = build_plan_file_name(opt)
    print(f'[saving to {path.join(opt.save_dir, plan_file)}]')

    # different performance metrics
    time_travelled, distance_travelled, road_completed = [], [], []
    collided, offscreen = [], []
    # values saved for later inspection
    action_sequences, state_sequences, cost_sequences =  [], [], []
    image_sequences = []

    writer = utils.create_tensorboard_writer(opt)

    n_test = len(splits['test_indx'])

    set_start_method('spawn')
    pool = Pool(opt.num_processes)

    async_results = []

    time_started = time.time()
    total_images = 0

    for j in range(n_test):
#         print(type(splits), len(splits['test_indx']), splits['test_indx'].shape, list(dataloader.car_sizes.keys())[0:5], list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0:5],dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]][list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0]])
        
        car_path = dataloader.ids[splits['test_indx'][j]]
        timeslot, car_id = utils.parse_car_path(car_path)
        car_sizes = torch.tensor(
                    dataloader.car_sizes[sorted(list(dataloader.car_sizes.keys()))[
                        timeslot]][car_id]
                )[None, :]
        async_results.append(
            pool.apply_async(
                process_one_episode, (
                    opt,
                    env,
                    car_path,
                    forward_model,
                    policy_network_il,
                    data_stats,
                    plan_file,
                    j,
                    car_sizes
                )
            )
        )

    for j in range(n_test):
        simulation_result = async_results[j].get()

        time_travelled.append(simulation_result.time_travelled)
        distance_travelled.append(simulation_result.distance_travelled)
        road_completed.append(simulation_result.road_completed)
        action_sequences.append(torch.from_numpy(
            simulation_result.action_sequence))
        state_sequences.append(torch.from_numpy(
            simulation_result.state_sequence))
#         image_sequences.append(torch.from_numpy(
#             simulation_result.image_sequence))
        cost_sequences.append(simulation_result.cost_sequence)
        total_images += time_travelled[-1]
        
        collided.append(simulation_result.has_collided)
        offscreen.append(simulation_result.off_screen)

        log_string = ' | '.join((
            f'ep: {j + 1:3d}/{n_test}',
            f'time: {time_travelled[-1]}',
            f'distance: {distance_travelled[-1]:.0f}',
            f'success: {road_completed[-1]:d}',
            f'mean time: {torch.Tensor(time_travelled).mean():.0f}',
            f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}',
            f'mean success: {torch.Tensor(road_completed).mean():.3f}',
        ))
        print(log_string)
        utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string)

        if writer is not None:
            # writer.add_video(
            #     f'Video/success={simulation_result.road_completed:d}_{j}',
            #     simulation_result.images.unsqueeze(0),
            #     j
            # )
            writer.add_scalar('ByEpisode/Success',
                              simulation_result.road_completed, j)
            writer.add_scalar('ByEpisode/Collision',
                              simulation_result.has_collided, j)
            writer.add_scalar('ByEpisode/OffScreen',
                              simulation_result.off_screen, j)
            writer.add_scalar('ByEpisode/Distance',
                              simulation_result.distance_travelled, j)

    pool.close()
    pool.join()

    diff_time = time.time() - time_started
    print('avg time travelled per second is', total_images / diff_time)

    torch.save({"road_completed" : road_completed, "collided": collided, "offscreen": offscreen}, path.join(opt.save_dir, f'{plan_file}.others'))
    torch.save(action_sequences, path.join(
        opt.save_dir, f'{plan_file}.actions'))
    torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states'))
#     torch.save(image_sequences, path.join(opt.save_dir, f'{plan_file}.images'))
    torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs'))
  
    if writer is not None:
        writer.close()
Ejemplo n.º 3
0
def process_one_episode(opt, env, car_path, plan_file, index, car_sizes):
    movie_dir = path.join(opt.save_dir, 'videos_simulator', plan_file,
                          f'ep{index + 1}')

    timeslot, car_id = utils.parse_car_path(car_path)
    # if None => picked at random
    inputs = env.reset(time_slot=timeslot, vehicle_id=car_id)
    done, mu, std = False, None, None
    images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], []
    cntr = 0
    # inputs, cost, done, info = env.step(numpy.zeros((2,)))
    input_state_t0 = inputs['state'].contiguous()[-1]
    cost_sequence, action_sequence, state_sequence = [], [], []
    has_collided = False
    off_screen = False

    env.controlled_car["locked"]._speed = 400.0

    it_limit = 60  #Avoid excess disk useage due to control flow bug
    while not done and cntr < it_limit:
        print("___________________________________________________________")
        print(f"cntr = {cntr}")

        input_images = inputs['context'].contiguous()
        input_states = inputs['state'].contiguous()

        a = [0.0, 0.0]  # No acceleration/steering

        action_sequence.append(a)
        state_sequence.append(input_states)
        cntr += 1
        cost_test = 0

        inputs, cost, done, info = env.step(a)
        if not opt.ignore_crash and info.collisions_per_frame > 0:
            has_collided = True
            # print(f'[collision after {cntr} frames, ending]')
            done = True
        off_screen = info.off_screen

        images.append(input_images[-1])
        states.append(input_states[-1])
        costs.append([cost['pixel_proximity_cost'], cost['lane_cost']])
        cost_sequence.append(cost)

        actions.append(a)
        mu_list.append(mu)
        std_list.append(std)

        print(f"len(info.frames) = {len(info.frames)}")

    print("___________________________________________________________")
    print(f"done = {done}, it_limit = {it_limit}, cntr = {cntr}")

    input_state_tfinal = inputs['state'][-1]

    if mu is not None:
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None

    images = torch.stack(images)
    states = torch.stack(states)
    costs = torch.tensor(costs)
    actions = torch.tensor(actions)

    if len(images) > 3:
        images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255)
        utils.save_movie(path.join(movie_dir, 'ego'),
                         images_3_channels.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)

        if opt.save_sim_video:
            print(f"len(info.frames) = {len(info.frames)}")
            sim_path = path.join(movie_dir, 'sim')
            print(f'[saving simulator movie to {sim_path}]')
            if not path.exists(sim_path):
                os.mkdir(sim_path)
            for n, img in enumerate(info.frames):
                imwrite(path.join(sim_path, f'im{n:05d}.png'), img)

    returned = eval_policy.SimulationResult()
    returned.time_travelled = len(images)
    returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0]
    returned.road_completed = 1 if cost['arrived_to_dst'] else 0
    returned.off_screen = off_screen
    returned.has_collided = has_collided
    returned.action_sequence = numpy.stack(action_sequence)
    returned.state_sequence = numpy.stack(state_sequence)
    returned.cost_sequence = numpy.stack(cost_sequence)

    return returned
Ejemplo n.º 4
0
def process_one_episode(opt,
                        env,
                        car_path,
                        forward_model,
                        policy_network_il,
                        data_stats,
                        plan_file,
                        index,
                        car_sizes):
    movie_dir = path.join(
        opt.save_dir, 'videos_simulator', plan_file, f'ep{index + 1}')
    if opt.save_grad_vid:
        grad_movie_dir = path.join(
            opt.save_dir, 'grad_videos_simulator', plan_file, f'ep{index + 1}')
        print(f'[gradient videos will be saved to: {grad_movie_dir}]')
    timeslot, car_id = utils.parse_car_path(car_path)
    # if None => picked at random
    inputs = env.reset(time_slot=timeslot, vehicle_id=car_id)
#     print(torch.mean(inputs['context']), torch.mean(inputs['state']))
    forward_model.reset_action_buffer(opt.npred)
    done, mu, std = False, None, None
    images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], []
    cntr = 0
    # inputs, cost, done, info = env.step(numpy.zeros((2,)))
    input_state_t0 = inputs['state'].contiguous()[-1]
    cost_sequence, action_sequence, state_sequence, image_sequence = [], [], [], []
    has_collided = False
    off_screen = False
    while not done:
        input_images = inputs['context'].contiguous()
        input_states = inputs['state'].contiguous()
#         print(input_images.shape, input_states.shape)
        if opt.save_grad_vid:
            grad_list.append(planning.get_grad_vid(
                forward_model, input_images, input_states,
                car_sizes,
                device='cuda' if torch.cuda.is_available else 'cpu'
            ))
        if opt.method == 'no-action':
            a = numpy.zeros((1, 2))
        elif opt.method == 'bprop':
            # TODO: car size is provided by the dataloader!! This lines below should be removed!
            # TODO: Namely, dataloader.car_sizes[timeslot][car_id]
            a = planning.plan_actions_backprop(
                forward_model,
                input_images[:, :3, :, :].contiguous(),
                input_states,
                car_sizes,
                npred=opt.npred,
                n_futures=opt.n_rollouts,
                normalize=True,
                bprop_niter=opt.bprop_niter,
                bprop_lrt=opt.bprop_lrt,
                u_reg=opt.u_reg,
                use_action_buffer=(opt.bprop_buffer == 1),
                n_models=opt.n_dropout_models,
                save_opt_stats=(opt.bprop_save_opt_stats == 1),
                nexec=opt.nexec,
                lambda_l=opt.lambda_l,
                lambda_o=opt.lambda_o
            )
        elif opt.method == 'policy-IL':
            _, _, _, a = policy_network_il(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.squeeze().cpu().view(1, 2).numpy()
        elif opt.method == 'policy-MPER':
            a, entropy, mu, std = forward_model.policy_net(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.cpu().view(1, 2).numpy()
        elif opt.method == 'policy-MPUR':
            a, entropy, mu, std = forward_model.policy_net(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.cpu().view(1, 2).numpy()    
#         elif opt.method == 'bprop+policy-MPUR':
#             a, entropy, mu, std = forward_model.policy_net(
#                 input_images,
#                 input_states,
#                 sample=True,
#                 normalize_inputs=True,
#                 normalize_outputs=True
#             )
# #             a = a[0]
#             a = forward_model.plan_actions_backprop(
#                 input_images,
#                 input_states,
#                 npred=opt.npred,
#                 n_futures=opt.n_rollouts,
#                 normalize=True,
#                 bprop_niter=opt.bprop_niter,
#                 bprop_lrt=opt.bprop_lrt,
#                 actions=a,
#                 u_reg=opt.u_reg,
#                 nexec=opt.nexec
#             )
#             a = a.cpu().view(1, 2).numpy()
        elif opt.method == 'bprop+policy-IL':
            _, _, _, a = policy_network_il(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=False
            )
            a = a[0]
            a = forward_model.plan_actions_backprop(
                input_images,
                input_states,
                npred=opt.npred,
                n_futures=opt.n_rollouts,
                normalize=True,
                bprop_niter=opt.bprop_niter,
                bprop_lrt=opt.bprop_lrt,
                actions=a,
                u_reg=opt.u_reg,
                nexec=opt.nexec
            )

        action_sequence.append(a)
        state_sequence.append(input_states)
        # image_sequence.append(input_images)
        cntr += 1
        cost_test = 0
        t = 0
        T = opt.npred if opt.nexec == -1 else opt.nexec
        if not opt.use_forward_model:
            while (t < T) and not done:
                inputs, cost, done, info = env.step(a[t])
                if info.collisions_per_frame > 0:
                    has_collided = True
                    # print(f'[collision after {cntr} frames, ending]')
                    done = True
                off_screen = info.off_screen

                images.append(input_images[-1])
                states.append(input_states[-1])
                costs.append([cost['pixel_proximity_cost'], cost['lane_cost']])
                cost_sequence.append(cost)
                if opt.mfile == 'no-action':
                    actions.append(a[t])
                    mu_list.append(mu)
                    std_list.append(std)
                else:
                    actions.append(
                        ((torch.tensor(a[t]) - data_stats['a_mean'])
                            / data_stats['a_std'])
                    )
                    if mu is not None:
                        mu_list.append(mu.data.cpu().numpy())
                        std_list.append(std.data.cpu().numpy())
                t += 1
        else:
            print(input_images.shape, input_states.shape, type(input_images), input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0))
#             print(input_images[:,:3])
            pred, actions = planning.train_policy_net_mpur(
                forward_model, (input_images.contiguous()[:,:3], input_states, input_images.contiguous()[:,3:4].repeat(1,3,1,1)), (torch.rand(input_images.contiguous()[:,:3].unsqueeze(0).shape), torch.rand(input_states.unsqueeze(0).shape), torch.rand(input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0).shape)), car_sizes, n_models=10, lrt_z=0,
                n_updates_z=0, infer_z=False, no_cuda=False, return_per_instance_values = False
            )
            print(pred['state_img.shape'].shape, pred['proximity'].shape, pred['lane'].shape)
            images.append(pred['state_img'])
            costs.append(pred['proximity'],pred['lane'])
#             cost_sequence.append(cost)
            
    input_state_tfinal = inputs['state'][-1]

    if mu is not None:
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None

    images = torch.stack(images)
    states = torch.stack(states)
    costs = torch.tensor(costs)
    actions = torch.stack(actions)
    if opt.save_grad_vid:
        grads = torch.cat(grad_list)

    if len(images) > 3:
        images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255)
        utils.save_movie(path.join(movie_dir, 'ego'),
                         images_3_channels.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)
        if opt.save_grad_vid:
            utils.save_movie(
                grad_movie_dir,
                grads,
                None,
                None,
                None,
                None,
                None,
                pytorch=True
            )
        if opt.save_sim_video:
            sim_path = path.join(movie_dir, 'sim')
            print(f'[saving simulator movie to {sim_path}]')
            os.mkdir(sim_path)
            for n, img in enumerate(info.frames):
                imwrite(path.join(sim_path, f'im{n:05d}.png'), img)

    returned = SimulationResult()
    returned.time_travelled = len(images)
    returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0]
    returned.road_completed = 1 if cost['arrived_to_dst'] else 0
    returned.off_screen = off_screen
    returned.has_collided = has_collided
    returned.action_sequence = numpy.stack(action_sequence)
    returned.state_sequence = numpy.stack(state_sequence)
    returned.cost_sequence = numpy.stack(cost_sequence)
    print(car_sizes)
    # returned.image_sequence = numpy.stack(image_sequence)

    return returned
Ejemplo n.º 5
0
def main():
    opt = parse_args()
    device = utils.get_device()

    random.seed(opt.seed)
    numpy.random.seed(opt.seed)
    torch.manual_seed(opt.seed)

    data_path = 'traffic-data/state-action-cost/data_i80_v0'

    dataloader = DataLoader(None, opt, 'i80')

    splits = torch.load(path.join(data_path, 'splits.pth'))

    gym.envs.registration.register(
        id='I-80-v1',
        entry_point='map_i80_ctrl:ControlledI80',
        kwargs=dict(
            fps=10,
            nb_states=opt.ncond,
            display=False,
            delta_t=0.1,
            store_simulator_video=opt.save_sim_video,
            show_frame_count=False,
        ))

    print('Building the environment (loading data, if any)')
    env_names = {
        'i80': 'I-80-v1',
    }
    env = gym.make(env_names[opt.map])

    plan_file = eval_policy.build_plan_file_name(opt)
    print(f'[saving to {path.join(opt.save_dir, plan_file)}]')

    # different performance metrics
    time_travelled, distance_travelled, road_completed = [], [], []
    # values saved for later inspection
    action_sequences, state_sequences, cost_sequences = [], [], []

    #writer = utils.create_tensorboard_writer(opt)

    n_test = len(splits['test_indx'])
    n_test = min(1, n_test)  # Ignore others

    time_started = time.time()
    total_images = 0

    for j in range(n_test):
        car_path = dataloader.ids[splits['test_indx'][j]]
        timeslot, car_id = utils.parse_car_path(car_path)
        car_sizes = torch.tensor(dataloader.car_sizes[sorted(
            list(dataloader.car_sizes.keys()))[timeslot]][car_id])[None, :]
        simulation_result = process_one_episode(opt, env, car_path, plan_file,
                                                j, car_sizes)

        time_travelled.append(simulation_result.time_travelled)
        distance_travelled.append(simulation_result.distance_travelled)
        road_completed.append(simulation_result.road_completed)
        action_sequences.append(
            torch.from_numpy(simulation_result.action_sequence))
        state_sequences.append(
            torch.from_numpy(simulation_result.state_sequence))
        cost_sequences.append(simulation_result.cost_sequence)
        total_images += time_travelled[-1]

        log_string = ' | '.join((
            f'ep: {j + 1:3d}/{n_test}',
            f'time: {time_travelled[-1]}',
            f'distance: {distance_travelled[-1]:.0f}',
            f'success: {road_completed[-1]:d}',
            f'mean time: {torch.Tensor(time_travelled).mean():.0f}',
            f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}',
            f'mean success: {torch.Tensor(road_completed).mean():.3f}',
        ))
        print(log_string)
        utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string)

        if False:  #writer is not None:
            # writer.add_video(
            #     f'Video/success={simulation_result.road_completed:d}_{j}',
            #     simulation_result.images.unsqueeze(0),
            #     j
            # )
            writer.add_scalar('ByEpisode/Success',
                              simulation_result.road_completed, j)
            writer.add_scalar('ByEpisode/Collision',
                              simulation_result.has_collided, j)
            writer.add_scalar('ByEpisode/OffScreen',
                              simulation_result.off_screen, j)
            writer.add_scalar('ByEpisode/Distance',
                              simulation_result.distance_travelled, j)

    diff_time = time.time() - time_started
    print('avg time travelled per second is', total_images / diff_time)

    torch.save(action_sequences, path.join(opt.save_dir,
                                           f'{plan_file}.actions'))
    torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states'))
    torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs'))

    if False:  #writer is not None:
        writer.close()