time_travelled, distance_travelled, road_completed, action_sequences, state_sequences = [], [], [], [], [] writer = utils.create_tensorboard_writer(opt) n_test = len(splits['test_indx']) outcomes = [] for j in range(n_test): movie_dir = path.join(opt.save_dir, 'videos_simulator', plan_file, f'ep{j + 1}') print(f'[new episode, will save to: {movie_dir}]') if opt.save_grad_vid: grad_movie_dir = path.join(opt.save_dir, 'grad_videos_simulator', plan_file, f'ep{j + 1}') print(f'[gradient videos will be saved to: {grad_movie_dir}]') car_path = dataloader.ids[splits['test_indx'][j]] timeslot, car_id = utils.parse_car_path(car_path) inputs = env.reset(time_slot=timeslot, vehicle_id=car_id) # if None => picked at random forward_model.reset_action_buffer(opt.npred) done, mu, std = False, None, None images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], [] cntr = 0 # inputs, cost, done, info = env.step(numpy.zeros((2,))) input_state_t0 = inputs['state'].contiguous()[-1] action_sequences.append([]) state_sequences.append([]) has_collided = False off_screen = False while not done: input_images = inputs['context'].contiguous() input_states = inputs['state'].contiguous()
def main(): opt = parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' random.seed(opt.seed) numpy.random.seed(opt.seed) torch.manual_seed(opt.seed) data_path = 'traffic-data/state-action-cost/data_i80_v0' dataloader = DataLoader(None, opt, 'i80') ( forward_model, value_function, policy_network_il, policy_network_mper, data_stats ) = load_models(opt, data_path, device) splits = torch.load(path.join(data_path, 'splits.pth')) if opt.u_reg > 0.0: forward_model.train() forward_model.opt.u_hinge = opt.u_hinge if hasattr(forward_model, 'value_function'): forward_model.value_function.train() planning.estimate_uncertainty_stats( forward_model, dataloader, n_batches=50, npred=opt.npred) gym.envs.registration.register( id='I-80-v1', entry_point='map_i80_ctrl:ControlledI80', kwargs=dict( fps=10, nb_states=opt.ncond, display=False, delta_t=0.1, store_simulator_video=opt.save_sim_video, show_frame_count=False, ) ) print('Building the environment (loading data, if any)') env_names = { 'i80': 'I-80-v1', } env = gym.make(env_names[opt.map]) plan_file = build_plan_file_name(opt) print(f'[saving to {path.join(opt.save_dir, plan_file)}]') # different performance metrics time_travelled, distance_travelled, road_completed = [], [], [] collided, offscreen = [], [] # values saved for later inspection action_sequences, state_sequences, cost_sequences = [], [], [] image_sequences = [] writer = utils.create_tensorboard_writer(opt) n_test = len(splits['test_indx']) set_start_method('spawn') pool = Pool(opt.num_processes) async_results = [] time_started = time.time() total_images = 0 for j in range(n_test): # print(type(splits), len(splits['test_indx']), splits['test_indx'].shape, list(dataloader.car_sizes.keys())[0:5], list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0:5],dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]][list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0]]) car_path = dataloader.ids[splits['test_indx'][j]] timeslot, car_id = utils.parse_car_path(car_path) car_sizes = torch.tensor( dataloader.car_sizes[sorted(list(dataloader.car_sizes.keys()))[ timeslot]][car_id] )[None, :] async_results.append( pool.apply_async( process_one_episode, ( opt, env, car_path, forward_model, policy_network_il, data_stats, plan_file, j, car_sizes ) ) ) for j in range(n_test): simulation_result = async_results[j].get() time_travelled.append(simulation_result.time_travelled) distance_travelled.append(simulation_result.distance_travelled) road_completed.append(simulation_result.road_completed) action_sequences.append(torch.from_numpy( simulation_result.action_sequence)) state_sequences.append(torch.from_numpy( simulation_result.state_sequence)) # image_sequences.append(torch.from_numpy( # simulation_result.image_sequence)) cost_sequences.append(simulation_result.cost_sequence) total_images += time_travelled[-1] collided.append(simulation_result.has_collided) offscreen.append(simulation_result.off_screen) log_string = ' | '.join(( f'ep: {j + 1:3d}/{n_test}', f'time: {time_travelled[-1]}', f'distance: {distance_travelled[-1]:.0f}', f'success: {road_completed[-1]:d}', f'mean time: {torch.Tensor(time_travelled).mean():.0f}', f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}', f'mean success: {torch.Tensor(road_completed).mean():.3f}', )) print(log_string) utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string) if writer is not None: # writer.add_video( # f'Video/success={simulation_result.road_completed:d}_{j}', # simulation_result.images.unsqueeze(0), # j # ) writer.add_scalar('ByEpisode/Success', simulation_result.road_completed, j) writer.add_scalar('ByEpisode/Collision', simulation_result.has_collided, j) writer.add_scalar('ByEpisode/OffScreen', simulation_result.off_screen, j) writer.add_scalar('ByEpisode/Distance', simulation_result.distance_travelled, j) pool.close() pool.join() diff_time = time.time() - time_started print('avg time travelled per second is', total_images / diff_time) torch.save({"road_completed" : road_completed, "collided": collided, "offscreen": offscreen}, path.join(opt.save_dir, f'{plan_file}.others')) torch.save(action_sequences, path.join( opt.save_dir, f'{plan_file}.actions')) torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states')) # torch.save(image_sequences, path.join(opt.save_dir, f'{plan_file}.images')) torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs')) if writer is not None: writer.close()
def process_one_episode(opt, env, car_path, plan_file, index, car_sizes): movie_dir = path.join(opt.save_dir, 'videos_simulator', plan_file, f'ep{index + 1}') timeslot, car_id = utils.parse_car_path(car_path) # if None => picked at random inputs = env.reset(time_slot=timeslot, vehicle_id=car_id) done, mu, std = False, None, None images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], [] cntr = 0 # inputs, cost, done, info = env.step(numpy.zeros((2,))) input_state_t0 = inputs['state'].contiguous()[-1] cost_sequence, action_sequence, state_sequence = [], [], [] has_collided = False off_screen = False env.controlled_car["locked"]._speed = 400.0 it_limit = 60 #Avoid excess disk useage due to control flow bug while not done and cntr < it_limit: print("___________________________________________________________") print(f"cntr = {cntr}") input_images = inputs['context'].contiguous() input_states = inputs['state'].contiguous() a = [0.0, 0.0] # No acceleration/steering action_sequence.append(a) state_sequence.append(input_states) cntr += 1 cost_test = 0 inputs, cost, done, info = env.step(a) if not opt.ignore_crash and info.collisions_per_frame > 0: has_collided = True # print(f'[collision after {cntr} frames, ending]') done = True off_screen = info.off_screen images.append(input_images[-1]) states.append(input_states[-1]) costs.append([cost['pixel_proximity_cost'], cost['lane_cost']]) cost_sequence.append(cost) actions.append(a) mu_list.append(mu) std_list.append(std) print(f"len(info.frames) = {len(info.frames)}") print("___________________________________________________________") print(f"done = {done}, it_limit = {it_limit}, cntr = {cntr}") input_state_tfinal = inputs['state'][-1] if mu is not None: mu_list = numpy.stack(mu_list) std_list = numpy.stack(std_list) else: mu_list, std_list = None, None images = torch.stack(images) states = torch.stack(states) costs = torch.tensor(costs) actions = torch.tensor(actions) if len(images) > 3: images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255) utils.save_movie(path.join(movie_dir, 'ego'), images_3_channels.float() / 255.0, states, costs, actions=actions, mu=mu_list, std=std_list, pytorch=True) if opt.save_sim_video: print(f"len(info.frames) = {len(info.frames)}") sim_path = path.join(movie_dir, 'sim') print(f'[saving simulator movie to {sim_path}]') if not path.exists(sim_path): os.mkdir(sim_path) for n, img in enumerate(info.frames): imwrite(path.join(sim_path, f'im{n:05d}.png'), img) returned = eval_policy.SimulationResult() returned.time_travelled = len(images) returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0] returned.road_completed = 1 if cost['arrived_to_dst'] else 0 returned.off_screen = off_screen returned.has_collided = has_collided returned.action_sequence = numpy.stack(action_sequence) returned.state_sequence = numpy.stack(state_sequence) returned.cost_sequence = numpy.stack(cost_sequence) return returned
def process_one_episode(opt, env, car_path, forward_model, policy_network_il, data_stats, plan_file, index, car_sizes): movie_dir = path.join( opt.save_dir, 'videos_simulator', plan_file, f'ep{index + 1}') if opt.save_grad_vid: grad_movie_dir = path.join( opt.save_dir, 'grad_videos_simulator', plan_file, f'ep{index + 1}') print(f'[gradient videos will be saved to: {grad_movie_dir}]') timeslot, car_id = utils.parse_car_path(car_path) # if None => picked at random inputs = env.reset(time_slot=timeslot, vehicle_id=car_id) # print(torch.mean(inputs['context']), torch.mean(inputs['state'])) forward_model.reset_action_buffer(opt.npred) done, mu, std = False, None, None images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], [] cntr = 0 # inputs, cost, done, info = env.step(numpy.zeros((2,))) input_state_t0 = inputs['state'].contiguous()[-1] cost_sequence, action_sequence, state_sequence, image_sequence = [], [], [], [] has_collided = False off_screen = False while not done: input_images = inputs['context'].contiguous() input_states = inputs['state'].contiguous() # print(input_images.shape, input_states.shape) if opt.save_grad_vid: grad_list.append(planning.get_grad_vid( forward_model, input_images, input_states, car_sizes, device='cuda' if torch.cuda.is_available else 'cpu' )) if opt.method == 'no-action': a = numpy.zeros((1, 2)) elif opt.method == 'bprop': # TODO: car size is provided by the dataloader!! This lines below should be removed! # TODO: Namely, dataloader.car_sizes[timeslot][car_id] a = planning.plan_actions_backprop( forward_model, input_images[:, :3, :, :].contiguous(), input_states, car_sizes, npred=opt.npred, n_futures=opt.n_rollouts, normalize=True, bprop_niter=opt.bprop_niter, bprop_lrt=opt.bprop_lrt, u_reg=opt.u_reg, use_action_buffer=(opt.bprop_buffer == 1), n_models=opt.n_dropout_models, save_opt_stats=(opt.bprop_save_opt_stats == 1), nexec=opt.nexec, lambda_l=opt.lambda_l, lambda_o=opt.lambda_o ) elif opt.method == 'policy-IL': _, _, _, a = policy_network_il( input_images, input_states, sample=True, normalize_inputs=True, normalize_outputs=True ) a = a.squeeze().cpu().view(1, 2).numpy() elif opt.method == 'policy-MPER': a, entropy, mu, std = forward_model.policy_net( input_images, input_states, sample=True, normalize_inputs=True, normalize_outputs=True ) a = a.cpu().view(1, 2).numpy() elif opt.method == 'policy-MPUR': a, entropy, mu, std = forward_model.policy_net( input_images, input_states, sample=True, normalize_inputs=True, normalize_outputs=True ) a = a.cpu().view(1, 2).numpy() # elif opt.method == 'bprop+policy-MPUR': # a, entropy, mu, std = forward_model.policy_net( # input_images, # input_states, # sample=True, # normalize_inputs=True, # normalize_outputs=True # ) # # a = a[0] # a = forward_model.plan_actions_backprop( # input_images, # input_states, # npred=opt.npred, # n_futures=opt.n_rollouts, # normalize=True, # bprop_niter=opt.bprop_niter, # bprop_lrt=opt.bprop_lrt, # actions=a, # u_reg=opt.u_reg, # nexec=opt.nexec # ) # a = a.cpu().view(1, 2).numpy() elif opt.method == 'bprop+policy-IL': _, _, _, a = policy_network_il( input_images, input_states, sample=True, normalize_inputs=True, normalize_outputs=False ) a = a[0] a = forward_model.plan_actions_backprop( input_images, input_states, npred=opt.npred, n_futures=opt.n_rollouts, normalize=True, bprop_niter=opt.bprop_niter, bprop_lrt=opt.bprop_lrt, actions=a, u_reg=opt.u_reg, nexec=opt.nexec ) action_sequence.append(a) state_sequence.append(input_states) # image_sequence.append(input_images) cntr += 1 cost_test = 0 t = 0 T = opt.npred if opt.nexec == -1 else opt.nexec if not opt.use_forward_model: while (t < T) and not done: inputs, cost, done, info = env.step(a[t]) if info.collisions_per_frame > 0: has_collided = True # print(f'[collision after {cntr} frames, ending]') done = True off_screen = info.off_screen images.append(input_images[-1]) states.append(input_states[-1]) costs.append([cost['pixel_proximity_cost'], cost['lane_cost']]) cost_sequence.append(cost) if opt.mfile == 'no-action': actions.append(a[t]) mu_list.append(mu) std_list.append(std) else: actions.append( ((torch.tensor(a[t]) - data_stats['a_mean']) / data_stats['a_std']) ) if mu is not None: mu_list.append(mu.data.cpu().numpy()) std_list.append(std.data.cpu().numpy()) t += 1 else: print(input_images.shape, input_states.shape, type(input_images), input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0)) # print(input_images[:,:3]) pred, actions = planning.train_policy_net_mpur( forward_model, (input_images.contiguous()[:,:3], input_states, input_images.contiguous()[:,3:4].repeat(1,3,1,1)), (torch.rand(input_images.contiguous()[:,:3].unsqueeze(0).shape), torch.rand(input_states.unsqueeze(0).shape), torch.rand(input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0).shape)), car_sizes, n_models=10, lrt_z=0, n_updates_z=0, infer_z=False, no_cuda=False, return_per_instance_values = False ) print(pred['state_img.shape'].shape, pred['proximity'].shape, pred['lane'].shape) images.append(pred['state_img']) costs.append(pred['proximity'],pred['lane']) # cost_sequence.append(cost) input_state_tfinal = inputs['state'][-1] if mu is not None: mu_list = numpy.stack(mu_list) std_list = numpy.stack(std_list) else: mu_list, std_list = None, None images = torch.stack(images) states = torch.stack(states) costs = torch.tensor(costs) actions = torch.stack(actions) if opt.save_grad_vid: grads = torch.cat(grad_list) if len(images) > 3: images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255) utils.save_movie(path.join(movie_dir, 'ego'), images_3_channels.float() / 255.0, states, costs, actions=actions, mu=mu_list, std=std_list, pytorch=True) if opt.save_grad_vid: utils.save_movie( grad_movie_dir, grads, None, None, None, None, None, pytorch=True ) if opt.save_sim_video: sim_path = path.join(movie_dir, 'sim') print(f'[saving simulator movie to {sim_path}]') os.mkdir(sim_path) for n, img in enumerate(info.frames): imwrite(path.join(sim_path, f'im{n:05d}.png'), img) returned = SimulationResult() returned.time_travelled = len(images) returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0] returned.road_completed = 1 if cost['arrived_to_dst'] else 0 returned.off_screen = off_screen returned.has_collided = has_collided returned.action_sequence = numpy.stack(action_sequence) returned.state_sequence = numpy.stack(state_sequence) returned.cost_sequence = numpy.stack(cost_sequence) print(car_sizes) # returned.image_sequence = numpy.stack(image_sequence) return returned
def main(): opt = parse_args() device = utils.get_device() random.seed(opt.seed) numpy.random.seed(opt.seed) torch.manual_seed(opt.seed) data_path = 'traffic-data/state-action-cost/data_i80_v0' dataloader = DataLoader(None, opt, 'i80') splits = torch.load(path.join(data_path, 'splits.pth')) gym.envs.registration.register( id='I-80-v1', entry_point='map_i80_ctrl:ControlledI80', kwargs=dict( fps=10, nb_states=opt.ncond, display=False, delta_t=0.1, store_simulator_video=opt.save_sim_video, show_frame_count=False, )) print('Building the environment (loading data, if any)') env_names = { 'i80': 'I-80-v1', } env = gym.make(env_names[opt.map]) plan_file = eval_policy.build_plan_file_name(opt) print(f'[saving to {path.join(opt.save_dir, plan_file)}]') # different performance metrics time_travelled, distance_travelled, road_completed = [], [], [] # values saved for later inspection action_sequences, state_sequences, cost_sequences = [], [], [] #writer = utils.create_tensorboard_writer(opt) n_test = len(splits['test_indx']) n_test = min(1, n_test) # Ignore others time_started = time.time() total_images = 0 for j in range(n_test): car_path = dataloader.ids[splits['test_indx'][j]] timeslot, car_id = utils.parse_car_path(car_path) car_sizes = torch.tensor(dataloader.car_sizes[sorted( list(dataloader.car_sizes.keys()))[timeslot]][car_id])[None, :] simulation_result = process_one_episode(opt, env, car_path, plan_file, j, car_sizes) time_travelled.append(simulation_result.time_travelled) distance_travelled.append(simulation_result.distance_travelled) road_completed.append(simulation_result.road_completed) action_sequences.append( torch.from_numpy(simulation_result.action_sequence)) state_sequences.append( torch.from_numpy(simulation_result.state_sequence)) cost_sequences.append(simulation_result.cost_sequence) total_images += time_travelled[-1] log_string = ' | '.join(( f'ep: {j + 1:3d}/{n_test}', f'time: {time_travelled[-1]}', f'distance: {distance_travelled[-1]:.0f}', f'success: {road_completed[-1]:d}', f'mean time: {torch.Tensor(time_travelled).mean():.0f}', f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}', f'mean success: {torch.Tensor(road_completed).mean():.3f}', )) print(log_string) utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string) if False: #writer is not None: # writer.add_video( # f'Video/success={simulation_result.road_completed:d}_{j}', # simulation_result.images.unsqueeze(0), # j # ) writer.add_scalar('ByEpisode/Success', simulation_result.road_completed, j) writer.add_scalar('ByEpisode/Collision', simulation_result.has_collided, j) writer.add_scalar('ByEpisode/OffScreen', simulation_result.off_screen, j) writer.add_scalar('ByEpisode/Distance', simulation_result.distance_travelled, j) diff_time = time.time() - time_started print('avg time travelled per second is', total_images / diff_time) torch.save(action_sequences, path.join(opt.save_dir, f'{plan_file}.actions')) torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states')) torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs')) if False: #writer is not None: writer.close()