コード例 #1
0
        pred, loss_p = model(inputs[: -1], actions, targets, z_dropout=opt.z_dropout)
        loss_p = loss_p[0]
        loss_i, loss_s = compute_loss(targets, pred)
        loss = loss_i + loss_s + opt.beta*loss_p

        total_loss_i += loss_i.item()
        total_loss_s += loss_s.item()
        total_loss_p += loss_p.item()
        del inputs, actions, targets

    total_loss_i /= nbatches
    total_loss_s /= nbatches
    total_loss_p /= nbatches
    return total_loss_i, total_loss_s, total_loss_p

writer = utils.create_tensorboard_writer(opt)

print('[training]')
for i in range(200):
    t0 = time.time()
    train_losses = train(opt.epoch_size, opt.npred)
    valid_losses = test(int(opt.epoch_size / 2))

    if writer is not None:
        writer.add_scalar('Loss/train_state_img', train_losses[0], i)
        writer.add_scalar('Loss/train_state_vct', train_losses[1], i)
        writer.add_scalar('Loss/train_relative_entropy', train_losses[2], i)

        writer.add_scalar('Loss/validation_state_img', valid_losses[0], i)
        writer.add_scalar('Loss/validation_state_vct', valid_losses[1], i)
        writer.add_scalar('Loss/validation_relative_entropy', valid_losses[2], i)
コード例 #2
0
ファイル: eval_policy.py プロジェクト: kawshik8/pytorch-PPUU
def main():
    opt = parse_args()
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    random.seed(opt.seed)
    numpy.random.seed(opt.seed)
    torch.manual_seed(opt.seed)

    data_path = 'traffic-data/state-action-cost/data_i80_v0'

    dataloader = DataLoader(None, opt, 'i80')
    (
        forward_model,
        value_function,
        policy_network_il,
        policy_network_mper,
        data_stats
    ) = load_models(opt, data_path, device)
    splits = torch.load(path.join(data_path, 'splits.pth'))

    if opt.u_reg > 0.0:
        forward_model.train()
        forward_model.opt.u_hinge = opt.u_hinge
        if hasattr(forward_model, 'value_function'):
            forward_model.value_function.train()
        planning.estimate_uncertainty_stats(
            forward_model, dataloader, n_batches=50, npred=opt.npred)

    gym.envs.registration.register(
        id='I-80-v1',
        entry_point='map_i80_ctrl:ControlledI80',
        kwargs=dict(
            fps=10,
            nb_states=opt.ncond,
            display=False,
            delta_t=0.1,
            store_simulator_video=opt.save_sim_video,
            show_frame_count=False,
        )
    )

    print('Building the environment (loading data, if any)')
    env_names = {
        'i80': 'I-80-v1',
    }
    env = gym.make(env_names[opt.map])

    plan_file = build_plan_file_name(opt)
    print(f'[saving to {path.join(opt.save_dir, plan_file)}]')

    # different performance metrics
    time_travelled, distance_travelled, road_completed = [], [], []
    collided, offscreen = [], []
    # values saved for later inspection
    action_sequences, state_sequences, cost_sequences =  [], [], []
    image_sequences = []

    writer = utils.create_tensorboard_writer(opt)

    n_test = len(splits['test_indx'])

    set_start_method('spawn')
    pool = Pool(opt.num_processes)

    async_results = []

    time_started = time.time()
    total_images = 0

    for j in range(n_test):
#         print(type(splits), len(splits['test_indx']), splits['test_indx'].shape, list(dataloader.car_sizes.keys())[0:5], list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0:5],dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]][list(dataloader.car_sizes[list(dataloader.car_sizes.keys())[0]].keys())[0]])
        
        car_path = dataloader.ids[splits['test_indx'][j]]
        timeslot, car_id = utils.parse_car_path(car_path)
        car_sizes = torch.tensor(
                    dataloader.car_sizes[sorted(list(dataloader.car_sizes.keys()))[
                        timeslot]][car_id]
                )[None, :]
        async_results.append(
            pool.apply_async(
                process_one_episode, (
                    opt,
                    env,
                    car_path,
                    forward_model,
                    policy_network_il,
                    data_stats,
                    plan_file,
                    j,
                    car_sizes
                )
            )
        )

    for j in range(n_test):
        simulation_result = async_results[j].get()

        time_travelled.append(simulation_result.time_travelled)
        distance_travelled.append(simulation_result.distance_travelled)
        road_completed.append(simulation_result.road_completed)
        action_sequences.append(torch.from_numpy(
            simulation_result.action_sequence))
        state_sequences.append(torch.from_numpy(
            simulation_result.state_sequence))
#         image_sequences.append(torch.from_numpy(
#             simulation_result.image_sequence))
        cost_sequences.append(simulation_result.cost_sequence)
        total_images += time_travelled[-1]
        
        collided.append(simulation_result.has_collided)
        offscreen.append(simulation_result.off_screen)

        log_string = ' | '.join((
            f'ep: {j + 1:3d}/{n_test}',
            f'time: {time_travelled[-1]}',
            f'distance: {distance_travelled[-1]:.0f}',
            f'success: {road_completed[-1]:d}',
            f'mean time: {torch.Tensor(time_travelled).mean():.0f}',
            f'mean distance: {torch.Tensor(distance_travelled).mean():.0f}',
            f'mean success: {torch.Tensor(road_completed).mean():.3f}',
        ))
        print(log_string)
        utils.log(path.join(opt.save_dir, f'{plan_file}.log'), log_string)

        if writer is not None:
            # writer.add_video(
            #     f'Video/success={simulation_result.road_completed:d}_{j}',
            #     simulation_result.images.unsqueeze(0),
            #     j
            # )
            writer.add_scalar('ByEpisode/Success',
                              simulation_result.road_completed, j)
            writer.add_scalar('ByEpisode/Collision',
                              simulation_result.has_collided, j)
            writer.add_scalar('ByEpisode/OffScreen',
                              simulation_result.off_screen, j)
            writer.add_scalar('ByEpisode/Distance',
                              simulation_result.distance_travelled, j)

    pool.close()
    pool.join()

    diff_time = time.time() - time_started
    print('avg time travelled per second is', total_images / diff_time)

    torch.save({"road_completed" : road_completed, "collided": collided, "offscreen": offscreen}, path.join(opt.save_dir, f'{plan_file}.others'))
    torch.save(action_sequences, path.join(
        opt.save_dir, f'{plan_file}.actions'))
    torch.save(state_sequences, path.join(opt.save_dir, f'{plan_file}.states'))
#     torch.save(image_sequences, path.join(opt.save_dir, f'{plan_file}.images'))
    torch.save(cost_sequences, path.join(opt.save_dir, f'{plan_file}.costs'))
  
    if writer is not None:
        writer.close()