Exemple #1
0
def start(what, nbatches, npred):
    train = True if what is 'train' else False
    model.train()
    model.policy_net.train()
    n_updates, grad_norm = 0, 0
    total_losses = dict(
        proximity=0,
        uncertainty=0,
        lane=0,
        offroad=0,
        action=0,
        policy=0,
    )
    for j in range(nbatches):
        inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(
            what, npred)
        pred, actions = planning.train_policy_net_mpur(
            model,
            inputs,
            targets,
            car_sizes,
            n_models=10,
            lrt_z=opt.lrt_z,
            n_updates_z=opt.z_updates,
            infer_z=opt.infer_z)
        pred['policy'] = pred['proximity'] + \
                         opt.u_reg * pred['uncertainty'] + \
                         opt.lambda_l * pred['lane'] + \
                         opt.lambda_a * pred['action'] + \
                         opt.lambda_o * pred['offroad']

        if not math.isnan(pred['policy'].item()):
            if train:
                optimizer.zero_grad()
                pred['policy'].backward()  # back-propagation through time!
                grad_norm += utils.grad_norm(model.policy_net).item()
                torch.nn.utils.clip_grad_norm_(model.policy_net.parameters(),
                                               opt.grad_clip)
                optimizer.step()
            for loss in total_losses:
                total_losses[loss] += pred[loss].item()
            n_updates += 1
        else:
            print('warning, NaN')  # Oh no... Something got quite f****d up!
            ipdb.set_trace()

        if j == 0 and opt.save_movies and train:
            # save videos of normal and adversarial scenarios
            for b in range(opt.batch_size):
                state_img = pred['state_img'][b]
                state_vct = pred['state_vct'][b]
                utils.save_movie(opt.model_file + f'.mov/sampled/mov{b}',
                                 state_img, state_vct, None, actions[b])

        del inputs, actions, targets, pred

    for loss in total_losses:
        total_losses[loss] /= n_updates
    if train: print(f'[avg grad norm: {grad_norm / n_updates:.4f}]')
    return total_losses
Exemple #2
0
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None

    outcome = 0
    if has_collided:
        outcome = 1
    if off_screen:
        outcome = 2

    if len(images) > 3:
        utils.save_movie(path.join(movie_dir, 'ego'),
                         images.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)
        if opt.save_grad_vid:
            utils.save_movie(grad_movie_dir,
                             grads,
                             None,
                             None,
                             None,
                             None,
                             None,
                             pytorch=True)
        outcomes.append(outcome)
        if writer is not None:
            writer.add_video(f'Video/success={road_completed[-1]:d}_{j}',
def train(nbatches, npred):
    model.train()
    model.policy_net.train()
    total_loss_c, total_loss_u, total_loss_l, total_loss_a, n_updates, grad_norm = 0, 0, 0, 0, 0, 0
    total_loss_policy = 0
    for j in range(nbatches):
        optimizer.zero_grad()
        inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(
            'train', npred)
        inputs = utils.make_variables(inputs)
        targets = utils.make_variables(targets)
        pred, actions, pred_adv = planning.train_policy_net_mpur(
            model,
            inputs,
            targets,
            car_sizes,
            n_models=10,
            lrt_z=opt.lrt_z,
            n_updates_z=opt.z_updates,
            infer_z=(opt.infer_z == 1))
        loss_c = pred[2]  # proximity cost
        loss_l = pred[3]  # lane cost
        loss_u = pred[4]  # uncertainty cost
        loss_a = actions.norm(2, 2).pow(2).mean()  # action regularisation
        loss_policy = loss_c + opt.u_reg * loss_u + opt.lambda_l * loss_l + opt.lambda_a * loss_a

        if not math.isnan(loss_policy.item()):
            loss_policy.backward()  # back-propagation through time!
            grad_norm += utils.grad_norm(model.policy_net).item()
            torch.nn.utils.clip_grad_norm_(model.policy_net.parameters(),
                                           opt.grad_clip)
            optimizer.step()
            total_loss_c += loss_c.item()  # proximity cost
            total_loss_u += loss_u.item()  # uncertainty (reg.)
            total_loss_a += loss_a.item()  # action (reg.)
            total_loss_l += loss_l.item()  # lane cost
            total_loss_policy += loss_policy.item()  # overall total cost
            n_updates += 1
        else:
            print('warning, NaN')  # Oh no... Something got quite f****d up!
            pdb.set_trace()

        if j == 0 and opt.save_movies:
            # save videos of normal and adversarial scenarios
            for b in range(opt.batch_size):
                utils.save_movie(opt.model_file + f'.mov/sampled/mov{b}',
                                 pred[0][b], pred[1][b], None, actions[b])
                if pred_adv[0] is not None:
                    utils.save_movie(
                        opt.model_file + f'.mov/adversarial/mov{b}',
                        pred_adv[0][b], pred_adv[1][b], None, actions[b])

        del inputs, actions, targets, pred

    total_loss_c /= n_updates
    total_loss_u /= n_updates
    total_loss_a /= n_updates
    total_loss_l /= n_updates
    total_loss_policy /= n_updates
    print(f'[avg grad norm: {grad_norm / n_updates}]')
    return total_loss_c, total_loss_l, total_loss_u, total_loss_a, total_loss_policy
Exemple #4
0
    return loss_i, loss_s, loss_c


dataloader.random.seed(12345)

for i in range(opt.n_batches):
    with torch.no_grad():
        torch.cuda.empty_cache()
        inputs, actions, targets, _, _ = dataloader.get_batch_fm('test', opt.npred)

        # save ground truth for the first 10 x batch_size samples
        if i < 10 and opt.save_video:
            for b in range(opt.batch_size):
                dirname_movie = f'{dirname}/videos/x{i * opt.batch_size + b:d}/y/'
                print(f'[saving ground truth video: {dirname_movie}]')
                utils.save_movie(dirname_movie, targets[0][b], targets[1][b], targets[2][b])

        for s in range(opt.n_samples):
            print(f'[batch {i}, sample {s}]', end="\r")

            if opt.zeroact == 1:
                actions.data.zero_()

            pred, _ = model(inputs, actions, targets, sampling=opt.sampling)  # return as many predictions as actions
            pred_states[i, :, s].copy_(pred[1])
            true_states[i].copy_(targets[1])

            if i < 10 and s < 20 and opt.save_video:
                for b in range(opt.batch_size):
                    dirname_movie = f'{dirname}/videos/sampled_z/true_actions/x{i * opt.batch_size + b:d}/z{s:d}/'
                    print(f'[saving video: {dirname_movie}]', end="\r")
Exemple #5
0
def process_one_episode(opt,
                        env,
                        car_path,
                        forward_model,
                        policy_network_il,
                        data_stats,
                        plan_file,
                        index,
                        car_sizes):
    movie_dir = path.join(
        opt.save_dir, 'videos_simulator', plan_file, f'ep{index + 1}')
    if opt.save_grad_vid:
        grad_movie_dir = path.join(
            opt.save_dir, 'grad_videos_simulator', plan_file, f'ep{index + 1}')
        print(f'[gradient videos will be saved to: {grad_movie_dir}]')
    timeslot, car_id = utils.parse_car_path(car_path)
    # if None => picked at random
    inputs = env.reset(time_slot=timeslot, vehicle_id=car_id)
#     print(torch.mean(inputs['context']), torch.mean(inputs['state']))
    forward_model.reset_action_buffer(opt.npred)
    done, mu, std = False, None, None
    images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], []
    cntr = 0
    # inputs, cost, done, info = env.step(numpy.zeros((2,)))
    input_state_t0 = inputs['state'].contiguous()[-1]
    cost_sequence, action_sequence, state_sequence, image_sequence = [], [], [], []
    has_collided = False
    off_screen = False
    while not done:
        input_images = inputs['context'].contiguous()
        input_states = inputs['state'].contiguous()
#         print(input_images.shape, input_states.shape)
        if opt.save_grad_vid:
            grad_list.append(planning.get_grad_vid(
                forward_model, input_images, input_states,
                car_sizes,
                device='cuda' if torch.cuda.is_available else 'cpu'
            ))
        if opt.method == 'no-action':
            a = numpy.zeros((1, 2))
        elif opt.method == 'bprop':
            # TODO: car size is provided by the dataloader!! This lines below should be removed!
            # TODO: Namely, dataloader.car_sizes[timeslot][car_id]
            a = planning.plan_actions_backprop(
                forward_model,
                input_images[:, :3, :, :].contiguous(),
                input_states,
                car_sizes,
                npred=opt.npred,
                n_futures=opt.n_rollouts,
                normalize=True,
                bprop_niter=opt.bprop_niter,
                bprop_lrt=opt.bprop_lrt,
                u_reg=opt.u_reg,
                use_action_buffer=(opt.bprop_buffer == 1),
                n_models=opt.n_dropout_models,
                save_opt_stats=(opt.bprop_save_opt_stats == 1),
                nexec=opt.nexec,
                lambda_l=opt.lambda_l,
                lambda_o=opt.lambda_o
            )
        elif opt.method == 'policy-IL':
            _, _, _, a = policy_network_il(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.squeeze().cpu().view(1, 2).numpy()
        elif opt.method == 'policy-MPER':
            a, entropy, mu, std = forward_model.policy_net(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.cpu().view(1, 2).numpy()
        elif opt.method == 'policy-MPUR':
            a, entropy, mu, std = forward_model.policy_net(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=True
            )
            a = a.cpu().view(1, 2).numpy()    
#         elif opt.method == 'bprop+policy-MPUR':
#             a, entropy, mu, std = forward_model.policy_net(
#                 input_images,
#                 input_states,
#                 sample=True,
#                 normalize_inputs=True,
#                 normalize_outputs=True
#             )
# #             a = a[0]
#             a = forward_model.plan_actions_backprop(
#                 input_images,
#                 input_states,
#                 npred=opt.npred,
#                 n_futures=opt.n_rollouts,
#                 normalize=True,
#                 bprop_niter=opt.bprop_niter,
#                 bprop_lrt=opt.bprop_lrt,
#                 actions=a,
#                 u_reg=opt.u_reg,
#                 nexec=opt.nexec
#             )
#             a = a.cpu().view(1, 2).numpy()
        elif opt.method == 'bprop+policy-IL':
            _, _, _, a = policy_network_il(
                input_images,
                input_states,
                sample=True,
                normalize_inputs=True,
                normalize_outputs=False
            )
            a = a[0]
            a = forward_model.plan_actions_backprop(
                input_images,
                input_states,
                npred=opt.npred,
                n_futures=opt.n_rollouts,
                normalize=True,
                bprop_niter=opt.bprop_niter,
                bprop_lrt=opt.bprop_lrt,
                actions=a,
                u_reg=opt.u_reg,
                nexec=opt.nexec
            )

        action_sequence.append(a)
        state_sequence.append(input_states)
        # image_sequence.append(input_images)
        cntr += 1
        cost_test = 0
        t = 0
        T = opt.npred if opt.nexec == -1 else opt.nexec
        if not opt.use_forward_model:
            while (t < T) and not done:
                inputs, cost, done, info = env.step(a[t])
                if info.collisions_per_frame > 0:
                    has_collided = True
                    # print(f'[collision after {cntr} frames, ending]')
                    done = True
                off_screen = info.off_screen

                images.append(input_images[-1])
                states.append(input_states[-1])
                costs.append([cost['pixel_proximity_cost'], cost['lane_cost']])
                cost_sequence.append(cost)
                if opt.mfile == 'no-action':
                    actions.append(a[t])
                    mu_list.append(mu)
                    std_list.append(std)
                else:
                    actions.append(
                        ((torch.tensor(a[t]) - data_stats['a_mean'])
                            / data_stats['a_std'])
                    )
                    if mu is not None:
                        mu_list.append(mu.data.cpu().numpy())
                        std_list.append(std.data.cpu().numpy())
                t += 1
        else:
            print(input_images.shape, input_states.shape, type(input_images), input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0))
#             print(input_images[:,:3])
            pred, actions = planning.train_policy_net_mpur(
                forward_model, (input_images.contiguous()[:,:3], input_states, input_images.contiguous()[:,3:4].repeat(1,3,1,1)), (torch.rand(input_images.contiguous()[:,:3].unsqueeze(0).shape), torch.rand(input_states.unsqueeze(0).shape), torch.rand(input_images.contiguous()[:,3:4].repeat(1,3,1,1).unsqueeze(0).shape)), car_sizes, n_models=10, lrt_z=0,
                n_updates_z=0, infer_z=False, no_cuda=False, return_per_instance_values = False
            )
            print(pred['state_img.shape'].shape, pred['proximity'].shape, pred['lane'].shape)
            images.append(pred['state_img'])
            costs.append(pred['proximity'],pred['lane'])
#             cost_sequence.append(cost)
            
    input_state_tfinal = inputs['state'][-1]

    if mu is not None:
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None

    images = torch.stack(images)
    states = torch.stack(states)
    costs = torch.tensor(costs)
    actions = torch.stack(actions)
    if opt.save_grad_vid:
        grads = torch.cat(grad_list)

    if len(images) > 3:
        images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255)
        utils.save_movie(path.join(movie_dir, 'ego'),
                         images_3_channels.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)
        if opt.save_grad_vid:
            utils.save_movie(
                grad_movie_dir,
                grads,
                None,
                None,
                None,
                None,
                None,
                pytorch=True
            )
        if opt.save_sim_video:
            sim_path = path.join(movie_dir, 'sim')
            print(f'[saving simulator movie to {sim_path}]')
            os.mkdir(sim_path)
            for n, img in enumerate(info.frames):
                imwrite(path.join(sim_path, f'im{n:05d}.png'), img)

    returned = SimulationResult()
    returned.time_travelled = len(images)
    returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0]
    returned.road_completed = 1 if cost['arrived_to_dst'] else 0
    returned.off_screen = off_screen
    returned.has_collided = has_collided
    returned.action_sequence = numpy.stack(action_sequence)
    returned.state_sequence = numpy.stack(state_sequence)
    returned.cost_sequence = numpy.stack(cost_sequence)
    print(car_sizes)
    # returned.image_sequence = numpy.stack(image_sequence)

    return returned
    '''
    images = numpy.stack(images).transpose(0, 2, 3, 1)
    states = numpy.stack(states)
    costs = numpy.stack(costs)
    actions = numpy.stack(actions)
    '''

    if mu is not None:
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None
    if len(images) > 3:
        utils.save_movie('{}/real/'.format(movie_dir),
                         images.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)

mean_time_to_collision = torch.Tensor(times_to_collision).mean()
median_time_to_collision = torch.Tensor(times_to_collision).median()
utils.log(opt.save_dir + '/' + plan_file + '.log',
          'mean: {}'.format(mean_time_to_collision.item()))
utils.log(opt.save_dir + '/' + plan_file + '.log',
          'median: {}'.format(median_time_to_collision.item()))
torch.save(torch.Tensor(times_to_collision),
           opt.save_dir + '/' + plan_file + '.pth')
Exemple #7
0
def start(what,
          nbatches,
          npred,
          split='train',
          return_per_instance_values=False,
          threshold=0):
    train = True if what is 'train' else False
    evaluate = True if what is 'eval' else False
    finetune_train = True if split is 'finetune_train' else False
    finetune_sim = True if split is 'finetune_sim' else False

    model.train()
    model.policy_net.train()
    n_updates, grad_norm = 0, 0

    if return_per_instance_values:
        total_losses = dict(
            proximity=[],
            uncertainty=[],
            lane=[],
            offroad=[],
            action=[],
            policy=[],
            episode_timestep_pairs=[],
        )

    else:
        total_losses = dict(
            proximity=0,
            uncertainty=0,
            lane=0,
            offroad=0,
            action=0,
            policy=0,
        )

    if evaluate:
        episode_cost_progression = {}

    iterable = range(nbatches)
    if evaluate or finetune_train:
        total_instances = dataloader.get_total_instances(split, what)
        print(f"total_instances in {split}: {total_instances}")
        iterable = range(0, total_instances, opt.batch_size)
#         nbatches = None

    step = 0
    for j in iterable:
        #             print("j:",j,n_updates)
        #         with tqdm(total=len(iterable)) as progress_bar:
        #             start = time.time()
        if not evaluate:
            inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(
                split,
                npred,
                cuda=(True if torch.cuda.is_available() and not opt.no_cuda
                      else False),
                all_batches=(True if finetune_train else False))
        else:
            e_index, inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(
                split,
                npred,
                return_episode_index=True,
                cuda=(True if torch.cuda.is_available() and not opt.no_cuda
                      else False),
                all_batches=True if finetune_train else False,
                randomize=(True if
                           (finetune_train or finetune_sim) else False))
            #                 print(np.unique(e_index))#, type(e_index))
            if -1 in e_index[:, 0]:
                print("breaking now")
                break

        pred, actions = planning.train_policy_net_mpur(
            model,
            inputs,
            targets,
            car_sizes,
            n_models=10,
            lrt_z=opt.lrt_z,
            n_updates_z=opt.z_updates,
            infer_z=opt.infer_z,
            no_cuda=opt.no_cuda,
            return_per_instance_values=(True if evaluate else False))
        #             print("costs: ",pred["proximity"].shape, pred['lane'].shape, pred['action'].shape, pred['offroad'].shape)
        pred['policy'] = pred['proximity'] + \
                         opt.u_reg * pred['uncertainty'] + \
                         opt.lambda_l * pred['lane'] + \
                         opt.lambda_a * pred['action'] + \
                         opt.lambda_o * pred['offroad']

        #             print(torch.mean(pred['policy']))
        #             print(pred['policy'].shape)
        #             print("time for loading batches and forward pass: "******"episode" not in key
                        }

                    if instance_loss >= threshold:
                        episode_index, timestep = e_index[b_i]

                        for loss in total_losses:
                            if loss != 'episode_timestep_pairs':
                                total_losses[loss].append(
                                    torch.mean(pred[loss][b_i]).detach().cpu())
                                episode_cost_progression[
                                    e_index[b_i][0]][loss].append(
                                        torch.mean(
                                            pred[loss][b_i]).detach().cpu())
                            else:
                                total_losses[loss].append(
                                    [episode_index, timestep, instance_loss])

#                             print(type(dataloader.finetune_dict))
#                             print(dataloader.finetune_dict)
#                             if episode_index not in dataloader.finetune_dict:
#                                 dataloader.finetune_dict[episode_index] = []

#                             nframes = opt.npred + opt.ncond
#                             min_range = max(0,timestep-nframes)
#                             max_range = min(len(dataloader.images[episode_index]),timestep+100)-50
#                             for frame_index in range(min_range,max_range,opt.finetune_nframes_overlap):
#                                 if frame_index not in dataloader.finetune_dict[episode_index]:
#                                     dataloader.finetune_dict[episode_index].append(frame_index)

#                             else:
# #                                 print(type(inputs),type(inputs[0]),type(inputs[0][b_i]))

#                                 if finetune_inputs["inputs"]:
#                                     for f_i, input_i in enumerate(finetune_inputs["inputs"]):
#                                         finetune_inputs["inputs"][input_i].append(inputs[f_i][b_i:b_i+1])
#                                     for f_i, input_i in enumerate(finetune_inputs["targets"]):
#                                         finetune_inputs["targets"][input_i].append(targets[f_i][b_i:b_i+1])
#                                 else:
#                                     finetune_inputs["inputs"] = {f_i : [inputs[f_i][b_i:b_i+1]] for f_i in range(len(inputs))}
#                                     finetune_inputs["targets"] = {f_i : [targets[f_i][b_i:b_i+1]] for f_i in range(len(targets))}

# #                                 print(type(car_sizes), len(car_sizes), type(car_sizes[0]), len(car_sizes[0]))

#                                 finetune_inputs["car_sizes"].append(car_sizes[b_i:b_i+1])

#                                 if len(finetune_inputs["car_sizes"]) == opt.batch_size:
# #                                     print([torch.cat(finetune_inputs["inputs"][input_i]).shape for input_i in finetune_inputs["inputs"]])
# #                                     for row in inputs:
# #                                         print(row.shape)
# #                                     print([torch.cat(finetune_inputs["targets"][input_i]).shape for input_i in finetune_inputs["targets"]])
# #                                     for row in targets:
# #                                         print(row.shape)
#                                     finetune_inputs["inputs"] = [torch.cat(finetune_inputs["inputs"][input_i]) for input_i in finetune_inputs["inputs"]]
#                                     finetune_inputs["targets"] = [torch.cat(finetune_inputs["targets"][input_i]) for input_i in finetune_inputs["targets"]]
#                                     finetune_inputs["car_sizes"] = torch.cat(finetune_inputs["car_sizes"])
# #                                     t1,t2,t3 = finetune_inputs["targets"]
# #                                     print(type(targets), len(targets), type(targets[0]), len(targets[0]))
# #                                     print(type(finetune_inputs["targets"]),len(finetune_inputs["targets"]),type(finetune_inputs["targets"][0]), len(finetune_inputs["targets"][0]))
#                                     pred, actions = planning.train_policy_net_mpur(
#                                         model, finetune_inputs["inputs"], finetune_inputs["targets"], finetune_inputs["car_sizes"], n_models=10, lrt_z=opt.lrt_z, n_updates_z=opt.z_updates, infer_z=opt.infer_z, no_cuda=opt.no_cuda )

#                                     pred['policy'] = pred['proximity'] + \
#                                                      opt.u_reg * pred['uncertainty'] + \
#                                                      opt.lambda_l * pred['lane'] + \
#                                                      opt.lambda_a * pred['action'] + \
#                                                      opt.lambda_o * pred['offroad']

#                                     for loss in finetune_losses:
#                                         finetune_losses[loss] += pred[loss]

#                                     optimizer.zero_grad()
#                                     pred['policy'].backward()  # back-propagation through time!
#                                     grad_norm += utils.grad_norm(model.policy_net).item()
#                                     torch.nn.utils.clip_grad_norm_(model.policy_net.parameters(), opt.grad_clip)
#                                     optimizer.step()

#                                     n_updates += 1
#                                     print(f"update no: {n_updates}")

#                                     finetune_inputs["inputs"] = {}
#                                     finetune_inputs["car_sizes"] = []
#                                     finetune_inputs["targets"] = {}

        else:
            print('warning, NaN')  # Oh no... Something got quite f****d up!
            ipdb.set_trace()

        if j == 0 and opt.save_movies and train:
            # save videos of normal and adversarial scenarios
            for b in range(opt.batch_size):
                state_img = pred['state_img'][b]
                state_vct = pred['state_vct'][b]
                utils.save_movie(opt.model_file + f'.mov/sampled/mov{b}',
                                 state_img, state_vct, None, actions[b])

#             step += len(actions)
#             progress_bar.update(step)
        del inputs, actions, targets, pred

        if n_updates == nbatches and not evaluate:
            #                 del dataloader[split]

            break
#             print("time for saving loss values for calc stats later: ", time.time() - start)

    if not evaluate:
        for loss in total_losses:
            total_losses[loss] /= n_updates
#     print(total_losses)

    if train or finetune_train or finetune_sim:
        print(f'[avg grad norm: {grad_norm / n_updates:.4f}]')

    if evaluate:
        pickle.dump(
            episode_cost_progression,
            open("policy_loss_stats/episode_cost_progression.pkl", 'wb+'))
    #     if finetune:
    #         return finetune_losses
    #     print("final j value: ", j)
    return total_losses
Exemple #8
0
def process_one_episode(opt, env, car_path, plan_file, index, car_sizes):
    movie_dir = path.join(opt.save_dir, 'videos_simulator', plan_file,
                          f'ep{index + 1}')

    timeslot, car_id = utils.parse_car_path(car_path)
    # if None => picked at random
    inputs = env.reset(time_slot=timeslot, vehicle_id=car_id)
    done, mu, std = False, None, None
    images, states, costs, actions, mu_list, std_list, grad_list = [], [], [], [], [], [], []
    cntr = 0
    # inputs, cost, done, info = env.step(numpy.zeros((2,)))
    input_state_t0 = inputs['state'].contiguous()[-1]
    cost_sequence, action_sequence, state_sequence = [], [], []
    has_collided = False
    off_screen = False

    env.controlled_car["locked"]._speed = 400.0

    it_limit = 60  #Avoid excess disk useage due to control flow bug
    while not done and cntr < it_limit:
        print("___________________________________________________________")
        print(f"cntr = {cntr}")

        input_images = inputs['context'].contiguous()
        input_states = inputs['state'].contiguous()

        a = [0.0, 0.0]  # No acceleration/steering

        action_sequence.append(a)
        state_sequence.append(input_states)
        cntr += 1
        cost_test = 0

        inputs, cost, done, info = env.step(a)
        if not opt.ignore_crash and info.collisions_per_frame > 0:
            has_collided = True
            # print(f'[collision after {cntr} frames, ending]')
            done = True
        off_screen = info.off_screen

        images.append(input_images[-1])
        states.append(input_states[-1])
        costs.append([cost['pixel_proximity_cost'], cost['lane_cost']])
        cost_sequence.append(cost)

        actions.append(a)
        mu_list.append(mu)
        std_list.append(std)

        print(f"len(info.frames) = {len(info.frames)}")

    print("___________________________________________________________")
    print(f"done = {done}, it_limit = {it_limit}, cntr = {cntr}")

    input_state_tfinal = inputs['state'][-1]

    if mu is not None:
        mu_list = numpy.stack(mu_list)
        std_list = numpy.stack(std_list)
    else:
        mu_list, std_list = None, None

    images = torch.stack(images)
    states = torch.stack(states)
    costs = torch.tensor(costs)
    actions = torch.tensor(actions)

    if len(images) > 3:
        images_3_channels = (images[:, :3] + images[:, 3:]).clamp(max=255)
        utils.save_movie(path.join(movie_dir, 'ego'),
                         images_3_channels.float() / 255.0,
                         states,
                         costs,
                         actions=actions,
                         mu=mu_list,
                         std=std_list,
                         pytorch=True)

        if opt.save_sim_video:
            print(f"len(info.frames) = {len(info.frames)}")
            sim_path = path.join(movie_dir, 'sim')
            print(f'[saving simulator movie to {sim_path}]')
            if not path.exists(sim_path):
                os.mkdir(sim_path)
            for n, img in enumerate(info.frames):
                imwrite(path.join(sim_path, f'im{n:05d}.png'), img)

    returned = eval_policy.SimulationResult()
    returned.time_travelled = len(images)
    returned.distance_travelled = input_state_tfinal[0] - input_state_t0[0]
    returned.road_completed = 1 if cost['arrived_to_dst'] else 0
    returned.off_screen = off_screen
    returned.has_collided = has_collided
    returned.action_sequence = numpy.stack(action_sequence)
    returned.state_sequence = numpy.stack(state_sequence)
    returned.cost_sequence = numpy.stack(cost_sequence)

    return returned
Exemple #9
0
    return loss_i, loss_s, loss_c


dataloader.random.seed(12345)

for i in range(opt.n_batches):
    torch.cuda.empty_cache()
    inputs_, actions_, targets_, _, _ = dataloader.get_batch_fm(
        'test', opt.npred)

    if i < 10 and opt.save_video:
        for b in range(opt.batch_size):
            dirname_movie = '{}/videos/x{:d}/y/'.format(
                dirname, i * opt.batch_size + b)
            print('[saving ground truth video: {}]'.format(dirname_movie))
            utils.save_movie(dirname_movie, targets_[0][b], targets_[1][b],
                             targets_[2][b])

    for s in range(opt.n_samples):
        print('[batch {}, sample {}'.format(i, s), end="\r")
        inputs = utils.make_variables(inputs_)
        targets = utils.make_variables(targets_)
        actions = utils.Variable(actions_)

        if opt.zeroact == 1:
            actions.data.zero_()

        pred_, _ = model(inputs, actions, targets, sampling=opt.sampling)
        '''
        loss_i_s, loss_s_s, loss_c_s = compute_loss(targets, pred_, r=False)
        loss_i[i, :, s] += loss_i_s.mean(2).mean(2).mean(2).data.cpu()
        loss_s[i, :, s] += loss_s_s.mean(2).data.cpu()