class CoILAgent(Agent):
    def __init__(self, checkpoint):

        #experiment_name='None', driver_conf=None, memory_fraction=0.18,
        #image_cut=[115, 510]):

        # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]):

        Agent.__init__(self)
        # This should likely come from global
        #config_gpu = tf.ConfigProto()
        #config_gpu.gpu_options.visible_device_list = '0'

        #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction
        #self._sess = tf.Session(config=config_gpu)

        # THIS DOES NOT WORK FOR FUSED PLUS LSTM
        #if self._config.number_frames_sequenced > self._config.number_frames_fused:
        #    self._config_train.batch_size = self._config.number_frames_sequenced
        #else:
        #    self._config_train.batch_size = self._config.number_frames_fused

        #self._train_manager = load_system(self._config_train)
        #self._config.train_segmentation = False
        self.model = CoILModel(g_conf.MODEL_NAME)

        self.model.load_state_dict(checkpoint['state_dict'])

        self.model.cuda()

        #self.model.load_network(checkpoint)

        #self._sess.run(tf.global_variables_initializer())

        #self._control_function = getattr(machine_output_functions,
        #                                 self._train_manager._config.control_mode)
        # More elegant way to merge with autopilot
        #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name))

        #self._image_cut = driver_conf.image_cut
        #self._auto_pilot = driver_conf.use_planner

        #self._recording = False
        #self._start_time = 0

    def run_step(self, measurements, sensor_data, directions, target):

        # pos = (rewards.player_x,rewards.player_y,22)
        # ori =(rewards.ori_x,rewards.ori_y,rewards.ori_z)
        # pos,point = self.planner.get_defined_point(pos,ori,(target[0],target[1],22),(1.0,0.02,-0.001),self._select_goal)
        # direction = convert_to_car_coord(point[0],point[1],pos[0],pos[1],ori[0],ori[1])
        # image_filename_format = '_images/episode_{:0>3d}/{:s}/image_{:0>5d}.png'

        # sys.stdout = open(str("direction" + ".out", "a", buffering=1))
        #control_agent = self._agent.run_step(measurements, None, target)
        print(" RUnning STEP ")
        speed = torch.cuda.FloatTensor(
            [measurements.player_measurements.forward_speed]).unsqueeze(0)
        print("Speed shape ", speed)
        directions_tensor = torch.cuda.LongTensor([2])
        print("dir", directions_tensor)
        model_outputs = self.model.forward_branch(
            self._process_sensors(sensor_data), speed, directions_tensor)

        print(model_outputs)

        steer, throttle, brake = self._process_model_outputs(
            model_outputs[0], measurements.player_measurements.forward_speed)
        #control = self.compute_action(,
        #                              ,
        #                              directions)
        control = carla_protocol.Control()
        control.steer = steer
        control.throttle = throttle
        control.brake = brake
        # if self._auto_pilot:
        #    control.steer = control_agent.steer
        # TODO: adapt the client side agent for the new version. ( PROBLEM )
        #control.throttle = control_agent.throttle
        #control.brake = control_agent.brake

        # TODO: maybe change to a more meaningfull message ??
        return control

    def _process_sensors(self, sensors):

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            sensor = sensors[name].data[
                g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]
            # if  sensors[name].type == 'SemanticSegmentation':
            #
            #
            # 	# TODO: the camera name has to be sincronized with what is in the experiment...
            # 	sensor = join_classes(sensor)
            #
            # 	sensor = sensor[:, :, np.newaxis]
            #
            # 	image_transform = transforms.Compose([transforms.ToTensor(),
            # 					   transforms.Resize((size[1], size[2]), interpolation=Image.NEAREST),
            # 					   iag.ToGPU(), iag.Multiply((1 / (number_of_seg_classes - 1)))])
            # else:

            plt.figure(1)
            plt.subplot(1, 2, 1)
            plt.imshow(sensor)

            print("Sensor size:", sensor.shape)  #300 x 800 x 3
            sensor = np.transpose(sensor, (2, 0, 1))
            # image = resize(image,[self._image_size2,self._image_size1])
            print("begin transform Sensor size:", sensor.shape)  #300 x 800 x 3
            print("orginal sensor", sensor[0][0][:10])
            image_transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((size[1], size[2])),
                transforms.ToTensor(),
                iag.ToGPU()
            ])

            # print ("Sensor Olala")
            # sensor = np.transpose(sensor, (2, 0, 1))
            # print (sensor.shape)

            # sensor = np.swapaxes(sensor, 0, 1) #800 x 300 x 3
            # print ("Sensor Previous SHape")
            # print (sensor.shape)
            # sensor = np.transpose(sensor, (2, 1, 0)) #3 x 300 x 800
            # print ("Sensor Previous SHape PT2")
            # print (sensor.shape)
            # if iteration == 0:
            image_input = image_transform(sensor)
            print("After transform", image_input.size())

            # print("After making to numpy", img_np.shape)
            # image_input = np.transpose(img_np, (2, 0, 1))
            # print("1st transform", image_input.shape)

            img_np = image_input.cpu().numpy()
            print("img np pix", img_np[0][0][:10])
            img_np = np.uint8(np.transpose(img_np, (1, 2, 0)))
            print("2nd transform", img_np.shape)
            plt.subplot(1, 2, 2)
            plt.imshow(img_np)
            print("Before div by 255", image_input[0][0][:10])

            image_input = image_input / 255.0
            print("After div by 255", image_input[0][0][:10])
            # else:
            # 	image_input = torch.cat((image_input, sensor), 0)

            # iteration += 1
            # print("New shape", image_input.size())
            # img_np = image_input.cpu()
            # img_np = img_np.numpy()*255
            # print("Newew shape", img_np.shape)
            # img_np = np.uint8(np.transpose(img_np, (1,2,0)))
            # # img_np = np.uint8((image_input[0].cpu()).numpy() * 255)
            # # print (img_np.shape)
            # plt.subplot(1, 3, 3)
            # plt.imshow(img_np)
            plt.show()

        print(image_input.shape)
        image_input = image_input.unsqueeze(0)
        print(image_input.shape)

        return image_input

    def _process_model_outputs(self, outputs, speed):
        """
		 A bit of heuristics in the control, to eventually make car faster, for instance.
		Returns:

		"""
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        # if brake < 0.2:
        # 	brake = 0.0
        #
        # if throttle > brake:
        # 	brake = 0.0
        # else:
        # 	throttle = throttle * 2
        # if speed > 35.0 and brake == 0.0:
        # 	throttle = 0.0

        return steer, throttle, brake

    """

	def compute_action(self, sensors, speed, direction):

		capture_time = time.time()


		sensor_pack = []



		for i in range(len(sensors)):

			sensor = sensors[i]

			sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :]

			if g_conf.param.SENSORS.keys()[i] == 'rgb':

				sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
													  self._config.sensors_size[i][1]])


			elif g_conf.param.SENSORS.keys()[i] == 'labels':

				sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
													  self._config.sensors_size[i][1]],
											 interp='nearest')

				sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1))

				sensor = sensor[:, :, np.newaxis]

			sensor_pack.append(sensor)

		if len(sensor_pack) > 1:

			image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2)

		else:
			image_input = sensor_pack[0]

		image_input = image_input.astype(np.float32)
		image_input = np.multiply(image_input, 1.0 / 255.0)


		image_input = sensors[0]

		image_input = image_input.astype(np.float32)
		image_input = np.multiply(image_input, 1.0 / 255.0)
		# TODO: This will of course depend on the model , if it is based on sequences there are
		# TODO: different requirements
		#tensor = self.model(image_input)
		outputs = self.model.forward_branch(image_input, speed, direction)



		return control  # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager)

	"""
    """
Example #2
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process('validation', dataset_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        # Define the dataset.
        full_dataset = [
            os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
        ]
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_names=[dataset_name])

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        # Create model.
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        # The window used to keep track of the validation loss
        l1_window = []
        # If we have evaluated a checkpoint, get the validation losses of all the previously
        # evaluated checkpoints (validation loss is used for early stopping)
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(dataset_name, None)

        model.cuda()

        best_mse = 1000
        best_error = 1000
        best_mse_iter = 0
        best_error_iter = 0

        # Loop to validate all checkpoints as they are saved during training
        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):
                with torch.no_grad():
                    # Get and load latest checkpoint
                    latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

                    checkpoint = torch.load(
                        os.path.join('_logs', exp_batch, exp_alias,
                                     'checkpoints',
                                     str(latest) + '.pth'))
                    checkpoint_iteration = checkpoint['iteration']
                    print("Validation loaded ", checkpoint_iteration)

                    model.load_state_dict(checkpoint['state_dict'])
                    model.eval()

                    accumulated_mse = 0
                    accumulated_error = 0
                    iteration_on_checkpoint = 0
                    if g_conf.USE_REPRESENTATION_LOSS:
                        accumulated_perception_rep_mse = 0
                        accumulated_speed_rep_mse = 0
                        accumulated_intentions_rep_mse = 0
                        accumulated_rep_mse = 0
                        accumulated_perception_rep_error = 0
                        accumulated_speed_rep_error = 0
                        accumulated_intentions_rep_error = 0
                        accumulated_rep_error = 0

                    # Validation loop
                    for data in data_loader:

                        # Compute the forward pass on a batch from  the validation dataset
                        controls = data['directions']

                        # Run model forward and get outputs
                        # First case corresponds to squeeze network, second case corresponds to driving model without
                        # mimicking losses, last case corresponds to mimic network
                        if "seg" in g_conf.SENSORS.keys():
                            output = model.forward_branch(
                                data,
                                dataset.extract_inputs(data).cuda(), controls,
                                dataset.extract_intentions(data).cuda())
                        elif not g_conf.USE_REPRESENTATION_LOSS:
                            output = model.forward_branch(
                                data,
                                dataset.extract_inputs(data).cuda(), controls)
                        else:
                            output, intermediate_reps = model.forward_branch(
                                data,
                                dataset.extract_inputs(data).cuda(), controls)

                        write_regular_output(checkpoint_iteration, output)

                        # Compute control loss on current validation batch and accumulate it
                        targets_to_use = dataset.extract_targets(data)

                        mse = torch.mean(
                            (output - targets_to_use.cuda())**2).data.tolist()
                        mean_error = torch.mean(
                            torch.abs(output -
                                      targets_to_use.cuda())).data.tolist()

                        accumulated_error += mean_error
                        accumulated_mse += mse

                        error = torch.abs(output - targets_to_use.cuda())

                        # Compute mimicking losses on current validation batch and accumulate it
                        if g_conf.USE_REPRESENTATION_LOSS:
                            expert_reps = dataset.extract_representations(data)
                            # First L1 losses (seg mask, speed, intention mimicking losses)
                            if g_conf.USE_PERCEPTION_REP_LOSS:
                                perception_rep_loss = torch.sum(
                                    torch.abs(intermediate_reps[0] -
                                              expert_reps[0].cuda())
                                ).data.tolist() / (3 * output.shape[0])
                            else:
                                perception_rep_loss = 0
                            if g_conf.USE_SPEED_REP_LOSS:
                                speed_rep_loss = torch.sum(
                                    torch.abs(intermediate_reps[1] -
                                              expert_reps[1].cuda())
                                ).data.tolist() / (3 * output.shape[0])
                            else:
                                speed_rep_loss = 0
                            if g_conf.USE_INTENTION_REP_LOSS:
                                intentions_rep_loss = torch.sum(
                                    torch.abs(intermediate_reps[2] -
                                              expert_reps[2].cuda())
                                ).data.tolist() / (3 * output.shape[0])
                            else:
                                intentions_rep_loss = 0
                            rep_error = g_conf.REP_LOSS_WEIGHT * (
                                perception_rep_loss + speed_rep_loss +
                                intentions_rep_loss)
                            accumulated_perception_rep_error += perception_rep_loss
                            accumulated_speed_rep_error += speed_rep_loss
                            accumulated_intentions_rep_error += intentions_rep_loss
                            accumulated_rep_error += rep_error

                            # L2 losses now
                            if g_conf.USE_PERCEPTION_REP_LOSS:
                                perception_rep_loss = torch.sum(
                                    (intermediate_reps[0] -
                                     expert_reps[0].cuda())**
                                    2).data.tolist() / (3 * output.shape[0])
                            else:
                                perception_rep_loss = 0
                            if g_conf.USE_SPEED_REP_LOSS:
                                speed_rep_loss = torch.sum(
                                    (intermediate_reps[1] -
                                     expert_reps[1].cuda())**
                                    2).data.tolist() / (3 * output.shape[0])
                            else:
                                speed_rep_loss = 0
                            if g_conf.USE_INTENTION_REP_LOSS:
                                intentions_rep_loss = torch.sum(
                                    (intermediate_reps[2] -
                                     expert_reps[2].cuda())**
                                    2).data.tolist() / (3 * output.shape[0])
                            else:
                                intentions_rep_loss = 0
                            rep_mse = g_conf.REP_LOSS_WEIGHT * (
                                perception_rep_loss + speed_rep_loss +
                                intentions_rep_loss)
                            accumulated_perception_rep_mse += perception_rep_loss
                            accumulated_speed_rep_mse += speed_rep_loss
                            accumulated_intentions_rep_mse += intentions_rep_loss
                            accumulated_rep_mse += rep_mse

                        # Log a random position
                        position = random.randint(
                            0,
                            len(output.data.tolist()) - 1)

                        # Logging
                        if g_conf.USE_REPRESENTATION_LOSS:
                            total_mse = mse + rep_mse
                            total_error = mean_error + rep_error
                            coil_logger.add_message(
                                'Iterating', {
                                    'Checkpoint':
                                    latest,
                                    'Iteration':
                                    (str(iteration_on_checkpoint * 120) + '/' +
                                     str(len(dataset))),
                                    'MeanError':
                                    mean_error,
                                    'MSE':
                                    mse,
                                    'RepMeanError':
                                    rep_error,
                                    'RepMSE':
                                    rep_mse,
                                    'MeanTotalError':
                                    total_error,
                                    'TotalMSE':
                                    total_mse,
                                    'Output':
                                    output[position].data.tolist(),
                                    'GroundTruth':
                                    targets_to_use[position].data.tolist(),
                                    'Error':
                                    error[position].data.tolist(),
                                    'Inputs':
                                    dataset.extract_inputs(
                                        data)[position].data.tolist()
                                }, latest)
                        else:
                            coil_logger.add_message(
                                'Iterating', {
                                    'Checkpoint':
                                    latest,
                                    'Iteration':
                                    (str(iteration_on_checkpoint * 120) + '/' +
                                     str(len(dataset))),
                                    'MeanError':
                                    mean_error,
                                    'MSE':
                                    mse,
                                    'Output':
                                    output[position].data.tolist(),
                                    'GroundTruth':
                                    targets_to_use[position].data.tolist(),
                                    'Error':
                                    error[position].data.tolist(),
                                    'Inputs':
                                    dataset.extract_inputs(
                                        data)[position].data.tolist()
                                }, latest)
                        iteration_on_checkpoint += 1

                        if g_conf.USE_REPRESENTATION_LOSS:
                            print("Iteration %d  on Checkpoint %d : Error %f" %
                                  (iteration_on_checkpoint,
                                   checkpoint_iteration, total_error))
                        else:
                            print("Iteration %d  on Checkpoint %d : Error %f" %
                                  (iteration_on_checkpoint,
                                   checkpoint_iteration, mean_error))
                    """
                        ########
                        Finish a round of validation, write results, wait for the next
                        ########
                    """
                    # Compute average L1 and L2 losses over whole round of validation and log them
                    checkpoint_average_mse = accumulated_mse / (
                        len(data_loader))
                    checkpoint_average_error = accumulated_error / (
                        len(data_loader))
                    coil_logger.add_scalar('L2 Loss', checkpoint_average_mse,
                                           latest, True)
                    coil_logger.add_scalar('Loss', checkpoint_average_error,
                                           latest, True)

                    if g_conf.USE_REPRESENTATION_LOSS:
                        checkpoint_average_perception_rep_mse = accumulated_perception_rep_mse / (
                            len(data_loader))
                        checkpoint_average_speed_rep_mse = accumulated_speed_rep_mse / (
                            len(data_loader))
                        checkpoint_average_intentions_rep_mse = accumulated_intentions_rep_mse / (
                            len(data_loader))
                        checkpoint_average_rep_mse = accumulated_rep_mse / (
                            len(data_loader))
                        checkpoint_average_total_mse = checkpoint_average_mse + checkpoint_average_rep_mse

                        checkpoint_average_perception_rep_error = accumulated_perception_rep_error / (
                            len(data_loader))
                        checkpoint_average_speed_rep_error = accumulated_speed_rep_error / (
                            len(data_loader))
                        checkpoint_average_intentions_rep_error = accumulated_intentions_rep_error / (
                            len(data_loader))
                        checkpoint_average_rep_error = accumulated_rep_error / (
                            len(data_loader))
                        checkpoint_average_total_error = checkpoint_average_error + checkpoint_average_rep_mse

                        # Log L1/L2 loss terms
                        coil_logger.add_scalar(
                            'Perception Rep Loss',
                            checkpoint_average_perception_rep_mse, latest,
                            True)
                        coil_logger.add_scalar(
                            'Speed Rep Loss', checkpoint_average_speed_rep_mse,
                            latest, True)
                        coil_logger.add_scalar(
                            'Intentions Rep Loss',
                            checkpoint_average_intentions_rep_mse, latest,
                            True)
                        coil_logger.add_scalar('Overall Rep Loss',
                                               checkpoint_average_rep_mse,
                                               latest, True)
                        coil_logger.add_scalar('Total L2 Loss',
                                               checkpoint_average_total_mse,
                                               latest, True)

                        coil_logger.add_scalar(
                            'Perception Rep Error',
                            checkpoint_average_perception_rep_error, latest,
                            True)
                        coil_logger.add_scalar(
                            'Speed Rep Error',
                            checkpoint_average_speed_rep_error, latest, True)
                        coil_logger.add_scalar(
                            'Intentions Rep Error',
                            checkpoint_average_intentions_rep_error, latest,
                            True)
                        coil_logger.add_scalar('Total Rep Error',
                                               checkpoint_average_rep_error,
                                               latest, True)
                        coil_logger.add_scalar('Total Loss',
                                               checkpoint_average_total_error,
                                               latest, True)
                    else:
                        checkpoint_average_total_mse = checkpoint_average_mse
                        checkpoint_average_total_error = checkpoint_average_error

                    if checkpoint_average_total_mse < best_mse:
                        best_mse = checkpoint_average_total_mse
                        best_mse_iter = latest

                    if checkpoint_average_total_error < best_error:
                        best_error = checkpoint_average_total_error
                        best_error_iter = latest

                    # Print for logging / to terminal validation results
                    if g_conf.USE_REPRESENTATION_LOSS:
                        coil_logger.add_message(
                            'Iterating', {
                                'Summary': {
                                    'Control Error': checkpoint_average_error,
                                    'Control Loss': checkpoint_average_mse,
                                    'Rep Error': checkpoint_average_rep_error,
                                    'Rep Loss': checkpoint_average_rep_mse,
                                    'Error': checkpoint_average_total_error,
                                    'Loss': checkpoint_average_total_mse,
                                    'BestError': best_error,
                                    'BestMSE': best_mse,
                                    'BestMSECheckpoint': best_mse_iter,
                                    'BestErrorCheckpoint': best_error_iter
                                },
                                'Checkpoint': latest
                            }, latest)
                    else:
                        coil_logger.add_message(
                            'Iterating', {
                                'Summary': {
                                    'Error': checkpoint_average_error,
                                    'Loss': checkpoint_average_mse,
                                    'BestError': best_error,
                                    'BestMSE': best_mse,
                                    'BestMSECheckpoint': best_mse_iter,
                                    'BestErrorCheckpoint': best_error_iter
                                },
                                'Checkpoint': latest
                            }, latest)

                    # Save validation loss history (validation loss is used for early stopping)
                    l1_window.append(checkpoint_average_total_error)
                    coil_logger.write_on_error_csv(
                        dataset_name, checkpoint_average_total_error)

                    # Early stopping
                    if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                        if dlib.count_steps_without_decrease(l1_window) > 3 and \
                                dlib.count_steps_without_decrease_robust(l1_window) > 3:
                            coil_logger.write_stop(dataset_name, latest)
                            break

            else:

                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
class CoILAgent(Agent):
    def __init__(self, checkpoint, architecture_name):

        #experiment_name='None', driver_conf=None, memory_fraction=0.18,
        #image_cut=[115, 510]):

        # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]):

        Agent.__init__(self)
        # This should likely come from global
        #config_gpu = tf.ConfigProto()
        #config_gpu.gpu_options.visible_device_list = '0'

        #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction
        #self._sess = tf.Session(config=config_gpu)

        # THIS DOES NOT WORK FOR FUSED PLUS LSTM
        #if self._config.number_frames_sequenced > self._config.number_frames_fused:
        #    self._config_train.batch_size = self._config.number_frames_sequenced
        #else:
        #    self._config_train.batch_size = self._config.number_frames_fused

        #self._train_manager = load_system(self._config_train)
        #self._config.train_segmentation = False
        self.architecture_name = architecture_name

        if architecture_name == 'coil_unit':
            self.model_task, self.model_gen = CoILModel('coil_unit')
            self.model_task, self.model_gen = self.model_task.cuda(
            ), self.model_gen.cuda()
        elif architecture_name == 'unit_task_only':
            self.model_task, self.model_gen = CoILModel('unit_task_only')
            self.model_task, self.model_gen = self.model_task.cuda(
            ), self.model_gen.cuda()
        else:
            self.model = CoILModel(architecture_name)
            self.model.cuda()

        if architecture_name == 'wgangp_lsd':
            # print(ckpt, checkpoint['best_loss_iter_F'])
            self.model.load_state_dict(checkpoint['stateF_dict'])
            self.model.eval()
        elif architecture_name == 'coil_unit':
            self.model_task.load_state_dict(checkpoint['task'])
            self.model_gen.load_state_dict(checkpoint['b'])
            self.model_task.eval()
            self.model_gen.eval()
        elif architecture_name == 'coil_icra':
            self.model.load_state_dict(checkpoint['state_dict'])
            self.model.eval()
        elif architecture_name == 'unit_task_only':
            self.model_task.load_state_dict(checkpoint['task_state_dict'])
            self.model_gen.load_state_dict(checkpoint['enc_state_dict'])
            self.model_task.eval()
            self.model_gen.eval()

        #self.model.load_network(checkpoint)

        #self._sess.run(tf.global_variables_initializer())

        #self._control_function = getattr(machine_output_functions,
        #                                 self._train_manager._config.control_mode)
        # More elegant way to merge with autopilot
        #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name))

        #self._image_cut = driver_conf.image_cut
        #self._auto_pilot = driver_conf.use_planner

        #self._recording = False
        #self._start_time = 0

    def run_step(self, measurements, sensor_data, directions, target):

        #control_agent = self._agent.run_step(measurements, None, target)
        print(" RUnning STEP ")
        speed = torch.cuda.FloatTensor(
            [measurements.player_measurements.forward_speed]).unsqueeze(0)
        print("Speed is", speed)
        print("Speed shape ", speed)
        directions_tensor = torch.cuda.LongTensor([directions])

        # model_outputs = self.model.forward_branch(self._process_sensors(sensor_data), speed,
        # 										  directions_tensor)
        if self.architecture_name == 'wgangp_lsd':
            embed, model_outputs = self.model(
                self._process_sensors(sensor_data), speed)

        elif self.architecture_name == 'coil_unit':
            embed, n_b = self.model_gen.encode(
                self._process_sensors(sensor_data))
            model_outputs = self.model_task(embed, speed)

        elif self.architecture_name == 'unit_task_only':
            embed, n_b = self.model_gen.encode(
                self._process_sensors(sensor_data))
            model_outputs = self.model_task(embed, speed)

        elif self.architecture_name == 'coil_icra':
            model_outputs = self.model.forward_branch(
                self._process_sensors(sensor_data), speed, directions_tensor)

        print(model_outputs)

        if self.architecture_name == 'coil_icra':
            steer, throttle, brake = self._process_model_outputs(
                model_outputs[0],
                measurements.player_measurements.forward_speed)
        else:
            steer, throttle, brake = self._process_model_outputs(
                model_outputs[0][0],
                measurements.player_measurements.forward_speed)

        control = carla_protocol.Control()
        control.steer = steer
        control.throttle = throttle
        control.brake = brake
        # if self._auto_pilot:
        #    control.steer = control_agent.steer
        # TODO: adapt the client side agent for the new version. ( PROBLEM )
        #control.throttle = control_agent.throttle
        #control.brake = control_agent.brake

        # TODO: maybe change to a more meaningfull message ??
        return control

    def _process_sensors(self, sensors):

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            sensor = sensors[name].data[140:260, ...]  #300*800*3

            image_input = transform.resize(sensor, (128, 128))

            # transforms.Normalize([ 0.5315,  0.5521,  0.5205], [ 0.1960,  0.1810,  0.2217])

            image_input = np.transpose(image_input, (2, 0, 1))
            image_input = torch.from_numpy(image_input).type(
                torch.FloatTensor).cuda()
            image_input = image_input  #normalization
            print("torch size", image_input.size())

            img_np = np.uint8(
                np.transpose(image_input.cpu().numpy() * 255, (1, 2, 0)))

            # plt.figure(1)
            # plt.subplot(1, 2, 1)
            # plt.imshow(sensor)
            #
            # plt.subplot(1,2,2)
            # plt.imshow(img_np)
            # #
            # plt.show()

            iteration += 1

        # print (image_input.shape)
        image_input = image_input.unsqueeze(0)
        print(image_input.shape)

        return image_input

    def _process_model_outputs(self, outputs, speed):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """

        print("OUTPUTS", outputs)
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        # if steer > 0.5:
        # 	throttle *= (1 - steer + 0.3)
        # 	steer += 0.3
        # 	if steer > 1:
        # 		steer = 1
        # if steer < -0.5:
        # 	throttle *= (1 + steer + 0.3)
        # 	steer -= 0.3
        # 	if steer < -1:
        # 		steer = -1

        # if brake < 0.2:
        # 	brake = 0.0
        #
        if throttle > brake:
            brake = 0.0
        # else:
        # 	throttle = throttle * 2
        # if speed > 35.0 and brake == 0.0:
        # 	throttle = 0.0

        return steer, throttle, brake
Example #4
0
class MPSCAgent(AutonomousAgent):
	def setup(self, path_to_config_file):

		yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file)

		# Take the checkpoint name and load it
		checkpoint = torch.load(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]),
											  '_logs',
											 yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2]
											 , 'checkpoints', str(checkpoint_number) + '.pth'))

		# merge the specific agent config with global config _g_conf
		merge_with_yaml(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]),
									 yaml_conf))

		self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
		# TODO: retrain the model with MPSC
		self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
		self.first_iter = True
		logging.info("Setup Model")
		# Load the model and prepare set it for evaluation
		self._model.load_state_dict(checkpoint['state_dict'])
		self._model.cuda()
		self._model.eval()
		self.latest_image = None
		self.latest_image_tensor = None
		# We add more time to the curve commands
		self._expand_command_front = 5
		self._expand_command_back = 3
		# check map waypoint format => carla_data_provider & http://carla.org/2018/11/16/release-0.9.1/
		# e.g. from map.get_waypoint Waypoint(Transform(Location(x=338.763, y=226.453, z=0), Rotation(pitch=360, yaw=270.035, roll=0)))
		self.track = Track.ALL_SENSORS_HDMAP_WAYPOINTS # specify available track info, see autonomous_agent.py

	def sensors(self):
		# currently give the full suite of available sensors
		# check the config/installation of the sensors => https://carla.readthedocs.io/en/latest/cameras_and_sensors/
		sensors = [{'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll':0.0, 'pitch':0.0, 'yaw': 0.0,
					'width': 800, 'height': 600, 'fov':100, 'id': 'Center'},
				   {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0,
					'yaw': -45.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'Left'},
				   {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': 45.0,
					'width': 800, 'height': 600, 'fov': 100, 'id': 'Right'},
				   {'type': 'sensor.lidar.ray_cast', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0,
					'yaw': -45.0, 'id': 'LIDAR'},
				   {'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS'},
				   {'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus'},
				   {'type': 'sensor.hd_map', 'reading_frequency': 1, 'id': 'hdmap'},
				  ]
		return sensors

	def run_step(self, input_data, timestamp):
		# the core method
		# TODO
		# 1. request current localization 
		# input_data is obtained from sensors. => autonomous_agent.py def __call__(self)
		for key, value in input_data.items():
			print("input_data ", key, value)


# 		  
# ======[Agent] Wallclock_time = 2019-07-08 14:26:54.522155 / Sim_time = 1.4500000216066837
# input_data key  GPS (3755, array([49.00202793,  8.00463308,  1.58916414]))
# input_data key  can_bus (43, {'moi': 1.0, 'center_of_mass': {'x': 60.0, 'y': 0.0, 'z': -60.0}, 'linear_velocity': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e2348>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e2450>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e2608>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0e22f0>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e2870>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e26b8>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0e2500>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e2818>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0ddfa8>]], dtype=object), 'speed': -1.6444947256841175e-06, 'lateral_speed': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e4ad8>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e49d0>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e23a0>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0e48c8>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e4ce8>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e23f8>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0e4d40>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e4c90>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0e28c8>]], dtype=object), 'transform': <carla.libcarla.Transform object at 0x7fb4fa0de3f0>, 'damping_rate_zero_throttle_clutch_disengaged': 0.3499999940395355, 'max_rpm': 6000.0, 'clutch_strength': 10.0, 'drag_coefficient': 0.30000001192092896, 'linear_acceleration': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0dd0e0>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0ddf50>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0d58c8>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0dd088>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0dd138>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0d5088>],
#        [<carla.libcarla.Vector3D object at 0x7fb4fa0dd1e8>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0f6d98>,
#         <carla.libcarla.Vector3D object at 0x7fb4fa0d5920>]], dtype=object), 'damping_rate_full_throttle': 0.15000000596046448, 'use_gear_autobox': True, 'torque_curve': [{'x': 0.0, 'y': 400.0}, {'x': 1890.7607421875, 'y': 500.0}, {'x': 5729.57763671875, 'y': 400.0}], 'dimensions': {'width': 0.9279687404632568, 'height': 0.6399999856948853, 'length': 2.4543750286102295}, 'steering_curve': [{'x': 0.0, 'y': 1.0}, {'x': 20.0, 'y': 0.8999999761581421}, {'x': 60.0, 'y': 0.800000011920929}, {'x': 120.0, 'y': 0.699999988079071}], 'mass': 1850.0, 'wheels': [{'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}]})
# input_data key  rgb (3753, array([[[135, 118, 110, 255],
#         [135, 118, 110, 255],
#         [136, 119, 110, 255],
#         ...,
#  		  [[114, 108, 105, 255],
#         [110, 105, 102, 255],
#         [112, 106, 104, 255],
#         ...,
#         [118, 112, 109, 255],
#         [118, 112, 109, 255],
#         [121, 115, 113, 255]]], dtype=uint8))
# Direction  RoadOption.LANEFOLLOW
# ego_trans
# Transform(Location(x=338.763, y=226.453, z=-0.0109183), Rotation(pitch=0.000136604, yaw=-89.9654, roll=-0.000274658))
# 1.9995784804148584/0.0

		localization = input_data['GPS']
		directions = self._get_current_direction(input_data['GPS'][1])
		logging.debug("Directions {}".format(directions))


		# 2. get recommended action from the NN controller (copy from CoILBaseline)
		# Take the forward speed and normalize it for it to go from 0-1
		norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR
		norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
		directions_tensor = torch.cuda.LongTensor([directions])
		# End-to-end part, feed in images from rgb sensor, then parse network output as controller
		# Compute the forward pass processing the sensors got from CARLA.
		model_outputs = self._model.forward_branch(self._process_sensors(input_data['rgb'][1]),
												   norm_speed,
												   directions_tensor)
		steer, throttle, brake = self._process_model_outputs(model_outputs[0])

		# 3. use inner-loop to simulate/approximate vehicle model
		# save the NN output as vehicle control
		sim_control = carla.VehicleControl()
		sim_control.steer = float(steer)
		sim_control.throttle = float(throttle)
		sim_control.brake = float(brake)
		logging.debug("inner loop for sim_control", sim_control)
		# TODO
		# copy a "parallel world" and create a "virtual agent" that has the same state with ego_vehicle
		sim_world = self.world # TODO: check how to copy the world, roads info are necessary, the rest optional
		sim_ego = sim_world.create_ego_vehicle(current_ego_states)

		sim_world.agent_instance = getattr(sim_world.module_agent, sim_world.module_agent.__name__)(args.config)
        correct_sensors, error_message = sim_world.valid_sensors_configuration(sim_world.sim_agent, sim_world.track)
		
		# pass the sim_control to virtual agent and run T timesteps
		sim_ego.apply_control(sim_control)
		# use current model to predict the following state-action series
		MPSC_controls = [] # TODO: check where u should init it
		for i in range(T):
			sim_ego.run_step() # TODO def run_step, update for sim_ego
			sim_ego.update()
			# 4. use MPSC to check safety at each future timestep
			safe = MPSC.check_safety(sim_ego.state, safety_boundary)
			
			if not safe:
				# if not safe, obtain MPSC control output
				logging.debug("use MPSC controller")
				control = MPSC_control
				MPSC_controls.append(MPSC_control) #  collect all "safe" o/p
				# 7. execute MPSC control and add it to new dataset
				break
			else:
				if i < T-1:
					continue
				else: # final step
					# if safe within all T timesteps, proceed to  use NN control output
					logging.debug("use NN controller")
					control = sim_control
		# 8. retrain the network and/or do policy aggregation
		if len(MPSC_controls):
			self.model.train(self.model, MPSC_controls)

		logging.debug("Control output ", control)
		# There is the posibility to replace some of the predictions with oracle predictions.
		self.first_iter = False
		return control
Example #5
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None):
    latest = None
    # try:
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias+'.yaml')
    if yaml_file is not None:
      path_to_yaml_file = os.path.join(yaml_file, exp_alias+'.yaml')
    merge_with_yaml(path_to_yaml_file)
    # The validation dataset is always fully loaded, so we fix a very high number of hours
    # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code
    
    """
    # commented out to simplify the code
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    if suppress_output:
        sys.stdout = open(os.path.join('_output_logs',
                                       exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
        sys.stderr = open(os.path.join('_output_logs',
                          exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
    """

    # Define the dataset. This structure is has the __get_item__ redefined in a way
    # that you can access the HDFILES positions from the root directory as a in a vector.
    
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
    augmenter = Augmenter(None)
    # Definition of the dataset to be used. Preload name is just the validation data name
    print ('full dataset path: ', full_dataset)
    dataset = CoILDataset(full_dataset, transform=augmenter,
                          preload_name=dataset_name)

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE,
                                              shuffle=False,
                                              num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
                                              pin_memory=True)

    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)

    """ removing this segment to simplify code
    # The window used to keep track of the trainings
    l1_window = []
    latest = get_latest_evaluated_checkpoint()
    if latest is not None:  # When latest is noe
        l1_window = coil_logger.recover_loss_window(dataset_name, None)
    """
    
    model.cuda()

    best_mse = 1000
    best_error = 1000
    best_mse_iter = 0
    best_error_iter = 0

    # modified validation code from here to run a single model
    # checkpoint = torch.load(os.path.join(g_conf.VALIDATION_CHECKPOINT_PATH
    #                        , 'checkpoints', g_conf.VALIDATION_CHECKPOINT_ITERATION + '.pth'))
    checkpoint = torch.load(args.checkpoint)
    checkpoint_iteration = checkpoint['iteration']
    print("model loaded ", checkpoint_iteration)

    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    accumulated_mse = 0
    accumulated_error = 0
    iteration_on_checkpoint = 0

    # considering steer, throttle & brake so 3x3 matrix
    normalized_covariate_shift = torch.zeros(3,3)

    print ('data_loader size: ', len(data_loader))
    total_output = []
    path_names = []
    for data in data_loader:

        # Compute the forward pass on a batch from  the validation dataset
        path_names += data[1]
        controls = data[0]['directions']
        # get prefinal branch activations, only the last layers have dropout
        output = model.get_prefinal_layer(torch.squeeze(data[0]['rgb']).cuda(),
                                      dataset.extract_inputs(data[0]).cuda(),
                                      controls)
        total_output += output.detach().cpu().tolist()

        iteration_on_checkpoint += 1
        if iteration_on_checkpoint % 50 == 0:
            print ('iter: ', iteration_on_checkpoint)

    print (len(total_output), len(path_names))
    i = 0
    st = time.time()
    for act, name in zip(total_output, path_names):
        episode_num = name.split('/')[-2]
        frame_num = name.split('/')[-1].split('_')[-1].split('.')[0]
        if not os.path.isdir(os.path.join(args.save_path, args.dataset_name, episode_num)):
            os.mkdir(os.path.join(args.save_path, args.dataset_name, episode_num))
        file_name = 'Activation_'+frame_num
        i += 1
        if i%1000 == 0:
            print ('iteration: ', i)
        # save activations for each image, to be used for computing the uncertainity later
        torch.save(act, os.path.join(args.save_path, args.dataset_name, episode_num, file_name))
    print ('time taken: ', time.time()-st)
Example #6
0
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' +
                              g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a",
                              buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                              exp_alias + '_err_'+g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)
        

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                                                  g_conf.PRELOAD_MODEL_ALIAS,
                                                 'checkpoints',
                                                 str(g_conf.PRELOAD_MODEL_CHECKPOINT)+'.pth'))


        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                    'checkpoints', str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0


        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset, transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS)
                                               + 'hours_' + g_conf.TRAIN_DATASET_NAME)
        print ("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration, number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print ("Before the loss")

        criterion = Loss(g_conf.LOSS_FUNCTION)

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # get the control commands from float_data, size = [120,1]

            capture_time = time.time()
            controls = data['directions']
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
            model.zero_grad()
            branches = model(torch.squeeze(data['rgb'].cuda()),
                             dataset.extract_inputs(data).cuda())
            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                               , 'checkpoints', str(iteration) + '.pth'))

            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message('Iterating',
                                    {'Iteration': iteration,
                                     'Loss': loss.data.tolist(),
                                     'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                                     'BestLoss': best_loss, 'BestLossIteration': best_loss_iter,
                                     'Output': output[position].data.tolist(),
                                     'GroundTruth': dataset.extract_targets(data)[
                                         position].data.tolist(),
                                     'Error': error[position].data.tolist(),
                                     'Inputs': dataset.extract_inputs(data)[
                                         position].data.tolist()},
                                    iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #7
0
def execute(gpu,
            exp_batch='nocrash',
            exp_alias='resnet34imnet10S1',
            suppress_output=True,
            yaml_file=None):
    latest = None
    # try:
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias + '.yaml')
    if yaml_file is not None:
        path_to_yaml_file = os.path.join(yaml_file, exp_alias + '.yaml')
    merge_with_yaml(path_to_yaml_file)
    # The validation dataset is always fully loaded, so we fix a very high number of hours
    # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code
    """
    # commenting this segment to simplify code, uncomment if necessary
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    if suppress_output:
        sys.stdout = open(os.path.join('_output_logs',
                                       exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
        sys.stderr = open(os.path.join('_output_logs',
                          exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
    """

    # Define the dataset. This structure is has the __get_item__ redefined in a way
    # that you can access the HDFILES positions from the root directory as a in a vector.

    full_dataset = os.path.join(
        os.environ["COIL_DATASET_PATH"], g_conf.DART_COVMAT_DATA
    )  # dataset used for computing dart covariance matrix

    augmenter = Augmenter(None)

    # Definition of the dataset to be used. Preload name is just the validation data name
    print('full dataset path: ', full_dataset)
    dataset = CoILDataset(full_dataset,
                          transform=augmenter,
                          preload_name=g_conf.DART_COVMAT_DATA
                          )  # specify DART_COVMAT_DATA in the config file

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=g_conf.BATCH_SIZE,
        shuffle=False,
        num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
        pin_memory=True)

    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
    """ removing this segment to simplify code
    # The window used to keep track of the trainings
    l1_window = []
    latest = get_latest_evaluated_checkpoint()
    if latest is not None:  # When latest is noe
        l1_window = coil_logger.recover_loss_window(g_conf.DART_COVMAT_DATA, None)
    """

    model.cuda()

    best_mse = 1000
    best_error = 1000
    best_mse_iter = 0
    best_error_iter = 0

    # modified validation code from here to run a single model checkpoint
    # used for computing the covariance matrix with the DART model checkpoint
    checkpoint = torch.load(
        g_conf.DART_MODEL_CHECKPOINT
    )  # specify DART_MODEL_CHECKPOINT in the config file
    checkpoint_iteration = checkpoint['iteration']
    print("Validation loaded ", checkpoint_iteration)
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    accumulated_mse = 0
    accumulated_error = 0
    iteration_on_checkpoint = 0

    # considering steer, throttle & brake so 3x3 matrix
    normalized_covariate_shift = torch.zeros(3, 3)

    print('data_loader size: ', len(data_loader))
    for data in data_loader:

        # Compute the forward pass on a batch from the validation dataset
        controls = data['directions']
        output = model.forward_branch(
            torch.squeeze(data['rgb']).cuda(),
            dataset.extract_inputs(data).cuda(), controls)
        """ removing this segment to simplify code
        # It could be either waypoints or direct control
        if 'waypoint1_angle' in g_conf.TARGETS:
            write_waypoints_output(checkpoint_iteration, output)
        else:
            write_regular_output(checkpoint_iteration, output)
        """

        mse = torch.mean(
            (output - dataset.extract_targets(data).cuda())**2).data.tolist()
        mean_error = torch.mean(
            torch.abs(output -
                      dataset.extract_targets(data).cuda())).data.tolist()

        accumulated_error += mean_error
        accumulated_mse += mse
        error = torch.abs(output -
                          dataset.extract_targets(data).cuda()).data.cpu()

        ### covariate shift segment starts
        error = error.unsqueeze(dim=2)
        error_transpose = torch.transpose(error, 1, 2)
        # compute covariate shift
        covariate_shift = torch.matmul(error, error_transpose)
        # expand traj length tensor to Bx3x3 (considering steer, throttle & brake)
        traj_lengths = torch.stack([
            torch.stack([data['current_traj_length'].squeeze(dim=1)] * 3,
                        dim=1)
        ] * 3,
                                   dim=2)
        covariate_shift = covariate_shift / traj_lengths
        covariate_shift = torch.sum(covariate_shift, dim=0)
        # print ('current covariate shift: ', covariate_shift.shape)

        normalized_covariate_shift += covariate_shift
        ### covariate shift segment ends

        total_episodes = data['episode_count'][-1].data
        iteration_on_checkpoint += 1
        if iteration_on_checkpoint % 50 == 0:
            print('iteration: ', iteration_on_checkpoint)

    print('total episodes: ', total_episodes)
    normalized_covariate_shift = normalized_covariate_shift / total_episodes
    print('normalized covariate shift: ', normalized_covariate_shift.shape,
          normalized_covariate_shift)

    # save the matrix to restart directly from the mat file
    # np.save(os.path.join(g_conf.COVARIANCE_MATRIX_PATH, 'covariance_matrix_%s.npy'%g_conf.DART_COVMATH_DATA), normalized_covariate_shift)
    return normalized_covariate_shift.numpy()
    '''
Example #8
0
class CoILBaseline(AutonomousAgent):
    def setup(self, path_to_config_file):

        yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(
            path_to_config_file)

        # Take the checkpoint name and load it
        checkpoint = torch.load(
            os.path.join(
                '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]),
                '_logs',
                yaml_conf.split('/')[-2],
                yaml_conf.split('/')[-1].split('.')[-2], 'checkpoints',
                str(checkpoint_number) + '.pth'))

        # do the merge here
        merge_with_yaml(
            os.path.join(
                '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]),
                yaml_conf))

        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        self.first_iter = True
        # Load the model and prepare set it for evaluation
        self._model.load_state_dict(checkpoint['state_dict'])
        self._model.cuda()
        self._model.eval()
        self.latest_image = None
        self.latest_image_tensor = None

        # We add more time to the curve commands
        self._expand_command_front = 5
        self._expand_command_back = 3

    def sensors(self):

        sensors = [{
            'type': 'sensor.camera.rgb',
            'x': 2.0,
            'y': 0.0,
            'z': 1.40,
            'roll': 0.0,
            'pitch': 0.0,
            'yaw': 0.0,
            'width': 800,
            'height': 600,
            'fov': 100,
            'id': 'rgb'
        }, {
            'type': 'sensor.can_bus',
            'reading_frequency': 25,
            'id': 'can_bus'
        }, {
            'type': 'sensor.other.gnss',
            'x': 0.7,
            'y': -0.4,
            'z': 1.60,
            'id': 'GPS'
        }]

        return sensors

    def run_step(self, input_data):

        # measurements, sensor_data, directions, target
        #print("=====================>")
        #or key, val in input_data.items():
        #    shape = val[1].shape
        #    #print("[{} -- {:06d}] with shape {}".format(key, val[0], shape))
        #print("<=====================")
        #print ("speed: ", input_data['speed'])
        #print ("gps: ", input_data['GPS'])

        directions = self._get_current_direction(input_data['GPS'][1])

        #print ("Directions : ", directions)
        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR
        # norm_speed = 0.2
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])
        # Compute the forward pass processing the sensors got from CARLA.
        model_outputs = self._model.forward_branch(
            self._process_sensors(input_data['rgb'][1]), norm_speed,
            directions_tensor)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])

        #print ("outputs: ", steer,throttle,brake)
        control = carla.VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        self.first_iter = False
        return control

    def set_global_plan(self, topological_plan):
        # We expand the commands before the curves in order to give more time
        # for the agent to respond.
        print(" Set the plan ")
        #topological_plan = self._expand_commands(topological_plan)
        self._global_plan = topological_plan

        #print (" plan after ")
        #print (self._global_plan)

    def get_attentions(self, layers=None):
        """
        Returns
            The activations obtained from the first layers of the latest iteration.

        """
        if layers is None:
            layers = [0, 1, 2]
        if self.latest_image_tensor is None:
            raise ValueError(
                'No step was ran yet. '
                'No image to compute the activations, Try Running ')
        all_layers = self._model.get_perception_layers(
            self.latest_image_tensor)
        cmap = plt.get_cmap('inferno')
        attentions = []
        for layer in layers:
            y = all_layers[layer]
            att = torch.abs(y).mean(1)[0].data.cpu().numpy()
            att = att / att.max()
            att = cmap(att)
            att = np.delete(att, 3, 2)
            attentions.append(imresize(att, [88, 200]))
        return attentions

    def _process_sensors(self, sensor):

        iteration = 0

        sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]

        sensor = scipy.misc.imresize(
            sensor, (g_conf.SENSORS['rgb'][1], g_conf.SENSORS['rgb'][2]))

        self.latest_image = sensor

        sensor = np.swapaxes(sensor, 0, 1)

        sensor = np.transpose(sensor, (2, 1, 0))

        sensor = torch.from_numpy(sensor / 255.0).type(
            torch.FloatTensor).cuda()

        if iteration == 0:
            image_input = sensor

        iteration += 1

        image_input = image_input.unsqueeze(0)

        self.latest_image_tensor = image_input

        return image_input

    def _get_current_direction(self, vehicle_position):

        # for the current position and orientation try to get the closest one from the waypoints
        closest_id = 0
        min_distance = 100000
        for index in range(len(self._global_plan)):

            waypoint = self._global_plan[index][0]

            computed_distance = distance_vehicle(waypoint, vehicle_position)
            if computed_distance < min_distance:
                min_distance = computed_distance
                closest_id = index

        #print ("Closest waypoint ", closest_id, "dist ", min_distance)
        direction = self._global_plan[closest_id][1]

        if direction == RoadOption.LEFT:
            direction = 3.0
        elif direction == RoadOption.RIGHT:
            direction = 4.0
        elif direction == RoadOption.STRAIGHT:
            direction = 5.0
        else:
            direction = 2.0

        return direction

    def _process_model_outputs(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        if brake < 0.05:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        return steer, throttle, brake

    def _expand_commands(self, topological_plan):
        """ The idea is to make the intersection indications to last longer"""

        # O(2*N) algorithm , probably it is possible to do in O(N) with queues.

        # Get the index where curves start and end
        curves_start_end = []
        inside = False
        start = -1
        current_curve = RoadOption.LANEFOLLOW
        for index in range(len(topological_plan)):

            command = topological_plan[index][1]
            print(command)
            if command != RoadOption.LANEFOLLOW and not inside:
                inside = True
                start = index
                current_curve = command

            if command == RoadOption.LANEFOLLOW and inside:
                inside = False
                # End now is the index.
                curves_start_end.append([start, index, current_curve])
                if start == -1:
                    raise ValueError("End of curve without start")

                start = -1

        for start_end_index_command in curves_start_end:
            start_index = start_end_index_command[0]
            end_index = start_end_index_command[1]
            command = start_end_index_command[2]

            # Add the backwards curves ( Before the begginning)
            for index in range(1, self._expand_command_front + 1):
                changed_index = start_index - index
                if changed_index > 0:
                    topological_plan[changed_index] = (
                        topological_plan[changed_index][0], command)

            # add the onnes after the end
            for index in range(0, self._expand_command_back):
                changed_index = end_index + index
                if changed_index < len(topological_plan):
                    topological_plan[changed_index] = (
                        topological_plan[changed_index][0], command)

        return topological_plan
Example #9
0
def execute(gpu, exp_batch, exp_alias, dataset_name, validation_set=False):
    latest = None
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    g_conf.immutable(False)
    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    # If using validation dataset, fix a very high number of hours
    if validation_set:
        g_conf.NUMBER_OF_HOURS = 10000
    g_conf.immutable(True)

    # Define the dataset.
    full_dataset = [
        os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
    ]
    augmenter = Augmenter(None)
    if validation_set:
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_names=[dataset_name])
    else:
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_names=[
                                  str(g_conf.NUMBER_OF_HOURS) + 'hours_' +
                                  dataset_name
                              ],
                              train_dataset=True)

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=g_conf.BATCH_SIZE,
        shuffle=False,
        num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
        pin_memory=True)

    # Define model
    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
    """ 
        ######
        Run a single driving benchmark specified by the checkpoint were validation is stale
        ######
    """

    if g_conf.FINISH_ON_VALIDATION_STALE is not None:

        while validation_stale_point(
                g_conf.FINISH_ON_VALIDATION_STALE) is None:
            time.sleep(0.1)

        validation_state_iteration = validation_stale_point(
            g_conf.FINISH_ON_VALIDATION_STALE)

        checkpoint = torch.load(
            os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                         str(validation_state_iteration) + '.pth'))
        print("Validation loaded ", validation_state_iteration)
    else:
        """
        #####
        Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration"
        #####
        """
        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
            # Get the correct checkpoint
            # We check it for some task name, all of then are ready at the same time
            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE,
                                        control_filename + '_' + task_list[0]):

                latest = get_next_checkpoint(
                    g_conf.TEST_SCHEDULE,
                    control_filename + '_' + task_list[0])

                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(latest) + '.pth'))
                print("Validation loaded ", latest)
            else:
                time.sleep(0.1)

    # Load the model and prepare set it for evaluation
    model.load_state_dict(checkpoint['state_dict'])
    model.cuda()
    model.eval()

    first_iter = True
    for data in data_loader:

        # Compute the forward pass on a batch from the dataset and get the intermediate
        # representations of the squeeze network
        if "seg" in g_conf.SENSORS.keys():
            perception_rep, speed_rep, intentions_rep = \
                model.get_intermediate_representations(data,
                                                       dataset.extract_inputs(data).cuda(),
                                                       dataset.extract_intentions(data).cuda())
            perception_rep = perception_rep.data.cpu()
            speed_rep = speed_rep.data.cpu()
            intentions_rep = intentions_rep.data.cpu()
        if first_iter:
            perception_rep_all = perception_rep
            speed_rep_all = speed_rep
            intentions_rep_all = intentions_rep
        else:
            perception_rep_all = torch.cat(
                [perception_rep_all, perception_rep], 0)
            speed_rep_all = torch.cat([speed_rep_all, speed_rep], 0)
            intentions_rep_all = torch.cat(
                [intentions_rep_all, intentions_rep], 0)
        first_iter = False

    # Save intermediate representations
    perception_rep_all = perception_rep_all.tolist()
    speed_rep_all = speed_rep_all.tolist()
    intentions_rep_all = intentions_rep_all.tolist()
    np.save(
        os.path.join(
            '_preloads', exp_batch + '_' + exp_alias + '_' + dataset_name +
            '_representations'),
        [perception_rep_all, speed_rep_all, intentions_rep_all])
Example #10
0
class CoILAgent(object):
    def __init__(self, checkpoint, town_name, carla_version='0.84'):

        # Set the carla version that is going to be used by the interface
        self._carla_version = carla_version
        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        self.first_iter = True
        # Load the model and prepare set it for evaluation
        self._model.load_state_dict(checkpoint['state_dict'])
        self._model.cuda()
        self._model.eval()

        # Set ERFnet for segmentation
        self.model_erf = ERFNet(20)
        self.model_erf = torch.nn.DataParallel(self.model_erf)
        self.model_erf.cuda()

        print("LOAD ERFNet - validate")

        def load_my_state_dict(
            model, state_dict
        ):  #custom function to load model when not all dict elements
            own_state = model.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
            own_state[name].copy_(param)
            return model

        self.model_erf = load_my_state_dict(
            self.model_erf,
            torch.load(os.path.join('trained_models/erfnet_pretrained.pth')))
        self.model_erf.eval()
        print("ERFNet and weights LOADED successfully")

        self.latest_image = None
        self.latest_image_tensor = None

        if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE:
            self.control_agent = CommandFollower(town_name)

    def run_step(self, measurements, sensor_data, directions, target):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """

        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])
        # Compute the forward pass processing the sensors got from CARLA.
        rgbs = self._process_sensors(sensor_data)
        with torch.no_grad():
            outputs = self.model_erf(rgbs)
        labels = outputs.max(1)[1].byte().cpu().data

        seg_road = (labels == 0)
        seg_not_road = (labels != 0)
        seg = torch.stack((seg_road, seg_not_road), 1).float()

        model_outputs = self._model.forward_branch(seg.cuda(), norm_speed,
                                                   directions_tensor)

        #        model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
        #                                                   directions_tensor)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            _, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {
                "Checkpoint": self.checkpoint['iteration'],
                'Agent': str(steer)
            }, self.checkpoint['iteration'])
        self.first_iter = False

        return control

    def get_attentions(self, layers=None):
        """

        Returns
            The activations obtained from the first layers of the latest iteration.

        """
        if layers is None:
            layers = [0, 1, 2]
        if self.latest_image_tensor is None:
            raise ValueError(
                'No step was ran yet. '
                'No image to compute the activations, Try Running ')
        all_layers = self._model.get_perception_layers(
            self.latest_image_tensor)
        cmap = plt.get_cmap('inferno')
        attentions = []
        for layer in layers:
            y = all_layers[layer]
            att = torch.abs(y).mean(1)[0].data.cpu().numpy()
            att = att / att.max()
            att = cmap(att)
            att = np.delete(att, 3, 2)
            attentions.append(imresize(att, [88, 200]))
        return attentions

    def _process_sensors(self, sensors):

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            if self._carla_version == '0.9':
                sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1],
                                       ...]
            else:
                sensor = sensors[name].data[
                    g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]

            sensor = scipy.misc.imresize(sensor, (size[1], size[2]))

            self.latest_image = sensor

            sensor = np.swapaxes(sensor, 0, 1)

            sensor = np.transpose(sensor, (2, 1, 0))

            sensor = torch.from_numpy(sensor / 255.0).type(
                torch.FloatTensor).cuda()

            if iteration == 0:
                image_input = sensor
            else:
                image_input = torch.cat((image_input, sensor), 0)

            iteration += 1

        image_input = image_input.unsqueeze(0)

        self.latest_image_tensor = image_input

        return image_input

    def _process_model_outputs(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        if brake < 0.05:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        return steer, throttle, brake

    def _process_model_outputs_wp(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[
            1], outputs[2]
        if brake < 0.2:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        steer = 0.7 * wpa2

        if steer > 0:
            steer = min(steer, 1)
        else:
            steer = max(steer, -1)

        return steer, throttle, brake

    def _get_oracle_prediction(self, measurements, target):
        # For the oracle, the current version of sensor data is not really relevant.
        control, _, _, _, _ = self.control_agent.run_step(
            measurements, [], [], target)

        return control.steer, control.throttle, control.brake
class CoILAgent(object):
    def __init__(self, checkpoint, town_name, carla_version='0.84'):

        # Set the carla version that is going to be used by the interface
        self._carla_version = carla_version
        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        self.first_iter = True
        # Load the model and prepare set it for evaluation
        self._model.load_state_dict(checkpoint['state_dict'])
        self._model.cuda()
        self._model.eval()

        self.latest_image = None
        self.latest_image_tensor = None

        if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE:
            self.control_agent = CommandFollower(town_name)

    def run_step(self, measurements, sensor_data, directions, target):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """

        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])
        # Compute the forward pass processing the sensors got from CARLA.
        model_outputs = self._model.forward_branch(
            self._process_sensors(sensor_data), norm_speed, directions_tensor)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            _, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {
                "Checkpoint": self.checkpoint['iteration'],
                'Agent': str(steer)
            }, self.checkpoint['iteration'])
        self.first_iter = False

        return control

    def get_attentions(self, layers=None):
        """

        Returns
            The activations obtained from the first layers of the latest iteration.

        """
        if layers is None:
            layers = [0, 1, 2]
        if self.latest_image_tensor is None:
            raise ValueError(
                'No step was ran yet. '
                'No image to compute the activations, Try Running ')
        all_layers = self._model.get_perception_layers(
            self.latest_image_tensor)
        cmap = plt.get_cmap('inferno')
        attentions = []
        for layer in layers:
            y = all_layers[layer]
            att = torch.abs(y).mean(1)[0].data.cpu().numpy()
            att = att / att.max()
            att = cmap(att)
            att = np.delete(att, 3, 2)
            attentions.append(imresize(att, [150, 200]))
        return attentions

    def _process_sensors(self, sensors):

        colors = [[0, 0, 0], [70, 70, 70], [190, 153, 153], [250, 170, 160],
                  [220, 20, 60], [153, 153, 153], [157, 234, 50],
                  [128, 64, 128], [244, 35, 232], [107, 142, 35], [0, 0, 142],
                  [102, 102, 156], [220, 220, 0]]

        def label_to_color_0(e):
            return colors[e][0]

        def label_to_color_1(e):
            return colors[e][1]

        def label_to_color_2(e):
            return colors[e][2]

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            sensor = sensors[name].data

            if (sensor.shape == (600, 800)):
                labels = sensor
                tmp = np.zeros((600, 800, 3))
                f0 = np.vectorize(label_to_color_0)
                f1 = np.vectorize(label_to_color_1)
                f2 = np.vectorize(label_to_color_2)
                tmp[:, :, 0] = f0(sensor)
                tmp[:, :, 1] = f1(sensor)
                tmp[:, :, 2] = f2(sensor)
                sensor = tmp

            sensor = scipy.misc.imresize(sensor, (size[1], size[2]))

            ##### Save sensor

            self.latest_image = sensor

            sensor = np.swapaxes(sensor, 0, 1)

            sensor = np.transpose(sensor, (2, 1, 0))

            sensor = torch.from_numpy(sensor / 255.0).type(
                torch.FloatTensor).cuda()

            if iteration == 0:
                image_input = sensor

            else:
                image_input = torch.cat((image_input, sensor), 0)

            iteration += 1

        image_input = image_input.unsqueeze(0)

        self.latest_image_tensor = image_input

        return image_input

    def _process_model_outputs(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        if brake < 0.05:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        return steer, throttle, brake

    def _process_model_outputs_wp(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[
            1], outputs[2]
        if brake < 0.2:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        steer = 0.7 * wpa2

        if steer > 0:
            steer = min(steer, 1)
        else:
            steer = max(steer, -1)

        return steer, throttle, brake

    def _get_oracle_prediction(self, measurements, target):
        # For the oracle, the current version of sensor data is not really relevant.
        control, _, _, _, _ = self.control_agent.run_step(
            measurements, [], [], target)

        return control.steer, control.throttle, control.brake
Example #12
0
class CoILAgent(Agent):
    def __init__(self, checkpoint):

        Agent.__init__(self)

        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        self.model = CoILModel(g_conf.MODEL_NAME)

        self.model.load_state_dict(checkpoint['state_dict'])

        self.model.cuda()

    def run_step(self, measurements, sensor_data, directions, target):

        #control_agent = self._agent.run_step(measurements, None, target)

        speed = torch.cuda.FloatTensor(
            [measurements.player_measurements.forward_speed]).unsqueeze(0)
        print("Speed shape ", speed)
        directions_tensor = torch.cuda.LongTensor([directions])
        model_outputs = self.model.forward_branch(
            self._process_sensors(sensor_data), speed, directions_tensor)

        print(model_outputs)

        steer, throttle, brake = self._process_model_outputs(
            model_outputs[0], measurements.player_measurements.forward_speed)

        #control = self.compute_action(,
        #                              ,
        #                              directions)
        control = carla_protocol.Control()
        control.steer = steer
        control.throttle = throttle
        control.brake = brake
        # if self._auto_pilot:
        #    control.steer = control_agent.steer
        # TODO: adapt the client side agent for the new version. ( PROBLEM )
        #control.throttle = control_agent.throttle
        #control.brake = control_agent.brake

        # TODO: maybe change to a more meaningfull message ??
        return control

    def _process_sensors(self, sensors):

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            sensor = sensors[name].data[
                g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]
            if sensors[name].type == 'SemanticSegmentation':

                # TODO: the camera name has to be sincronized with what is in the experiment...
                sensor = join_classes(sensor)

                sensor = sensor[:, :, np.newaxis]

                image_transform = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Resize((size[1], size[2]),
                                      interpolation=Image.NEAREST),
                    iag.ToGPU(),
                    iag.Multiply((1 / (number_of_seg_classes - 1)))
                ])
            else:

                image_transform = transforms.Compose([
                    transforms.ToPILImage(),
                    transforms.Resize((size[1], size[2])),
                    transforms.ToTensor(),
                    transforms.Normalize((0, 0, 0), (255, 255, 255)),
                    iag.ToGPU()
                ])

            sensor = np.swapaxes(sensor, 0, 1)

            sensor = np.flip(sensor.transpose((2, 0, 1)), axis=0)

            if iteration == 0:
                image_input = image_transform(sensor)
            else:
                image_input = torch.cat((image_input, sensor), 0)

            iteration += 1

        image_input = image_input.unsqueeze(0)

        return image_input

    def _process_model_outputs(self, outputs, speed):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        if brake < 0.2:
            brake = 0.0

        if throttle > brake:
            brake = 0.0
        else:
            throttle = throttle * 2
        if speed > 35.0 and brake == 0.0:
            throttle = 0.0

        return steer, throttle, brake

    """

    def compute_action(self, sensors, speed, direction):

        capture_time = time.time()


        sensor_pack = []



        for i in range(len(sensors)):

            sensor = sensors[i]

            sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :]

            if g_conf.param.SENSORS.keys()[i] == 'rgb':

                sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
                                                      self._config.sensors_size[i][1]])


            elif g_conf.param.SENSORS.keys()[i] == 'labels':

                sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
                                                      self._config.sensors_size[i][1]],
                                             interp='nearest')

                sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1))

                sensor = sensor[:, :, np.newaxis]

            sensor_pack.append(sensor)

        if len(sensor_pack) > 1:

            image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2)

        else:
            image_input = sensor_pack[0]

        image_input = image_input.astype(np.float32)
        image_input = np.multiply(image_input, 1.0 / 255.0)
        

        image_input = sensors[0]

        image_input = image_input.astype(np.float32)
        image_input = np.multiply(image_input, 1.0 / 255.0)
        # TODO: This will of course depend on the model , if it is based on sequences there are
        # TODO: different requirements
        #tensor = self.model(image_input)
        outputs = self.model.forward_branch(image_input, speed, direction)



        return control  # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager)

    """
    """
Example #13
0
def execute(gpu, exp_batch, exp_alias, dataset_name):
    # We set the visible cuda devices

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)

    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)

    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()

    criterion = Loss()

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    print(dataset.meta_data)
    best_loss = 1000
    best_error = 1000
    best_loss_iter = 0
    best_error_iter = 0

    while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

        if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

            latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(latest) + '.pth'))
            checkpoint_iteration = checkpoint['iteration']
            print("Validation loaded ", checkpoint_iteration)

            accumulated_loss = 0
            accumulated_error = 0
            iteration_on_checkpoint = 0
            for data in data_loader:

                input_data, float_data = data
                control_position = np.where(
                    dataset.meta_data[:, 0] == 'control')[0][0]
                speed_position = np.where(
                    dataset.meta_data[:, 0] == 'speed_module')[0][0]
                print(torch.squeeze(input_data['rgb']).shape)

                print(control_position)
                print(speed_position)
                # Obs : Maybe we could also check for other branches ??

                output = model.forward_branch(
                    torch.squeeze(input_data['rgb']).cuda(),
                    float_data[:, speed_position, :].cuda(),
                    float_data[:, control_position, :].cuda())

                for i in range(input_data['rgb'].shape[0]):

                    coil_logger.write_on_csv(
                        checkpoint_iteration,
                        [output[i][0], output[i][1], output[i][2]])

                # TODO: Change this a functional standard using the loss functions.

                loss = torch.mean(
                    (output - dataset.extract_targets(float_data).cuda()
                     )**2).data.tolist()
                mean_error = torch.mean(
                    torch.abs(output -
                              dataset.extract_targets(float_data).cuda())
                ).data.tolist()
                accumulated_error += mean_error
                accumulated_loss += loss
                error = torch.abs(output -
                                  dataset.extract_targets(float_data).cuda())

                # Log a random position
                position = random.randint(0, len(float_data) - 1)
                #print (output[position].data.tolist())
                coil_logger.add_message(
                    'Iterating', {
                        'Checkpoint':
                        latest,
                        'Iteration': (str(iteration_on_checkpoint * 120) +
                                      '/' + str(len(dataset))),
                        'MeanError':
                        mean_error,
                        'Loss':
                        loss,
                        'Output':
                        output[position].data.tolist(),
                        'GroundTruth':
                        dataset.extract_targets(
                            float_data)[position].data.tolist(),
                        'Error':
                        error[position].data.tolist(),
                        'Inputs':
                        dataset.extract_inputs(float_data)
                        [position].data.tolist()
                    }, latest)
                iteration_on_checkpoint += 1

            checkpoint_average_loss = accumulated_loss / len(dataset)
            checkpoint_average_error = accumulated_error / len(dataset)
            coil_logger.add_scalar('Loss', checkpoint_average_loss, latest)
            coil_logger.add_scalar('Error', checkpoint_average_error, latest)

            if checkpoint_average_loss < best_loss:
                best_loss = checkpoint_average_loss
                best_loss_iter = latest

            if checkpoint_average_error < best_loss:
                best_error = checkpoint_average_error
                best_error_iter = latest

            coil_logger.add_message(
                'Iterating', {
                    'Summary': {
                        'Error': checkpoint_average_error,
                        'Loss': checkpoint_average_loss,
                        'BestError': best_error,
                        'BestLoss': best_loss,
                        'BestLossCheckpoint': best_loss_iter,
                        'BestErrorCheckpoint': best_error_iter
                    },
                    'Checkpoint': latest
                })

        else:
            time.sleep(1)
            print("Waiting for the next Validation")
class AffordancesAgent(object):
    def __init__(self, path_to_config_file):
        # params for now it is not used but we might want to use this to set

        self.setup(path_to_config_file)
        self.save_attentions = False

    def setup(self, path_to_config_file):
        self._agent = None
        self.route_assigned = False
        self.count = 0

        exp_dir = os.path.join(
            '/', os.path.join(*path_to_config_file.split('/')[:-1]))

        yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file(
            path_to_config_file)

        if encoder_params == "None":
            encoder_params = None

        g_conf.immutable(False)
        merge_with_yaml(
            os.path.join('/',
                         os.path.join(*path_to_config_file.split('/')[:-4]),
                         yaml_conf), encoder_params)

        if g_conf.MODEL_TYPE in ['one-step-affordances']:
            # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton
            self._model = EncoderModel(g_conf.ENCODER_MODEL_TYPE,
                                       g_conf.ENCODER_MODEL_CONFIGURATION)
            self.checkpoint = torch.load(
                os.path.join(exp_dir, 'checkpoints',
                             str(checkpoint_number) + '.pth'))
            print("Affordances Model ",
                  str(checkpoint_number) + '.pth', "loaded from ",
                  os.path.join(exp_dir, 'checkpoints'))
            self._model.load_state_dict(self.checkpoint['state_dict'])
            self._model.cuda()
            self._model.eval()

        elif g_conf.MODEL_TYPE in ['separate-affordances']:
            if encoder_params is not None:
                self.encoder_model = EncoderModel(
                    g_conf.ENCODER_MODEL_TYPE,
                    g_conf.ENCODER_MODEL_CONFIGURATION)
                self.encoder_model.cuda()
                # Here we load the pre-trained encoder (not fine-tunned)
                if g_conf.FREEZE_ENCODER:
                    encoder_checkpoint = torch.load(
                        os.path.join(
                            os.path.join(
                                '/',
                                os.path.join(
                                    *path_to_config_file.split('/')[:-4])),
                            '_logs', encoder_params['encoder_folder'],
                            encoder_params['encoder_exp'], 'checkpoints',
                            str(encoder_params['encoder_checkpoint']) +
                            '.pth'))
                    print(
                        "Encoder model ",
                        str(encoder_params['encoder_checkpoint']),
                        "loaded from ",
                        os.path.join('_logs', encoder_params['encoder_folder'],
                                     encoder_params['encoder_exp'],
                                     'checkpoints'))
                    self.encoder_model.load_state_dict(
                        encoder_checkpoint['state_dict'])
                    self.encoder_model.eval()
                    for param_ in self.encoder_model.parameters():
                        param_.requires_grad = False

                else:
                    encoder_checkpoint = torch.load(
                        os.path.join(exp_dir, 'checkpoints',
                                     str(checkpoint_number) + '_encoder.pth'))
                    print("FINE TUNNED encoder model ",
                          str(checkpoint_number) + '_encoder.pth',
                          "loaded from ", os.path.join(exp_dir, 'checkpoints'))
                    self.encoder_model.load_state_dict(
                        encoder_checkpoint['state_dict'])
                    self.encoder_model.eval()
                    for param_ in self.encoder_model.parameters():
                        param_.requires_grad = False
            else:
                raise RuntimeError(
                    'encoder_params can not be None in MODEL_TYPE --> separate-affordances'
                )

            self._model = CoILModel(g_conf.MODEL_TYPE,
                                    g_conf.MODEL_CONFIGURATION,
                                    g_conf.ENCODER_MODEL_CONFIGURATION)
            self.checkpoint = torch.load(
                os.path.join(exp_dir, 'checkpoints',
                             str(checkpoint_number) + '.pth'))
            print("Affordances Model ",
                  str(checkpoint_number) + '.pth', "loaded from ",
                  os.path.join(exp_dir, 'checkpoints'))
            self._model.load_state_dict(self.checkpoint['state_dict'])
            self._model.cuda()
            self._model.eval()

    def get_sensors_dict(self):
        """
        The agent sets the sensors that it is going to use. That has to be
        set into the environment for it to produce this data.
        """
        sensors_dict = [{
            'type': 'sensor.camera.rgb',
            'x': 2.0,
            'y': 0.0,
            'z': 1.40,
            'roll': 0.0,
            'pitch': -15.0,
            'yaw': 0.0,
            'width': 800,
            'height': 600,
            'fov': 100,
            'id': 'rgb_central'
        }]

        return sensors_dict

    # TODO we set the sensors here directly.
    def sensors(self):
        return self._sensors_dict

    def get_state(self, exp_list, target_speed=20.0):
        """
            Based on the exp object it makes all the affordances.
        :param exp:
        :return:
        """
        exp = exp_list[0]
        self._vehicle = exp._ego_actor

        if self._agent is None:
            self._agent = True
            self._state = AgentState.NAVIGATING
            args_lateral_dict = {
                'K_P': 1,
                'K_D': 0.02,
                'K_I': 0,
                'dt': 1.0 / 20.0
            }
            self._local_planner = LocalPlanner(self._vehicle,
                                               opt_dict={
                                                   'target_speed':
                                                   target_speed,
                                                   'lateral_control_dict':
                                                   args_lateral_dict
                                               })
            self._hop_resolution = 2.0
            self._path_seperation_hop = 2
            self._path_seperation_threshold = 0.5
            self._grp = None

        if not self.route_assigned:
            plan = []
            for transform, road_option in exp._route:
                wp = exp._ego_actor.get_world().get_map().get_waypoint(
                    transform.location)
                plan.append((wp, road_option))

            self._local_planner.set_global_plan(plan)
            self.route_assigned = True

        input_data = exp._sensor_interface.get_data()
        input_data = self._process_sensors(
            input_data['rgb_central'][1])  #torch.Size([1, 3, 88, 200]

        if g_conf.MODEL_TYPE in ['one-step-affordances']:
            c_output, r_output, layers = self._model.forward_outputs(
                input_data.cuda(),
                torch.cuda.FloatTensor(
                    [exp._forward_speed / g_conf.SPEED_FACTOR]).unsqueeze(0),
                torch.cuda.FloatTensor(encode_directions(
                    exp._directions)).unsqueeze(0))
        elif g_conf.MODEL_TYPE in ['separate-affordances']:
            if g_conf.ENCODER_MODEL_TYPE in [
                    'action_prediction', 'stdim', 'ETEDIM', 'FIMBC',
                    'one-step-affordances'
            ]:
                e, layers = self.encoder_model.forward_encoder(
                    input_data.cuda(),
                    torch.cuda.FloatTensor([
                        exp._forward_speed / g_conf.SPEED_FACTOR
                    ]).unsqueeze(0),
                    torch.cuda.FloatTensor(encode_directions(
                        exp._directions)).unsqueeze(0))
                c_output, r_output = self._model.forward_test(e)
            elif g_conf.ENCODER_MODEL_TYPE in [
                    'ETE', 'ETE_inverse_model', 'forward', 'ETE_stdim'
            ]:
                e, layers = self.encoder_model.forward_encoder(
                    input_data.cuda(),
                    torch.cuda.FloatTensor([
                        exp._forward_speed / g_conf.SPEED_FACTOR
                    ]).unsqueeze(0),
                    torch.cuda.FloatTensor(encode_directions(
                        exp._directions)).unsqueeze(0))
                c_output, r_output = self._model.forward_test(e)

        if self.save_attentions:
            exp_params = exp._exp_params
            attentions_full_path = os.path.join(
                os.environ["SRL_DATASET_PATH"], exp_params['package_name'],
                exp_params['env_name'],
                str(exp_params['env_number']) + '_' + exp._agent_name,
                str(exp_params['exp_number']))
            save_attentions(input_data.cuda(),
                            layers,
                            self.count,
                            attentions_full_path,
                            save_input=False,
                            big_size=False)

        self.count += 1

        affordances = {}

        output_relative_angle = torch.squeeze(
            r_output[0]).cpu().detach().numpy() * 1.0

        is_pedestrian_hazard = False
        if c_output[0][0, 0] < c_output[0][0, 1]:
            is_pedestrian_hazard = True

        is_red_tl_hazard = False
        if c_output[1][0, 0] < c_output[1][0, 1]:
            is_red_tl_hazard = True

        is_vehicle_hazard = False
        if (c_output[2][0, 0] < c_output[2][0, 1]):
            is_vehicle_hazard = True

        affordances.update({'is_pedestrian_hazard': is_pedestrian_hazard})
        affordances.update({'is_red_tl_hazard': is_red_tl_hazard})
        affordances.update({'is_vehicle_hazard': is_vehicle_hazard})
        affordances.update({'relative_angle': output_relative_angle})
        # Now we consider all target speed to be 20.0
        affordances.update({'target_speed': target_speed})

        #affordances.update({'GT_is_pedestrian_hazard': })
        #affordances.update({'GT_is_red_tl_hazard': })
        #affordances.update({'GT_is_vehicle_hazard': })
        gt_relative_angle = compute_relative_angle(
            self._vehicle, self._local_planner.get_target_waypoint())
        affordances.update({'GT_relative_angle': gt_relative_angle})
        affordances.update({
            'ERROR_relative_angle':
            output_relative_angle - gt_relative_angle
        })

        return affordances

    def make_reward(self, exp):
        # Just basically return None since the reward is not used for a non

        return None

    def step(self, affordances):
        hazard_detected = False
        is_vehicle_hazard = affordances['is_vehicle_hazard']
        is_red_tl_hazard = affordances['is_red_tl_hazard']
        is_pedestrian_hazard = affordances['is_pedestrian_hazard']
        relative_angle = affordances['relative_angle']
        target_speed = affordances['target_speed']
        # once we meet a speed limit sign, the target speed changes

        #if target_speed != self._local_planner._target_speed:
        #    self._local_planner.set_speed(target_speed)
        #forward_speed = affordances['forward_speed']

        if is_vehicle_hazard:
            self._state = AgentState.BLOCKED_BY_VEHICLE
            hazard_detected = True

        if is_red_tl_hazard:
            self._state = AgentState.BLOCKED_RED_LIGHT
            hazard_detected = True

        if is_pedestrian_hazard:
            self._state = AgentState.BLOCKED_BY_PEDESTRIAN
            hazard_detected = True

        if hazard_detected:
            control = self.emergency_stop()

        else:
            self._state = AgentState.NAVIGATING
            control = self._local_planner.run_step(relative_angle,
                                                   target_speed)

        logging.debug("Output %f %f %f " %
                      (control.steer, control.throttle, control.brake))

        return control

    def reinforce(self, rewards):
        """
        This agent cannot learn so there is no reinforce
        """
        pass

    def reset(self):
        print(" Correctly reseted the agent")
        self.route_assigned = False
        self._agent = None
        self.count = 0

    def emergency_stop(self):
        """
        Send an emergency stop command to the vehicle
        :return:
        """
        control = carla.VehicleControl()
        control.steer = 0.0
        control.throttle = 0.0
        control.brake = 1.0
        control.hand_brake = False

        return control

    def _process_sensors(self, sensor):
        sensor = sensor[:, :, 0:3]  # BGRA->BRG drop alpha channel
        sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :, :]  # crop
        sensor = scipy.misc.imresize(sensor,
                                     (g_conf.SENSORS['rgb_central'][1],
                                      g_conf.SENSORS['rgb_central'][2]))
        self.latest_image = sensor

        sensor = np.swapaxes(sensor, 0, 1)
        sensor = np.transpose(sensor, (2, 1, 0))
        sensor = torch.from_numpy(sensor / 255.0).type(
            torch.FloatTensor).cuda()
        image_input = sensor.unsqueeze(0)
        self.latest_image_tensor = image_input

        return image_input
Example #15
0
class CoILAgent(object):

    def __init__(self, checkpoint, town_name, carla_version='0.84'):

        # Set the carla version that is going to be used by the interface
        self._carla_version = carla_version
        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        # Create model
        self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        self.first_iter = True
        # Load the model and prepare set it for evaluation
        self._model.load_state_dict(checkpoint['state_dict'])
        self._model.cuda()
        self._model.eval()

        # If we are evaluating squeeze model (so we are using ground truth seg mask), 
        # also run the autopilot to get its stop intentions
        if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE or "seg" in g_conf.SENSORS.keys():
            self.control_agent = CommandFollower(town_name)

    def run_step(self, measurements, sensor_data, directions, target):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """
        # Get speed and high-level turning command
        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])

        # If we're evaluating squeeze network (so we are using ground truth seg mask)
        if "seg" in g_conf.SENSORS.keys():
            # Run the autopilot agent to get stop intentions
            _, state = self.control_agent.run_step(measurements, [], [], target)
            inputs_vec = []
            for input_name in g_conf.INTENTIONS:
                inputs_vec.append(float(state[input_name]))
            intentions = torch.cuda.FloatTensor(inputs_vec).unsqueeze(0)
            # Run squeeze network
            model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                       directions_tensor, intentions, benchmark=True)
        else:
            # Run driving model
            model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                       directions_tensor, benchmark=True)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            _, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {"Checkpoint": self.checkpoint['iteration'],
                                                  'Agent': str(steer)},
                                    self.checkpoint['iteration'])
        self.first_iter = False
        
        return control

    def _process_sensors(self, sensors):

        iteration = 0
        sensor_dict = {}
        for name, size in g_conf.SENSORS.items():

            if self._carla_version == '0.9':
                sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]
            else:
                sensor = sensors[name].data[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]

            # Process RGB image or CARLA seg mask
            if name == 'rgb':
                # Resize image, convert it to [0, 1] BGR image
                sensor = scipy.misc.imresize(sensor, (size[1], size[2]))
                sensor = sensor[:, :, ::-1]
                sensor = np.swapaxes(sensor, 0, 1)
                sensor = np.transpose(sensor, (2, 1, 0))
                sensor = torch.from_numpy(sensor / 255.0).type(torch.FloatTensor)
            elif name == 'seg':
                seg = scipy.misc.imresize(sensor, (size[1], size[2]), 'nearest')
                # Re-map classes, mapping irrelevant classes to a "nuisance" class
                class_map = \
                    {0: 0, # None
                     1: 0, # Buildings -> None
                     2: 0, # Fences -> None
                     3: 0, # Other -> None
                     4: 1, # Pedestrians kept
                     5: 0, # Poles -> None
                     6: 2, # RoadLines kept
                     7: 3, # Roads kept
                     8: 2, # Sidewalks mapped to roadlines (both are boundaries of road)
                     9 : 0, # Vegetation -> None
                     10: 4, # Vehicles kept
                     11: 0, # Walls -> None
                     12: 5} # TrafficSigns kept (for traffic lights)
                new_seg = np.zeros((seg.shape[0], seg.shape[1]))
                # Remap classes
                for key, value in class_map.items():
                    new_seg[np.where(seg == key)] = value 
                # One hot encode seg mask, for now hardcode max of class map values + 1
                new_seg = np.eye(6)[new_seg.astype(np.int32)]
                new_seg = new_seg.transpose(2, 0, 1)
                new_seg = new_seg.astype(np.float)
                sensor = torch.from_numpy(new_seg).type(torch.FloatTensor)

            sensor = sensor.unsqueeze(0)
            sensor_dict[name] = sensor

        return sensor_dict

    def _process_model_outputs(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0], outputs[1], outputs[2]
        if brake < 0.05:
            brake = 0.0

        if throttle > brake:
            brake = 0.0


        return steer, throttle, brake

    def _get_oracle_prediction(self, measurements, target):
        # For the oracle, the current version of sensor data is not really relevant.
        control, _ = self.control_agent.run_step(measurements, [], [], target)

        return control.steer, control.throttle, control.brake
Example #16
0
class CoILAgent(object):

    def __init__(self, checkpoint, town_name, carla_version='0.84'):

        # Set the carla version that is going to be used by the interface
        self._carla_version = carla_version 
        self.checkpoint = checkpoint  # We save the checkpoint for some interesting future use.
        self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        self.first_iter = True
        # Load the model and prepare set it for evaluation
        self._model.load_state_dict(checkpoint['state_dict'])
        self._model.cuda()
        self._model.eval()

        # this entire segment is for loading models for ensemble evaluation - take care for the paths and checkpoints
        '''
        self.weights = [0.25, 0.25, 0.25, 0.25] # simple ensemble
        self.model_ids = ['660000', '670000', '1070000', '2640000'] # model checkpoints
        self.models_dir = '/is/sg2/aprakash/Projects/carla_autonomous_driving/code/coiltraine/_logs/ensemble'
        self._ensemble_model_list = []
        for i in range(len(self.model_ids)):
            curr_checkpoint = torch.load(self.models_dir+'/resnet34imnet10S1/checkpoints/'+self.model_ids[i]+'.pth')
            self._ensemble_model_list.append(CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION))
            self._ensemble_model_list[i].load_state_dict(curr_checkpoint['state_dict'])
            self._ensemble_model_list[i].cuda().eval()
        '''
        self.latest_image = None
        self.latest_image_tensor = None

        # for image corruptions
        self.corruption_number = None
        self.severity = None
        
        if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: # for evaluating expert
            self.control_agent = CommandFollower(town_name)

    def run_step(self, measurements, sensor_data, directions, target, **kwargs):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """
        # only required if using corruptions module
        # self.corruption_number = kwargs.get('corruption_number', None)
        # self.severity = kwargs.get('severity', None)

        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])
        # Compute the forward pass processing the sensors got from CARLA.
        model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                 directions_tensor)
        # run forward pass using felipe model
        # model_outputs_felipe = self._model_felipe.forward_branch(self._process_sensors(sensor_data), norm_speed,
        #                                           directions_tensor)
        
        # model_outputs[0] = torch.FloatTensor([(model_outputs[0][i].item()+model_outputs_felipe[0][i].item())/2.0 for i in range(3)]).cuda()
        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        # steer_f, throttle_f, brake_f = self._process_model_outputs(model_outputs_felipe[0])

        # ensemble
        '''
        steer_c = []
        throttle_c = []
        brake_c = []
        for i in range(len(self.model_ids)):
            mo = self._ensemble_model_list[i].forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                  directions_tensor)
            s, t, b = self._process_model_outputs(mo[0])
            steer_c.append(s)
            throttle_c.append(t)
            brake_c.append(b)
        '''
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        # single model
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)

        # ensemble
        # control.steer = float(np.average(steer_c, weights=self.weights))
        # control.throttle = float(np.average(throttle_c, weights=self.weights))
        # control.brake = float(np.average(brake_c, weights=self.weights))

        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            control.steer, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, sensor_data, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {"Checkpoint": self.checkpoint['iteration'],
                                                  'Agent': str(control.steer)},
                                    self.checkpoint['iteration'])
        self.first_iter = False

        return control

    # define run step for carla 9
    def run_step_carla9(self, observations):
        norm_speed = np.linalg.norm(observations['velocity'])/g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([int(observations['command'])])
        # print ('rgb: ', observations['big_cam'].shape)
        # print ('velocity: ', observations['velocity'])
        # print ('norm velocity: ', np.linalg.norm(observations['velocity']))
        # print ('norm_speed: ', norm_speed.shape, norm_speed.item())
        # print ('directions_tensor: ', directions_tensor.shape, directions_tensor.item())

        model_outputs = self._model.forward_branch(self._process_sensors(observations), norm_speed,
                                                  directions_tensor)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])

        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        # single model
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        
        return control

    def get_attentions(self, layers=None):
        """

        Returns
            The activations obtained from the first layers of the latest iteration.

        """
        if layers is None:
            layers = [0, 1, 2]
        if self.latest_image_tensor is None:
            raise ValueError('No step was ran yet. '
                             'No image to compute the activations, Try Running ')
        all_layers = self._model.get_perception_layers(self.latest_image_tensor)
        cmap = plt.get_cmap('inferno')
        attentions = []
        for layer in layers:
            y = all_layers[layer]
            att = torch.abs(y).mean(1)[0].data.cpu().numpy()
            att = att / att.max()
            att = cmap(att)
            att = np.delete(att, 3, 2)
            attentions.append(imresize(att, [88, 200]))
            # attentions.append(np.array(Image.fromarray(sensor).resize((200, 88))))
        return attentions

    def _process_sensors(self, sensors):

        iteration = 0
        for name, size in g_conf.SENSORS.items():

            if self._carla_version == '0.9':
                sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]
            else:
                sensor = sensors[name].data[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...]

            sensor = scipy.misc.imresize(sensor, (size[1], size[2])) # depreciated
            # sensor = np.array(Image.fromarray(sensor).resize((size[2], size[1]))) # for running corruptions
            '''
            # corrupt the image here
            # print ('out of corruption: ', self.corruption_number, self.severity)
            if self.corruption_number is not None and self.severity is not None:
                # print ('in corruption: ', self.corruption_number, self.severity)
                sensor = corrupt(sensor, corruption_number=self.corruption_number, 
                                        severity=self.severity+1)
            '''
            self.latest_image = sensor

            sensor = np.swapaxes(sensor, 0, 1)

            sensor = np.transpose(sensor, (2, 1, 0))

            sensor = torch.from_numpy(sensor / 255.0).type(torch.FloatTensor).cuda()

            if iteration == 0:
                image_input = sensor
            else:
                image_input = torch.cat((image_input, sensor), 0)

            iteration += 1
    
        image_input = image_input.unsqueeze(0)

        self.latest_image_tensor = image_input

        return image_input

    def _process_model_outputs(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        steer, throttle, brake = outputs[0].item(), outputs[1].item(), outputs[2].item()
        # print ('steer: ', steer, 'throttle: ', throttle, 'brake: ', brake)
        
        # these heuristics are a part of the original benchmark, evaluation doesn't run properly without these
        if brake < 0.05:
            brake = 0.0

        if throttle > brake:
            brake = 0.0
        
        # print ('steer after heuristic: ', steer, 'throttle after heuristic: ', throttle, 'brake after heuristic: ', brake)
        return steer, throttle, brake


    def _process_model_outputs_wp(self, outputs):
        """
         A bit of heuristics in the control, to eventually make car faster, for instance.
        Returns:

        """
        wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[1], outputs[2]
        if brake < 0.2:
            brake = 0.0

        if throttle > brake:
            brake = 0.0

        steer = 0.7 * wpa2

        if steer > 0:
            steer = min(steer, 1)
        else:
            steer = max(steer, -1)

        return steer, throttle, brake

    def _get_oracle_prediction(self, measurements, sensor_data, target):
        # For the oracle, the current version of sensor data is not really relevant.
        control, _ = self.control_agent.run_step(measurements, sensor_data, [], target)

        return control.steer, control.throttle, control.brake
Example #17
0
def execute(gpu, exp_batch, exp_alias):
    # We set the visible cuda devices

    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')

        coil_logger.add_message('Loading', {'GPU': gpu})

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        sys.stdout = open(os.path.join(
            '_output_logs',
            g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                          "a",
                          buffering=1)

        if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                                g_conf.PROCESS_NAME)[0] == "Finished":
            # TODO: print some cool summary or not ?
            return

        #Define the dataset. This structure is has the __get_item__ redefined in a way
        #that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        #augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU)

        dataset = CoILDataset(full_dataset,
                              transform=transforms.Compose(
                                  [transforms.ToTensor()]))

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.
        sampler = BatchSequenceSampler(
            splitter.control_steer_split(dataset.measurements,
                                         dataset.meta_data), g_conf.BATCH_SIZE,
            g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        # TODO: batch size an number of workers go to some configuration file
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_sampler=sampler,
                                                  shuffle=False,
                                                  num_workers=12,
                                                  pin_memory=True)
        # By instanciating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE)

        # TODO: here there is clearly a posibility to make a cool "conditioning" system.

        model = CoILModel(g_conf.MODEL_NAME)
        model.cuda()
        exit()
        print(model)

        criterion = Loss()

        # TODO: DATASET SIZE SEEMS WEIRD
        optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file != None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            accumulated_time = checkpoint['total_time']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            accumulated_time = 0  # We accumulate iteration time and keep the average speed
            best_loss_iter = 0

        # TODO: The checkpoint will continue, so it should erase everything up to the iteration

        print(dataset.meta_data)

        print(model)
        capture_time = time.time()
        for data in data_loader:

            input_data, float_data = data

            #TODO, ADD ITERATION SCHEDULE
            input_rgb_data = augmenter(0, input_data['rgb'])
            #coil_logger.add_images(input_rgb_data)

            # get the control commands from float_data, size = [120,1]

            controls = float_data[:, dataset.controls_position(), :]
            print(" CONTROLS  ", controls.shape)
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]

            model.zero_grad()
            print('INPUTS', dataset.extract_inputs(float_data).shape)
            branches = model(input_rgb_data,
                             dataset.extract_inputs(float_data).cuda())

            #print ("len ",len(branches))

            #targets = torch.cat([steer_gt, gas_gt, brake_gt], 1)
            print("Extracted targets ",
                  dataset.extract_targets(float_data).shape[0])
            loss = criterion.MSELoss(
                branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())

            # TODO: All these logging things could go out to clean up the main
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(float_data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output -
                              dataset.extract_targets(float_data).cuda())

            # TODO: For now we are computing the error for just the correct branch, it could be multi- branch,

            coil_logger.add_scalar('Loss', loss.data, iteration)

            loss.backward()
            optimizer.step()

            accumulated_time += time.time() - capture_time
            capture_time = time.time()

            # TODO: Get only the  float_data that are actually generating output
            # TODO: itearation is repeating , and that is dumb
            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(
                        float_data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(float_data)[position].data.tolist()
                }, iteration)

            # TODO: For now we are computing the error for just the correct branch, it could be multi-branch,

            # TODO: save also the optimizer state dictionary
            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'best_loss_iter': best_loss_iter
                }
                # TODO : maybe already summarize the best model ???
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))

            iteration += 1

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()

        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #18
0
class CoILAgent(Agent):

	def __init__(self, checkpoint):



		#experiment_name='None', driver_conf=None, memory_fraction=0.18,
		#image_cut=[115, 510]):

		# use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]):

		Agent.__init__(self)
		# This should likely come from global
		#config_gpu = tf.ConfigProto()
		#config_gpu.gpu_options.visible_device_list = '0'

		#config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction
		#self._sess = tf.Session(config=config_gpu)

		# THIS DOES NOT WORK FOR FUSED PLUS LSTM
		#if self._config.number_frames_sequenced > self._config.number_frames_fused:
		#    self._config_train.batch_size = self._config.number_frames_sequenced
		#else:
		#    self._config_train.batch_size = self._config.number_frames_fused

		#self._train_manager = load_system(self._config_train)
		#self._config.train_segmentation = False
		self.model = CoILModel(g_conf.MODEL_NAME)

		self.model.load_state_dict(checkpoint['state_dict'])

		self.model.cuda()

		self.model.eval()


		#self.model.load_network(checkpoint)

		#self._sess.run(tf.global_variables_initializer())

		#self._control_function = getattr(machine_output_functions,
		#                                 self._train_manager._config.control_mode)
		# More elegant way to merge with autopilot
		#self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name))

		#self._image_cut = driver_conf.image_cut
		#self._auto_pilot = driver_conf.use_planner

		#self._recording = False
		#self._start_time = 0


	def run_step(self, measurements, sensor_data, directions, target):


		self.model.eval()
		#control_agent = self._agent.run_step(measurements, None, target)
		print (" RUnning STEP ")
		speed = torch.cuda.FloatTensor([measurements.player_measurements.forward_speed]).unsqueeze(0)
		print("Speed is", speed)
		print ("Speed shape ", speed)
		directions_tensor = torch.cuda.LongTensor([directions])
		model_outputs = self.model.forward_branch(self._process_sensors(sensor_data), speed,
												  directions_tensor)

		print (model_outputs)

		steer, throttle, brake = self._process_model_outputs(model_outputs[0],
										 measurements.player_measurements.forward_speed)



		#control = self.compute_action(,
		#                              ,
		#                              directions)
		control = carla_protocol.Control()
		control.steer = steer
		control.throttle = throttle
		control.brake = brake
		# if self._auto_pilot:
		#    control.steer = control_agent.steer
		# TODO: adapt the client side agent for the new version. ( PROBLEM )
		#control.throttle = control_agent.throttle
		#control.brake = control_agent.brake

		# TODO: maybe change to a more meaningfull message ??
		return control


	def _process_sensors(self, sensors):


		iteration = 0
		for name, size in g_conf.SENSORS.items():

			sensor = sensors[name].data[175:375, ...] #300*800*3

			image_input = transform.resize(sensor, (size[1], size[2]))

			# transforms.Normalize([ 0.5315,  0.5521,  0.5205], [ 0.1960,  0.1810,  0.2217])

			print ("Image pixL ", image_input[:10][0][0])
			image_input = np.transpose(image_input, (2, 0, 1))
			image_input = torch.from_numpy(image_input).type(torch.FloatTensor).cuda()
			print ("torch size", image_input.size())

			img_np = np.uint8(np.transpose(image_input.cpu().numpy() * 255, (1 , 2, 0)))

			# plt.figure(1)
			# plt.subplot(1, 2, 1)
			# plt.imshow(sensor)

			# plt.subplot(1,2,2)
			# plt.imshow(img_np)
			# #
			# plt.show()

			# if  sensors[name].type == 'SemanticSegmentation':
			#     # TODO: the camera name has to be sincronized with what is in the experiment...
			#     sensor = join_classes(sensor)
			#
			#     sensor = sensor[:, :, np.newaxis]
			#
			#     image_transform = transforms.Compose([transforms.ToTensor(),
			#                        transforms.Resize((size[1], size[2]), interpolation=Image.NEAREST),
			#                        iag.ToGPU(), iag.Multiply((1 / (number_of_seg_classes - 1)))])
			# else:
			#
			#     image_transform = transforms.Compose([transforms.ToPILImage(),
			#                        transforms.Resize((size[1], size[2])),
			#                        transforms.ToTensor(), transforms.Normalize((0, 0 ,0), (255, 255, 255)),
			#                        iag.ToGPU()])
			# sensor = np.swapaxes(sensor, 0, 1)
			# print ("Sensor Previous SHape")
			# print (sensor.shape)
			# sensor = np.flip(sensor.transpose((2, 0, 1)), axis=0)
			# print ("Sensor Previous SHape PT2")
			# print (sensor.shape)
			# if iteration == 0:
			#     image_input = image_transform(sensor)
			# else:
			#     image_input = torch.cat((image_input, sensor), 0)

			iteration += 1

		# print (image_input.shape)
		image_input  = image_input.unsqueeze(0)
		print (image_input.shape)

		return image_input


	def _process_model_outputs(self,outputs, speed):
		"""
		 A bit of heuristics in the control, to eventually make car faster, for instance.
		Returns:

		"""
		steer, throttle, brake = outputs[0], outputs[1], outputs[2]
		# if steer > 0.5:
		# 	throttle *= (1 - steer + 0.3)
		# 	steer += 0.3
		# 	if steer > 1:
		# 		steer = 1
		# if steer < -0.5:
		# 	throttle *= (1 + steer + 0.3)
		# 	steer -= 0.3
		# 	if steer < -1:
		# 		steer = -1

		# if brake < 0.2:
		# 	brake = 0.0
		#
		if throttle > brake:
			brake = 0.0
		# else:
		# 	throttle = throttle * 2
		# if speed > 35.0 and brake == 0.0:
		# 	throttle = 0.0

		print ("Steer", steer, "Throttle", throttle)


		return steer, throttle, brake


	"""

	def compute_action(self, sensors, speed, direction):

		capture_time = time.time()


		sensor_pack = []



		for i in range(len(sensors)):

			sensor = sensors[i]

			sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :]

			if g_conf.param.SENSORS.keys()[i] == 'rgb':

				sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
													  self._config.sensors_size[i][1]])


			elif g_conf.param.SENSORS.keys()[i] == 'labels':

				sensor = scipy.misc.imresize(sensor, [self._config.sensors_size[i][0],
													  self._config.sensors_size[i][1]],
											 interp='nearest')

				sensor = join_classes(sensor) * int(255 / (number_of_seg_classes - 1))

				sensor = sensor[:, :, np.newaxis]

			sensor_pack.append(sensor)

		if len(sensor_pack) > 1:

			image_input = np.concatenate((sensor_pack[0], sensor_pack[1]), axis=2)

		else:
			image_input = sensor_pack[0]

		image_input = image_input.astype(np.float32)
		image_input = np.multiply(image_input, 1.0 / 255.0)


		image_input = sensors[0]

		image_input = image_input.astype(np.float32)
		image_input = np.multiply(image_input, 1.0 / 255.0)
		# TODO: This will of course depend on the model , if it is based on sequences there are
		# TODO: different requirements
		#tensor = self.model(image_input)
		outputs = self.model.forward_branch(image_input, speed, direction)



		return control  # ,machine_output_functions.get_intermediate_rep(image_input,speed,self._config,self._sess,self._train_manager)

	"""
	"""
Example #19
0
def execute(gpu, exp_batch, exp_alias, dataset_name, architecture,
            suppress_output):

    try:
        # We set the visible cuda devices
        torch.manual_seed(2)
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # Validation available for:
        # coil_unit (UNIT + task combined)
        # coil_icra (Also used for finetuned models)
        # wgangp_lsd (Our architecture)

        architecture_name = architecture
        # At this point the log file with the correct naming is created.
        if architecture_name == 'coil_unit':
            pass
        elif architecture_name == 'wgangp_lsd':
            merge_with_yaml(
                os.path.join('/home/rohitrishabh/CoilWGAN/configs', exp_batch,
                             exp_alias + '.yaml'))
            set_type_of_process('validation', dataset_name)
        elif architecture_name == 'coil_icra':
            merge_with_yaml(
                os.path.join(
                    '/home/adas/CleanedCode/CoIL_Codes/coil_20-06/configs',
                    exp_batch, exp_alias + '.yaml'))
            set_type_of_process('validation', dataset_name)

            if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                                    g_conf.PROCESS_NAME)[0] == "Finished":
                # TODO: print some cool summary or not ?
                return

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs',
                g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        #Define the dataset. This structure is has the __get_item__ redefined in a way
        #that you can access the HDFILES positions from the root directory as a in a vector.
        if dataset_name != []:
            full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                        dataset_name)
        else:
            full_dataset = os.environ["COIL_DATASET_PATH"]

        augmenter = Augmenter(None)

        dataset = CoILDataset(full_dataset, transform=augmenter)

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        # TODO: batch size an number of workers go to some configuration file
        batchsize = 30
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=batchsize,
                                                  shuffle=False,
                                                  num_workers=1,
                                                  pin_memory=True)

        # TODO: here there is clearly a posibility to make a cool "conditioning" system.

        if architecture_name == 'coil_unit':
            model_task, model_gen = CoILModel('coil_unit')
            model_task, model_gen = model_task.cuda(), model_gen.cuda()
        else:
            model = CoILModel(architecture_name)
            model.cuda()

        latest = 0

        # print (dataset.meta_data)
        best_loss = 1000
        best_error = 1000
        best_loss_mini = 1000
        best_loss_iter = 0
        best_error_iter = 0
        batch_size = 30
        best_loss_ckpt = ''

        if architecture_name == 'coil_unit':
            ckpts = glob.glob('/home/rohitrishabh/UNIT_DA/outputs/' +
                              exp_alias + '/checkpoints/gen*.pt')
        else:
            ckpts = glob.glob(
                os.path.join(
                    '/home/adas/CleanedCode/CoIL_Codes/coil_20-06/_logs',
                    exp_batch, exp_alias) + '/*.pth')

        if architecture_name == 'coil_unit':
            model_task.eval()
            model_gen.eval()
        else:
            model.eval()
        ckpts = sorted(ckpts)
        # TODO: refactor on the getting on the checkpoint organization needed
        for ckpt in ckpts:

            # if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

            # latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
            # ckpt = os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
            #                         , 'checkpoints', str(latest) + '.pth')
            checkpoint = torch.load(ckpt)
            print("Validation loaded ", ckpt)
            if architecture_name == 'wgangp_lsd':
                print(ckpt, checkpoint['best_loss_iter_F'])
                model.load_state_dict(checkpoint['stateF_dict'])
                model.eval()
            elif architecture_name == 'coil_unit':
                model_task.load_state_dict(checkpoint['task'])
                model_gen.load_state_dict(checkpoint['b'])
                model_task.eval()
                model_gen.eval()
            elif architecture_name == 'coil_icra':
                model.load_state_dict(checkpoint['state_dict'])
                model.eval()

            accumulated_loss = 0
            accumulated_error = 0
            iteration_on_checkpoint = 0
            datacount = 0
            for data in data_loader:

                input_data, float_data = data

                controls = float_data[:, dataset.controls_position(), :]

                camera_angle = float_data[:, 26, :]
                camera_angle = camera_angle.cuda()
                steer = float_data[:, 0, :]
                steer = steer.cuda()
                speed = float_data[:, 10, :]
                speed = speed.cuda()

                time_use = 1.0
                car_length = 3.0
                extra_factor = 2.5
                threshold = 1.0

                pos = camera_angle > 0.0
                pos = pos.type(torch.FloatTensor)
                neg = camera_angle <= 0.0
                neg = neg.type(torch.FloatTensor)
                pos = pos.cuda()
                neg = neg.cuda()

                rad_camera_angle = math.pi * (torch.abs(camera_angle)) / 180.0
                val = extra_factor * (torch.atan(
                    (rad_camera_angle * car_length) /
                    (time_use * speed + 0.05))) / 3.1415
                steer -= pos * torch.min(val, torch.Tensor([0.6]).cuda())
                steer += neg * torch.min(val, torch.Tensor([0.6]).cuda())

                steer = steer.cpu()
                float_data[:, 0, :] = steer
                float_data[:, 0, :][float_data[:, 0, :] > 1.0] = 1.0
                float_data[:, 0, :][float_data[:, 0, :] < -1.0] = -1.0

                datacount += 1
                control_position = 24
                speed_position = 10

                if architecture_name == 'wgangp_lsd':
                    embed, output = model(
                        torch.squeeze(input_data['rgb']).cuda(),
                        float_data[:, speed_position, :].cuda())

                    loss = torch.sum(
                        (output[0] -
                         dataset.extract_targets(float_data).cuda()
                         )**2).data.tolist()
                    mean_error = torch.sum(
                        torch.abs(output[0] -
                                  dataset.extract_targets(float_data).cuda())
                    ).data.tolist()

                elif architecture_name == 'coil_unit':
                    embed, n_b = model_gen.encode(
                        torch.squeeze(input_data['rgb']).cuda())
                    output = model_task(
                        embed,
                        Variable(float_data[:, speed_position, :]).cuda())

                    loss = torch.sum(
                        (output[0].data -
                         dataset.extract_targets(float_data).cuda())**2)
                    mean_error = torch.sum(
                        torch.abs(output[0].data -
                                  dataset.extract_targets(float_data).cuda()))

                elif architecture_name == 'coil_icra':
                    output = model.forward_branch(
                        torch.squeeze(input_data['rgb']).cuda(),
                        float_data[:, speed_position, :].cuda(),
                        float_data[:, control_position, :].cuda())

                    loss = torch.sum(
                        (output - dataset.extract_targets(float_data).cuda()
                         )**2).data.tolist()
                    mean_error = torch.sum(
                        torch.abs(output -
                                  dataset.extract_targets(float_data).cuda())
                    ).data.tolist()

                if loss < best_loss_mini:
                    best_loss_mini = loss

                accumulated_error += mean_error
                accumulated_loss += loss
                # error = torch.abs(output[0] - dataset.extract_targets(float_data).cuda())

                # Log a random position
                position = random.randint(0, len(float_data) - 1)
                iteration_on_checkpoint += 1

            print(datacount, len(data_loader), accumulated_loss)
            checkpoint_average_loss = accumulated_loss / float(
                datacount * batchsize)
            checkpoint_average_error = accumulated_error / float(
                datacount * batchsize)

            if checkpoint_average_loss < best_loss:
                best_loss = checkpoint_average_loss
                best_loss_iter = latest
                best_loss_ckpt = ckpt

            if checkpoint_average_error < best_error:
                best_error = checkpoint_average_error
                best_error_iter = latest

            print("current loss", checkpoint_average_loss)
            print("best_loss", best_loss)

            coil_logger.add_message(
                'Iterating', {
                    'Summary': {
                        'Error': checkpoint_average_error,
                        'Loss': checkpoint_average_loss,
                        'BestError': best_error,
                        'BestLoss': best_loss,
                        'BestLossCheckpoint': best_loss_iter,
                        'BestErrorCheckpoint': best_error_iter
                    },
                    'Checkpoint': latest
                }, latest)
            latest += 2000

        coil_logger.add_message('Finished', {})
        print("Best Validation Loss ckpt:", best_loss_ckpt)

        # TODO: DO ALL THE AMAZING LOGGING HERE, as a way to very the status in paralell.
        # THIS SHOULD BE AN INTERELY PARALLEL PROCESS

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()

        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #20
0
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' +
                              g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a",
                              buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                              exp_alias + '_err_'+g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                                                  g_conf.PRELOAD_MODEL_ALIAS,
                                                 'checkpoints',
                                                 str(g_conf.PRELOAD_MODEL_CHECKPOINT)+'.pth'))


        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                    'checkpoints', str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0


        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset, transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS)
                                               + 'hours_' + g_conf.TRAIN_DATASET_NAME)
        print ("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration, number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)
        

        # Set ERFnet for segmentation
        model_erf = ERFNet(20)
        model_erf = torch.nn.DataParallel(model_erf)
        model_erf = model_erf.cuda()        
        
        print("LOAD ERFNet")
        def load_my_state_dict(model, state_dict):  #custom function to load model when not all dict elements
            own_state = model.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
                own_state[name].copy_(param)
            return model
        
        model_erf = load_my_state_dict(model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth')))
        model_erf.eval()
        print ("ERFNet and weights LOADED successfully")

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []
       

        print ("Before the loss")

        criterion = Loss(g_conf.LOSS_FUNCTION)

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # get the control commands from float_data, size = [120,1]

            capture_time = time.time()
            controls = data['directions']
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
            model.zero_grad()

            # print("Segmentation")
            # use ERFNet to convert RGB to Segmentation
            rgbs = data['rgb']
            filenames = data['rgb_name']

            # # seg one by one
            # seg_road = []
            # seg_not_road = []
            # i = 0
            # for inputs in rgbs:
            #     inputs = inputs.unsqueeze(0)
            #     # print("inputs ",inputs.shape)
            #     with torch.no_grad():
            #         outputs = model_erf(inputs)

            #     label = outputs[0].max(0)[1].byte().cpu().data

            #     road = (label == 0)
            #     not_road = (label != 0)
            #     seg_road.append(road)
            #     seg_not_road.append(not_road)   

            #     # # print("label ",label.shape)
            #     # label_color = Colorize()(label.unsqueeze(0))
            #     # filename = filenames[i]                
            #     # filenameSave = "./save_color/" + filename.split("CoILTrain/")[1]
            #     # os.makedirs(os.path.dirname(filenameSave), exist_ok=True)
                   
            #     # label_save = ToPILImage()(label_color)           
            #     # label_save.save(filenameSave) 
            #     # # print (i, filenameSave)
            #     # i += 1                 

            # seg_road = torch.stack(seg_road)
            # seg_not_road = torch.stack(seg_not_road)
            # seg = torch.stack([seg_road,seg_not_road]).transpose(0,1).float()
            # # print(seg.shape)
            
            # seg batch
            with torch.no_grad():
                outputs = model_erf(rgbs)
            # print("outputs.shape ",outputs.shape)
            labels = outputs.max(1)[1].byte().cpu().data
            # print("labels.shape",labels.shape)
            # print(np.unique(labels[0])) 

            seg_road = (labels==0)
            seg_not_road = (labels!=0)
            seg = torch.stack((seg_road,seg_not_road),1).float()

            # save 1st batch's segmentation results
            if iteration == 1:
                for i in range(120):
                    label = seg[i,0,:,:]
                    label_color = Colorize()(label.unsqueeze(0))               
                    filenameSave = "./save_color/batch_road_mask/%d.png"%(i)
                    os.makedirs(os.path.dirname(filenameSave), exist_ok=True)                   
                    label_save = ToPILImage()(label_color)           
                    label_save.save(filenameSave)

                    label = labels[i,:,:]
                    label_color = Colorize()(label.unsqueeze(0))               
                    filenameSave = "./save_color/batch_road/%d.png"%(i)
                    os.makedirs(os.path.dirname(filenameSave), exist_ok=True)                   
                    label_save = ToPILImage()(label_color)           
                    label_save.save(filenameSave)


            branches = model(torch.squeeze(seg).cuda(),
                             dataset.extract_inputs(data).cuda())
#             branches = model(torch.squeeze(rgbs.cuda()),
#                              dataset.extract_input(data).cuda())

            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                               , 'checkpoints', str(iteration) + '.pth'))

            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message('Iterating',
                                    {'Iteration': iteration,
                                     'Loss': loss.data.tolist(),
                                     'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                                     'BestLoss': best_loss, 'BestLossIteration': best_loss_iter,
                                     'Output': output[position].data.tolist(),
                                     'GroundTruth': dataset.extract_targets(data)[
                                         position].data.tolist(),
                                     'Error': error[position].data.tolist(),
                                     'Inputs': dataset.extract_inputs(data)[
                                         position].data.tolist()},
                                    iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #21
0
def execute(gpu, exp_batch, exp_alias):
    # We set the visible cuda devices

    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')

        coil_logger.add_message('Loading', {'GPU': gpu})

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        sys.stdout = open(os.path.join(
            '_output_logs',
            g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                          "a",
                          buffering=1)

        if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                                g_conf.PROCESS_NAME)[0] == "Finished":
            # TODO: print some cool summary or not ?
            return

        #Define the dataset. This structure is has the __get_item__ redefined in a way
        #that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        #augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU)

        dataset = CoILDataset(full_dataset,
                              transform=transforms.Compose(
                                  [transforms.ToTensor()]))

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.
        sampler = BatchSequenceSampler(
            splitter.control_steer_split(dataset.measurements,
                                         dataset.meta_data), g_conf.BATCH_SIZE,
            g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        # TODO: batch size an number of workers go to some configuration file
        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_sampler=sampler,
                                                  shuffle=False,
                                                  num_workers=12,
                                                  pin_memory=False)
        # By instanciating the augmenter we get a callable that augment images and transform them
        # into tensors.
        st = lambda aug: iag.Sometimes(aug, 0.4)
        oc = lambda aug: iag.Sometimes(aug, 0.3)
        rl = lambda aug: iag.Sometimes(aug, 0.09)
        augmenter = iag.Augmenter([iag.ToGPU()] + [
            rl(iag.GaussianBlur(
                (0, 1.5))),  # blur images with a sigma between 0 and 1.5
            rl(
                iag.AdditiveGaussianNoise(
                    loc=0, scale=(0.0, 0.05),
                    per_channel=0.5)),  # add gaussian noise to images
            oc(iag.Dropout((0.0, 0.10), per_channel=0.5)
               ),  # randomly remove up to X% of the pixels
            oc(
                iag.CoarseDropout(
                    (0.0, 0.10), size_percent=(0.08, 0.2), per_channel=0.5)
            ),  # randomly remove up to X% of the pixels
            oc(iag.Add((-40, 40), per_channel=0.5)
               ),  # change brightness of images (by -X to Y of original value)
            st(iag.Multiply((0.10, 2), per_channel=0.2)
               ),  # change brightness of images (X-Y% of original value)
            rl(iag.ContrastNormalization((
                0.5, 1.5), per_channel=0.5)),  # improve or worsen the contrast
            rl(iag.Grayscale((0.0, 1))),  # put grayscale
        ]  # do all of the above in random order
                                  )
        # augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE)
        # TODO: here there is clearly a posibility to make a cool "conditioning" system.

        model = CoILModel(g_conf.MODEL_NAME)
        model.cuda()
        print(model)

        criterion = Loss()

        # TODO: DATASET SIZE SEEMS WEIRD
        optimizer = optim.Adam(model.parameters(), lr=0.0002)

        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file != None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            accumulated_time = checkpoint['total_time']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            accumulated_time = 0  # We accumulate iteration time and keep the average speed
            best_loss_iter = 0

        # TODO: The checkpoint will continue, so it should erase everything up to the iteration

        best_loss_save = 10000.0
        best_loss_save_iter = 0
        curr_loss_save = 0.0

        print(dataset.meta_data)

        print(model)
        capture_time = time.time()
        model.train()
        for data in data_loader:

            input_data, float_data = data

            #TODO, ADD ITERATION SCHEDULE
            input_rgb_data = augmenter(0, input_data['rgb'])
            augment_for_controls = 1
            adjustlr = 1

            if augment_for_controls:  #and self._config.targets_names[j] == "Steer":
                camera_angle = float_data[:, 26, :]
                camera_angle = camera_angle.cuda(
                )  #self._config.variable_names.index('Angle'),i]
                print("Camera angle", camera_angle[0])
                steer = float_data[:, 0, :]
                # print("Original", steer[0])
                steer = steer.cuda()
                speed = float_data[:, 10, :]
                speed = speed.cuda()
                # print (steer)

                time_use = 1.0
                car_length = 3.0
                extra_factor = 2.5
                threshold = 1.0

                pos = camera_angle > 0.0
                pos = pos.type(torch.FloatTensor)
                neg = camera_angle <= 0.0
                neg = neg.type(torch.FloatTensor)
                pos = pos.cuda()
                neg = neg.cuda()

                rad_camera_angle = math.pi * (torch.abs(camera_angle)) / 180.0
                val = extra_factor * (torch.atan(
                    (rad_camera_angle * car_length) /
                    (time_use * speed + 0.05))) / 3.1415
                # print(val)
                steer -= pos * torch.min(val, torch.tensor([0.6]).cuda())

                steer += neg * torch.min(val, torch.tensor([0.6]).cuda())

                print("val", val[0])
                print("speed", speed[0])

                steer = steer.cpu()
                float_data[:, 0, :] = steer

                float_data[:, 0, :][float_data[:, 0, :] > 1.0] = 1.0
                float_data[:, 0, :][float_data[:, 0, :] < -1.0] = -1.0
            #coil_logger.add_images(input_rgb_data)

            # get the control commands from float_data, size = [120,1]

            controls = float_data[:, dataset.controls_position(), :]
            # print(" CONTROLS  ", controls.shape)
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]

            model.zero_grad()
            # print ( 'INPUTS', dataset.extract_inputs(float_data).shape )
            branches = model(input_rgb_data,
                             dataset.extract_inputs(float_data).cuda())

            #print ("len ",len(branches))

            #targets = torch.cat([steer_gt, gas_gt, brake_gt], 1)
            # print ("Extracted targets ", dataset.extract_targets(float_data).shape[0])
            loss = criterion.MSELoss(
                branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())

            # TODO: All these logging things could go out to clean up the main
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            curr_loss_save += loss.data

            # Log a random position
            position = random.randint(0, len(float_data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output -
                              dataset.extract_targets(float_data).cuda())

            # TODO: For now we are computing the error for just the correct branch, it could be multi- branch,

            coil_logger.add_scalar('Loss', loss.data, iteration)

            loss.backward()
            optimizer.step()

            accumulated_time += time.time() - capture_time
            capture_time = time.time()

            # TODO: Get only the  float_data that are actually generating output
            # TODO: itearation is repeating , and that is dumb
            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'BestLossSave':
                    best_loss_save,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(
                        float_data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(float_data)[position].data.tolist()
                }, iteration)

            # TODO: For now we are computing the error for just the correct branch, it could be multi-branch,

            # TODO: save also the optimizer state dictionary
            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'best_loss_iter': best_loss_iter
                }
                # TODO : maybe already summarize the best model ???
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            print("before best save")
            if iteration % 5 == 0 and iteration > 4:
                curr_loss_save /= 5000.0
                if curr_loss_save < best_loss_save:

                    best_loss_save = curr_loss_save
                    curr_loss_save = 0
                    state = {
                        'iteration': iteration,
                        'state_dict': model.state_dict(),
                        'best_loss': best_loss_save,
                        'total_time': accumulated_time,
                        'best_loss_iter': best_loss_save_iter
                    }
                    # TODO : maybe already summarize the best model ???
                    torch.save(
                        state,
                        os.path.join('_logs', exp_batch, exp_alias,
                                     'best_loss_save' + '.pth'))
            print("after best save")
            if iteration == best_loss_iter:

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'best_loss_iter': best_loss_iter
                }
                # TODO : maybe already summarize the best model ???
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias,
                                 'best_loss' + '.pth'))

            iteration += 1

            if adjustlr and iteration % 1000:
                adjust_learning_rate(optimizer, iteration)

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()

        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #22
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output=True, yaml_file=None):
    latest = None
    # try:
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias+'.yaml')
    if yaml_file is not None:
      path_to_yaml_file = os.path.join(yaml_file, exp_alias+'.yaml')
    merge_with_yaml(path_to_yaml_file)
    # The validation dataset is always fully loaded, so we fix a very high number of hours
    # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code
    
    """
    # check again if this segment is required or not
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    if suppress_output:
        sys.stdout = open(os.path.join('_output_logs',
                                       exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
        sys.stderr = open(os.path.join('_output_logs',
                          exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
    """

    # Define the dataset. This structure is has the __get_item__ redefined in a way
    # that you can access the HDFILES positions from the root directory as a in a vector.
    
    dataset_name = dataset_name.split('_')[-1] # since preload file has '<X>hours_' as prefix whereas dataset folder does not
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) # original code
    augmenter = Augmenter(None)

    print ('full dataset path: ', full_dataset)
    dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=args.dataset_name)

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE,
                                              shuffle=False,
                                              num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
                                              pin_memory=True)

    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)

    """ removing this segment to simplify code
    # The window used to keep track of the trainings
    l1_window = []
    latest = get_latest_evaluated_checkpoint()
    if latest is not None:  # When latest is noe
        l1_window = coil_logger.recover_loss_window(dataset_name, None)
    """
    
    model.cuda()

    best_mse = 1000
    best_error = 1000
    best_mse_iter = 0
    best_error_iter = 0

    # modified validation code from here to run a single model
    checkpoint = torch.load(args.checkpoint)
    checkpoint_iteration = checkpoint['iteration']
    print("model loaded ", checkpoint_iteration)

    model.load_state_dict(checkpoint['state_dict'])

    model.eval()
    accumulated_mse = 0
    accumulated_error = 0
    iteration_on_checkpoint = 0

    print ('data_loader size: ', len(data_loader))
    total_error = []
    for data in data_loader:

        # Compute the forward pass on a batch from the loaded dataset
        controls = data['directions']
        branches = model(torch.squeeze(data['rgb'].cuda()),
                             dataset.extract_inputs(data).cuda())
        output = model.extract_branch(torch.stack(branches[0:4]), controls)
        error = torch.abs(output - dataset.extract_targets(data).cuda())
        total_error += error.detach().cpu().tolist()
        
        iteration_on_checkpoint += 1
        if iteration_on_checkpoint % 50 == 0:
            print ('iteration: ', iteration_on_checkpoint)

    total_error = np.array(total_error)
    print (len(total_error), total_error.shape)

    np.save(os.path.join(args.save_path, args.dataset_name, 'computed_error.npy'), total_error)
    '''
Example #23
0
def execute(gpu,
            exp_batch,
            exp_alias,
            suppress_output=True,
            number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Seed RNGs
        torch.manual_seed(g_conf.MAGICAL_SEED)
        random.seed(g_conf.MAGICAL_SEED)

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # Define the dataset.
        # Can specify a list of training datasets or just a single training dataset
        if len(g_conf.TRAIN_DATASET_NAMES) == 0:
            train_dataset_list = [g_conf.TRAIN_DATASET_NAME]
        else:
            train_dataset_list = g_conf.TRAIN_DATASET_NAMES
        full_dataset = [
            os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
            for dataset_name in train_dataset_list
        ]

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_names=[
                                  str(g_conf.NUMBER_OF_HOURS) + 'hours_' +
                                  dataset_name
                                  for dataset_name in train_dataset_list
                              ],
                              train_dataset=True)
        print("Loaded dataset")

        # Create dataloader, model, and optimizer
        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        # If we have a previous checkpoint, load model, optimizer, and record of previous
        # train loss values (used for the learning rate schedule)
        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print("Before the loss")

        # Define control loss function
        criterion = Loss(g_conf.LOSS_FUNCTION)

        if iteration == 0 and is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'total_time': accumulated_time,
                'optimizer': optimizer.state_dict(),
                'best_loss_iter': best_loss_iter
            }
            torch.save(
                state,
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(iteration) + '.pth'))
        # Training loop
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1

            # Adjust learning rate based on training loss
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            capture_time = time.time()
            model.zero_grad()

            controls = data['directions']

            # Run model forward and get outputs
            # First case corresponds to training squeeze network, second case corresponds to training driving model without
            # mimicking losses, last case corresponds to training mimic network
            if "seg" in g_conf.SENSORS.keys():
                branches = model(data,
                                 dataset.extract_inputs(data).cuda(),
                                 dataset.extract_intentions(data).cuda())
            elif not g_conf.USE_REPRESENTATION_LOSS:
                branches = model(data, dataset.extract_inputs(data).cuda())
            else:
                branches, intermediate_reps = model(
                    data,
                    dataset.extract_inputs(data).cuda())

            # Compute control loss
            targets_to_use = dataset.extract_targets(data)
            loss_function_params = {
                'branches': branches,
                'targets': targets_to_use.cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)

            # Compute mimicking loss
            if g_conf.USE_REPRESENTATION_LOSS:
                expert_reps = dataset.extract_representations(data)
                # Seg mask mimicking loss
                if g_conf.USE_PERCEPTION_REP_LOSS:
                    perception_rep_loss_elementwise = (
                        intermediate_reps[0] - expert_reps[0].cuda())**2
                    perception_rep_loss = g_conf.PERCEPTION_REP_WEIGHT * torch.sum(
                        perception_rep_loss_elementwise) / branches[0].shape[0]
                else:
                    perception_rep_loss = torch.tensor(0.).cuda()
                # Speed mimicking loss
                if g_conf.USE_SPEED_REP_LOSS:
                    speed_rep_loss_elementwise = (intermediate_reps[1] -
                                                  expert_reps[1].cuda())**2
                    speed_rep_loss = g_conf.SPEED_REP_WEIGHT * torch.sum(
                        speed_rep_loss_elementwise) / branches[0].shape[0]
                else:
                    speed_rep_loss = torch.tensor(0.).cuda()
                # Stop intentions mimicking loss
                if g_conf.USE_INTENTION_REP_LOSS:
                    intentions_rep_loss_elementwise = (
                        intermediate_reps[2] - expert_reps[2].cuda())**2
                    intentions_rep_loss = g_conf.INTENTIONS_REP_WEIGHT * torch.sum(
                        intentions_rep_loss_elementwise) / branches[0].shape[0]
                else:
                    intentions_rep_loss = torch.tensor(0.).cuda()
                rep_loss = g_conf.REP_LOSS_WEIGHT * (
                    perception_rep_loss + speed_rep_loss + intentions_rep_loss)
                overall_loss = loss + rep_loss
            else:
                overall_loss = loss
            overall_loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            if g_conf.USE_REPRESENTATION_LOSS:
                coil_logger.add_scalar('Perception Rep Loss',
                                       perception_rep_loss.data, iteration)
                coil_logger.add_scalar('Speed Rep Loss', speed_rep_loss.data,
                                       iteration)
                coil_logger.add_scalar('Intentions Rep Loss',
                                       intentions_rep_loss.data, iteration)
                coil_logger.add_scalar('Overall Rep Loss', rep_loss.data,
                                       iteration)
                coil_logger.add_scalar('Total Loss', overall_loss.data,
                                       iteration)
            if 'rgb' in data:
                coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                      iteration)
            if overall_loss.data < best_loss:
                best_loss = overall_loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - targets_to_use.cuda())

            accumulated_time += time.time() - capture_time

            # Log to terminal and log file
            if g_conf.USE_REPRESENTATION_LOSS:
                coil_logger.add_message(
                    'Iterating', {
                        'Iteration':
                        iteration,
                        'Loss':
                        overall_loss.data.tolist(),
                        'Control Loss':
                        loss.data.tolist(),
                        'Rep Loss':
                        rep_loss.data.tolist(),
                        'Images/s':
                        (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                        'BestLoss':
                        best_loss,
                        'BestLossIteration':
                        best_loss_iter,
                        'Output':
                        output[position].data.tolist(),
                        'GroundTruth':
                        targets_to_use[position].data.tolist(),
                        'Error':
                        error[position].data.tolist(),
                        'Inputs':
                        dataset.extract_inputs(data)[position].data.tolist()
                    }, iteration)
            else:
                coil_logger.add_message(
                    'Iterating', {
                        'Iteration':
                        iteration,
                        'Loss':
                        loss.data.tolist(),
                        'Images/s':
                        (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                        'BestLoss':
                        best_loss,
                        'BestLossIteration':
                        best_loss_iter,
                        'Output':
                        output[position].data.tolist(),
                        'GroundTruth':
                        targets_to_use[position].data.tolist(),
                        'Error':
                        error[position].data.tolist(),
                        'Inputs':
                        dataset.extract_inputs(data)[position].data.tolist()
                    }, iteration)
            # Save training loss history (useful for restoring training runs since learning rate is adjusted
            # based on training loss)
            loss_window.append(overall_loss.data.tolist())
            coil_logger.write_on_error_csv('train', overall_loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, overall_loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #24
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output):
    latest = None
    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu)

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias+'.yaml'))

        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process('validation', dataset_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join('_output_logs',
                                           exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                              exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)


        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=dataset_name)

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=g_conf.BATCH_SIZE,
                                                  shuffle=False,
                                                  num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
                                                  pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(dataset_name, None)

        model.cuda()

        best_mse = 1000
        best_error = 1000
        best_mse_iter = 0
        best_error_iter = 0

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

                checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias
                                        , 'checkpoints', str(latest) + '.pth'))
                checkpoint_iteration = checkpoint['iteration']
                print("Validation loaded ", checkpoint_iteration)

                model.load_state_dict(checkpoint['state_dict'])

                model.eval()

                if torch.cuda.device_count() > 1:
                  model = torch.nn.DataParallel(model)
                  
                accumulated_mse = 0
                accumulated_error = 0
                iteration_on_checkpoint = 0
                for data in data_loader:

                    # Compute the forward pass on a batch from  the validation dataset
                    controls = data['directions']
                    if torch.cuda.device_count() > 1:
                      output = model.module.forward_branch(torch.squeeze(data['rgb']).cuda(),
                                                  dataset.extract_inputs(data).cuda(),
                                                  controls)
                    else:
                      output = model.forward_branch(torch.squeeze(data['rgb']).cuda(),
                                                  dataset.extract_inputs(data).cuda(),
                                                  controls)
                    # It could be either waypoints or direct control
                    if 'waypoint1_angle' in g_conf.TARGETS:
                        write_waypoints_output(checkpoint_iteration, output)
                    else:
                        write_regular_output(checkpoint_iteration, output)

                    mse = torch.mean((output -
                                      dataset.extract_targets(data).cuda())**2).data.tolist()
                    mean_error = torch.mean(
                                    torch.abs(output -
                                              dataset.extract_targets(data).cuda())).data.tolist()

                    accumulated_error += mean_error
                    accumulated_mse += mse
                    error = torch.abs(output - dataset.extract_targets(data).cuda())

                    # Log a random position
                    position = random.randint(0, len(output.data.tolist())-1)

                    coil_logger.add_message('Iterating',
                         {'Checkpoint': latest,
                          'Iteration': (str(iteration_on_checkpoint*120)+'/'+str(len(dataset))),
                          'MeanError': mean_error,
                          'MSE': mse,
                          'Output': output[position].data.tolist(),
                          'GroundTruth': dataset.extract_targets(data)[position].data.tolist(),
                          'Error': error[position].data.tolist(),
                          'Inputs': dataset.extract_inputs(data)[position].data.tolist()},
                          latest)
                    iteration_on_checkpoint += 1
                    print("Iteration %d  on Checkpoint %d : Error %f" % (iteration_on_checkpoint,
                                                                checkpoint_iteration, mean_error))

                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                checkpoint_average_mse = accumulated_mse/(len(data_loader))
                checkpoint_average_error = accumulated_error/(len(data_loader))
                coil_logger.add_scalar('Loss', checkpoint_average_mse, latest, True)
                coil_logger.add_scalar('Error', checkpoint_average_error, latest, True)

                if checkpoint_average_mse < best_mse:
                    best_mse = checkpoint_average_mse
                    best_mse_iter = latest

                if checkpoint_average_error < best_error:
                    best_error = checkpoint_average_error
                    best_error_iter = latest

                coil_logger.add_message('Iterating',
                     {'Summary':
                         {
                          'Error': checkpoint_average_error,
                          'Loss': checkpoint_average_mse,
                          'BestError': best_error,
                          'BestMSE': best_mse,
                          'BestMSECheckpoint': best_mse_iter,
                          'BestErrorCheckpoint': best_error_iter
                         },

                      'Checkpoint': latest},
                                        latest)

                l1_window.append(checkpoint_average_error)
                coil_logger.write_on_error_csv(dataset_name, checkpoint_average_error)

                # If we are using the finish when validation stops, we check the current
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(dataset_name, latest)
                        break

            else:

                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)
                # print ('checkpoint: ', latest)

                coil_logger.add_message('Loading', {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
Example #25
0
def execute(gpu,
            exp_batch,
            exp_alias,
            dataset_name,
            suppress_output=True,
            yaml_file=None):
    latest = None
    # try:
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    path_to_yaml_file = os.path.join('configs', exp_batch, exp_alias + '.yaml')
    if yaml_file is not None:
        path_to_yaml_file = os.path.join(yaml_file, exp_alias + '.yaml')
    merge_with_yaml(path_to_yaml_file)
    # The validation dataset is always fully loaded, so we fix a very high number of hours
    # g_conf.NUMBER_OF_HOURS = 10000 # removed to simplify code
    """
    # commenting out this segment to simplify code
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    if suppress_output:
        sys.stdout = open(os.path.join('_output_logs',
                                       exp_alias + '_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
        sys.stderr = open(os.path.join('_output_logs',
                          exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                       + str(os.getpid()) + ".out"),
                          "a", buffering=1)
    """

    # Define the dataset. This structure is has the __get_item__ redefined in a way
    # that you can access the HDFILES positions from the root directory as a in a vector.

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                dataset_name)  # original code
    augmenter = Augmenter(None)
    # Definition of the dataset to be used. Preload name is just the validation data name
    print('full dataset path: ', full_dataset)
    dataset = CoILDataset(full_dataset,
                          transform=augmenter,
                          preload_name=dataset_name)

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=g_conf.BATCH_SIZE,
        shuffle=False,
        num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
        pin_memory=True)

    model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
    """ removing this segment to simplify code
    # The window used to keep track of the trainings
    l1_window = []
    latest = get_latest_evaluated_checkpoint()
    if latest is not None:  # When latest is noe
        l1_window = coil_logger.recover_loss_window(dataset_name, None)
    """

    model.cuda()

    best_mse = 1000
    best_error = 1000
    best_mse_iter = 0
    best_error_iter = 0

    # modified validation code from here to run a single model
    # checkpoint = torch.load(os.path.join(g_conf.VALIDATION_CHECKPOINT_PATH
    #                        , 'checkpoints', g_conf.VALIDATION_CHECKPOINT_ITERATION + '.pth'))
    checkpoint = torch.load(args.checkpoint)
    checkpoint_iteration = checkpoint['iteration']
    print("model loaded ", checkpoint_iteration)

    model.load_state_dict(checkpoint['state_dict'])

    model.train()
    accumulated_mse = 0
    accumulated_error = 0
    iteration_on_checkpoint = 0

    print('data_loader size: ', len(data_loader))
    total_var = []
    for data in data_loader:
        # dataloader directly loads the saved activations
        # Compute the forward pass on a batch from  the validation dataset
        controls = data['directions']
        curr_var = []
        for i in range(100):
            output = model.branches(data['activation'].cuda())
            output_vec = model.extract_branch(torch.stack(output), controls)
            curr_var.append(output_vec.detach().cpu().numpy())

        curr_var = np.array(curr_var)
        compute_var = np.var(curr_var, axis=0)
        total_var += compute_var.tolist()

        iteration_on_checkpoint += 1
        if iteration_on_checkpoint % 50 == 0:
            print('iteration: ', iteration_on_checkpoint)

    total_var = np.array(total_var)
    print(len(total_var), total_var.shape)

    # save the computed variance array, this would be used for uncertainty based sampling in 'tools/filter_dagger_data_var.py'
    np.save(
        os.path.join(args.save_path, args.dataset_name, 'computed_var.npy'),
        total_var)
Example #26
0
def execute(gpu,
            exp_batch,
            exp_alias,
            suppress_output=True,
            number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: gpus ids for training
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu)
        g_conf.VARIABLE_WEIGHT = {}

        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')

        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case
        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
            print('iteration: ', iteration, 'best_loss: ', best_loss)
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read the dataset
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded dataset")

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.
        # define the sampling strategy for mini-batch, different samplers can be found in 'splitter.py'
        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)

        # Instatiate the network architecture
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE
                               )  # adabound and adamio can also be used here

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:
            # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        # freeze the perception module weights if required
        # for m in model.perception.parameters():
        #     m.requires_grad = False

        # total trainable parameters
        model_parameters = filter(lambda p: p.requires_grad,
                                  model.parameters())
        total_params = sum([np.prod(p.size()) for p in model_parameters])
        print('trainable parameters: ', total_params)

        # multi-gpu
        print('number of gpus: ', torch.cuda.device_count())
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)

        criterion = Loss(g_conf.LOSS_FUNCTION)

        print('Start Training')

        st = time.time()
        for data in data_loader:

            # use this for early stopping if the validation loss is not coming down
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1

            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # additional learning rate scheduler - cyclic cosine annealing (https://arxiv.org/pdf/1704.00109.pdf)
            # adjust_learning_rate_cosine_annealing(optimizer, loss_window, iteration)

            capture_time = time.time()
            controls = data['directions']
            model.zero_grad()
            branches = model(torch.squeeze(data['rgb'].cuda()),
                             dataset.extract_inputs(data).cuda())
            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):
                if torch.cuda.device_count() > 1:
                    state_dict_save = model.module.state_dict()
                else:
                    state_dict_save = model.state_dict()

                state = {
                    'iteration': iteration,
                    'state_dict': state_dict_save,
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            if torch.cuda.device_count() > 1:
                output = model.module.extract_branch(
                    torch.stack(branches[0:4]), controls)
            else:
                output = model.extract_branch(torch.stack(branches[0:4]),
                                              controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(data)[position].data.tolist()
                }, iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))
            st = time.time()

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Example #27
0
def execute(gpu, exp_batch, exp_alias, suppress_output=True):
    # We set the visible cuda devices

    # TODO: probable race condition, the train has to be started before.
    try:
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')

        coil_logger.add_message('Loading', {'GPU': gpu})

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        # Put the output to a separate file
        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs',
                g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']

        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # TODO: The checkpoint will continue, so it should erase everything up to the iteration on tensorboard
        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HD_FILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # augmenter_cpu = iag.AugmenterCPU(g_conf.AUGMENTATION_SUITE_CPU)

        # By instanciating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        dataset = CoILDataset(full_dataset, transform=augmenter)

        data_loader = select_balancing_strategy(dataset, iteration)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()

        if checkpoint_file is not None:
            model.load_state_dict(checkpoint['state_dict'])

        print(model)

        criterion = Loss(g_conf.LOSS_FUNCTION)

        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        print(dataset.meta_data)

        print(model)
        if checkpoint_file is not None:
            accumulated_time = checkpoint['total_time']
        else:
            accumulated_time = 0  # We accumulate iteration time and keep the average speed

        #TODO: test experiment continuation. Is the data sampler going to continue were it started.. ?
        capture_time = time.time()
        for data in data_loader:

            input_data, float_data = data

            # get the control commands from float_data, size = [120,1]

            controls = float_data[:, dataset.controls_position(), :]

            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]

            model.zero_grad()

            branches = model(torch.squeeze(input_data['rgb'].cuda()),
                             dataset.extract_inputs(float_data).cuda())

            loss = criterion(branches,
                             dataset.extract_targets(float_data).cuda(),
                             controls.cuda(),
                             dataset.extract_inputs(float_data).cuda(),
                             branch_weights=g_conf.BRANCH_LOSS_WEIGHT,
                             variable_weights=g_conf.VARIABLE_WEIGHT)

            # TODO: All these logging things could go out to clean up the main
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(float_data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output -
                              dataset.extract_targets(float_data).cuda())

            # TODO: For now we are computing the error for just the correct branch, it could be multi- branch,

            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(input_data['rgb']),
                                  iteration)

            loss.backward()
            optimizer.step()

            accumulated_time += time.time() - capture_time
            capture_time = time.time()

            # TODO: Get only the  float_data that are actually generating output
            # TODO: itearation is repeating , and that is dumb
            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(
                        float_data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(float_data)[position].data.tolist()
                }, iteration)

            # TODO: For now we are computing the error for just the correct branch, it could be multi-branch,

            # TODO: save also the optimizer state dictionary
            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'best_loss_iter': best_loss_iter
                }
                # TODO : maybe already summarize the best model ???
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))

            iteration += 1
            print(iteration)

            if iteration % 1000 == 0:
                adjust_learning_rate(optimizer, iteration)

            del data

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()

        coil_logger.add_message('Error', {'Message': 'Something Happened'})