Ejemplo n.º 1
0
def start_carla_simulator(gpu, town_name, docker):
    """
        Start a CARLA simulator, either by running a docker image or by running the binary
        directly. For that, the CARLA_PATH environment variable should be specified.
    Args:
        gpu: the gpu number to run carla
        town_name: The town name
        docker: the docker name, if used. If not used docker should be None.

    Returns:

    """

    port = find_free_port()

    sp = subprocess.Popen(['docker', 'run', '--rm', '-d', '-p',
                           str(port)+'-'+str(port+2)+':'+str(port)+'-'+str(port+2),
                           '--runtime=nvidia', '-e', 'NVIDIA_VISIBLE_DEVICES='+str(gpu), docker,
                           '/bin/bash', 'CarlaUE4.sh', '/Game/Maps/' + town_name, '-windowed',
                           '-benchmark', '-fps=10', '-world-port=' + str(port)], shell=False,
                           stdout=subprocess.PIPE)
    (out, err) = sp.communicate()

    print("Going to communicate")

    coil_logger.add_message('Loading', {'CARLA':  '/CarlaUE4/Binaries/Linux/CarlaUE4' 
                            '-windowed'+ '-benchmark'+ '-fps=10'+ '-world-port='+ str(port)})

    return sp, port, out
Ejemplo n.º 2
0
def partition_keys_by_percentiles(steerings, keys, percentiles):
    iter_index = 0
    quad_pos = 0
    splited_keys = []
    quad_vec = [percentiles[0]]
    for i in range(1, len(percentiles)):
        quad_vec.append(quad_vec[-1] + percentiles[i])

    for i in range(0, len(steerings)):

        if i >= quad_vec[quad_pos] * len(steerings) - 1:
            # We split

            splited_keys.append(keys[iter_index:i])
            if keys[iter_index:i] == []:
                raise RuntimeError("Reach into an empty bin.")
            iter_index = i
            quad_pos += 1
            # THe value of steering splitted
            # The number of keys for this split
            # print ([steerings[i], len(splited_keys)])
            coil_logger.add_message(
                'Loading',
                {'SplitPoints': [steerings[i], len(splited_keys)]})

    return splited_keys
Ejemplo n.º 3
0
def control_steer_split(measurements, meta_data):

    steerings = measurements[0, :]

    # TODO: read meta data and turn into a coool dictionary ?
    #print(np.where(dataset.meta_data[:, 0] == 'control'))
    labels = measurements[np.where(meta_data[:, 0] == 'control'), :]

    print(np.unique(labels))

    keys = range(0, len(steerings))
    print(labels)
    print(keys)
    splitted_labels = label_split(labels[0][0], keys, g_conf.LABELS_DIVISION)

    # Another level of splitting
    splitted_steer_labels = []

    for keys in splitted_labels:
        splitter_steer = float_split(steerings, keys, g_conf.STEERING_DIVISION)

        splitted_steer_labels.append(splitter_steer)

    coil_logger.add_message('Loading', {'KeysDivision': splitted_steer_labels})

    return splitted_steer_labels
Ejemplo n.º 4
0
def set_type_of_process(process_type, param=None):
    """
    This function is used to set which is the type of the current process, test, train or val
    and also the details of each since there could be many vals and tests for a single
    experiment.

    NOTE: AFTER CALLING THIS FUNCTION, THE CONFIGURATION CLOSES

    Args:
        type:

    Returns:

    """

    if _g_conf.PROCESS_NAME == "default":
        raise RuntimeError(
            " You should merge with some exp file before setting the type")

    if process_type == 'train':
        _g_conf.PROCESS_NAME = process_type
    elif process_type == "validation":
        _g_conf.PROCESS_NAME = process_type + '_' + param
    if process_type == "drive":  # FOR drive param is city name.
        _g_conf.CITY_NAME = param.split('_')[-1]
        _g_conf.PROCESS_NAME = process_type + '_' + param

    #else:  # FOr the test case we join with the name of the experimental suite.

    create_log(_g_conf.EXPERIMENT_BATCH_NAME, _g_conf.EXPERIMENT_NAME,
               _g_conf.PROCESS_NAME, _g_conf.LOG_SCALAR_WRITING_FREQUENCY,
               _g_conf.LOG_IMAGE_WRITING_FREQUENCY)

    if process_type == "train":
        if not os.path.exists(
                os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                             _g_conf.EXPERIMENT_NAME, 'checkpoints')):
            os.mkdir(
                os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                             _g_conf.EXPERIMENT_NAME, 'checkpoints'))

    if process_type == "validation" or process_type == 'drive':
        if not os.path.exists(
                os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                             _g_conf.EXPERIMENT_NAME,
                             _g_conf.PROCESS_NAME + '_csv')):
            os.mkdir(
                os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                             _g_conf.EXPERIMENT_NAME,
                             _g_conf.PROCESS_NAME + '_csv'))

    # TODO: check if there is some integrity.

    add_message(
        'Loading', {
            'ProcessName': _g_conf.EXPERIMENT_GENERATED_NAME,
            'FullConfiguration': _g_conf.TRAIN_DATASET_NAME + 'dict'
        })

    _g_conf.immutable(True)
Ejemplo n.º 5
0
def set_type_of_process(process_type, param=None):
    """
    This function is used to set which is the type of the current process, test, train or val
    and also the details of each since there could be many vals and tests for a single
    experiment.

    NOTE: AFTER CALLING THIS FUNCTION, THE CONFIGURATION CLOSES

    Args:
        type:

    Returns:

    """

    if _g_conf.PROCESS_NAME == "default":
        raise RuntimeError(" You should merge with some exp file before setting the type")

    if process_type == 'train':
        _g_conf.PROCESS_NAME = process_type
    elif process_type == "validation":
        _g_conf.PROCESS_NAME = process_type + '_' + param
    if process_type == "drive":  # FOR drive param is city name.
        _g_conf.CITY_NAME = param
        _g_conf.PROCESS_NAME = process_type + '_' + _g_conf.CITY_NAME + '_' + _g_conf.EXPERIMENTAL_SUITE_NAME

    #else:  # FOr the test case we join with the name of the experimental suite.

    create_log(_g_conf.EXPERIMENT_BATCH_NAME,
               _g_conf.EXPERIMENT_NAME,
               _g_conf.PROCESS_NAME)

    if process_type == "train":
        if not os.path.exists(os.path.join('/datatmp/Experiments/rohitgan/_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                                        _g_conf.EXPERIMENT_NAME,
                                        'checkpoints') ):
                os.mkdir(os.path.join('/datatmp/Experiments/rohitgan/_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                                            _g_conf.EXPERIMENT_NAME,
                                            'checkpoints'))




    if process_type == "validation" or process_type == 'drive':
        if not os.path.exists(os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                                           _g_conf.EXPERIMENT_NAME,
                                           _g_conf.PROCESS_NAME + '_csv')):
            os.mkdir(os.path.join('_logs', _g_conf.EXPERIMENT_BATCH_NAME,
                                          _g_conf.EXPERIMENT_NAME,
                                           _g_conf.PROCESS_NAME + '_csv'))



    # We assure ourselves that the configuration file added does not kill things
    _check_integrity()

    add_message('Loading', {'ProcessName': generate_name(),
                            'FullConfiguration': generate_param_dict()})

    _g_conf.immutable(True)
Ejemplo n.º 6
0
def control_steer_split(float_data, meta_data):

    steerings = float_data[np.where(meta_data[:, 0] == 'steer'), :][0][
        0]  # TODO: WHY EVERY WHERE MAKE THIS TO BE USED ??

    print("steer shape", steerings.shape)

    # TODO: read meta data and turn into a coool dictionary ?
    #print(np.where(dataset.meta_data[:, 0] == 'control'))
    #TODO ELIMINATE ALL NAMES CALLED LABEL OR MEASUREMENTS , MORE GENERIC FLOAT DATA AND SENSOR DATA IS BETTER
    labels = float_data[np.where(meta_data[:, 0] == 'control'), :][0][0]

    print("labels shape ", labels.shape)
    keys = range(0, len(steerings) - g_conf.NUMBER_IMAGES_SEQUENCE)

    splitted_labels = label_split(labels, keys, g_conf.LABELS_DIVISION)

    # Another level of splitting
    splitted_steer_labels = []

    for keys in splitted_labels:
        splitter_steer = float_split(steerings, keys, g_conf.STEERING_DIVISION)

        splitted_steer_labels.append(splitter_steer)

    coil_logger.add_message('Loading', {'KeysDivision': splitted_steer_labels})

    return splitted_steer_labels
Ejemplo n.º 7
0
 def __call__(self, tensor, output_tensor):
     coil_logger.add_message(
         'Loss', {
             "Iteration": 765,
             "LossValue":
             [0.232, 0.232, 0.332, 0.2322, 0.232, 0.232, 0.232]
         })
     return tensor
Ejemplo n.º 8
0
    def run_step(self, measurements, sensor_data, directions, target):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """
        # Get speed and high-level turning command
        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])

        # If we're evaluating squeeze network (so we are using ground truth seg mask)
        if "seg" in g_conf.SENSORS.keys():
            # Run the autopilot agent to get stop intentions
            _, state = self.control_agent.run_step(measurements, [], [], target)
            inputs_vec = []
            for input_name in g_conf.INTENTIONS:
                inputs_vec.append(float(state[input_name]))
            intentions = torch.cuda.FloatTensor(inputs_vec).unsqueeze(0)
            # Run squeeze network
            model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                       directions_tensor, intentions, benchmark=True)
        else:
            # Run driving model
            model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
                                                       directions_tensor, benchmark=True)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            _, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {"Checkpoint": self.checkpoint['iteration'],
                                                  'Agent': str(steer)},
                                    self.checkpoint['iteration'])
        self.first_iter = False
        
        return control
Ejemplo n.º 9
0
    def run_step(self, measurements, sensor_data, directions, target):
        """
            Run a step on the benchmark simulation
        Args:
            measurements: All the float measurements from CARLA ( Just speed is used)
            sensor_data: All the sensor data used on this benchmark
            directions: The directions, high level commands
            target: Final objective. Not used when the agent is predicting all outputs.

        Returns:
            Controls for the vehicle on the CARLA simulator.

        """

        # Take the forward speed and normalize it for it to go from 0-1
        norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR
        norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0)
        directions_tensor = torch.cuda.LongTensor([directions])
        # Compute the forward pass processing the sensors got from CARLA.
        rgbs = self._process_sensors(sensor_data)
        with torch.no_grad():
            outputs = self.model_erf(rgbs)
        labels = outputs.max(1)[1].byte().cpu().data

        seg_road = (labels == 0)
        seg_not_road = (labels != 0)
        seg = torch.stack((seg_road, seg_not_road), 1).float()

        model_outputs = self._model.forward_branch(seg.cuda(), norm_speed,
                                                   directions_tensor)

        #        model_outputs = self._model.forward_branch(self._process_sensors(sensor_data), norm_speed,
        #                                                   directions_tensor)

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        if self._carla_version == '0.9':
            import carla
            control = carla.VehicleControl()
        else:
            control = VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        # There is the posibility to replace some of the predictions with oracle predictions.
        if g_conf.USE_ORACLE:
            _, control.throttle, control.brake = self._get_oracle_prediction(
                measurements, target)

        if self.first_iter:
            coil_logger.add_message('Iterating', {
                "Checkpoint": self.checkpoint['iteration'],
                'Agent': str(steer)
            }, self.checkpoint['iteration'])
        self.first_iter = False

        return control
Ejemplo n.º 10
0
    def load_network(self, checkpoint):
        """
        Load a network for a given model definition .

        Args:
            checkpoint: The checkpoint that the user wants to add .



        """
        coil_logger.add_message(
            'Loading', {"Model": {"Loaded checkpoint: " + str(checkpoint)}})
Ejemplo n.º 11
0
def start_carla_simulator(gpu, town_name, no_screen):

    # Set the outfiles for the process
    carla_out_file = os.path.join(
        '_output_logs',
        'CARLA_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out")
    carla_out_file_err = os.path.join(
        '_output_logs',
        'CARLA_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out")

    # TODO: Add parameters
    mode = 'VGL'
    port = find_free_port()
    carla_path = os.environ['CARLA_PATH']

    if no_screen and mode == 'SDL':
        print(" EXECUTING NO SCREEN! ")
        os.environ['SDL_VIDEODRIVER'] = 'offscreen'

    if mode == 'SDL':
        os.environ['SDL_HINT_CUDA_DEVICE'] = str(gpu)

        sp = subprocess.Popen([
            carla_path + '/CarlaUE4/Binaries/Linux/CarlaUE4',
            '/Game/Maps/' + town_name, '-windowed', '-benchmark', '-fps=10',
            '-world-port=' + str(port)
        ],
                              shell=False,
                              stdout=open(carla_out_file, 'w'),
                              stderr=open(carla_out_file_err, 'w'))
    elif mode == 'VGL':
        os.environ['DISPLAY'] = ":5"
        sp = subprocess.Popen([
            'vglrun', '-d', ':7.' + str(gpu), carla_path +
            '/CarlaUE4/Binaries/Linux/CarlaUE4', '/Game/Maps/' + town_name,
            '-windowed', '-benchmark', '-fps=10', '-world-port=' + str(port)
        ],
                              shell=False,
                              stdout=open(carla_out_file, 'w'),
                              stderr=open(carla_out_file_err, 'w'))
    else:
        raise ValueError("Invalid Mode !")

    coil_logger.add_message(
        'Loading', {
            'CARLA':
            carla_path + '/CarlaUE4/Binaries/Linux/CarlaUE4'
            '-windowed' + '-benchmark' + '-fps=10' + '-world-port=' + str(port)
        })

    return sp, port
Ejemplo n.º 12
0
    def test_global_logger_train(self):
        # TODO: THERE WILL BE A NAME GENERATOR
        g_conf.param.NAME = 'experiment_1'
        g_conf.merge_with_yaml('configs/eccv/experiment_1.yaml')
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        g_conf.set_type_of_process('train')

        coil_logger.add_message(
            'Loading',
            {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]})

        coil_logger.add_message('Loading', {
            "Models_loaded": ' VUALA ',
            "Checkpoint": "988765"
        })

        for i in range(0, 10):

            coil_logger.add_message('Reading', {
                "Iteration": i,
                "ReadKeys": [1, 123, 5, 1, 34, 1, 23]
            })
            coil_logger.add_message('Network', {
                "Iteration": i,
                "Output-": ["output"]
            })
Ejemplo n.º 13
0
    def test_check_status_running_iter(self):
        g_conf = GlobalConfig()
        g_conf.param.NAME = 'experiment_running_iter'
        # TODO: this merge is weird.
        g_conf.merge_with_yaml(
            'configs/monitor_test/experiment_running_iter.yaml')
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        g_conf.set_type_of_process('train')

        coil_logger.add_message(
            'Loading',
            {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]})

        coil_logger.add_message('Loading', {
            "Models_loaded": ' VUALA ',
            "Checkpoint": "988765"
        })

        for i in range(0, 10):

            coil_logger.add_message('Reading', {
                "Iteration": i,
                "ReadKeys": [1, 123, 5, 1, 34, 1, 23]
            })
            coil_logger.add_message('Model', {
                "Iteration": i,
                "Output": ["output"]
            })

        # TODO: Check how the alias will work.
        status = monitorer.get_status('monitor_test',
                                      'experiment_running_iter',
                                      g_conf.param.PROCESS_NAME)

        self.assertEqual(status[0], "Iterating")
Ejemplo n.º 14
0
def start_carla_simulator(gpu, town_name, docker):
    """
        Start a CARLA simulator, either by running a docker image or by running the binary
        directly. For the latter, the CARLA_PATH environment variable should be specified.
    Args:
        gpu: the gpu number to run carla
        town_name: The town name
        docker: the docker name, if used. If not used docker should be None.

    Returns:

    """

    # Temporary config file
    port = find_free_port()
    my_env = os.environ.copy()

    # TODO: add quality level here
    # sp = subprocess.Popen(
    #     ['docker', 'run', '--rm', '-d', '-p', f'{port}-{port+2}:{port}-{port+2}', '--runtime=nvidia', '-e',
    #      f'NVIDIA_VISIBLE_DEVICES={gpu[0]}', docker, '/bin/bash', 'CarlaUE4.sh', f'/Game/Maps/{town_name}', '-windowed',
    #      '-benchmark', '-fps=20', f'-world-port={port}', '-RenderOffScreen', '--carla-rpc-port=3654',
    #      '--carla-streaming-port=0', '-nosound'],
    #     shell=False, stdout=subprocess.PIPE, env=my_env)
    sp = subprocess.Popen([
        'docker', 'run', '--rm', '-d', '-p',
        f'{port}-{port+2}:{port}-{port+2}', '--runtime=nvidia', '-e', '--gpus',
        f'"device={gpu[0]}"', docker, '/bin/bash', 'CarlaUE4.sh',
        f'/Game/Maps/{town_name}', '-windowed', '-fps=20',
        f'-world-port={port}'
    ],
                          shell=False,
                          stdout=subprocess.PIPE,
                          env=my_env)
    (out, err) = sp.communicate()

    print("Going to communicate")
    coil_logger.add_message(
        'Loading', {
            'CARLA':
            f'/CarlaUE4/Binaries/Linux/CarlaUE4-windowed-benchmark-fps=20-world-port={port}'
        })

    return sp, port, out
Ejemplo n.º 15
0
def start_carla_simulator(gpu, exp_batch, exp_alias, city_name):

    port = find_free_port()
    carla_path = os.environ['CARLA_PATH']

    #os.environ['SDL_VIDEODRIVER'] = 'offscreen'
    #os.environ['SDL_HINT_CUDA_DEVICE'] = str(gpu)

    #subprocess.call()

    sp = subprocess.Popen([carla_path + '/CarlaUE4/Binaries/Linux/CarlaUE4', '/Game/Maps/' + city_name
                           , '-windowed', '-benchmark', '-fps=10', '-world-port='+str(port)], shell=False,
                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)


    coil_logger.add_message('Loading', {'CARLA': carla_path + '/CarlaUE4/Binaries/Linux/CarlaUE4' + '/Game/Maps/' + city_name
                           + '-windowed'+ '-benchmark'+ '-fps=10'+ '-world-port='+ str(port)})

    return sp, port
Ejemplo n.º 16
0
    def test_check_status_error(self):

        g_conf.immutable(False)
        # TODO: THe error ? How do nicely merge with the other parts ??
        g_conf.NAME = 'experiment_running_error'
        # TODO: this merge is weird.
        merge_with_yaml('configs/monitor_test/experiment_running_error.yaml')
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        set_type_of_process('train')

        coil_logger.add_message(
            'Loading',
            {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]})

        coil_logger.add_message('Loading', {
            "Models_loaded": ' VUALA ',
            "Checkpoint": "988765"
        })

        for i in range(0, 10):

            coil_logger.add_message('Iterating', {
                "Iteration": i,
                "ReadKeys": [1, 123, 5, 1, 34, 1, 23]
            })
            coil_logger.add_message('Iterating', {
                "Iteration": i,
                "Output": ["output"]
            })

        coil_logger.add_message('Error', {
            "Iteration": 10,
            "Message": " Some data integrity problems ! "
        })

        # TODO: Check how the alias will work.

        status = monitorer.get_status('monitor_test',
                                      'experiment_running_error.yaml',
                                      g_conf.PROCESS_NAME)

        self.assertEqual(status[0], "Error")
        print(status[1])
Ejemplo n.º 17
0
    def forward(self, x):
        # get only the speeds from measurement labels

        # TODO: TRACK NANS OUTPUTS
        # TODO: Maybe change the name

        # TODO: Control the frequency of postion log
        coil_logger.add_message('Model', {
            'Perception': {
                            "Output": [1.0, 12.3, 124.29]
                           },
            "Iteration": 765

            }
        )

        """ conv1 + batch normalization + dropout + relu """
        x = self.layers(x)

        x = x.view(-1, self.num_flat_features(x))


        return x
Ejemplo n.º 18
0
    def test_check_status_running_loading(self):

        g_conf.immutable(False)
        g_conf.NAME = 'experiment_running_loading'
        # TODO: this merge is weird.
        merge_with_yaml('configs/monitor_test/experiment_running_loading.yaml')
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        set_type_of_process('train')

        coil_logger.add_message(
            'Loading',
            {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]})

        coil_logger.add_message('Loading', {
            "Models_loaded": ' VUALA ',
            "Checkpoint": "988765"
        })

        # TODO: Check how the alias will work.
        status = monitorer.get_status('monitor_test',
                                      'experiment_running_loading.yaml',
                                      g_conf.PROCESS_NAME)

        self.assertEqual(status[0], "Loading")
Ejemplo n.º 19
0
    def test_check_status_finished(self):

        g_conf.immutable(False)
        g_conf.NAME = 'experiment_finished'
        # TODO: this merge is weird.
        merge_with_yaml('configs/monitor_test/experiment_finished.yaml')

        g_conf.NUMBER_ITERATIONS = 20
        # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS
        set_type_of_process('train')

        # We set the number of iterations as

        coil_logger.add_message(
            'Loading',
            {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]})

        coil_logger.add_message('Loading', {
            "Models_loaded": ' VUALA ',
            "Checkpoint": "988765"
        })

        for i in range(0, 21):

            coil_logger.add_message('Iterating', {
                "Iteration": i,
                "ReadKeys": [1, 123, 5, 1, 34, 1, 23]
            }, i)
            coil_logger.add_message('Iterating', {
                "Iteration": i,
                "Output": ["output"]
            }, i)

        # TODO: Check how the alias will work.

        status = monitorer.get_status('monitor_test',
                                      'experiment_finished.yaml',
                                      g_conf.PROCESS_NAME)

        self.assertEqual(status[0], "Finished")
Ejemplo n.º 20
0
def execute(gpu,
            exp_batch,
            exp_alias,
            state_dict,
            suppress_output=True,
            number_of_workers=12):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': gpu})

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 10000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # Instantiate the class used to read a dataset. The coil dataset generator
        # can be found
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=str(g_conf.NUMBER_OF_HOURS) +
                              'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)
        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        model.cuda()

        if state_dict != '':
            seg_model = ERFNet_Fast(2)
            seg_model = load_my_state_dict(seg_model, torch.load(state_dict))
            seg_model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print("Before the loss")

        criterion = Loss(g_conf.LOSS_FUNCTION)
        color_transforms = Colorizes(2)
        board = Dashboard(8097)

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            iteration += 1
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            # get the control commands from float_data, size = [120,1]

            capture_time = time.time()
            controls = data['directions']
            # The output(branches) is a list of 5 branches results, each branch is with size [120,3]
            model.zero_grad()
            if state_dict != '':
                with torch.no_grad():
                    repre = seg_model(torch.squeeze(data['rgb'].cuda()),
                                      only_encode=False)
                    inputs = repre
                    imgs = color_transforms(inputs)
                inputs = inputs.float().cuda()
            else:
                inputs = torch.squeeze(data['rgb'].cuda())

            # vis
            board.image(
                torch.squeeze(data['rgb'])[0].cpu().data,
                '(train) input iter: ' + str(iteration))
            board.image(imgs[0].cpu().data,
                        '(train) output iter: ' + str(iteration))

            branches = model(inputs, dataset.extract_inputs(data).cuda())
            loss_function_params = {
                'branches': branches,
                'targets': dataset.extract_targets(data).cuda(),
                'controls': controls.cuda(),
                'inputs': dataset.extract_inputs(data).cuda(),
                'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                'variable_weights': g_conf.VARIABLE_WEIGHT
            }
            loss, _ = criterion(loss_function_params)
            loss.backward()
            optimizer.step()
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(iteration) + '.pth'))
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)
            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            # Log a random position
            position = random.randint(0, len(data) - 1)

            output = model.extract_branch(torch.stack(branches[0:4]), controls)
            error = torch.abs(output - dataset.extract_targets(data).cuda())

            accumulated_time += time.time() - capture_time

            coil_logger.add_message(
                'Iterating', {
                    'Iteration':
                    iteration,
                    'Loss':
                    loss.data.tolist(),
                    'Images/s':
                    (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                    'BestLoss':
                    best_loss,
                    'BestLossIteration':
                    best_loss_iter,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(data)[position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(data)[position].data.tolist()
                }, iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)
            print("Iteration: %d  Loss: %f" % (iteration, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Ejemplo n.º 21
0
def execute(gpu,
            exp_batch,
            exp_alias,
            suppress_output=True,
            number_of_workers=12,
            encoder_params=None):
    """
        The main training function. This functions loads the latest checkpoint
        for a given, exp_batch (folder) and exp_alias (experiment configuration).
        With this checkpoint it starts from the beginning or continue some training.
    Args:
        gpu: The GPU number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading

    Returns:
        None

    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(
            os.path.join('configs', exp_batch, exp_alias + '.yaml'),
            encoder_params)
        set_type_of_process('train')
        # Set the process into loading status.
        coil_logger.add_message('Loading',
                                {'GPU': os.environ["CUDA_VISIBLE_DEVICES"]})

        seed_everything(seed=g_conf.MAGICAL_SEED)

        # Put the output to a separate file if it is the case

        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        if coil_logger.check_finish('train'):
            coil_logger.add_message('Finished', {})
            return

        # Preload option
        print(" GOING TO LOAD")
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            print(" LOADING A PRELOAD")
            checkpoint = torch.load(
                os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                             g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints',
                             str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        else:

            # Get the latest checkpoint to be loaded
            # returns none if there are no checkpoints saved for this model
            checkpoint_file = get_latest_saved_checkpoint()
            if checkpoint_file is not None:
                print('loading previous checkpoint ', checkpoint_file)
                checkpoint = torch.load(
                    os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME, 'checkpoints',
                                 str(get_latest_saved_checkpoint())))
                iteration = checkpoint['iteration']
                best_loss = checkpoint['best_loss']
                best_loss_iter = checkpoint['best_loss_iter']
            else:
                iteration = 0
                best_loss = 100000000.0
                best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        # We can save preload dataset depends on the json file name, then no need to load dataset for each time with the same dataset
        if len(g_conf.EXPERIENCE_FILE) == 1:
            json_file_name = str(
                g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2]
        else:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split(
                '/')[-1].split('.')[-2] + '_' + str(
                    g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2]
        dataset = CoILDataset(transform=augmenter,
                              preload_name=g_conf.PROCESS_NAME + '_' +
                              json_file_name + '_' + g_conf.DATA_USED)

        #dataset = CoILDataset(transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS)+ 'hours_' + g_conf.TRAIN_DATASET_NAME)
        print("Loaded Training dataset")

        data_loader = select_balancing_strategy(dataset, iteration,
                                                number_of_workers)
        if g_conf.MODEL_TYPE in ['separate-affordances']:
            model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION,
                              g_conf.ENCODER_MODEL_CONFIGURATION)

        model.cuda()
        optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE)

        print(model)

        # we use the pre-trained encoder model to extract bottleneck Z and train the E-t-E model

        if g_conf.MODEL_TYPE in ['separate-affordances']:
            encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE,
                                         g_conf.ENCODER_MODEL_CONFIGURATION)
            encoder_model.cuda()
            encoder_model.eval()
            # To freeze the pre-trained encoder model
            if g_conf.FREEZE_ENCODER:
                for param_ in encoder_model.parameters():
                    param_.requires_grad = False
            if encoder_params is not None:
                encoder_checkpoint = torch.load(
                    os.path.join(
                        '_logs', encoder_params['encoder_folder'],
                        encoder_params['encoder_exp'], 'checkpoints',
                        str(encoder_params['encoder_checkpoint']) + '.pth'))
                print(
                    "Encoder model ",
                    str(encoder_params['encoder_checkpoint']), "loaded from ",
                    os.path.join('_logs', encoder_params['encoder_folder'],
                                 encoder_params['encoder_exp'], 'checkpoints'))
                encoder_model.load_state_dict(encoder_checkpoint['state_dict'])
                if g_conf.FREEZE_ENCODER:
                    encoder_model.eval()
                    # To freeze the pre-trained encoder model
                    for param_ in encoder_model.parameters():
                        param_.requires_grad = False
                else:
                    optimizer = optim.Adam(list(model.parameters()) +
                                           list(encoder_model.parameters()),
                                           lr=g_conf.LEARNING_RATE)

            for name_encoder, param_encoder in encoder_model.named_parameters(
            ):
                if param_encoder.requires_grad:
                    print('  Unfrozen layers', name_encoder)
                else:
                    print('  Frozen layers', name_encoder)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        for name, param in model.named_parameters():
            if param.requires_grad:
                print('  Unfrozen layers', name)
            else:
                print('  Frozen layers', name)

        print("Before the loss")

        # Loss time series window
        for data in data_loader:

            # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times,
            # add a stop on the _logs folder that is going to be read by this process
            if g_conf.FINISH_ON_VALIDATION_STALE is not None and \
                    check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE):
                break
            """
                ####################################
                    Main optimization loop
                ####################################
            """

            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            model.zero_grad()
            if not g_conf.FREEZE_ENCODER:
                encoder_model.zero_grad()

            if g_conf.LABELS_SUPERVISED:
                inputs_data = torch.cat(
                    (data['rgb'], torch.zeros(g_conf.BATCH_SIZE, 1, 88, 200)),
                    dim=1).cuda()
            else:
                inputs_data = torch.squeeze(data['rgb'].cuda())

            if g_conf.MODEL_TYPE in ['separate-affordances']:
                #TODO: for this two encoder models training, we haven't put speed as input to train yet

                if g_conf.ENCODER_MODEL_TYPE in [
                        'action_prediction', 'stdim', 'forward',
                        'one-step-affordances'
                ]:

                    e, inter = encoder_model.forward_encoder(
                        inputs_data,
                        dataset.extract_inputs(data).cuda(),
                        # We also add measurements and commands
                        torch.squeeze(dataset.extract_commands(data).cuda()))

                elif g_conf.ENCODER_MODEL_TYPE in ['ETE']:
                    e, inter = encoder_model.forward_encoder(
                        inputs_data,
                        dataset.extract_inputs(data).cuda(),
                        torch.squeeze(dataset.extract_commands(data).cuda()))

                loss_function_params = {
                    'classification_gt':
                    dataset.extract_affordances_targets(
                        data, 'classification').cuda(),
                    # harzard stop, red_light....
                    'class_weights':
                    g_conf.AFFORDANCES_CLASS_WEIGHT,
                    'regression_gt':
                    dataset.extract_affordances_targets(data,
                                                        'regression').cuda(),
                    'variable_weights':
                    g_conf.AFFORDANCES_VARIABLE_WEIGHT
                }
                loss = model(e, loss_function_params)
                loss.backward()
                optimizer.step()

            else:
                raise RuntimeError(
                    'Not implement yet, this branch is only work for g_conf.MODEL_TYPE in [separate-affordances]'
                )
            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):

                state = {
                    'iteration': iteration,
                    'state_dict': model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(
                    state,
                    os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                 g_conf.EXPERIMENT_NAME, 'checkpoints',
                                 str(iteration) + '.pth'))

                if not g_conf.FREEZE_ENCODER:
                    encoder_state = {
                        'iteration': iteration,
                        'state_dict': encoder_model.state_dict(),
                        'best_loss': best_loss,
                        'total_time': accumulated_time,
                        'optimizer': optimizer.state_dict(),
                        'best_loss_iter': best_loss_iter
                    }
                    torch.save(
                        encoder_state,
                        os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME,
                                     g_conf.EXPERIMENT_NAME, 'checkpoints',
                                     str(iteration) + '_encoder.pth'))

            iteration += 1
            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """
            coil_logger.add_scalar('Loss', loss.data, iteration)
            coil_logger.add_image('Image', torch.squeeze(data['rgb']),
                                  iteration)

            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            if iteration % 100 == 0:
                print('Train Iteration: {} [{}/{} ({:.0f}%)] \t Loss: {:.6f}'.
                      format(iteration, iteration, g_conf.NUMBER_ITERATIONS,
                             100. * iteration / g_conf.NUMBER_ITERATIONS,
                             loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Ejemplo n.º 22
0
def execute(gpu, exp_batch, exp_alias, dataset_name):
    # We set the visible cuda devices

    os.environ["CUDA_VISIBLE_DEVICES"] = '0'

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)

    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)

    print(full_dataset)
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()
    model.eval()

    criterion = Loss()

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    latest = 200000

    best_loss = 1000.0
    best_error = 1000.0
    best_loss_iter = 0
    best_error_iter = 0
    print(dataset.meta_data[0][0])
    for k in dataset.meta_data:
        k[0] = str(k[0], 'utf-8')

    print(dataset.meta_data[0][0])
    cpts = glob.glob(
        '/home-local/rohitrishabh/coil_20-06/_logs/eccv/experiment_1/checkpoints/*.pth'
    )
    # while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
    for ckpt in cpts:

        # if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

        # latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
        latest = int(ckpt[-10:-4])

        # checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias
        #                         , 'checkpoints', str(latest) + '.pth'))
        checkpoint = torch.load(ckpt)
        checkpoint_iteration = checkpoint['iteration']
        print("Validation loaded ", checkpoint_iteration)

        accumulated_loss = 0.0
        accumulated_error = 0.0
        iteration_on_checkpoint = 0
        for data in data_loader:

            input_data, float_data = data
            control_position = np.where(
                dataset.meta_data[:, 0] == 'control')[0][0]
            speed_position = np.where(
                dataset.meta_data[:, 0] == 'speed_module')[0][0]
            # print (torch.squeeze(input_data['rgb']).shape)
            # print (control_position)
            # print (speed_position)
            # Obs : Maybe we could also check for other branches ??

            output = model.forward_branch(
                torch.squeeze(input_data['rgb']).cuda(),
                float_data[:, speed_position, :].cuda(),
                float_data[:, control_position, :].cuda())

            for i in range(input_data['rgb'].shape[0]):

                coil_logger.write_on_csv(
                    checkpoint_iteration,
                    [output[i][0], output[i][1], output[i][2]])

            # TODO: Change this a functional standard using the loss functions.

            loss = torch.mean(
                (output -
                 dataset.extract_targets(float_data).cuda())**2).data.tolist()
            mean_error = torch.mean(
                torch.abs(
                    output -
                    dataset.extract_targets(float_data).cuda())).data.tolist()
            accumulated_error += mean_error
            accumulated_loss += loss
            error = torch.abs(output -
                              dataset.extract_targets(float_data).cuda())

            # Log a random position
            position = random.randint(0, len(float_data) - 1)
            #print (output[position].data.tolist())
            coil_logger.add_message(
                'Iterating in Validation', {
                    'Checkpoint':
                    latest,
                    'Iteration': (str(iteration_on_checkpoint * 120) + '/' +
                                  str(len(dataset))),
                    'MeanError':
                    mean_error,
                    'Loss':
                    loss,
                    'Output':
                    output[position].data.tolist(),
                    'GroundTruth':
                    dataset.extract_targets(float_data)
                    [position].data.tolist(),
                    'Error':
                    error[position].data.tolist(),
                    'Inputs':
                    dataset.extract_inputs(float_data)[position].data.tolist()
                }, latest)
            iteration_on_checkpoint += 1

        checkpoint_average_loss = accumulated_loss / len(dataset)
        checkpoint_average_error = accumulated_error / len(dataset)
        coil_logger.add_scalar('Loss', checkpoint_average_loss, latest)
        coil_logger.add_scalar('Error', checkpoint_average_error, latest)
        print('Loss: ', checkpoint_average_loss, "----Error: ",
              checkpoint_average_error)

        if checkpoint_average_loss < best_loss:
            best_loss = checkpoint_average_loss
            best_loss_iter = latest

            state = {
                'state_dict': model.state_dict(),
                'best_loss': best_loss,
                'best_loss_iter': best_loss_iter
            }
            # TODO : maybe already summarize the best model ???
            torch.save(
                state,
                os.path.join('_logs', exp_batch, exp_alias,
                             'best_model_l2' + '.pth'))

        if checkpoint_average_error < best_error:
            best_error = checkpoint_average_error
            best_error_iter = latest

            state = {
                'state_dict': model.state_dict(),
                'best_error': best_error,
                'best_error_iter': best_error_iter
            }
            # TODO : maybe already summarize the best model ???
            torch.save(
                state,
                os.path.join('_logs', exp_batch, exp_alias,
                             'best_model_l1' + '.pth'))

        print('Best Loss: ', best_loss, "Checkpoint", best_loss_iter)
        print('Best Error: ', best_error, "Checkpoint", best_error_iter)

        coil_logger.add_message(
            'Iterating in Validation', {
                'Summary': {
                    'Error': checkpoint_average_error,
                    'Loss': checkpoint_average_loss,
                    'BestError': best_error,
                    'BestLoss': best_loss,
                    'BestLossCheckpoint': best_loss_iter,
                    'BestErrorCheckpoint': best_error_iter
                },
                'Checkpoint': latest
            })
Ejemplo n.º 23
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process('validation', dataset_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    dataset_name)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=dataset_name)

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)

        # Set ERFnet for segmentation
        model_erf = ERFNet(20)
        model_erf = torch.nn.DataParallel(model_erf)
        model_erf = model_erf.cuda()

        print("LOAD ERFNet - validate")

        def load_my_state_dict(
            model, state_dict
        ):  #custom function to load model when not all dict elements
            own_state = model.state_dict()
            for name, param in state_dict.items():
                if name not in own_state:
                    continue
                own_state[name].copy_(param)
            return model

        model_erf = load_my_state_dict(
            model_erf,
            torch.load(os.path.join('trained_models/erfnet_pretrained.pth')))
        model_erf.eval()
        print("ERFNet and weights LOADED successfully")

        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(dataset_name, None)

        model.cuda()

        best_mse = 1000
        best_error = 1000
        best_mse_iter = 0
        best_error_iter = 0

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(latest) + '.pth'))
                checkpoint_iteration = checkpoint['iteration']
                print("Validation loaded ", checkpoint_iteration)

                model.load_state_dict(checkpoint['state_dict'])

                model.eval()
                accumulated_mse = 0
                accumulated_error = 0
                iteration_on_checkpoint = 0
                for data in data_loader:

                    # Compute the forward pass on a batch from  the validation dataset
                    controls = data['directions']

                    # Seg batch
                    rgbs = data['rgb']
                    with torch.no_grad():
                        outputs = model_erf(rgbs)
                    labels = outputs.max(1)[1].byte().cpu().data

                    seg_road = (labels == 0)
                    seg_not_road = (labels != 0)
                    seg = torch.stack((seg_road, seg_not_road), 1).float()

                    output = model.forward_branch(
                        torch.squeeze(seg).cuda(),
                        dataset.extract_inputs(data).cuda(), controls)

                    #                    output = model.foward_branch(torch.squeeze(rgbs).cuda(),
                    #                                                 dataset.extract_inputs(data).cuda(),controls)
                    # It could be either waypoints or direct control
                    if 'waypoint1_angle' in g_conf.TARGETS:
                        write_waypoints_output(checkpoint_iteration, output)
                    else:
                        write_regular_output(checkpoint_iteration, output)

                    mse = torch.mean(
                        (output - dataset.extract_targets(data).cuda()
                         )**2).data.tolist()
                    mean_error = torch.mean(
                        torch.abs(output -
                                  dataset.extract_targets(data).cuda())
                    ).data.tolist()

                    accumulated_error += mean_error
                    accumulated_mse += mse
                    error = torch.abs(output -
                                      dataset.extract_targets(data).cuda())

                    # Log a random position
                    position = random.randint(0, len(output.data.tolist()) - 1)

                    coil_logger.add_message(
                        'Iterating', {
                            'Checkpoint':
                            latest,
                            'Iteration': (str(iteration_on_checkpoint * 120) +
                                          '/' + str(len(dataset))),
                            'MeanError':
                            mean_error,
                            'MSE':
                            mse,
                            'Output':
                            output[position].data.tolist(),
                            'GroundTruth':
                            dataset.extract_targets(
                                data)[position].data.tolist(),
                            'Error':
                            error[position].data.tolist(),
                            'Inputs':
                            dataset.extract_inputs(data)
                            [position].data.tolist()
                        }, latest)
                    iteration_on_checkpoint += 1
                    print("Iteration %d  on Checkpoint %d : Error %f" %
                          (iteration_on_checkpoint, checkpoint_iteration,
                           mean_error))
                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                checkpoint_average_mse = accumulated_mse / (len(data_loader))
                checkpoint_average_error = accumulated_error / (
                    len(data_loader))
                coil_logger.add_scalar('Loss', checkpoint_average_mse, latest,
                                       True)
                coil_logger.add_scalar('Error', checkpoint_average_error,
                                       latest, True)

                if checkpoint_average_mse < best_mse:
                    best_mse = checkpoint_average_mse
                    best_mse_iter = latest

                if checkpoint_average_error < best_error:
                    best_error = checkpoint_average_error
                    best_error_iter = latest

                coil_logger.add_message(
                    'Iterating', {
                        'Summary': {
                            'Error': checkpoint_average_error,
                            'Loss': checkpoint_average_mse,
                            'BestError': best_error,
                            'BestMSE': best_mse,
                            'BestMSECheckpoint': best_mse_iter,
                            'BestErrorCheckpoint': best_error_iter
                        },
                        'Checkpoint': latest
                    }, latest)

                l1_window.append(checkpoint_average_error)
                coil_logger.write_on_error_csv(dataset_name,
                                               checkpoint_average_error)

                # If we are using the finish when validation stops, we check the current
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(dataset_name, latest)
                        break

            else:

                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
Ejemplo n.º 24
0
def execute(gpu, exp_batch, exp_alias):

    from time import gmtime, strftime

    manualSeed = g_conf.SEED
    torch.cuda.manual_seed(manualSeed)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    real_dataset = g_conf.TARGET_DOMAIN_PATH
    #main data loader
    dataset = CoILDataset(full_dataset,
                          real_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    st = lambda aug: iag.Sometimes(aug, 0.4)
    oc = lambda aug: iag.Sometimes(aug, 0.3)
    rl = lambda aug: iag.Sometimes(aug, 0.09)
    augmenter = iag.Augmenter([iag.ToGPU()] + [
        rl(iag.GaussianBlur(
            (0, 1.5))),  # blur images with a sigma between 0 and 1.5
        rl(iag.AdditiveGaussianNoise(loc=0, scale=(
            0.0, 0.05), per_channel=0.5)),  # add gaussian noise to images
        oc(iag.Dropout((0.0, 0.10), per_channel=0.5)
           ),  # randomly remove up to X% of the pixels
        oc(
            iag.CoarseDropout(
                (0.0, 0.10), size_percent=(0.08, 0.2),
                per_channel=0.5)),  # randomly remove up to X% of the pixels
        oc(iag.Add((-40, 40), per_channel=0.5)
           ),  # change brightness of images (by -X to Y of original value)
        st(iag.Multiply((0.10, 2), per_channel=0.2)
           ),  # change brightness of images (X-Y% of original value)
        rl(iag.ContrastNormalization(
            (0.5, 1.5), per_channel=0.5)),  # improve or worsen the contrast
        rl(iag.Grayscale((0.0, 1))),  # put grayscale
    ]  # do all of the above in random order
                              )

    l1weight = g_conf.L1_WEIGHT
    task_adv_weight = g_conf.TASK_ADV_WEIGHT
    image_size = tuple([88, 200])

    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    print("GPU", gpu)
    print("Configurations of ", exp_alias)
    print("GANMODEL_NAME", g_conf.GANMODEL_NAME)
    print("LOSS_FUNCTION", g_conf.LOSS_FUNCTION)
    print("LR_G, LR_D, LR", g_conf.LR_G, g_conf.LR_D, g_conf.LEARNING_RATE)
    print("SKIP", g_conf.SKIP)
    print("TYPE", g_conf.TYPE)
    print("L1 WEIGHT", g_conf.L1_WEIGHT)
    print("TASK ADV WEIGHT", g_conf.TASK_ADV_WEIGHT)
    print("LAB SMOOTH", g_conf.LABSMOOTH)

    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels._netG(loss=g_conf.LOSS_FUNCTION,
                               skip=g_conf.SKIP).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch':
        netD = ganmodels_nopatch._netD(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch._netG(loss=g_conf.LOSS_FUNCTION).cuda()
    elif g_conf.GANMODEL_NAME == 'LSDcontrol_nopatch_smaller':
        netD = ganmodels_nopatch_smaller._netD(
            loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_nopatch_smaller._netG(
            loss=g_conf.LOSS_FUNCTION).cuda()

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task':
        netD_task = ganmodels_task._netD_task(loss=g_conf.LOSS_FUNCTION).cuda()
        netD_img = ganmodels_task._netD_img(loss=g_conf.LOSS_FUNCTION).cuda()
        netG = ganmodels_task._netG(loss=g_conf.LOSS_FUNCTION).cuda()
        netF = ganmodels_task._netF(loss=g_conf.LOSS_FUNCTION).cuda()

        if g_conf.PRETRAINED == 'RECON':
            netF_statedict = torch.load('netF_GAN_Pretrained.wts')
            netF.load_state_dict(netF_statedict)

        elif g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load('best_loss_20-06_EpicClearWeather.pth')
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                # if newkey.split('.')[0] == "branch" and oldkey.split('.')[0] == "branches":
                #     print("No Transfer of ",  newkey, " to ", oldkey)
                # else:
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

    elif g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        netD_bin = ganmodels_task._netD_task().cuda()
        netD_img = ganmodels_task._netD_img().cuda()
        netG = ganmodels_task._netG().cuda()
        netF = ganmodels_task._netF().cuda()

        if g_conf.PRETRAINED == 'IL':
            print("Loading IL")
            model_IL = torch.load(g_conf.IL_AGENT_PATH)
            model_IL_state_dict = model_IL['state_dict']

            netF_state_dict = netF.state_dict()

            print(len(netF_state_dict.keys()), len(model_IL_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netF_state_dict.keys(), model_IL_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netF_state_dict[newkey] = model_IL_state_dict[oldkey]

            netF.load_state_dict(netF_state_dict)
            print("IL Model Loaded!")

            ####
            if g_conf.IF_AUG:
                print("Loading Aug Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            else:
                print("Loading Decoder")
                model_dec = torch.load(g_conf.DECODER_RECON_PATH)
            model_dec_state_dict = model_dec['stateG_dict']

            netG_state_dict = netG.state_dict()

            print(len(netG_state_dict.keys()),
                  len(model_dec_state_dict.keys()))
            for i, keys in enumerate(
                    zip(netG_state_dict.keys(), model_dec_state_dict.keys())):
                newkey, oldkey = keys
                print("Transferring ", newkey, " to ", oldkey)
                netG_state_dict[newkey] = model_dec_state_dict[oldkey]

            netG.load_state_dict(netG_state_dict)
            print("Decoder Model Loaded!")

    init_weights(netD_bin)
    init_weights(netD_img)
    # init_weights(netG)

    print(netD_bin)
    print(netF)

    optimD_bin = torch.optim.Adam(netD_bin.parameters(),
                                  lr=g_conf.LR_D,
                                  betas=(0.5, 0.999))
    optimD_img = torch.optim.Adam(netD_img.parameters(),
                                  lr=g_conf.LR_D,
                                  betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(),
                              lr=g_conf.LR_D,
                              betas=(0.5, 0.999))
    if g_conf.TYPE == 'task':
        optimF = torch.optim.Adam(netF.parameters(), lr=g_conf.LEARNING_RATE)
        Task_Loss = TaskLoss()

    if g_conf.GANMODEL_NAME == 'LSDcontrol_task_2d':
        print("Using cross entropy!")
        Loss = torch.nn.CrossEntropyLoss().cuda()

    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter_F = 0
    best_loss_iter_G = 0
    best_lossF = 1000000.0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0
    gen_iterations = 0
    n_critic = g_conf.N_CRITIC

    lossF = Variable(torch.Tensor([100.0]))
    lossG_adv = Variable(torch.Tensor([100.0]))
    lossG_smooth = Variable(torch.Tensor([100.0]))
    lossG = Variable(torch.Tensor([100.0]))

    netD_bin.train()
    netD_img.train()
    netG.train()
    netF.train()
    capture_time = time.time()

    if not os.path.exists('./imgs_' + exp_alias):
        os.mkdir('./imgs_' + exp_alias)

    #TODO check how C network is optimized in LSDSEG
    #TODO put family for losses
    #IMPORTANT WHILE RUNNING THIS, CONV.PY MUST HAVE BATCHNORMS

    fake_img_pool_src = ImagePool(50)
    fake_img_pool_tgt = ImagePool(50)

    for data in data_loader:

        set_requires_grad(netD_bin, True)
        set_requires_grad(netD_img, True)
        set_requires_grad(netG, True)
        set_requires_grad(netF, True)

        # print("ITERATION:", iteration)

        val = 0.0
        input_data, float_data, tgt_imgs = data

        if g_conf.IF_AUG:
            inputs = augmenter(0, input_data['rgb'])
            tgt_imgs = augmenter(0, tgt_imgs)
        else:
            inputs = input_data['rgb'].cuda()
            tgt_imgs = tgt_imgs.cuda()

        inputs = inputs.squeeze(1)
        inputs = inputs - val  #subtracted by 0.5
        tgt_imgs = tgt_imgs - val  #subtracted by 0.5

        controls = float_data[:, dataset.controls_position(), :]

        src_embed_inputs, src_branches = netF(
            inputs,
            dataset.extract_inputs(float_data).cuda())
        tgt_embed_inputs = netF(tgt_imgs, None)

        src_img_fake = netG(src_embed_inputs)
        tgt_img_fake = netG(tgt_embed_inputs)

        if iteration % 100 == 0:
            imgs_to_save = torch.cat(
                (inputs[:1] + val, src_img_fake[:1] + val, tgt_imgs[:1] + val,
                 tgt_img_fake[:1] + val), 0).cpu().data
            coil_logger.add_image("Images", imgs_to_save, iteration)
            imgs_to_save = imgs_to_save.clamp(0.0, 1.0)
            vutils.save_image(imgs_to_save,
                              './imgs_' + exp_alias + '/' + str(iteration) +
                              '_real_and_fake.png',
                              normalize=False)

        ##--------------------Discriminator part!!!!!!!!!!-------------------##
        set_requires_grad(netD_bin, True)
        set_requires_grad(netD_img, False)
        set_requires_grad(netG, False)
        set_requires_grad(netF, False)
        optimD_bin.zero_grad()

        outputsD_real_src_bin = netD_bin(src_embed_inputs)
        outputsD_real_tgt_bin = netD_bin(tgt_embed_inputs)

        gradient_penalty = calc_gradient_penalty(netD_bin, src_embed_inputs,
                                                 tgt_embed_inputs)
        lossD_bin = torch.mean(outputsD_real_tgt_bin -
                               outputsD_real_src_bin) + gradient_penalty
        lossD_bin.backward(retain_graph=True)
        optimD_bin.step()

        coil_logger.add_scalar('Total LossD Bin', lossD_bin.data, iteration)

        #### Discriminator img update ####
        set_requires_grad(netD_bin, False)
        set_requires_grad(netD_img, True)
        set_requires_grad(netG, False)
        set_requires_grad(netF, False)

        optimD_img.zero_grad()
        outputsD_fake_src_img = netD_img(src_img_fake.detach())
        outputsD_fake_tgt_img = netD_img(tgt_img_fake.detach())

        outputsD_real_src_img = netD_img(inputs)
        outputsD_real_tgt_img = netD_img(tgt_imgs)

        gradient_penalty_src = calc_gradient_penalty(netD_img, inputs,
                                                     src_img_fake)
        lossD_img_src = torch.mean(
            outputsD_fake_src_img -
            outputsD_real_src_img) + gradient_penalty_src

        gradient_penalty_tgt = calc_gradient_penalty(netD_img, tgt_imgs,
                                                     tgt_img_fake)
        lossD_img_tgt = torch.mean(
            outputsD_fake_tgt_img -
            outputsD_real_tgt_img) + gradient_penalty_tgt

        lossD_img = (lossD_img_src + lossD_img_tgt) * 0.5
        lossD_img.backward(retain_graph=True)
        optimD_img.step()

        coil_logger.add_scalar('Total LossD img', lossD_img.data, iteration)

        if ((iteration + 1) % n_critic) == 0:

            #####Generator updates#######
            set_requires_grad(netD_bin, False)
            set_requires_grad(netD_img, False)
            set_requires_grad(netG, True)
            set_requires_grad(netF, False)

            outputsD_fake_src_img = netD_img(src_img_fake)
            outputsD_real_tgt_img = netD_img(tgt_imgs)
            outputsD_fake_tgt_img = netD_img(tgt_img_fake)

            lossG_src_smooth = L1_loss(src_img_fake, inputs)
            lossG_tgt_smooth = L1_loss(tgt_img_fake, tgt_imgs)
            lossG_smooth = (lossG_src_smooth + lossG_tgt_smooth) * 0.5

            lossG_adv = 0.5 * (-1.0 * outputsD_fake_src_img.mean() -
                               1.0 * outputsD_fake_tgt_img.mean())
            lossG = (lossG_smooth + 0.0 * lossG_adv)
            lossG.backward(retain_graph=True)
            optimG.step()

            coil_logger.add_scalar('Total LossG', lossG.data, iteration)

            #####Task network updates##########################
            set_requires_grad(netD_bin, False)
            set_requires_grad(netD_img, False)
            set_requires_grad(netG, False)
            set_requires_grad(netF, True)
            optimF.zero_grad()

            src_embed_inputs, src_branches = netF(
                inputs,
                dataset.extract_inputs(float_data).cuda())
            tgt_embed_inputs = netF(tgt_imgs, None)
            src_img_fake = netG(src_embed_inputs)
            tgt_img_fake = netG(tgt_embed_inputs)

            outputsD_fake_src_img = netD_img(src_img_fake)
            outputsD_real_tgt_img = netD_img(tgt_imgs)

            lossF_task = Task_Loss.MSELoss(
                src_branches,
                dataset.extract_targets(float_data).cuda(), controls.cuda(),
                dataset.extract_inputs(float_data).cuda())

            lossF_adv_bin = netD_bin(src_embed_inputs).mean() - netD_bin(
                tgt_embed_inputs).mean()
            lossF_adv_img = outputsD_fake_src_img.mean(
            ) - outputsD_real_tgt_img.mean()
            lossF_adv = 0.5 * (lossF_adv_bin + 0.1 * lossF_adv_img)
            lossF = (lossF_task + task_adv_weight * lossF_adv)

            coil_logger.add_scalar('Total Task Loss', lossF.data, iteration)
            coil_logger.add_scalar('Adv Task Loss', lossF_adv.data, iteration)
            coil_logger.add_scalar('Only Task Loss', lossF_task.data,
                                   iteration)
            lossF.backward(retain_graph=True)
            optimF.step()

            if lossF.data < best_lossF:
                best_lossF = lossF.data.tolist()
                best_loss_iter_F = iteration

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)

        accumulated_time += time.time() - capture_time
        capture_time = time.time()

        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_bin_dict': netD_bin.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'checkpoints',
                             str(iteration) + '.pth'))

        if iteration == best_loss_iter_F and iteration > 10000:

            state = {
                'iteration': iteration,
                'stateD_bin_dict': netD_bin.state_dict(),
                'stateF_dict': netF.state_dict(),
                'best_lossD': best_lossD,
                'best_lossF': best_lossF,
                'total_time': accumulated_time,
                'best_loss_iter_F': best_loss_iter_F
            }
            torch.save(
                state,
                os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch,
                             exp_alias, 'best_modelF' + '.pth'))

        iteration += 1
Ejemplo n.º 25
0
    def run_step(self, input_data):

        # Get the current directions for following the route

        directions = self._get_current_direction(self._vehicle_pos)
        logging.debug(" Current direction %f ", directions)

        # Take the forward speed and normalize it for it to go from 0-1
        network_input = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR
        network_input = torch.cuda.FloatTensor([network_input]).unsqueeze(0)

        # TODO remove ifs
        #if 'scenario' in g_conf.MEASUREMENTS_INPUTS:
        #    network_input = torch.cat((torch.cuda.FloatTensor([input_data['scenario']]),
        #                               network_input), 1)

        # Compute the forward pass processing the sensors got from CARLA.
        # TODO we start with an if but we can build a class hierarquical !

        if g_conf.MODEL_TYPE in [
                'coil-icra', 'coil-icra-KLD', 'separate-supervised'
        ]:
            directions_tensor = torch.cuda.LongTensor([directions])
            #print("       Directions ", int(directions))
            if False:
                save_path = os.path.join('temp', 'ete_baseline')
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_image(
                    input_data['sensor_input'],
                    os.path.join(
                        save_path,
                        'run_input_' + str(self.count).zfill(5) + ".png"))
                self.count += 1
            model_outputs = self._model.forward_branch(
                input_data['sensor_input'], network_input, directions_tensor)

        elif g_conf.MODEL_TYPE in ['coil-icra-VAE']:
            directions_tensor = torch.cuda.LongTensor([directions])
            if g_conf.ENCODER_MODEL_TYPE in ['VAE']:
                if g_conf.LABELS_SUPERVISED:
                    input = torch.cat(
                        (input_data['sensor_input'], torch.zeros(
                            1, 1, 88, 200).cuda()),
                        dim=1)
                    recon_x, mu, _, z = self.encoder_model(input)
                else:
                    recon_x, mu, _, z = self.encoder_model(
                        input_data['sensor_input'])

            elif g_conf.ENCODER_MODEL_TYPE in ['Affordances']:
                mu, _ = self.encoder_model(input_data['sensor_input'])

            if False:
                save_path = os.path.join('temp', 'affordances_upperbound')
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                if g_conf.LABELS_SUPERVISED:
                    save_image(
                        input_data['sensor_input'],
                        os.path.join(
                            save_path,
                            'run_input_' + str(self.count).zfill(5) + ".png"))
                    split = torch.split(torch.squeeze(recon_x, dim=1), [3, 1],
                                        dim=1)
                    save_image(
                        split[0],
                        os.path.join(
                            save_path, 'run_recon_rgb_' +
                            str(self.count).zfill(5) + ".png"))
                    save_image(
                        split[1],
                        os.path.join(
                            save_path, 'run_recon_labels_' +
                            str(self.count).zfill(5) + ".png"))
                else:
                    save_image(
                        input_data['sensor_input'],
                        os.path.join(
                            save_path,
                            'run_input_' + str(self.count).zfill(5) + ".png"))
                    #save_image(recon_x, os.path.join(save_path, 'run_recon_' + str(self.count).zfill(5) + ".png"))
                self.count += 1

            model_outputs = self._model.forward_branch(mu, network_input,
                                                       directions_tensor)

            #print(' frame', self.count)
            #print(' direction', directions_tensor)
            #print(' branch output', model_outputs)

        elif g_conf.MODEL_TYPE in [
                'separate-supervised-NoSpeed', 'coil-icra-NoSpeed'
        ]:
            directions_tensor = torch.cuda.LongTensor([directions])
            if False:
                save_path = os.path.join('temp', 'ETE_resnet34_6')
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_image(
                    input_data['sensor_input'],
                    os.path.join(
                        save_path,
                        'run_input_' + str(self.count).zfill(5) + ".png"))
                self.count += 1
            model_outputs = self._model.forward_branch(
                input_data['sensor_input'], directions_tensor)

        else:
            directions_tensor = torch.cuda.FloatTensor(
                encode_directions(directions))
            model_outputs = self._model.forward(
                self._process_sensors(input_data['rgb'][1]), network_input,
                directions_tensor)[0]

        steer, throttle, brake = self._process_model_outputs(model_outputs[0])
        control = carla.VehicleControl()
        control.steer = float(steer)
        control.throttle = float(throttle)
        control.brake = float(brake)
        logging.debug("Output %f %f %f " %
                      (control.steer, control.throttle, control.brake))

        if self.first_iter:
            coil_logger.add_message('Iterating', {
                "Checkpoint": self.checkpoint['iteration'],
                'Agent': str(steer)
            }, self.checkpoint['iteration'])
        # There is the posibility to replace some of the predictions with oracle predictions.
        self.first_iter = False

        #print(['steer: ', control.steer, 'throttle: ', control.throttle, 'brake: ', control.brake])

        return control
Ejemplo n.º 26
0
def execute(gpu, exp_batch, exp_alias):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    sampler = BatchSequenceSampler(
        splitter.control_steer_split(dataset.measurements,
                                     dataset.meta_data), g_conf.BATCH_SIZE,
        g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_sampler=sampler,
                                              shuffle=False,
                                              num_workers=6,
                                              pin_memory=True)

    l1weight = 1.0
    image_size = tuple([88, 200])
    testmode = 1

    # print("helllooooo", g_conf.MODEL_NAME)
    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        # netD = ganmodels._netD().cuda()
        netG = ganmodels._netG(skip=g_conf.SKIP).cuda()
    # else:
    #     netD = ganmodels._oldnetD().cuda()
    #     netG = ganmodels._oldnetG().cuda()

    # init_weights(netD)
    init_weights(netG)
    # print(netD)
    print(netG)

    # optimD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.7, 0.999))
    MSE_loss = torch.nn.MSELoss().cuda()
    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter = 0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0

    netG.eval()
    # netD.eval()
    capture_time = time.time()
    for data in data_loader:

        val = 0.5
        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)
        inputs_in = inputs - val

        #forward pass
        # print(inputs[0][0][0][0], inputs_in[0][0][0][0])
        fake_inputs = netG(inputs_in)  #subtracted by 0.5
        fake_inputs_in = fake_inputs
        # print(fake_inputs[0][0][0][0], fake_inputs_in[0][0][0][0])
        if iteration % 200 == 0:
            imgs_to_save = torch.cat((inputs_in[:2] + val, fake_inputs_in[:2]),
                                     0).cpu().data
            vutils.save_image(imgs_to_save,
                              './noganimgs/' + str(iteration) +
                              'noganreal_samples.png',
                              normalize=True)
            coil_logger.add_image("Images", imgs_to_save, iteration)

        optimG.zero_grad()

        print("~~~~~~~~~__________")
        print(inputs_in[0][0][0][0])
        print(fake_inputs[0][0][0][0])
        lossG_mse = MSE_loss(fake_inputs, inputs)
        print(lossG_mse)
        lossG_mse /= len(inputs_in)

        print("~~~~~~~~~__________--------------")

        lossG_mse.backward()  #retain_graph=True needed?
        optimG.step()

        coil_logger.add_scalar('MSE LossG', lossG_mse.data / len(inputs_in),
                               iteration)

        #optimization for one iter done!

        position = random.randint(0, len(float_data) - 1)

        # if lossD.data < best_lossD:
        #     best_lossD = lossD.data.tolist()

        if lossG_mse.data < best_lossG:
            best_lossG = lossG_mse.data.tolist()
            best_loss_iter = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        # print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(), "BestLossD", best_lossD, "BestLossG", best_lossG, "Iteration", iteration, "Best Loss Iteration", best_loss_iter)

        # coil_logger.add_message('Iterating',
        #                         {'Iteration': iteration,
        #                             'LossD': lossD.data.tolist(),
        #                             'LossG': lossG.data.tolist(),
        #                             'Images/s': (iteration*g_conf.BATCH_SIZE)/accumulated_time,
        #                             'BestLossD': best_lossD, 'BestLossIteration': best_loss_iter,
        #                             'BestLossG': best_lossG, 'BestLossIteration': best_loss_iter,
        #                             'GroundTruth': dataset.extract_targets(float_data)[position].data.tolist(),
        #                             'Inputs': dataset.extract_inputs(float_data)[position].data.tolist()},
        #                         iteration)
        # if is_ready_to_save(iteration):
        #
        #     state = {
        #         'iteration': iteration,
        #         'stateD_dict': netD.state_dict(),
        #         'stateG_dict': netG.state_dict(),
        #         'best_lossD': best_lossD,
        #         'best_lossG': best_lossG,
        #         'total_time': accumulated_time,
        #         'best_loss_iter': best_loss_iter
        #
        #     }
        #     torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
        #                                    , 'checkpoints', str(iteration) + '.pth'))
        # if iteration == best_loss_iter:
        #
        #     state = {
        #         'iteration': iteration,
        #         'stateD_dict': netD.state_dict(),
        #         'stateG_dict': netG.state_dict(),
        #         'best_lossD': best_lossD,
        #         'best_lossG': best_lossG,
        #         'total_time': accumulated_time,
        #         'best_loss_iter': best_loss_iter
        #
        #     }
        #     torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
        #                                    , 'best_modelG' + '.pth'))
        #
        iteration += 1
Ejemplo n.º 27
0
def execute(gpu, exp_batch, exp_alias, validation_dataset, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     f'{exp_alias}.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process(process_type='validation',
                            param=validation_dataset)

        # Save the output to a file if so desired
        if suppress_output:
            save_output(exp_alias)

        # Define the dataset. This structure has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    validation_dataset)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=validation_dataset,
                              process_type='validation')

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION,
                          g_conf.SENSORS).cuda()
        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(
                validation_dataset, None)

        # Keep track of the best loss and the iteration where it happens
        best_loss = 1000
        best_loss_iter = 0

        print(20 * '#')
        print('Starting validation!')
        print(20 * '#')

        # Check if the maximum checkpoint for validating has been reached
        while not maximum_checkpoint_reached(latest):
            # Wait until the next checkpoint is ready (assuming this is run whilst training the model)
            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):
                # Get next checkpoint for validation according to the test schedule and load it
                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 f'{latest}.pth'))
                checkpoint_iteration = checkpoint['iteration']

                model.load_state_dict(checkpoint['state_dict'])
                model.eval()  # Turn off dropout and batchnorm (if any)
                print(f"Validation loaded, checkpoint {checkpoint_iteration}")

                # Main metric will be the used loss for training the network
                criterion = Loss(g_conf.LOSS_FUNCTION)
                checkpoint_average_loss = 0

                # Counter
                iteration_on_checkpoint = 0

                with torch.no_grad():  # save some computation/memory
                    for data in data_loader:
                        # Compute the forward pass on a batch from the validation dataset
                        controls = data['directions'].cuda()
                        img = torch.squeeze(data['rgb']).cuda()
                        speed = dataset.extract_inputs(
                            data).cuda()  # this might not always be speed

                        # For auxiliary metrics
                        output = model.forward_branch(img, speed, controls)

                        # For the loss function
                        branches = model(img, speed)
                        loss_function_params = {
                            'branches': branches,
                            'targets': dataset.extract_targets(data).cuda(),
                            'controls': controls,
                            'inputs': speed,
                            'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                            'variable_weights': g_conf.VARIABLE_WEIGHT
                        }
                        # It could be either waypoints or direct control
                        if 'waypoint1_angle' in g_conf.TARGETS:
                            write_waypoints_output(checkpoint_iteration,
                                                   output)
                        else:
                            write_regular_output(checkpoint_iteration, output)

                        loss, _ = criterion(loss_function_params)
                        loss = loss.data.tolist()

                        # Log a random position
                        position = random.randint(
                            0,
                            len(output.data.tolist()) - 1)

                        coil_logger.add_message(
                            'Iterating', {
                                'Checkpoint':
                                latest,
                                'Iteration':
                                f'{iteration_on_checkpoint * g_conf.BATCH_SIZE}/{len(dataset)}',
                                f'Validation Loss ({g_conf.LOSS_FUNCTION})':
                                loss,
                                'Output':
                                output[position].data.tolist(),
                                'GroundTruth':
                                dataset.extract_targets(
                                    data)[position].data.tolist(),
                                'Inputs':
                                dataset.extract_inputs(data)
                                [position].data.tolist()
                            }, latest)

                        # We get the average with a growing list of values
                        # Thanks to John D. Cook: http://www.johndcook.com/blog/standard_deviation/
                        iteration_on_checkpoint += 1
                        checkpoint_average_loss += (
                            loss -
                            checkpoint_average_loss) / iteration_on_checkpoint

                        print(
                            f"\rProgress: {100 * iteration_on_checkpoint * g_conf.BATCH_SIZE / len(dataset):3.4f}% - "
                            f"Average Loss ({g_conf.LOSS_FUNCTION}): {checkpoint_average_loss:.16f}",
                            end='')
                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                coil_logger.add_scalar(
                    f'Validation Loss ({g_conf.LOSS_FUNCTION})',
                    checkpoint_average_loss, latest, True)

                # Let's visualize the distribution of the loss
                coil_logger.add_histogram(
                    f'Validation Checkpoint Loss ({g_conf.LOSS_FUNCTION})',
                    checkpoint_average_loss, latest)

                if checkpoint_average_loss < best_loss:
                    best_loss = checkpoint_average_loss
                    best_loss_iter = latest

                coil_logger.add_message(
                    'Iterating', {
                        'Summary': {
                            'Loss': checkpoint_average_loss,
                            'BestLoss': best_loss,
                            'BestLossCheckpoint': best_loss_iter
                        },
                        'Checkpoint': latest
                    }, latest)

                l1_window.append(checkpoint_average_loss)
                coil_logger.write_on_error_csv(validation_dataset,
                                               checkpoint_average_loss, latest)

                # If we are using the finish when validation stops, we check the current checkpoint
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(validation_dataset, latest)
                        break

            else:
                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        print('\n' + 20 * '#')
        print('Finished validation!')
        print(20 * '#')
        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
Ejemplo n.º 28
0
    def pre_load_hdf5_files(self, path_for_files):
        """
        Function to load all hdfiles from a certain folder
        TODO: Add partially loading of the data
        Returns
            TODO: IMPROVE
            A list with the read sensor data ( h5py)

            All the measurement data

        """

        # Take the names of all measurements from the dataset
        meas_names = list(g_conf.MEASUREMENTS.keys())
        # take the names for all sensors
        sensors_names = list(g_conf.SENSORS.keys())

        # From the determined path take all the possible file names.
        # TODO: Add more flexibility for the file base names ??
        folder_file_names = [
            os.path.join(path_for_files, f)
            for f in glob.glob1(path_for_files, "data_*.h5")
        ]

        # Concatenate all the sensor names and measurements names
        # TODO: This structure is very ugly.
        meas_data_cat = [list([]) for _ in range(len(meas_names))]
        sensors_data_cat = [list([]) for _ in range(len(sensors_names))]

        # We open one dataset to get the metadata for targets
        # that is important to be able to reference variables in a more legible way
        dataset = h5py.File(folder_file_names[0], "r")
        metadata_targets = np.array(dataset['metadata_' + meas_names[0]])

        lastidx = 0
        count = 0
        # TODO: More logs to be added ??
        coil_logger.add_message(
            'Loading', {
                'FilesLoaded': folder_file_names,
                'NumberOfImages': len(folder_file_names)
            })

        for file_name in folder_file_names:
            try:
                dataset = h5py.File(file_name, "r")

                for i in range(len(sensors_names)):
                    x = dataset[sensors_names[i]]
                    old_shape = x.shape[0]

                    #  Concatenate all the datasets for a given sensor.

                    sensors_data_cat[i].append(
                        (lastidx, lastidx + x.shape[0], x))

                for i in range(len(meas_names)):
                    dset_to_append = dataset[meas_names[i]]
                    meas_data_cat[i].append(dset_to_append[:])

                lastidx += old_shape
                dataset.flush()
                count += 1

            except IOError:

                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc()
                traceback.print_tb(exc_traceback, limit=1, file=sys.stdout)
                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_traceback,
                                          limit=2,
                                          file=sys.stdout)
                print("failed to open", file_name)

        # TODO: ADD THE STEERING MULTIPLE CAMERA AUGMENTATION

        # For the number of datasets names that are going to be used for measurements cat all.
        for i in range(len(meas_names)):
            meas_data_cat[i] = np.concatenate(meas_data_cat[i], axis=0)
            meas_data_cat[i] = meas_data_cat[i].transpose((1, 0))

        return sensors_data_cat, meas_data_cat[0], metadata_targets
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12):
    """
        The main encoder training function.
    Args:
        gpu: The GPU id number
        exp_batch: the folder with the experiments
        exp_alias: the alias, experiment name
        suppress_output: if the output are going to be saved on a file
        number_of_workers: the number of threads used for data loading
    Returns:
        None
    """
    try:
        # We set the visible cuda devices to select the GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        g_conf.VARIABLE_WEIGHT = {}
        # At this point the log file with the correct naming is created.
        # You merge the yaml file with the global configuration structure.
        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
        set_type_of_process('train_encoder')
        # Set the process into loading status.
        coil_logger.add_message('Loading', {'GPU': os.environ["CUDA_VISIBLE_DEVICES"]})

        # we set a seed for this exp
        seed_everything(seed=g_conf.MAGICAL_SEED)

        # Put the output to a separate file if it is the case
        if suppress_output:
            if not os.path.exists('_output_logs'):
                os.mkdir('_output_logs')
            sys.stdout = open(os.path.join('_output_logs', exp_alias + '_' +
                                           g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a",
                              buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                                           exp_alias + '_err_' + g_conf.PROCESS_NAME + '_'
                                           + str(os.getpid()) + ".out"),
                              "a", buffering=1)

        # Preload option
        if g_conf.PRELOAD_MODEL_ALIAS is not None:
            checkpoint = torch.load(os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH,
                                                 g_conf.PRELOAD_MODEL_ALIAS,
                                                 'checkpoints',
                                                 str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth'))

        # Get the latest checkpoint to be loaded
        # returns none if there are no checkpoints saved for this model
        checkpoint_file = get_latest_saved_checkpoint()
        if checkpoint_file is not None:
            checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias,
                                                 'checkpoints', str(get_latest_saved_checkpoint())))
            iteration = checkpoint['iteration']
            best_loss = checkpoint['best_loss']
            best_loss_iter = checkpoint['best_loss_iter']
        else:
            iteration = 0
            best_loss = 1000000000.0
            best_loss_iter = 0

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the positions from the root directory as a in a vector.
        # full_dataset = os.path.join(os.environ["SRL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)

        # By instantiating the augmenter we get a callable that augment images and transform them
        # into tensors.
        augmenter = Augmenter(g_conf.AUGMENTATION)

        if len(g_conf.EXPERIENCE_FILE) == 1:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2]
        else:
            json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2] + '_' + str(g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2]

        dataset = CoILDataset(transform=augmenter,
                              preload_name=g_conf.PROCESS_NAME + '_' + json_file_name + '_' + g_conf.DATA_USED)

        print ("Loaded dataset")

        data_loader = select_balancing_strategy(dataset, iteration, number_of_workers)

        encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION)
        encoder_model.cuda()
        encoder_model.train()

        print(encoder_model)

        optimizer = optim.Adam(encoder_model.parameters(), lr=g_conf.LEARNING_RATE)

        if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None:
            encoder_model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            accumulated_time = checkpoint['total_time']
            loss_window = coil_logger.recover_loss_window('train', iteration)
        else:  # We accumulate iteration time and keep the average speed
            accumulated_time = 0
            loss_window = []

        print ("Before the loss")

        if g_conf.ENCODER_MODEL_TYPE in ['ETE']:
            criterion = Loss(g_conf.LOSS_FUNCTION)

        # Loss time series window
        for data in data_loader:
            if iteration % 1000 == 0:
                adjust_learning_rate_auto(optimizer, loss_window)

            capture_time = time.time()
            encoder_model.zero_grad()

            """
                ####################################
                    ENCODER_MODEL_TYPE can be: one-step-affordances, ETE, stdim, action_prediction
                    
                ####################################
              - one-step-affordances: input RGB images, compute affordances loss.
              - ETE: input RGB images and speed, compute action loss (steering, throttle, brake)
              - stdim: input two consecutive RGB images, compute the feature loss
              - action_prediction: input two consecutive RGB images, compute action classification loss
              - forward: input two consecutive RGB images, compute action loss + feature loss
              
            """

            if g_conf.ENCODER_MODEL_TYPE in ['one-step-affordances']:
                loss_function_params = {
                    'classification_gt': dataset.extract_affordances_targets(data, 'classification').cuda(),
                # harzard stop, red_light....
                    'class_weights': g_conf.AFFORDANCES_CLASS_WEIGHT,
                    'regression_gt': dataset.extract_affordances_targets(data, 'regression').cuda(),
                    'variable_weights': g_conf.AFFORDANCES_VARIABLE_WEIGHT
                }
                # we input RGB images, speed and command to train affordances
                loss = encoder_model(torch.squeeze(data['rgb'].cuda()),
                                     dataset.extract_inputs(data).cuda(),
                                     torch.squeeze(dataset.extract_commands(data).cuda()),
                                     loss_function_params)

                if iteration == 0:
                    state = {
                        'iteration': iteration,
                        'state_dict': encoder_model.state_dict(),
                        'best_loss': best_loss,
                        'total_time': accumulated_time,
                        'optimizer': optimizer.state_dict(),
                        'best_loss_iter': best_loss_iter
                    }
                    torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                                   , 'checkpoints', 'inital.pth'))

                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['forward']:
                # We sample another batch to avoid the superposition

                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, loss_other, loss_ete = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data),
                                           dataset.extract_targets(data)[0].cuda()
                                           )
                loss.backward()
                optimizer.step()


            elif g_conf.ENCODER_MODEL_TYPE in ['ETE']:
                branches = encoder_model(torch.squeeze(data['rgb'].cuda()),
                                         dataset.extract_inputs(data).cuda(),
                                         torch.squeeze(dataset.extract_commands(data).cuda()))

                loss_function_params = {
                    'branches': branches,
                    'targets': dataset.extract_targets(data).cuda(),  # steer, throttle, brake
                    'inputs': dataset.extract_inputs(data).cuda(),  # speed
                    'branch_weights': g_conf.BRANCH_LOSS_WEIGHT,
                    'variable_weights': g_conf.VARIABLE_WEIGHT
                }

                loss, _ = criterion(loss_function_params)
                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['stdim']:
                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, _, _ = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data)
                                           )
                loss.backward()
                optimizer.step()

            elif g_conf.ENCODER_MODEL_TYPE in ['action_prediction']:
                inputs_data = [data['rgb'][0].cuda(), data['rgb'][1].cuda()]
                loss, _, _ = encoder_model(inputs_data,
                                           dataset.extract_inputs(data),
                                           # We also add measurements and commands
                                           dataset.extract_commands(data),
                                           dataset.extract_targets(data)[0].cuda()
                                           )
                loss.backward()
                optimizer.step()

            else:
                raise ValueError("The encoder model type is not know")

            """
                ####################################
                    Saving the model if necessary
                ####################################
            """

            if is_ready_to_save(iteration):
                state = {
                    'iteration': iteration,
                    'state_dict': encoder_model.state_dict(),
                    'best_loss': best_loss,
                    'total_time': accumulated_time,
                    'optimizer': optimizer.state_dict(),
                    'best_loss_iter': best_loss_iter
                }
                torch.save(state, os.path.join('_logs', exp_batch, exp_alias
                                               , 'checkpoints', str(iteration) + '.pth'))

            iteration += 1

            """
                ################################################
                    Adding tensorboard logs.
                    Making calculations for logging purposes.
                    These logs are monitored by the printer module.
                #################################################
            """

            if g_conf.ENCODER_MODEL_TYPE in ['stdim', 'action_prediction', 'forward']:
                coil_logger.add_scalar('Loss', loss.data, iteration)
                coil_logger.add_image('f_t', torch.squeeze(data['rgb'][0]), iteration)
                coil_logger.add_image('f_ti', torch.squeeze(data['rgb'][1]), iteration)

            elif g_conf.ENCODER_MODEL_TYPE in ['one-step-affordances', 'ETE']:
                coil_logger.add_scalar('Loss', loss.data, iteration)
                coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration)

            if loss.data < best_loss:
                best_loss = loss.data.tolist()
                best_loss_iter = iteration

            accumulated_time += time.time() - capture_time
            coil_logger.add_message('Iterating',
                                    {'Iteration': iteration,
                                     'Loss': loss.data.tolist(),
                                     'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time,
                                     'BestLoss': best_loss, 'BestLossIteration': best_loss_iter},
                                    iteration)
            loss_window.append(loss.data.tolist())
            coil_logger.write_on_error_csv('train', loss.data)

            if iteration % 100 == 0:
                print('Train Iteration: {} [{}/{} ({:.0f}%)] \t Loss: {:.6f}'.format(
                    iteration, iteration, g_conf.NUMBER_ITERATIONS,
                    100. * iteration / g_conf.NUMBER_ITERATIONS, loss.data))

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except RuntimeError as e:

        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
Ejemplo n.º 30
0
def execute(gpu, exp_batch, exp_alias):

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('train')

    coil_logger.add_message('Loading', {'GPU': gpu})
    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')
    sys.stdout = open(os.path.join('_output_logs',
                      g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1)
    if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished":
        return

    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME)
    dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToPILImage(), transforms.Resize(128, 128), transforms.ToTensor(), transforms.Normalize([ 0.5,  0.5,  0.5], [ 1.0, 1.0, 1.0])]))

    sampler = BatchSequenceSampler(splitter.control_steer_split(dataset.measurements, dataset.meta_data),
                          g_conf.BATCH_SIZE, g_conf.NUMBER_IMAGES_SEQUENCE, g_conf.SEQUENCE_STRIDE)
    data_loader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler,
                                              shuffle=False, num_workers=6, pin_memory=True)

    transform = transforms.Compose([transforms.Resize((88, 200))])
    l1weight = 1.0
    image_size = tuple([88, 200])
    testmode = 1

    # print("helllooooo", g_conf.MODEL_NAME)
    if g_conf.GANMODEL_NAME == 'LSDcontrol':
        netD = ganmodels._netD().cuda()
        netG = ganmodels._netG(skip=g_conf.SKIP).cuda()
    # else:
    #     netD = ganmodels._oldnetD().cuda()
    #     netG = ganmodels._oldnetG().cuda()

    init_weights(netD)
    init_weights(netG)
    print(netD)
    print(netG)

    optimD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optimG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
    MSE_loss = torch.nn.MSELoss().cuda()
    L1_loss = torch.nn.L1Loss().cuda()

    iteration = 0
    best_loss_iter = 0
    best_lossD = 1000000.0
    best_lossG = 1000000.0
    accumulated_time = 0

    netG.eval()
    netD.eval()
    capture_time = time.time()
    for data in data_loader:

        input_data, float_data = data
        inputs = input_data['rgb'].cuda()
        inputs = inputs.squeeze(1)
        print ("Inputs", i)

        #forward pass
        fake_inputs = netG(inputs)
        if iteration % 1000 == 0:
            coil_logger.add_image("Images", torch.cat((inputs[:3], fake_inputs[:3]), 0), iteration)

        ##--------------------Discriminator part!!!!!!!!!!-----------------------
        set_requires_grad(netD, True)
        optimD.zero_grad()

        ##fake
        outputsD_fake_forD = netD(fake_inputs.detach())

        labsize = outputsD_fake_forD.size()
        #Create labels of patchgan style with label smoothing
        labels_fake = torch.zeros(labsize[0], labsize[1], labsize[2], labsize[3]) #Fake labels
        label_fake_noise = torch.rand(labels_fake.size()) * 0.5 - 0.25 #Label smoothing
        labels_fake = labels_fake + label_fake_noise
        labels_fake = Variable(labels_fake).cuda()

        lossD_fake = MSE_loss(outputsD_fake_forD, labels_fake)

        ##real
        outputsD_real = netD(inputs)

        labsize = outputsD_real.size()
        #Create labels of patchgan style with label smoothing
        labels_real = torch.ones(labsize[0], labsize[1], labsize[2], labsize[3]) #Real labels
        label_real_noise = torch.rand(labels_real.size()) * 0.5 - 0.25 #Label smoothing
        labels_real = labels_real + label_real_noise
        labels_real = Variable(labels_real).cuda()

        lossD_real = MSE_loss(outputsD_real, labels_real)

        #Discriminator updates

        lossD = (lossD_real + lossD_fake) * 0.5
        lossD /= len(inputs)
        lossD.backward() #retain_graph=True needed?
        optimD.step()


        coil_logger.add_scalar('Total LossD', lossD.data, iteration)
        coil_logger.add_scalar('Real LossD', lossD_real.data / len(inputs), iteration)
        coil_logger.add_scalar('Fake LossD', lossD_fake.data / len(inputs), iteration)

        ##--------------------Generator part!!!!!!!!!!-----------------------

        #TODO change decoder architecture
        #TODO check norms of gradients later
        #TODO add auxiliary regression loss for steering

        set_requires_grad(netD, False)
        optimG.zero_grad()
        outputsD_fake_forG = netD(fake_inputs)
        #Generator updates

        lossG_adv = MSE_loss(outputsD_fake_forG, labels_real)
        lossG_smooth = L1_loss(fake_inputs, inputs)
        lossG = lossG_adv + l1weight * lossG_smooth
        lossG /= len(inputs)

        lossG.backward() #retain_graph=True needed?
        optimG.step()

        coil_logger.add_scalar('Total LossG', lossG.data, iteration)
        coil_logger.add_scalar('Adv LossG', lossG_adv.data / len(inputs), iteration)
        coil_logger.add_scalar('Smooth LossG', lossG_smooth.data / len(inputs), iteration)

        #optimization for one iter done!

        position = random.randint(0, len(float_data)-1)

        if lossD.data < best_lossD:
            best_lossD = lossD.data.tolist()

        if lossG.data < best_lossG:
            best_lossG = lossG.data.tolist()
            best_loss_iter = iteration

        accumulated_time += time.time() - capture_time
        capture_time = time.time()
        print("LossD", lossD.data.tolist(), "LossG", lossG.data.tolist(), "BestLossD", best_lossD, "BestLossG", best_lossG, "Iteration", iteration, "Best Loss Iteration", best_loss_iter)

        coil_logger.add_message('Iterating',
                                {'Iteration': iteration,
                                    'LossD': lossD.data.tolist(),
                                    'LossG': lossG.data.tolist(),
                                    'Images/s': (iteration*g_conf.BATCH_SIZE)/accumulated_time,
                                    'BestLossD': best_lossD, 'BestLossIteration': best_loss_iter,
                                    'BestLossG': best_lossG, 'BestLossIteration': best_loss_iter,
                                    'GroundTruth': dataset.extract_targets(float_data)[position].data.tolist(),
                                    'Inputs': dataset.extract_inputs(float_data)[position].data.tolist()},
                                iteration)
        if is_ready_to_save(iteration):

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'checkpoints', str(iteration) + '.pth'))
        if iteration == best_loss_iter:

            state = {
                'iteration': iteration,
                'stateD_dict': netD.state_dict(),
                'stateG_dict': netG.state_dict(),
                'best_lossD': best_lossD,
                'best_lossG': best_lossG,
                'total_time': accumulated_time,
                'best_loss_iter': best_loss_iter

            }
            torch.save(state, os.path.join('/datatmp/Experiments/rohitgan/_logs', exp_batch, exp_alias
                                           , 'best_modelG' + '.pth'))

        iteration += 1