Ejemplo n.º 1
0
def execute(gpu, exp_batch, exp_alias, drive_conditions, params):
    """
    Main loop function. Executes driving benchmarks the specified iterations.
    Args:
        gpu:
        exp_batch:
        exp_alias:
        drive_conditions:
        params:

    Returns:

    """

    try:
        print("Running ", __file__, " On GPU ", gpu, "of experiment name ", exp_alias)
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu
        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))

        exp_set_name, town_name = drive_conditions.split('_')

        experiment_suite_module = __import__('drive.suites.' + camelcase_to_snakecase(exp_set_name)
                                             + '_suite',
                                             fromlist=[exp_set_name])
        experiment_suite_module = getattr(experiment_suite_module, exp_set_name)

        experiment_set = experiment_suite_module()

        set_type_of_process('drive', drive_conditions)

        if params['suppress_output']:
            sys.stdout = open(os.path.join('_output_logs',
                              g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                              "a", buffering=1)
            sys.stderr = open(os.path.join('_output_logs',
                              exp_alias + '_err_'+g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                              "a", buffering=1)

        coil_logger.add_message('Loading', {'Poses': experiment_set.build_experiments()[0].poses})
        if g_conf.USE_ORACLE:
            control_filename = 'control_output_auto'
        else:
            control_filename = 'control_output'

        """
            #####
            Preparing the output files that will contain the driving summary
            #####
        """
        experiment_list = experiment_set.build_experiments()
        # Get all the uniquely named tasks
        task_list = unique([experiment.task_name for experiment in experiment_list ])
        # Now actually run the driving_benchmark

        latest = get_latest_evaluated_checkpoint(control_filename + '_' + task_list[0])

        if latest is None:  # When nothing was tested, get latest returns none, we fix that.
            latest = 0
            # The used tasks are hardcoded, this need to be improved
            file_base = os.path.join('_logs', exp_batch, exp_alias,
                                     g_conf.PROCESS_NAME + '_csv', control_filename)

            for i in range(len(task_list)):
                # Write the header of the summary file used conclusion
                # While the checkpoint is not there
                write_header_control_summary(file_base, task_list[i])

        """ 
            ######
            Run a single driving benchmark specified by the checkpoint were validation is stale
            ######
        """

        if g_conf.FINISH_ON_VALIDATION_STALE is not None:

            while validation_stale_point(g_conf.FINISH_ON_VALIDATION_STALE) is None:
                time.sleep(0.1)

            validation_state_iteration = validation_stale_point(g_conf.FINISH_ON_VALIDATION_STALE)
            driving_benchmark(validation_state_iteration, gpu, town_name, experiment_set, exp_batch,
                              exp_alias, params, control_filename, task_list)

        else:
            """
            #####
            Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration"
            #####
            """
            while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):
                # Get the correct checkpoint
                # We check it for some task name, all of then are ready at the same time
                if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE,
                                            control_filename + '_' + task_list[0]):

                    latest = get_next_checkpoint(g_conf.TEST_SCHEDULE,
                                                 control_filename + '_' + task_list[0])

                    driving_benchmark(latest, gpu, town_name, experiment_set, exp_batch,
                                      exp_alias, params, control_filename, task_list)

                else:
                    time.sleep(0.1)

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something happened'})
Ejemplo n.º 2
0
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output):
    latest = None
    try:
        # We set the visible cuda devices
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        # At this point the log file with the correct naming is created.
        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))
        # The validation dataset is always fully loaded, so we fix a very high number of hours
        g_conf.NUMBER_OF_HOURS = 10000
        set_type_of_process('validation', dataset_name)

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' +
                str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            sys.stderr = open(os.path.join(
                '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME +
                '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)

        # Define the dataset. This structure is has the __get_item__ redefined in a way
        # that you can access the HDFILES positions from the root directory as a in a vector.
        full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                    dataset_name)
        augmenter = Augmenter(None)
        # Definition of the dataset to be used. Preload name is just the validation data name
        dataset = CoILDataset(full_dataset,
                              transform=augmenter,
                              preload_name=dataset_name)

        # Creates the sampler, this part is responsible for managing the keys. It divides
        # all keys depending on the measurements and produces a set of keys for each bach.

        # The data loader is the multi threaded module from pytorch that release a number of
        # workers to get all the data.
        data_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=g_conf.BATCH_SIZE,
            shuffle=False,
            num_workers=g_conf.NUMBER_OF_LOADING_WORKERS,
            pin_memory=True)

        model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)
        # The window used to keep track of the trainings
        l1_window = []
        latest = get_latest_evaluated_checkpoint()
        if latest is not None:  # When latest is noe
            l1_window = coil_logger.recover_loss_window(dataset_name, None)

        model.cuda()

        best_mse = 1000
        best_error = 1000
        best_mse_iter = 0
        best_error_iter = 0

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

            if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

                checkpoint = torch.load(
                    os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                                 str(latest) + '.pth'))
                checkpoint_iteration = checkpoint['iteration']
                print("Validation loaded ", checkpoint_iteration)

                model.load_state_dict(checkpoint['state_dict'])

                model.eval()
                accumulated_mse = 0
                accumulated_error = 0
                iteration_on_checkpoint = 0
                for data in data_loader:

                    # Compute the forward pass on a batch from  the validation dataset
                    controls = data['directions']
                    output = model.forward_branch(
                        torch.squeeze(data['rgb']).cuda(),
                        dataset.extract_inputs(data).cuda(), controls)
                    # It could be either waypoints or direct control
                    if 'waypoint1_angle' in g_conf.TARGETS:
                        write_waypoints_output(checkpoint_iteration, output)
                    else:
                        write_regular_output(checkpoint_iteration, output)

                    mse = torch.mean(
                        (output - dataset.extract_targets(data).cuda()
                         )**2).data.tolist()
                    mean_error = torch.mean(
                        torch.abs(output -
                                  dataset.extract_targets(data).cuda())
                    ).data.tolist()

                    accumulated_error += mean_error
                    accumulated_mse += mse
                    error = torch.abs(output -
                                      dataset.extract_targets(data).cuda())

                    # Log a random position
                    position = random.randint(0, len(output.data.tolist()) - 1)

                    coil_logger.add_message(
                        'Iterating', {
                            'Checkpoint':
                            latest,
                            'Iteration': (str(iteration_on_checkpoint * 120) +
                                          '/' + str(len(dataset))),
                            'MeanError':
                            mean_error,
                            'MSE':
                            mse,
                            'Output':
                            output[position].data.tolist(),
                            'GroundTruth':
                            dataset.extract_targets(
                                data)[position].data.tolist(),
                            'Error':
                            error[position].data.tolist(),
                            'Inputs':
                            dataset.extract_inputs(data)
                            [position].data.tolist()
                        }, latest)
                    iteration_on_checkpoint += 1
                    print("Iteration %d  on Checkpoint %d : Error %f" %
                          (iteration_on_checkpoint, checkpoint_iteration,
                           mean_error))
                """
                    ########
                    Finish a round of validation, write results, wait for the next
                    ########
                """

                checkpoint_average_mse = accumulated_mse / (len(data_loader))
                checkpoint_average_error = accumulated_error / (
                    len(data_loader))
                coil_logger.add_scalar('Loss', checkpoint_average_mse, latest,
                                       True)
                coil_logger.add_scalar('Error', checkpoint_average_error,
                                       latest, True)

                if checkpoint_average_mse < best_mse:
                    best_mse = checkpoint_average_mse
                    best_mse_iter = latest

                if checkpoint_average_error < best_error:
                    best_error = checkpoint_average_error
                    best_error_iter = latest

                coil_logger.add_message(
                    'Iterating', {
                        'Summary': {
                            'Error': checkpoint_average_error,
                            'Loss': checkpoint_average_mse,
                            'BestError': best_error,
                            'BestMSE': best_mse,
                            'BestMSECheckpoint': best_mse_iter,
                            'BestErrorCheckpoint': best_error_iter
                        },
                        'Checkpoint': latest
                    }, latest)

                l1_window.append(checkpoint_average_error)
                coil_logger.write_on_error_csv(dataset_name,
                                               checkpoint_average_error)

                # If we are using the finish when validation stops, we check the current
                if g_conf.FINISH_ON_VALIDATION_STALE is not None:
                    if dlib.count_steps_without_decrease(l1_window) > 3 and \
                            dlib.count_steps_without_decrease_robust(l1_window) > 3:
                        coil_logger.write_stop(dataset_name, latest)
                        break

            else:

                latest = get_latest_evaluated_checkpoint()
                time.sleep(1)

                coil_logger.add_message('Loading',
                                        {'Message': 'Waiting Checkpoint'})
                print("Waiting for the next Validation")

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        coil_logger.add_message('Error', {'Message': 'Killed By User'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)

    except RuntimeError as e:
        if latest is not None:
            coil_logger.erase_csv(latest)
        coil_logger.add_message('Error', {'Message': str(e)})

    except:
        traceback.print_exc()
        coil_logger.add_message('Error', {'Message': 'Something Happened'})
        # We erase the output that was unfinished due to some process stop.
        if latest is not None:
            coil_logger.erase_csv(latest)
Ejemplo n.º 3
0
def execute(gpu, exp_batch, exp_alias):
    # We set the visible cuda devices
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join(exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation')

    sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1)

    if monitorer.get_status(exp_batch, exp_alias,
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"],
                                g_conf.DATASET_NAME)

    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()

    # TODO: The checkpoint will continue, so the logs should restart ??? OR continue were it was

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    print(dataset.meta_data)

    while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

        if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

            latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(latest) + '.pth'))
            checkpoint_iteration = checkpoint['iteration']
            print("Validation loaded ", checkpoint_iteration)

            for data in data_loader:

                input_data, labels = data
                control_position = np.where(
                    dataset.meta_data[:, 0] == 'control')[0][0]
                speed_position = np.where(
                    dataset.meta_data[:, 0] == 'speed_module')[0][0]
                print(torch.squeeze(input_data['rgb']).shape)

                print(control_position)
                print(speed_position)
                # Obs : Maybe we could also check for other branches ??
                output = model.forward_branch(
                    torch.squeeze(input_data['rgb']).cuda(),
                    labels[:, speed_position, :].cuda(),
                    labels[:, control_position, :].cuda())
                # TODO: clean this squeeze and dimension things

                for i in range(input_data['rgb'].shape[0]):

                    coil_logger.write_on_csv(
                        checkpoint_iteration,
                        [output[i][0], output[i][1], output[i][2]])

                #loss = criterion(output, labels)

                #loss.backward()

                #optimizer.step()

                #shutil.copyfile(filename, 'model_best.pth.tar')
        else:
            time.sleep(1)
            print("Waiting for the next Validation")
Ejemplo n.º 4
0
def execute(gpu, exp_batch, exp_alias, city_name='Town01', memory_use=0.2, host='127.0.0.1'):
    # host,port,gpu_number,path,show_screen,resolution,noise_type,config_path,type_of_driver,experiment_name,city_name,game,drivers_name
    #drive_config.city_name = city_name
    # TODO Eliminate drive config.

    print("Running ", __file__, " On GPU ", gpu, "of experiment name ", exp_alias)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu


    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')


    sys.stdout = open(os.path.join('_output_logs',
                      g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1)


    #vglrun - d:7.$GPU $CARLA_PATH / CarlaUE4 / Binaries / Linux / CarlaUE4 / Game / Maps /$TOWN - windowed - benchmark - fps = 10 - world - port =$PORT;
    #sleep    100000

    carla_process, port = start_carla_simulator(gpu, exp_batch, exp_alias, city_name)


    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias+'.yaml'))
    set_type_of_process('drive', city_name)



    log_level = logging.WARNING

    logging.StreamHandler(stream=None)
    logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)

    # TODO we have some external class that control this weather thing.

    """
    if city_name == 'Town01':
        experiment_suite = ECCVTrainingSuite()
    else:
        experiment_suite = ECCVGeneralizationSuite()
    """
    experiment_suite = TestSuite()

    coil_logger.add_message('Loading', {'Poses': experiment_suite._poses()})



    while True:
        try:
            coil_logger.add_message('Loading', {'CARLAClient': host+':'+str(port)})
            with make_carla_client(host, port) as client:


                # Now actually run the driving_benchmark

                latest = 0
                # While the checkpoint is not there
                while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):


                    # Get the correct checkpoint
                    if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                        latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
                        checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias
                                                             , 'checkpoints', str(latest) + '.pth'))

                        coil_agent = CoILAgent(checkpoint)
                        coil_logger.add_message({'Iterating': {"Checkpoint": latest}})
                        # TODO: Change alias to actual experiment name.
                        run_driving_benchmark(coil_agent, experiment_suite, city_name,
                                              exp_batch + '_' + exp_alias + '_' + str(latest)
                                              , False, host, port)

                        # Read the resulting dictionary
                        #with open(os.path.join('_benchmark_results',
                        #                       exp_batch+'_'+exp_alias + 'iteration', 'metrics.json')
                        #          , 'r') as f:
                        #    summary_dict = json.loads(f.read())

                        # TODO: When you add the message you need to check if the experiment continues properly



                        # TODO: WRITE AN EFICIENT PARAMETRIZED OUTPUT SUMMARY FOR TEST.

                        #test_agent.finish_model()

                        #test_agent.write(results)

                    else:
                        time.sleep(0.1)

                break


        except TCPConnectionError as error:
            logging.error(error)
            time.sleep(1)
            carla_process.kill()
            break
        except KeyboardInterrupt:
            carla_process.kill()
            coil_logger.add_message('Error', {'Message': 'Killed By User'})
            break
        except:
            traceback.print_exc()
            carla_process.kill()
            coil_logger.add_message('Error', {'Message': 'Something Happened'})
            break

    carla_process.kill()
Ejemplo n.º 5
0
def execute(gpu,
            exp_batch,
            exp_alias,
            drive_conditions,
            memory_use=0.2,
            host='127.0.0.1',
            suppress_output=True,
            no_screen=False):

    try:

        print("Running ", __file__, " On GPU ", gpu, "of experiment name ",
              exp_alias)
        os.environ["CUDA_VISIBLE_DEVICES"] = gpu

        if not os.path.exists('_output_logs'):
            os.mkdir('_output_logs')

        merge_with_yaml(os.path.join('configs', exp_batch,
                                     exp_alias + '.yaml'))

        print("drive cond", drive_conditions)
        exp_set_name, town_name = drive_conditions.split('_')

        if g_conf.USE_ORACLE:
            control_filename = 'control_output_auto.csv'
        else:
            control_filename = 'control_output.csv'

        if exp_set_name == 'ECCVTrainingSuite':
            experiment_set = ECCVTrainingSuite()
            set_type_of_process('drive', drive_conditions)
        elif exp_set_name == 'ECCVGeneralizationSuite':
            experiment_set = ECCVGeneralizationSuite()
            set_type_of_process('drive', drive_conditions)
        elif exp_set_name == 'TestT1':
            experiment_set = TestT1()
            set_type_of_process('drive', drive_conditions)
        elif exp_set_name == 'TestT2':
            experiment_set = TestT2()
            set_type_of_process('drive', drive_conditions)
        else:

            raise ValueError(" Exp Set name is not correspondent to a city")

        if suppress_output:
            sys.stdout = open(os.path.join(
                '_output_logs',
                g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                              "a",
                              buffering=1)
            #sys.stderr = open(os.path.join('_output_logs',
            #                  'err_'+g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
            #                  "a", buffering=1)

        carla_process, port = start_carla_simulator(gpu, town_name, no_screen)

        coil_logger.add_message(
            'Loading', {'Poses': experiment_set.build_experiments()[0].poses})

        coil_logger.add_message('Loading',
                                {'CARLAClient': host + ':' + str(port)})

        # Now actually run the driving_benchmark

        latest = get_latest_evaluated_checkpoint()
        if latest is None:  # When nothing was tested, get latest returns none, we fix that.
            latest = 0

            csv_outfile = open(
                os.path.join('_logs', exp_batch, exp_alias,
                             g_conf.PROCESS_NAME + '_csv', control_filename),
                'w')

            csv_outfile.write(
                "%s,%s,%s,%s,%s,%s,%s,%s\n" %
                ('step', 'episodes_completion', 'intersection_offroad',
                 'intersection_otherlane', 'collision_pedestrians',
                 'collision_vehicles', 'episodes_fully_completed',
                 'driven_kilometers'))
            csv_outfile.close()

        # Write the header of the summary file used conclusion
        # While the checkpoint is not there

        while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

            try:
                # Get the correct checkpoint
                if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                    latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
                    checkpoint = torch.load(
                        os.path.join('_logs', exp_batch, exp_alias,
                                     'checkpoints',
                                     str(latest) + '.pth'))

                    coil_agent = CoILAgent(checkpoint, town_name)

                    coil_logger.add_message('Iterating',
                                            {"Checkpoint": latest}, latest)

                    run_driving_benchmark(
                        coil_agent, experiment_set, town_name,
                        exp_batch + '_' + exp_alias + '_' + str(latest) +
                        '_drive_' + control_filename[:-4], True, host, port)

                    path = exp_batch + '_' + exp_alias + '_' + str(latest) \
                           + '_' + g_conf.PROCESS_NAME.split('_')[0] + '_' + control_filename[:-4] \
                           + '_' + g_conf.PROCESS_NAME.split('_')[1] + '_' + g_conf.PROCESS_NAME.split('_')[2]

                    print(path)
                    print("Finished")
                    benchmark_json_path = os.path.join(get_latest_path(path),
                                                       'metrics.json')
                    with open(benchmark_json_path, 'r') as f:
                        benchmark_dict = json.loads(f.read())

                    averaged_dict = compute_average_std(
                        [benchmark_dict], experiment_set.weathers,
                        len(experiment_set.build_experiments()))
                    print(averaged_dict)
                    csv_outfile = open(
                        os.path.join('_logs', exp_batch, exp_alias,
                                     g_conf.PROCESS_NAME + '_csv',
                                     control_filename), 'a')

                    csv_outfile.write(
                        "%d,%f,%f,%f,%f,%f,%f,%f\n" %
                        (latest, averaged_dict['episodes_completion'],
                         averaged_dict['intersection_offroad'],
                         averaged_dict['intersection_otherlane'],
                         averaged_dict['collision_pedestrians'],
                         averaged_dict['collision_vehicles'],
                         averaged_dict['episodes_fully_completed'],
                         averaged_dict['driven_kilometers']))

                    csv_outfile.close()

                    # TODO: When you add the message you need to check if the experiment continues properly

                    # TODO: WRITE AN EFICIENT PARAMETRIZED OUTPUT SUMMARY FOR TEST.

                else:
                    time.sleep(0.1)

            except TCPConnectionError as error:
                logging.error(error)
                time.sleep(1)
                carla_process.kill()
                coil_logger.add_message('Error',
                                        {'Message': 'TCP serious Error'})
                exit(1)
            except KeyboardInterrupt:
                carla_process.kill()
                coil_logger.add_message('Error', {'Message': 'Killed By User'})
                exit(1)
            except:
                traceback.print_exc()
                carla_process.kill()
                coil_logger.add_message('Error',
                                        {'Message': 'Something Happened'})
                exit(1)

        coil_logger.add_message('Finished', {})

    except KeyboardInterrupt:
        traceback.print_exc()
        carla_process.kill()
        coil_logger.add_message('Error', {'Message': 'Killed By User'})

    except:
        traceback.print_exc()
        carla_process.kill()
        coil_logger.add_message('Error', {'Message': 'Something happened'})

    carla_process.kill()
Ejemplo n.º 6
0
def execute(gpu,
            exp_batch,
            exp_alias,
            city_name='Town01',
            memory_use=0.2,
            host='127.0.0.1'):
    # host,port,gpu_number,path,show_screen,resolution,noise_type,config_path,type_of_driver,experiment_name,city_name,game,drivers_name
    #drive_config.city_name = city_name
    # TODO Eliminate drive config.

    print("Running ", __file__, " On GPU ", gpu, "of experiment name ",
          exp_alias)
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1)

    #vglrun - d:7.$GPU $CARLA_PATH / CarlaUE4 / Binaries / Linux / CarlaUE4 / Game / Maps /$TOWN - windowed - benchmark - fps = 10 - world - port =$PORT;
    #sleep    100000

    carla_process, port = start_carla_simulator(gpu, exp_batch, exp_alias,
                                                city_name)

    merge_with_yaml(os.path.join(exp_batch, exp_alias + '.yaml'))
    set_type_of_process('test')

    #test_agent = CarlaDrive(experiment_name)

    # TODO we have some external class that control this weather thing.
    """
    if city_name == 'Town01':
        experiment_suite = ECCVTrainingSuite()
    else:
        experiment_suite = ECCVGeneralizationSuite()
    """
    experiment_suite = TestSuite()

    while True:
        try:

            with make_carla_client(host, port) as client:

                # Now actually run the driving_benchmark

                latest = 0
                # While the checkpoint is not there
                while not maximun_checkpoint_reach(latest,
                                                   g_conf.TEST_SCHEDULE):

                    # Get the correct checkpoint
                    if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

                        latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)
                        checkpoint = torch.load(
                            os.path.join('_logs', exp_batch, exp_alias,
                                         'checkpoints',
                                         str(latest) + '.pth'))

                        coil_agent = CoILAgent(checkpoint)
                        run_driving_benchmark(
                            coil_agent, experiment_suite, city_name,
                            exp_batch + '_' + exp_alias + 'iteration', False,
                            host, port)

                        # Read the resulting dictionary
                        with open(
                                os.path.join(
                                    '_benchmark_results',
                                    exp_batch + '_' + exp_alias + 'iteration',
                                    'metrics.json'), 'r') as f:
                            summary_dict = json.loads(f.read())

                        # TODO: When you add the message you need to check if the experiment continues properly
                        coil_logger.add_message(
                            {'Running': {
                                "DBSummary": summary_dict
                            }})

                        #test_agent.finish_model()

                        #test_agent.write(results)

                    else:
                        time.sleep(0.1)
                # TODO: is this really needed ??? I believe not.
                #monitorer.export_results(os.path.join('_benchmark_results',
                #                                      exp_batch + '_' +exp_alias +'iteration'))
                break

        except TCPConnectionError as error:
            logging.error(error)
            time.sleep(1)
            carla_process.kill()

        except KeyboardInterrupt:
            carla_process.kill()
        except:
            traceback.print_exc()
            carla_process.kill()

    carla_process.kill()
Ejemplo n.º 7
0
def execute(gpu, exp_batch, exp_alias, dataset_name):
    # We set the visible cuda devices

    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # At this point the log file with the correct naming is created.
    merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml'))
    set_type_of_process('validation', dataset_name)

    if not os.path.exists('_output_logs'):
        os.mkdir('_output_logs')

    sys.stdout = open(os.path.join(
        '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"),
                      "a",
                      buffering=1)

    if monitorer.get_status(exp_batch, exp_alias + '.yaml',
                            g_conf.PROCESS_NAME)[0] == "Finished":
        # TODO: print some cool summary or not ?
        return

    #Define the dataset. This structure is has the __get_item__ redefined in a way
    #that you can access the HDFILES positions from the root directory as a in a vector.
    full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name)

    dataset = CoILDataset(full_dataset,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]))

    # Creates the sampler, this part is responsible for managing the keys. It divides
    # all keys depending on the measurements and produces a set of keys for each bach.

    # The data loader is the multi threaded module from pytorch that release a number of
    # workers to get all the data.
    # TODO: batch size an number of workers go to some configuration file
    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=120,
                                              shuffle=False,
                                              num_workers=12,
                                              pin_memory=True)

    # TODO: here there is clearly a posibility to make a cool "conditioning" system.
    model = CoILModel(g_conf.MODEL_NAME)
    model.cuda()

    criterion = Loss()

    latest = get_latest_evaluated_checkpoint()
    if latest is None:  # When nothing was tested, get latest returns none, we fix that.
        latest = 0

    print(dataset.meta_data)
    best_loss = 1000
    best_error = 1000
    best_loss_iter = 0
    best_error_iter = 0

    while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE):

        if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE):

            latest = get_next_checkpoint(g_conf.TEST_SCHEDULE)

            checkpoint = torch.load(
                os.path.join('_logs', exp_batch, exp_alias, 'checkpoints',
                             str(latest) + '.pth'))
            checkpoint_iteration = checkpoint['iteration']
            print("Validation loaded ", checkpoint_iteration)

            accumulated_loss = 0
            accumulated_error = 0
            iteration_on_checkpoint = 0
            for data in data_loader:

                input_data, float_data = data
                control_position = np.where(
                    dataset.meta_data[:, 0] == 'control')[0][0]
                speed_position = np.where(
                    dataset.meta_data[:, 0] == 'speed_module')[0][0]
                print(torch.squeeze(input_data['rgb']).shape)

                print(control_position)
                print(speed_position)
                # Obs : Maybe we could also check for other branches ??

                output = model.forward_branch(
                    torch.squeeze(input_data['rgb']).cuda(),
                    float_data[:, speed_position, :].cuda(),
                    float_data[:, control_position, :].cuda())

                for i in range(input_data['rgb'].shape[0]):

                    coil_logger.write_on_csv(
                        checkpoint_iteration,
                        [output[i][0], output[i][1], output[i][2]])

                # TODO: Change this a functional standard using the loss functions.

                loss = torch.mean(
                    (output - dataset.extract_targets(float_data).cuda()
                     )**2).data.tolist()
                mean_error = torch.mean(
                    torch.abs(output -
                              dataset.extract_targets(float_data).cuda())
                ).data.tolist()
                accumulated_error += mean_error
                accumulated_loss += loss
                error = torch.abs(output -
                                  dataset.extract_targets(float_data).cuda())

                # Log a random position
                position = random.randint(0, len(float_data) - 1)
                #print (output[position].data.tolist())
                coil_logger.add_message(
                    'Iterating', {
                        'Checkpoint':
                        latest,
                        'Iteration': (str(iteration_on_checkpoint * 120) +
                                      '/' + str(len(dataset))),
                        'MeanError':
                        mean_error,
                        'Loss':
                        loss,
                        'Output':
                        output[position].data.tolist(),
                        'GroundTruth':
                        dataset.extract_targets(
                            float_data)[position].data.tolist(),
                        'Error':
                        error[position].data.tolist(),
                        'Inputs':
                        dataset.extract_inputs(float_data)
                        [position].data.tolist()
                    }, latest)
                iteration_on_checkpoint += 1

            checkpoint_average_loss = accumulated_loss / len(dataset)
            checkpoint_average_error = accumulated_error / len(dataset)
            coil_logger.add_scalar('Loss', checkpoint_average_loss, latest)
            coil_logger.add_scalar('Error', checkpoint_average_error, latest)

            if checkpoint_average_loss < best_loss:
                best_loss = checkpoint_average_loss
                best_loss_iter = latest

            if checkpoint_average_error < best_loss:
                best_error = checkpoint_average_error
                best_error_iter = latest

            coil_logger.add_message(
                'Iterating', {
                    'Summary': {
                        'Error': checkpoint_average_error,
                        'Loss': checkpoint_average_loss,
                        'BestError': best_error,
                        'BestLoss': best_loss,
                        'BestLossCheckpoint': best_loss_iter,
                        'BestErrorCheckpoint': best_error_iter
                    },
                    'Checkpoint': latest
                })

        else:
            time.sleep(1)
            print("Waiting for the next Validation")