Esempio n. 1
0
def archive_analysis(input_path, output_path, lower_bound, upper_bound):
    """
    Records all instances from the archives found in input_path where any decision space value is lower than the
    lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
    are skipped. The output is saved in one file per problem and consists of lines with the following format:
    [evaluation_number] [objective space values] [decision space values]
    """

    # Check whether input path exits
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    lb = float(lower_bound)
    ub = float(upper_bound)

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, dimension) = parse_archive_file_name(input_file)
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))

        if dimension > 5:
            continue

        print(input_file)
        column_start = 3
        column_end = 3 + dimension

        f_out = None
        f_name = ""

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if line[0] == '%' and 'instance' in line:
                    if f_out and not f_out.closed:
                        f_out.close()
                        remove_empty_file(f_name)
                    instance = int(get_key_value(line[1:], 'instance').strip(' \t\n\r'))
                    f_name = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                        function,
                                                                                                        instance,
                                                                                                        dimension))
                    f_out = open(f_name, 'a')
                elif len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > ub) or (float(number) < lb):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write("{}\n".format(string))
            f_in.close()
        if f_out and not f_out.closed:
            f_out.close()
            remove_empty_file(f_name)
Esempio n. 2
0
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue
                elif line[0] == '%':
                    if 'instance' in line:
                        instance = int(get_key_value(line[1:], 'instance'))
                        if f_out and not f_out.closed:
                            f_out.close()
                        output_file = os.path.join(output_path,
                                                   '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(suite_name,
                                                                                                         function,
                                                                                                         instance,
                                                                                                         dimension))
                        f_out = open(output_file, 'w')
                    buffered_lines += line

                elif (line[0] != '%') and (instance in instances):
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            f_out.close()
Esempio n. 3
0
def extract_extremes(input_paths, output_file, functions, instances, dimensions):
    """
    Extracts the extreme points from the archives contained in input_paths and outputs them to the output_file in
    the following format:
    [problem_name] [extreme_point_1] [extreme_point_2]

    Assumes the two extreme points are contained in the first two lines of every instance archive. If not, that
    instance is skipped.
    Performs no kind of sorting or filtering of the problems, therefore if multiple copies of one problem are present
    in the input, multiple lines for one problem will be also present in the output.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:
            try:
                (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
                if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)

            with open(input_file, 'r') as f_in:
                extreme1 = None
                count = 0
                for line in f_in:
                    if line[0] == '%' and 'instance' in line:
                        instance = int(get_key_value(line[1:], 'instance').strip(' \t\n\r'))
                        count = 0
                    elif count > 1 or (len(line) == 0) or line[0] == '%':
                        continue
                    elif count == 0:
                        extreme1 = line.split()[1:3]
                        count = 1
                    elif count == 1:
                        extreme2 = line.split()[1:3]
                        count = 2
                        try:
                            string = '{}_f{:02d}_i{:02d}_d{:02d}\t'.format(suite_name, function, instance, dimension)
                            string = string + '\t'.join(extreme1) + '\t' + '\t'.join(extreme2) + '\n'
                            f_out.write(string)
                        except ValueError:
                            print('Skipping instance {} in file {}'.format(instance, input_file))

                f_in.close()
                f_out.flush()
        f_out.close()
Esempio n. 4
0
def archive_analysis(input_paths, output_path, lower_bound, upper_bound, functions, instances, dimensions):
    """Records all instances from the archives found in input_paths where any decision space value is lower than the
       lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
       are skipped. The output consists of lines with the following format:
       [evaluation_number] [objective space values] [decision space values]
       Assumes one file contains one archive.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions) or (dimension > 5):
                continue
            if not instance:
                raise PreprocessingWarning('Analysis does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        column_start = 3
        column_end = 3 + dimension
        output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                 function,
                                                                                                 instance,
                                                                                                 dimension))
        f_out = open(output_file, 'a')

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > upper_bound) or (float(number) < lower_bound):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write('{}\n'.format(string))

        f_out.close()
        remove_empty_file(output_file)
def archive_thinning(input_path, output_path, thinning_precision, currently_nondominated, functions, instances,
                     dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Thinning does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [(f_original[i] - ideal[i]) / normalization[i] for i in range(2)]
                        f_normalized = [round(f_normalized[i] / thinning_precision) for i in range(2)]
                        f_normalized = [ideal[i] + f_normalized[i] * thinning_precision for i in range(2)]
                        updated = thinned_archive.add_solution(f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(all_solutions, thinned_solutions,
                                                          100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)
def log_reconstruct(input_path, output_path, algorithm_name, algorithm_info,
                    functions, instances, dimensions):
    """Reconstructs the .info, .dat and .tdat files produced by the logger from the .adat files in the input_path.

       Takes into account only the given functions, instances and dimensions. If any .info, .dat and .tdat files of
       the same names already exist in the output_path, the new data is appended to them.
    """
    suite_name = 'bbob-biobj'

    print('Reading archive information...')
    archive_info = ArchiveInfo(input_path, functions, instances, dimensions,
                               False)

    function_string = archive_info.get_function_string()
    instance_string = archive_info.get_instance_string()
    dimension_string = archive_info.get_dimension_string()
    file_name_set = archive_info.get_file_name_set()

    print('Initializing the suite and observer...')
    suite_instance = 'instances: {}'.format(instance_string)
    suite_options = 'dimensions: {} function_indices: {}'.format(
        dimension_string, function_string)
    suite = Suite(suite_name, suite_instance, suite_options)
    observer_options = 'result_folder: {} algorithm_name: {} algorithm_info: "{}" log_nondominated: read'. \
        format(output_path, algorithm_name, algorithm_info)
    observer = Observer(suite_name, observer_options)

    print('Reconstructing...')
    for input_file in file_name_set:

        (_suite_name, function, _instance,
         dimension) = parse_archive_file_name(input_file)

        with open(input_file, 'r') as f_in:
            print(input_file)

            problem = None
            objective_vector = None
            evaluation_found = False
            instance = None
            count_not_updated = 0
            evaluation = 0

            for line in f_in:

                if len(line.split()) < 3:
                    continue

                elif line[0] == '%' and 'instance' in line:
                    instance = int(get_key_value(line[1:], 'instance'))
                    if instance in instances:
                        if problem is not None:
                            if not evaluation_found:
                                raise PreprocessingWarning(
                                    'Missing the line `% evaluations = ` in the previous '
                                    'problem. This problem is file = {}, instance = {}'
                                    .format(input_file, instance))
                            if count_not_updated > 0:
                                print(
                                    '{} solutions did not update the archive'.
                                    format(count_not_updated))
                            problem.free()
                        problem = suite.get_problem_by_function_dimension_instance(
                            function, dimension, instance, observer)
                        evaluation_found = False

                elif line[0] != '%' and instance in instances:
                    try:
                        split = line.split()
                        evaluation = int(split[0])
                        objective_vector = np.array(split[1:3])
                        updated = problem.logger_biobj_feed_solution(
                            evaluation, objective_vector)
                        if updated == 0:
                            count_not_updated += 1
                    except ValueError as error:
                        print('Problem in file {}, line {}, skipping line\n{}'.
                              format(input_file, line, error))
                        continue

                elif line[0] == '%' and 'evaluations' in line:
                    old_evaluation = evaluation
                    evaluation = int(get_key_value(line[1:], 'evaluations'))
                    evaluation_found = True
                    if (
                            evaluation > old_evaluation
                    ) and problem is not None and objective_vector is not None:
                        problem.logger_biobj_feed_solution(
                            evaluation, objective_vector)

            if problem is not None:
                if not evaluation_found:
                    print(
                        'Missing the line `% evaluations = ` in this or the previous problem. This is file = {}, '
                        'instance = {}'.format(input_file, instance))
                if count_not_updated > 0:
                    print('{} solutions did not update the archive'.format(
                        count_not_updated))
                problem.free()

            f_in.close()
def log_reconstruct(input_path, output_path, algorithm_name, algorithm_info, functions, instances, dimensions):
    """Reconstructs the .info, .dat and .tdat files produced by the logger from the .adat files in the input_path.

       Takes into account only the given functions, instances and dimensions. If any .info, .dat and .tdat files of
       the same names already exist in the output_path, the new data is appended to them.
    """
    ext_suite_name = 'bbob-biobj-ext'
    suite_name = 'bbob-biobj'

    print('Reading archive information...')
    archive_info = ArchiveInfo(input_path, functions, instances, dimensions, False)

    function_string = archive_info.get_function_string()
    instance_string = archive_info.get_instance_string()
    dimension_string = archive_info.get_dimension_string()
    file_name_set = archive_info.get_file_name_set()

    print('Initializing the suite and observer...')
    suite_instance = 'instances: {}'.format(instance_string)
    suite_options = 'dimensions: {} function_indices: {}'.format(dimension_string, function_string)
    suite = Suite(ext_suite_name, suite_instance, suite_options)
    observer_options = 'result_folder: {} algorithm_name: {} algorithm_info: "{}" log_nondominated: read'. \
        format(output_path, algorithm_name, algorithm_info)
    observer = Observer(suite_name, observer_options)

    print('Reconstructing...')
    for input_file in file_name_set:

        (_suite_name, function, _instance, dimension) = parse_archive_file_name(input_file)

        with open(input_file, 'r') as f_in:
            print(input_file)

            problem = None
            objective_vector = None
            evaluation_found = False
            instance = None
            count_not_updated = 0
            evaluation = 0

            for line in f_in:

                if len(line.split()) < 3:
                    continue

                elif line[0] == '%' and 'instance' in line:
                    instance = int(get_key_value(line[1:], 'instance'))
                    if instance in instances:
                        if problem is not None:
                            if not evaluation_found:
                                raise PreprocessingWarning('Missing the line `% evaluations = ` in the previous '
                                                           'problem. This problem is file = {}, instance = {}'
                                                           .format(input_file, instance))
                            if count_not_updated > 0:
                                print('{} solutions did not update the archive'.format(count_not_updated))
                            problem.free()
                        problem = suite.get_problem_by_function_dimension_instance(function, dimension, instance,
                                                                                   observer)
                        evaluation_found = False

                elif line[0] != '%' and instance in instances:
                    try:
                        split = line.split()
                        evaluation = int(split[0])
                        objective_vector = np.array(split[1:3])
                        updated = problem.logger_biobj_feed_solution(evaluation, objective_vector)
                        if updated == 0:
                            count_not_updated += 1
                    except ValueError as error:
                        print('Problem in file {}, line {}, skipping line\n{}'.format(input_file, line, error))
                        continue

                elif line[0] == '%' and 'evaluations' in line:
                    old_evaluation = evaluation
                    evaluation = int(get_key_value(line[1:], 'evaluations'))
                    evaluation_found = True
                    if (evaluation > old_evaluation) and problem is not None and objective_vector is not None:
                        problem.logger_biobj_feed_solution(evaluation, objective_vector)

            if problem is not None:
                if not evaluation_found:
                    print('Missing the line `% evaluations = ` in this or the previous problem. This is file = {}, '
                          'instance = {}' .format(input_file, instance))
                if count_not_updated > 0:
                    print('{} solutions did not update the archive'.format(count_not_updated))
                problem.free()

            f_in.close()
def archive_difference(first_path, second_path, differences, functions, instances, dimensions):
    """Outputs the differences between the matching archive files found in the first and second path.
    """
    # Check whether first paths exist
    first_files = get_file_name_list(first_path, ".adat")
    if len(first_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(first_path))

    for i, first_file in enumerate(first_files):
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(first_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Checking for differences does not work on files with multiple archives, '
                                           'use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(first_file, warning))
            first_files[i] = ''
            continue
        print(first_file)

    # Check whether second paths exist
    second_files = get_file_name_list(second_path, ".adat")
    if len(second_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(second_path))

    for i, second_file in enumerate(second_files):
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(second_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Checking for differences does not work on files with multiple archives, '
                                           'use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(second_file, warning))
            second_files[i] = ''
            continue
        print(second_file)

    with open(differences, 'a') as f_out:
        for first_file in first_files:
            if first_file != '':
                file_name = os.path.basename(first_file)
                if file_name in [os.path.basename(second_file) for second_file in second_files]:
                    second_file = os.path.join(second_path, file_name)
                    with open(first_file, 'r') as f1:
                        with open(second_file, 'r') as f2:
                            # Find and output the differences
                            diff = difflib.unified_diff(f1.readlines(), f2.readlines(), fromfile='f1', tofile='f2')
                            f_out.write('{}\n'.format(file_name))
                            print(file_name)
                            for line in diff:
                                f_out.write(line)
                        f2.close()
                    f1.close()
        f_out.close()
Esempio n. 9
0
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension
                                                           not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None
        instance = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue

                elif line[0] == '%':
                    if 'instance' in line:
                        if f_out and not f_out.closed:
                            if len(buffered_lines) > 0:
                                f_out.write(buffered_lines)
                                buffered_lines = ''
                            f_out.close()
                        instance = int(get_key_value(line[1:], 'instance'))
                        if instance in instances:
                            output_file = os.path.join(
                                output_path,
                                '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.
                                format(suite_name, function, instance,
                                       dimension))
                            f_out = open(output_file, 'w')
                        else:
                            instance = None

                    if instance:
                        buffered_lines += line

                elif (line[0] != '%') and instance:
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            if len(buffered_lines) > 0:
                f_out.write(buffered_lines)
            f_out.close()
Esempio n. 10
0
def extract_extremes(input_paths, output_file, functions, instances,
                     dimensions):
    """
    Extracts the extreme points from the archives contained in input_paths and outputs them to the output_file in
    the following format:
    [problem_name] [extreme_point_1] [extreme_point_2]

    Assumes the two extreme points are contained in the first two lines of every instance archive. If not, that
    instance is skipped.
    Performs no kind of sorting or filtering of the problems, therefore if multiple copies of one problem are present
    in the input, multiple lines for one problem will be also present in the output.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:
            try:
                (suite_name, function, instance,
                 dimension) = parse_archive_file_name(input_file)
                if (function not in functions) or (
                        instance not in instances) or (dimension
                                                       not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)

            with open(input_file, 'r') as f_in:
                extreme1 = None
                count = 0
                for line in f_in:
                    if line[0] == '%' and 'instance' in line:
                        instance = int(
                            get_key_value(line[1:],
                                          'instance').strip(' \t\n\r'))
                        count = 0
                    elif count > 1 or (len(line) == 0) or line[0] == '%':
                        continue
                    elif count == 0:
                        extreme1 = line.split()[1:3]
                        count = 1
                    elif count == 1:
                        extreme2 = line.split()[1:3]
                        count = 2
                        try:
                            string = '{}_f{:02d}_i{:02d}_d{:02d}\t'.format(
                                suite_name, function, instance, dimension)
                            string = string + '\t'.join(
                                extreme1) + '\t' + '\t'.join(extreme2) + '\n'
                            f_out.write(string)
                        except ValueError:
                            print('Skipping instance {} in file {}'.format(
                                instance, input_file))

                f_in.close()
                f_out.flush()
        f_out.close()
Esempio n. 11
0
def archive_difference(first_path, second_path, differences, functions,
                       instances, dimensions):
    """Outputs the differences between the matching archive files found in the first and second path.
    """
    # Check whether first paths exist
    first_files = get_file_name_list(first_path, ".adat")
    if len(first_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(first_path))

    for i, first_file in enumerate(first_files):
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(first_file)
            if not instance:
                raise PreprocessingWarning(
                    'Checking for differences does not work on files with multiple archives, '
                    'use archive_split')
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(first_file, warning))
            first_files[i] = ''
            continue
        print(first_file)

    # Check whether second paths exist
    second_files = get_file_name_list(second_path, ".adat")
    if len(second_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(second_path))

    for i, second_file in enumerate(second_files):
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(second_file)
            if not instance:
                raise PreprocessingWarning(
                    'Checking for differences does not work on files with multiple archives, '
                    'use archive_split')
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(second_file, warning))
            second_files[i] = ''
            continue
        print(second_file)

    with open(differences, 'a') as f_out:
        for first_file in first_files:
            if first_file != '':
                file_name = os.path.basename(first_file)
                if file_name in [
                        os.path.basename(second_file)
                        for second_file in second_files
                ]:
                    second_file = os.path.join(second_path, file_name)
                    with open(first_file, 'r') as f1:
                        with open(second_file, 'r') as f2:
                            # Find and output the differences
                            diff = difflib.unified_diff(f1.readlines(),
                                                        f2.readlines(),
                                                        fromfile='f1',
                                                        tofile='f2')
                            f_out.write('{}\n'.format(file_name))
                            print(file_name)
                            for line in diff:
                                f_out.write(line)
                        f2.close()
                    f1.close()
        f_out.close()
def check_file_complete(input_paths, functions, instances, dimensions, max_diff=1000):
    """Checks the .adat files created by the bbob-biobj logger to see if they have been properly written. Outputs the
       difference between the last evaluation from the .adat file and the one noted in the .info file if they are
       greater than max_diff.

       Takes into account only the given functions, instances and dimensions.
    """

    def inspect_line(input_file, line_string, evaluations, max_diff=1e5):
        """Check that the line_string contains at least three numbers and that they are correctly written. Outputs a
           message if the difference between the evaluations and the first number in the line_string is grater than
           max_diff.
        """
        num_items = len(line_string.split())
        if num_items < 3:
            print("File {}, line {} too short".format(input_file, line_string))
        for i in range(num_items):
            try:
                float(line_string.split()[i])
            except ValueError:
                print('File {}, line {}, number {} incorrect'.format(input_file, line_string, line_string.split()[i]))
                continue

        if evaluations - int(line_string.split()[0]) > max_diff:
            print('Mismatch in evaluations in file {}\n'
                  '.info  = {}\n'
                  '.adat  = {}\n'
                  ' diff  = {}\n'.format(input_file, evaluations, line_string.split()[0],
                                         evaluations - int(line_string.split()[0])))

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException('Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException('Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    print('Reading .info files...')
    for input_file in info_files:
        # Store the data from the .info files
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    print('Reading .adat files...')
    for input_file in adat_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        with open(input_file, 'r') as f:

            instance_found = False
            last_line = None

            for line in f:
                if not line.strip() or (line[0] == '%' and 'instance' not in line):
                    # Ignore empty lines and lines with comments
                    continue

                elif line[0] == '%' and 'instance' in line:
                    if last_line:
                        inspect_line(input_file, last_line, info_dict[(function, instance, dimension)])
                    instance = int(get_key_value(line[1:], 'instance'))
                    instance_found = (instance in instances)

                elif instance_found and line[0] != '%':
                    last_line = line

            if instance_found:
                inspect_line(input_file, last_line, info_dict[(function, instance, dimension)])
            f.close()
def evaluations_append(input_paths, functions, instances, dimensions, fast=False):
    """Appends the comment `% evaluations = NUMBER` to the end of every instance in the .adat files created by the
       bbob-biobj logger.

       If fast is True, it assumes the file contains only one instance (the instance is read from the file contents,
       not the file name) and appends the comment only once - at the end of the file. No check whether this should be
       done is performed - the user should know when it is safe to choose this option.

       The NUMBER is retrieved from the corresponding .info file.
       Takes into account only the given functions, instances and dimensions.
    """

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException('Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException('Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    for input_file in info_files:
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    for input_file in adat_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        try:
            if instance or fast:
                # Assumes only one instance is contained in the file
                with open(input_file, 'r') as f:
                    for line in f:
                        if (line[0] == '%') and ('instance' in line):
                            instance = int(get_key_value(line[1:], 'instance'))
                            break
                    f.close()
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(info_dict[(function, instance, dimension)]))
                    f.close()

            else:
                first_instance = True
                # Take care of the non-last instances in the file
                for line in fileinput.input(input_file, inplace=True):
                    if (line[0] == '%') and ('instance' in line):
                        instance = int(get_key_value(line[1:], 'instance'))
                        if first_instance:
                            first_instance = False
                        else:
                            sys.stdout.write('% evaluations = {}\n'.format(info_dict[(function, instance, dimension)]))
                    sys.stdout.write(line)
                fileinput.close()

                # Take care of the last instance in the file
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(info_dict[(function, instance, dimension)]))
                    f.close()

        except KeyError as error:
            print('Encountered problem in file {}\n{}'.format(input_file, error))
            fileinput.close()
            continue
Esempio n. 14
0
def archive_thinning(input_path, output_path, thinning_precision,
                     currently_nondominated, functions, instances, dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if not instance:
                raise PreprocessingWarning(
                    'Thinning does not work on files with multiple archives, use archive_split'
                )
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [
                            (f_original[i] - ideal[i]) / normalization[i]
                            for i in range(2)
                        ]
                        f_normalized = [
                            round(f_normalized[i] / thinning_precision)
                            for i in range(2)
                        ]
                        f_normalized = [
                            ideal[i] + f_normalized[i] * thinning_precision
                            for i in range(2)
                        ]
                        updated = thinned_archive.add_solution(
                            f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.
                              format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions
                                           == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(
            all_solutions, thinned_solutions,
            100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)
Esempio n. 15
0
def check_file_complete(input_paths,
                        functions,
                        instances,
                        dimensions,
                        max_diff=1000):
    """Checks the .adat files created by the bbob-biobj logger to see if they have been properly written. Outputs the
       difference between the last evaluation from the .adat file and the one noted in the .info file if they are
       greater than max_diff.

       Takes into account only the given functions, instances and dimensions.
    """
    def inspect_line(input_file, line_string, evaluations, max_diff=1e5):
        """Check that the line_string contains at least three numbers and that they are correctly written. Outputs a
           message if the difference between the evaluations and the first number in the line_string is grater than
           max_diff.
        """
        num_items = len(line_string.split())
        if num_items < 3:
            print("File {}, line {} too short".format(input_file, line_string))
        for i in range(num_items):
            try:
                float(line_string.split()[i])
            except ValueError:
                print('File {}, line {}, number {} incorrect'.format(
                    input_file, line_string,
                    line_string.split()[i]))
                continue

        if evaluations - int(line_string.split()[0]) > max_diff:
            print('Mismatch in evaluations in file {}\n'
                  '.info  = {}\n'
                  '.adat  = {}\n'
                  ' diff  = {}\n'.format(
                      input_file, evaluations,
                      line_string.split()[0],
                      evaluations - int(line_string.split()[0])))

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    print('Reading .info files...')
    for input_file in info_files:
        # Store the data from the .info files
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(
                input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    print('Reading .adat files...')
    for input_file in adat_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        with open(input_file, 'r') as f:

            instance_found = False
            last_line = None

            for line in f:
                if not line.strip() or (line[0] == '%'
                                        and 'instance' not in line):
                    # Ignore empty lines and lines with comments
                    continue

                elif line[0] == '%' and 'instance' in line:
                    if last_line:
                        inspect_line(
                            input_file, last_line,
                            info_dict[(function, instance, dimension)])
                    instance = int(get_key_value(line[1:], 'instance'))
                    instance_found = (instance in instances)

                elif instance_found and line[0] != '%':
                    last_line = line

            if instance_found:
                inspect_line(input_file, last_line,
                             info_dict[(function, instance, dimension)])
            f.close()
Esempio n. 16
0
def evaluations_append(input_paths,
                       functions,
                       instances,
                       dimensions,
                       fast=False):
    """Appends the comment `% evaluations = NUMBER` to the end of every instance in the .adat files created by the
       bbob-biobj logger.

       If fast is True, it assumes the file contains only one instance (the instance is read from the file contents,
       not the file name) and appends the comment only once - at the end of the file. No check whether this should be
       done is performed - the user should know when it is safe to choose this option.

       The NUMBER is retrieved from the corresponding .info file.
       Takes into account only the given functions, instances and dimensions.
    """

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    for input_file in info_files:
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(
                input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    for input_file in adat_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        try:
            if instance or fast:
                # Assumes only one instance is contained in the file
                with open(input_file, 'r') as f:
                    for line in f:
                        if (line[0] == '%') and ('instance' in line):
                            instance = int(get_key_value(line[1:], 'instance'))
                            break
                    f.close()
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(
                        info_dict[(function, instance, dimension)]))
                    f.close()

            else:
                first_instance = True
                # Take care of the non-last instances in the file
                for line in fileinput.input(input_file, inplace=True):
                    if (line[0] == '%') and ('instance' in line):
                        instance = int(get_key_value(line[1:], 'instance'))
                        if first_instance:
                            first_instance = False
                        else:
                            sys.stdout.write('% evaluations = {}\n'.format(
                                info_dict[(function, instance, dimension)]))
                    sys.stdout.write(line)
                fileinput.close()

                # Take care of the last instance in the file
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(
                        info_dict[(function, instance, dimension)]))
                    f.close()

        except KeyError as error:
            print('Encountered problem in file {}\n{}'.format(
                input_file, error))
            fileinput.close()
            continue