コード例 #1
0
def archive_analysis(input_path, output_path, lower_bound, upper_bound):
    """
    Records all instances from the archives found in input_path where any decision space value is lower than the
    lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
    are skipped. The output is saved in one file per problem and consists of lines with the following format:
    [evaluation_number] [objective space values] [decision space values]
    """

    # Check whether input path exits
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    lb = float(lower_bound)
    ub = float(upper_bound)

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, dimension) = parse_archive_file_name(input_file)
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))

        if dimension > 5:
            continue

        print(input_file)
        column_start = 3
        column_end = 3 + dimension

        f_out = None
        f_name = ""

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if line[0] == '%' and 'instance' in line:
                    if f_out and not f_out.closed:
                        f_out.close()
                        remove_empty_file(f_name)
                    instance = int(get_key_value(line[1:], 'instance').strip(' \t\n\r'))
                    f_name = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                        function,
                                                                                                        instance,
                                                                                                        dimension))
                    f_out = open(f_name, 'a')
                elif len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > ub) or (float(number) < lb):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write("{}\n".format(string))
            f_in.close()
        if f_out and not f_out.closed:
            f_out.close()
            remove_empty_file(f_name)
コード例 #2
0
ファイル: archive_split.py プロジェクト: ShaulSalomon/coco
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue
                elif line[0] == '%':
                    if 'instance' in line:
                        instance = int(get_key_value(line[1:], 'instance'))
                        if f_out and not f_out.closed:
                            f_out.close()
                        output_file = os.path.join(output_path,
                                                   '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(suite_name,
                                                                                                         function,
                                                                                                         instance,
                                                                                                         dimension))
                        f_out = open(output_file, 'w')
                    buffered_lines += line

                elif (line[0] != '%') and (instance in instances):
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            f_out.close()
コード例 #3
0
def summary_analysis(input_path, output_file, lower_bound, upper_bound, functions, instances, dimensions):
    """
    Creates a summary of the analysis files from the input_path folder, which contain data in the following format:
    [evaluation_number] [objective space values] [decision space values]
    For each file records the highest values higher than the upper_bound and lowest values lower than the
    lower_bound. The output consists of two lines for each problem_id with the following format:
    [file_name] [lowest_value1] ... [lowest_valueD]
    [file_name] [highest_value1] ... [highest_valueD]
    If none of the decision space values went beyond one of the bounds, no output is done.
    """

    # Check whether input path exits
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:

            try:
                (suite_name, function, instance, dimension) = parse_problem_instance_file_name(input_file)
                if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)
            column_start = 3
            column_end = 3 + dimension

            lowest = [float(lower_bound)] * dimension
            highest = [float(upper_bound)] * dimension

            with open(input_file, 'r') as f_in:
                for line in f_in:
                    for idx, number in enumerate(line.split()[column_start:column_end]):
                        num = float(number)
                        if num > highest[idx]:
                            highest[idx] = num
                        if num < lowest[idx]:
                            lowest[idx] = num
                f_in.close()

            f_out.write('{}_f{:02d}_i{:02d}_d{:02d}'.format(suite_name, function, instance, dimension))
            for number in lowest:
                f_out.write('\t{:.8E}'.format(number))
            f_out.write('\n')

            f_out.write('{}_f{:02d}_i{:02d}_d{:02d}'.format(suite_name, function, instance, dimension))
            for number in highest:
                f_out.write('\t{:.8E}'.format(number))
            f_out.write('\n')

        f_out.close()
コード例 #4
0
ファイル: archive_analysis.py プロジェクト: ShaulSalomon/coco
def summary_analysis(input_path, output_file, lower_bound, upper_bound, functions, instances, dimensions):
    """
    Creates a summary of the analysis files from the input_path folder, which contain data in the following format:
    [evaluation_number] [objective space values] [decision space values]
    For each file records the highest values higher than the upper_bound and lowest values lower than the
    lower_bound. The output consists of two lines for each problem_id with the following format:
    [file_name] [lowest_value1] ... [lowest_valueD]
    [file_name] [highest_value1] ... [highest_valueD]
    If none of the decision space values went beyond one of the bounds, no output is done.
    """

    # Check whether input path exits
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:

            try:
                (suite_name, function, instance, dimension) = parse_problem_instance_file_name(input_file)
                if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)
            column_start = 3
            column_end = 3 + dimension

            lowest = [float(lower_bound)] * dimension
            highest = [float(upper_bound)] * dimension

            with open(input_file, 'r') as f_in:
                for line in f_in:
                    for idx, number in enumerate(line.split()[column_start:column_end]):
                        num = float(number)
                        if num > highest[idx]:
                            highest[idx] = num
                        if num < lowest[idx]:
                            lowest[idx] = num
                f_in.close()

            f_out.write('{}_f{:02d}_i{:02d}_d{:02d}'.format(suite_name, function, instance, dimension))
            for number in lowest:
                f_out.write('\t{:.8E}'.format(number))
            f_out.write('\n')

            f_out.write('{}_f{:02d}_i{:02d}_d{:02d}'.format(suite_name, function, instance, dimension))
            for number in highest:
                f_out.write('\t{:.8E}'.format(number))
            f_out.write('\n')

        f_out.close()
コード例 #5
0
ファイル: extract_extremes.py プロジェクト: ShaulSalomon/coco
def extract_extremes(input_paths, output_file, functions, instances, dimensions):
    """
    Extracts the extreme points from the archives contained in input_paths and outputs them to the output_file in
    the following format:
    [problem_name] [extreme_point_1] [extreme_point_2]

    Assumes the two extreme points are contained in the first two lines of every instance archive. If not, that
    instance is skipped.
    Performs no kind of sorting or filtering of the problems, therefore if multiple copies of one problem are present
    in the input, multiple lines for one problem will be also present in the output.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:
            try:
                (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
                if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)

            with open(input_file, 'r') as f_in:
                extreme1 = None
                count = 0
                for line in f_in:
                    if line[0] == '%' and 'instance' in line:
                        instance = int(get_key_value(line[1:], 'instance').strip(' \t\n\r'))
                        count = 0
                    elif count > 1 or (len(line) == 0) or line[0] == '%':
                        continue
                    elif count == 0:
                        extreme1 = line.split()[1:3]
                        count = 1
                    elif count == 1:
                        extreme2 = line.split()[1:3]
                        count = 2
                        try:
                            string = '{}_f{:02d}_i{:02d}_d{:02d}\t'.format(suite_name, function, instance, dimension)
                            string = string + '\t'.join(extreme1) + '\t' + '\t'.join(extreme2) + '\n'
                            f_out.write(string)
                        except ValueError:
                            print('Skipping instance {} in file {}'.format(instance, input_file))

                f_in.close()
                f_out.flush()
        f_out.close()
コード例 #6
0
def archive_analysis(input_paths, output_path, lower_bound, upper_bound, functions, instances, dimensions):
    """Records all instances from the archives found in input_paths where any decision space value is lower than the
       lower_bound or higher than the upper_bound. Archives of dimensions > 5, which don't include decision space values
       are skipped. The output consists of lines with the following format:
       [evaluation_number] [objective space values] [decision space values]
       Assumes one file contains one archive.
    """

    # Check whether input path exists
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions) or (dimension > 5):
                continue
            if not instance:
                raise PreprocessingWarning('Analysis does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        column_start = 3
        column_end = 3 + dimension
        output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_analysis.txt'.format(suite_name,
                                                                                                 function,
                                                                                                 instance,
                                                                                                 dimension))
        f_out = open(output_file, 'a')

        with open(input_file, 'r') as f_in:
            for line in f_in:
                if len(line) == 0 or line[0] == '%' or len(line.split()) < 4:
                    continue
                else:
                    for number in line.split()[column_start:column_end]:
                        if (float(number) > upper_bound) or (float(number) < lower_bound):
                            string = '\t'.join(line.split()[:column_end])
                            f_out.write('{}\n'.format(string))

        f_out.close()
        remove_empty_file(output_file)
コード例 #7
0
ファイル: archive_reformat.py プロジェクト: ShaulSalomon/coco
def reformat_archives(input_path, output_path, functions, instances, dimensions):
    """
    The names of the files in the input_path have the following notation:
    f[f1]-[f2]_i[i1]-[i2]_[d]D.txt
    where f1 and f2 are function numbers used for the first and second objective, i1 and i2 are instance numbers of the
    two functions and d is the dimension (one among 2, 3, 5, 10 and 20). Each such file starts with a few lines of
    comments that start with '#', after which each line corresponds to one solutions. In files with d <= 5 the solution
    is represented by its decision and objective vector values, while files with d > 5 contain only objective vector
    values of each solution.

    The output files to be written to output_path have the following notation:
    [suite_name]_f[F]_i[I]_d[D]_nondominated.adat
    where F is the function number in the suite, I is the instance number and D is the dimension. One file contains
    only one instance and starts with a line '% instance = I', where I is the instance number and is followed by a
    commented line (starting with '%'). In the subsequent lines, the solutions are written in the following format:
    num obj1 obj2 dec1 ... decn
    where num is the evaluation number of the solution (0 for extreme solutions and 1 for solutions read from the old
    file format), obj1 and obj2 are its objective values, and dec1, ... are its decision values (if they are given).

    Note this implementation is concerned only with the 'bbob-biobj' suite and applies reformatting only on the archive
    files that correspond to the problems contained in this suite.

    :param input_path: path to the folder with input archives
    :param output_path: path to the folder where output archives are stored to, if any files already exist there, they
    get appended to
    :param functions: list of function numbers to be included in the reformatting
    :param instances: list of instance numbers to be included in the reformatting
    :param dimensions: list of dimensions to be included in the reformatting
    """
    suite_name = 'bbob-biobj'
    print('Reformatting archive files for the {} suite...'.format(suite_name))

    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    # Create output folder if it does not exist yet
    create_path(output_path)

    # Read the input files one by one
    for input_file in input_files:

        try:
            (function, instance, dimension) = parse_old_arhive_file_name(input_file)
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        # Open the output file
        output_file = os.path.join(output_path, '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(suite_name,
                                                                                                      function,
                                                                                                      instance,
                                                                                                      dimension))

        with open(input_file, 'r') as f_in:
            with open(output_file, 'a') as f_out:
                # Perform reformatting

                print(input_file)
                f_out.write('% instance = {}\n%\n'.format(instance))

                for line in f_in:
                    if line[0] == '#':
                        continue

                    if dimension <= 5:
                        f_out.write('1 \t{} \t{}\n'.format(' \t'.join(line.split()[dimension:dimension+2]),
                                                           ' \t'.join(line.split()[0:dimension])))
                    else:
                        f_out.write('1 \t{}\n'.format(' \t'.join(line.split()[0:2])))

            f_out.close()
        f_in.close()
    print('Done!')
コード例 #8
0
ファイル: archive_reformat.py プロジェクト: ysakanaka/coco
def reformat_archives(input_path, output_path, functions, instances,
                      dimensions):
    """
    The names of the files in the input_path have the following notation:
    f[f1]-[f2]_i[i1]-[i2]_[d]D.txt
    where f1 and f2 are function numbers used for the first and second objective, i1 and i2 are instance numbers of the
    two functions and d is the dimension (one among 2, 3, 5, 10 and 20). Each such file starts with a few lines of
    comments that start with '#', after which each line corresponds to one solutions. In files with d <= 5 the solution
    is represented by its decision and objective vector values, while files with d > 5 contain only objective vector
    values of each solution.

    The output files to be written to output_path have the following notation:
    [suite_name]_f[F]_i[I]_d[D]_nondominated.adat
    where F is the function number in the suite, I is the instance number and D is the dimension. One file contains
    only one instance and starts with a line '% instance = I', where I is the instance number and is followed by a
    commented line (starting with '%'). In the subsequent lines, the solutions are written in the following format:
    num obj1 obj2 dec1 ... decn
    where num is the evaluation number of the solution (0 for extreme solutions and 1 for solutions read from the old
    file format), obj1 and obj2 are its objective values, and dec1, ... are its decision values (if they are given).

    Note this implementation is concerned only with the 'bbob-biobj' suite and applies reformatting only on the archive
    files that correspond to the problems contained in this suite.

    :param input_path: path to the folder with input archives
    :param output_path: path to the folder where output archives are stored to, if any files already exist there, they
    get appended to
    :param functions: list of function numbers to be included in the reformatting
    :param instances: list of instance numbers to be included in the reformatting
    :param dimensions: list of dimensions to be included in the reformatting
    """
    suite_name = 'bbob-biobj'
    print('Reformatting archive files for the {} suite...'.format(suite_name))

    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".txt")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_path))

    # Create output folder if it does not exist yet
    create_path(output_path)

    # Read the input files one by one
    for input_file in input_files:

        try:
            (function, instance,
             dimension) = parse_old_arhive_file_name(input_file)
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        # Open the output file
        output_file = os.path.join(
            output_path, '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.format(
                suite_name, function, instance, dimension))

        with open(input_file, 'r') as f_in:
            with open(output_file, 'a') as f_out:
                # Perform reformatting

                print(input_file)
                f_out.write('% instance = {}\n%\n'.format(instance))

                for line in f_in:
                    if line[0] == '#':
                        continue

                    if dimension <= 5:
                        f_out.write('1 \t{} \t{}\n'.format(
                            ' \t'.join(line.split()[dimension:dimension + 2]),
                            ' \t'.join(line.split()[0:dimension])))
                    else:
                        f_out.write('1 \t{}\n'.format(' \t'.join(
                            line.split()[0:2])))

            f_out.close()
        f_in.close()
    print('Done!')
コード例 #9
0
def archive_difference(first_path, second_path, differences, functions, instances, dimensions):
    """Outputs the differences between the matching archive files found in the first and second path.
    """
    # Check whether first paths exist
    first_files = get_file_name_list(first_path, ".adat")
    if len(first_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(first_path))

    for i, first_file in enumerate(first_files):
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(first_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Checking for differences does not work on files with multiple archives, '
                                           'use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(first_file, warning))
            first_files[i] = ''
            continue
        print(first_file)

    # Check whether second paths exist
    second_files = get_file_name_list(second_path, ".adat")
    if len(second_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(second_path))

    for i, second_file in enumerate(second_files):
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(second_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Checking for differences does not work on files with multiple archives, '
                                           'use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(second_file, warning))
            second_files[i] = ''
            continue
        print(second_file)

    with open(differences, 'a') as f_out:
        for first_file in first_files:
            if first_file != '':
                file_name = os.path.basename(first_file)
                if file_name in [os.path.basename(second_file) for second_file in second_files]:
                    second_file = os.path.join(second_path, file_name)
                    with open(first_file, 'r') as f1:
                        with open(second_file, 'r') as f2:
                            # Find and output the differences
                            diff = difflib.unified_diff(f1.readlines(), f2.readlines(), fromfile='f1', tofile='f2')
                            f_out.write('{}\n'.format(file_name))
                            print(file_name)
                            for line in diff:
                                f_out.write(line)
                        f2.close()
                    f1.close()
        f_out.close()
コード例 #10
0
def archive_split(input_paths, output_path, functions, instances, dimensions):
    """Iterates through all files in input_paths and splits those that contain multiple instances to one file per
       instance. The check for multiple instances is done only through file names.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_path
    create_path(output_path)
    for input_file in input_files:

        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or instance or (dimension
                                                           not in dimensions):
                continue

        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)
        f_out = None
        instance = None

        with open(input_file, 'r') as f_in:

            buffered_lines = ''

            for line in f_in:
                if not line.strip():
                    # Ignore empty lines
                    continue

                elif line[0] == '%':
                    if 'instance' in line:
                        if f_out and not f_out.closed:
                            if len(buffered_lines) > 0:
                                f_out.write(buffered_lines)
                                buffered_lines = ''
                            f_out.close()
                        instance = int(get_key_value(line[1:], 'instance'))
                        if instance in instances:
                            output_file = os.path.join(
                                output_path,
                                '{}_f{:02d}_i{:02d}_d{:02d}_nondominated.adat'.
                                format(suite_name, function, instance,
                                       dimension))
                            f_out = open(output_file, 'w')
                        else:
                            instance = None

                    if instance:
                        buffered_lines += line

                elif (line[0] != '%') and instance:
                    if len(buffered_lines) > 0:
                        f_out.write(buffered_lines)
                        buffered_lines = ''
                    f_out.write(line)

            f_in.close()

        if f_out and not f_out.closed:
            if len(buffered_lines) > 0:
                f_out.write(buffered_lines)
            f_out.close()
コード例 #11
0
ファイル: extract_extremes.py プロジェクト: ysakanaka/coco
def extract_extremes(input_paths, output_file, functions, instances,
                     dimensions):
    """
    Extracts the extreme points from the archives contained in input_paths and outputs them to the output_file in
    the following format:
    [problem_name] [extreme_point_1] [extreme_point_2]

    Assumes the two extreme points are contained in the first two lines of every instance archive. If not, that
    instance is skipped.
    Performs no kind of sorting or filtering of the problems, therefore if multiple copies of one problem are present
    in the input, multiple lines for one problem will be also present in the output.
    """

    # Check whether input paths exist
    input_files = get_file_name_list(input_paths, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_paths))

    # Read the input files one by one and save the result in the output_file
    with open(output_file, 'a') as f_out:
        for input_file in input_files:
            try:
                (suite_name, function, instance,
                 dimension) = parse_archive_file_name(input_file)
                if (function not in functions) or (
                        instance not in instances) or (dimension
                                                       not in dimensions):
                    continue
            except PreprocessingWarning as warning:
                print('Skipping file {}\n{}'.format(input_file, warning))
                continue

            print(input_file)

            with open(input_file, 'r') as f_in:
                extreme1 = None
                count = 0
                for line in f_in:
                    if line[0] == '%' and 'instance' in line:
                        instance = int(
                            get_key_value(line[1:],
                                          'instance').strip(' \t\n\r'))
                        count = 0
                    elif count > 1 or (len(line) == 0) or line[0] == '%':
                        continue
                    elif count == 0:
                        extreme1 = line.split()[1:3]
                        count = 1
                    elif count == 1:
                        extreme2 = line.split()[1:3]
                        count = 2
                        try:
                            string = '{}_f{:02d}_i{:02d}_d{:02d}\t'.format(
                                suite_name, function, instance, dimension)
                            string = string + '\t'.join(
                                extreme1) + '\t' + '\t'.join(extreme2) + '\n'
                            f_out.write(string)
                        except ValueError:
                            print('Skipping instance {} in file {}'.format(
                                instance, input_file))

                f_in.close()
                f_out.flush()
        f_out.close()
コード例 #12
0
def archive_difference(first_path, second_path, differences, functions,
                       instances, dimensions):
    """Outputs the differences between the matching archive files found in the first and second path.
    """
    # Check whether first paths exist
    first_files = get_file_name_list(first_path, ".adat")
    if len(first_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(first_path))

    for i, first_file in enumerate(first_files):
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(first_file)
            if not instance:
                raise PreprocessingWarning(
                    'Checking for differences does not work on files with multiple archives, '
                    'use archive_split')
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(first_file, warning))
            first_files[i] = ''
            continue
        print(first_file)

    # Check whether second paths exist
    second_files = get_file_name_list(second_path, ".adat")
    if len(second_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(second_path))

    for i, second_file in enumerate(second_files):
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(second_file)
            if not instance:
                raise PreprocessingWarning(
                    'Checking for differences does not work on files with multiple archives, '
                    'use archive_split')
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(second_file, warning))
            second_files[i] = ''
            continue
        print(second_file)

    with open(differences, 'a') as f_out:
        for first_file in first_files:
            if first_file != '':
                file_name = os.path.basename(first_file)
                if file_name in [
                        os.path.basename(second_file)
                        for second_file in second_files
                ]:
                    second_file = os.path.join(second_path, file_name)
                    with open(first_file, 'r') as f1:
                        with open(second_file, 'r') as f2:
                            # Find and output the differences
                            diff = difflib.unified_diff(f1.readlines(),
                                                        f2.readlines(),
                                                        fromfile='f1',
                                                        tofile='f2')
                            f_out.write('{}\n'.format(file_name))
                            print(file_name)
                            for line in diff:
                                f_out.write(line)
                        f2.close()
                    f1.close()
        f_out.close()
コード例 #13
0
def evaluations_append(input_paths,
                       functions,
                       instances,
                       dimensions,
                       fast=False):
    """Appends the comment `% evaluations = NUMBER` to the end of every instance in the .adat files created by the
       bbob-biobj logger.

       If fast is True, it assumes the file contains only one instance (the instance is read from the file contents,
       not the file name) and appends the comment only once - at the end of the file. No check whether this should be
       done is performed - the user should know when it is safe to choose this option.

       The NUMBER is retrieved from the corresponding .info file.
       Takes into account only the given functions, instances and dimensions.
    """

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    for input_file in info_files:
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(
                input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    for input_file in adat_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        try:
            if instance or fast:
                # Assumes only one instance is contained in the file
                with open(input_file, 'r') as f:
                    for line in f:
                        if (line[0] == '%') and ('instance' in line):
                            instance = int(get_key_value(line[1:], 'instance'))
                            break
                    f.close()
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(
                        info_dict[(function, instance, dimension)]))
                    f.close()

            else:
                first_instance = True
                # Take care of the non-last instances in the file
                for line in fileinput.input(input_file, inplace=True):
                    if (line[0] == '%') and ('instance' in line):
                        instance = int(get_key_value(line[1:], 'instance'))
                        if first_instance:
                            first_instance = False
                        else:
                            sys.stdout.write('% evaluations = {}\n'.format(
                                info_dict[(function, instance, dimension)]))
                    sys.stdout.write(line)
                fileinput.close()

                # Take care of the last instance in the file
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(
                        info_dict[(function, instance, dimension)]))
                    f.close()

        except KeyError as error:
            print('Encountered problem in file {}\n{}'.format(
                input_file, error))
            fileinput.close()
            continue
コード例 #14
0
def evaluations_append(input_paths, functions, instances, dimensions, fast=False):
    """Appends the comment `% evaluations = NUMBER` to the end of every instance in the .adat files created by the
       bbob-biobj logger.

       If fast is True, it assumes the file contains only one instance (the instance is read from the file contents,
       not the file name) and appends the comment only once - at the end of the file. No check whether this should be
       done is performed - the user should know when it is safe to choose this option.

       The NUMBER is retrieved from the corresponding .info file.
       Takes into account only the given functions, instances and dimensions.
    """

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException('Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException('Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    for input_file in info_files:
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    for input_file in adat_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        try:
            if instance or fast:
                # Assumes only one instance is contained in the file
                with open(input_file, 'r') as f:
                    for line in f:
                        if (line[0] == '%') and ('instance' in line):
                            instance = int(get_key_value(line[1:], 'instance'))
                            break
                    f.close()
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(info_dict[(function, instance, dimension)]))
                    f.close()

            else:
                first_instance = True
                # Take care of the non-last instances in the file
                for line in fileinput.input(input_file, inplace=True):
                    if (line[0] == '%') and ('instance' in line):
                        instance = int(get_key_value(line[1:], 'instance'))
                        if first_instance:
                            first_instance = False
                        else:
                            sys.stdout.write('% evaluations = {}\n'.format(info_dict[(function, instance, dimension)]))
                    sys.stdout.write(line)
                fileinput.close()

                # Take care of the last instance in the file
                with open(input_file, 'a') as f:
                    f.write('% evaluations = {}'.format(info_dict[(function, instance, dimension)]))
                    f.close()

        except KeyError as error:
            print('Encountered problem in file {}\n{}'.format(input_file, error))
            fileinput.close()
            continue
コード例 #15
0
def archive_thinning(input_path, output_path, thinning_precision,
                     currently_nondominated, functions, instances, dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException(
            'Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if not instance:
                raise PreprocessingWarning(
                    'Thinning does not work on files with multiple archives, use archive_split'
                )
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [
                            (f_original[i] - ideal[i]) / normalization[i]
                            for i in range(2)
                        ]
                        f_normalized = [
                            round(f_normalized[i] / thinning_precision)
                            for i in range(2)
                        ]
                        f_normalized = [
                            ideal[i] + f_normalized[i] * thinning_precision
                            for i in range(2)
                        ]
                        updated = thinned_archive.add_solution(
                            f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.
                              format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions
                                           == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(
            all_solutions, thinned_solutions,
            100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)
コード例 #16
0
def check_file_complete(input_paths,
                        functions,
                        instances,
                        dimensions,
                        max_diff=1000):
    """Checks the .adat files created by the bbob-biobj logger to see if they have been properly written. Outputs the
       difference between the last evaluation from the .adat file and the one noted in the .info file if they are
       greater than max_diff.

       Takes into account only the given functions, instances and dimensions.
    """
    def inspect_line(input_file, line_string, evaluations, max_diff=1e5):
        """Check that the line_string contains at least three numbers and that they are correctly written. Outputs a
           message if the difference between the evaluations and the first number in the line_string is grater than
           max_diff.
        """
        num_items = len(line_string.split())
        if num_items < 3:
            print("File {}, line {} too short".format(input_file, line_string))
        for i in range(num_items):
            try:
                float(line_string.split()[i])
            except ValueError:
                print('File {}, line {}, number {} incorrect'.format(
                    input_file, line_string,
                    line_string.split()[i]))
                continue

        if evaluations - int(line_string.split()[0]) > max_diff:
            print('Mismatch in evaluations in file {}\n'
                  '.info  = {}\n'
                  '.adat  = {}\n'
                  ' diff  = {}\n'.format(
                      input_file, evaluations,
                      line_string.split()[0],
                      evaluations - int(line_string.split()[0])))

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException(
            'Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    print('Reading .info files...')
    for input_file in info_files:
        # Store the data from the .info files
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(
                input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (
                    dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    print('Reading .adat files...')
    for input_file in adat_files:
        try:
            (suite_name, function, instance,
             dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        with open(input_file, 'r') as f:

            instance_found = False
            last_line = None

            for line in f:
                if not line.strip() or (line[0] == '%'
                                        and 'instance' not in line):
                    # Ignore empty lines and lines with comments
                    continue

                elif line[0] == '%' and 'instance' in line:
                    if last_line:
                        inspect_line(
                            input_file, last_line,
                            info_dict[(function, instance, dimension)])
                    instance = int(get_key_value(line[1:], 'instance'))
                    instance_found = (instance in instances)

                elif instance_found and line[0] != '%':
                    last_line = line

            if instance_found:
                inspect_line(input_file, last_line,
                             info_dict[(function, instance, dimension)])
            f.close()
コード例 #17
0
def archive_thinning(input_path, output_path, thinning_precision, currently_nondominated, functions, instances,
                     dimensions):
    """Performs thinning of all the archives in the input path and stores the thinned archives in the output path.
       Assumes one file contains one archive.

       For each archive, all input solutions are rounded according to the thinning precision (in the normalized
       objective space) and added to the thinned archive. If currently_nondominated is True, all solutions that
       are currently nondominated within the thinned archive are output. The two extreme solutions are not output.
       If currently_nondominated is False, only the solutions that are contained in the final archive are output.
       In this case, the two extreme solutions are also output.
    """
    # Check whether input path exists
    input_files = get_file_name_list(input_path, ".adat")
    if len(input_files) == 0:
        raise PreprocessingException('Folder {} does not exist or is empty'.format(input_path))

    old_level = log_level('warning')

    for input_file in input_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (dimension not in dimensions):
                continue
            if not instance:
                raise PreprocessingWarning('Thinning does not work on files with multiple archives, use archive_split')
            if instance not in instances:
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        print(input_file)

        output_file = input_file.replace(input_path, output_path)
        create_path(os.path.dirname(output_file))
        f_out = open(output_file, 'w')
        thinned_archive = Archive(suite_name, function, instance, dimension)
        thinned_solutions = 0
        all_solutions = 0

        extreme1_text = thinned_archive.get_next_solution_text()
        extreme2_text = thinned_archive.get_next_solution_text()
        extreme1 = [float(x) for x in extreme1_text.split()[1:3]]
        extreme2 = [float(x) for x in extreme2_text.split()[1:3]]
        ideal = [min(x, y) for x, y in zip(extreme1, extreme2)]
        nadir = [max(x, y) for x, y in zip(extreme1, extreme2)]
        normalization = [x - y for x, y in zip(nadir, ideal)]

        with open(input_file, 'r') as f_in:
            for line in f_in:

                if line[0] == '%':
                    f_out.write(line)

                elif len(line) == 0 or len(line.split()) < 3:
                    continue

                elif line.split()[0] == '0':
                    # The line contains an extreme solution, do nothing
                    all_solutions += 1
                    continue

                else:
                    # The line contains a 'regular' solution
                    try:
                        # Fill the archives with the rounded solutions values wrt the different precisions
                        f_original = [float(x) for x in line.split()[1:3]]
                        f_normalized = [(f_original[i] - ideal[i]) / normalization[i] for i in range(2)]
                        f_normalized = [round(f_normalized[i] / thinning_precision) for i in range(2)]
                        f_normalized = [ideal[i] + f_normalized[i] * thinning_precision for i in range(2)]
                        updated = thinned_archive.add_solution(f_normalized[0], f_normalized[1], line)
                    except IndexError:
                        print('Problem in file {}, line {}, skipping line'.format(input_file, line))
                        continue
                    finally:
                        all_solutions += 1

                    if currently_nondominated and (updated == 1):
                        thinned_solutions += 1
                        f_out.write(line)

        if not currently_nondominated and (thinned_archive.number_of_solutions == 2):
            # Output the two extreme solutions if they are the only two in the archive
            f_out.write(extreme1_text)
            f_out.write(extreme2_text)
            thinned_solutions = 2

        while not currently_nondominated:
            text = thinned_archive.get_next_solution_text()
            if text is None:
                break
            thinned_solutions += 1
            f_out.write(text)

        print('original: {} thinned: {} ({:.2f}%)'.format(all_solutions, thinned_solutions,
                                                          100 * thinned_solutions / all_solutions))
        f_out.close()

    log_level(old_level)
コード例 #18
0
def check_file_complete(input_paths, functions, instances, dimensions, max_diff=1000):
    """Checks the .adat files created by the bbob-biobj logger to see if they have been properly written. Outputs the
       difference between the last evaluation from the .adat file and the one noted in the .info file if they are
       greater than max_diff.

       Takes into account only the given functions, instances and dimensions.
    """

    def inspect_line(input_file, line_string, evaluations, max_diff=1e5):
        """Check that the line_string contains at least three numbers and that they are correctly written. Outputs a
           message if the difference between the evaluations and the first number in the line_string is grater than
           max_diff.
        """
        num_items = len(line_string.split())
        if num_items < 3:
            print("File {}, line {} too short".format(input_file, line_string))
        for i in range(num_items):
            try:
                float(line_string.split()[i])
            except ValueError:
                print('File {}, line {}, number {} incorrect'.format(input_file, line_string, line_string.split()[i]))
                continue

        if evaluations - int(line_string.split()[0]) > max_diff:
            print('Mismatch in evaluations in file {}\n'
                  '.info  = {}\n'
                  '.adat  = {}\n'
                  ' diff  = {}\n'.format(input_file, evaluations, line_string.split()[0],
                                         evaluations - int(line_string.split()[0])))

    # Check whether .info and .adat files exist in the input paths
    info_files = get_file_name_list(input_paths, ".info")
    if len(info_files) == 0:
        raise PreprocessingException('Folder {} does not contain .info files'.format(input_paths))

    adat_files = get_file_name_list(input_paths, ".adat")
    if len(adat_files) == 0:
        raise PreprocessingException('Folder {} does not contain .adat files'.format(input_paths))

    info_dict = {}
    print('Reading .info files...')
    for input_file in info_files:
        # Store the data from the .info files
        try:
            info_data_list = parse_info_file(input_file)
        except ValueError as error:
            raise PreprocessingException('Cannot read file {}\n{}'.format(input_file, error))

        for info_data_item in info_data_list:
            (function, instance, dimension, evaluations) = info_data_item
            if (function not in functions) or (instance not in instances) or (dimension not in dimensions):
                continue
            info_dict[(function, instance, dimension)] = evaluations

    print('Reading .adat files...')
    for input_file in adat_files:
        try:
            (suite_name, function, instance, dimension) = parse_archive_file_name(input_file)
            if (function not in functions) or (instance and instance not in instances) or \
                    (dimension not in dimensions):
                continue
        except PreprocessingWarning as warning:
            print('Skipping file {}\n{}'.format(input_file, warning))
            continue

        with open(input_file, 'r') as f:

            instance_found = False
            last_line = None

            for line in f:
                if not line.strip() or (line[0] == '%' and 'instance' not in line):
                    # Ignore empty lines and lines with comments
                    continue

                elif line[0] == '%' and 'instance' in line:
                    if last_line:
                        inspect_line(input_file, last_line, info_dict[(function, instance, dimension)])
                    instance = int(get_key_value(line[1:], 'instance'))
                    instance_found = (instance in instances)

                elif instance_found and line[0] != '%':
                    last_line = line

            if instance_found:
                inspect_line(input_file, last_line, info_dict[(function, instance, dimension)])
            f.close()