Python open_utf8 Examples, python_helpers.open_utf8 Python Examples

Example #1

0

Show file

File: format.py Project: zrw/duckdb

def format_file(f, full_path, directory, ext):
    global difference_files
    f = open_utf8(full_path, 'r')
    old_lines = f.read().split('\n')
    f.close()

    new_text = get_formatted_text(f, full_path, directory, ext)
    if check_only:
        new_lines = new_text.split('\n')
        old_lines = [x for x in old_lines if '...' not in x]
        new_lines = [x for x in new_lines if '...' not in x]
        diff_result = difflib.unified_diff(old_lines, new_lines)
        total_diff = ""
        for diff_line in diff_result:
            total_diff += diff_line + "\n"
        total_diff = total_diff.strip()
        if len(total_diff) > 0:
            print("----------------------------------------")
            print("----------------------------------------")
            print("Found differences in file " + full_path)
            print("----------------------------------------")
            print("----------------------------------------")
            print(total_diff)
            difference_files.append(full_path)
    else:
        tmpfile = full_path + ".tmp"
        f = open_utf8(tmpfile, 'w+')
        f.write(new_text)
        f.close()
        os.rename(tmpfile, full_path)

Example #2

0

Show file

def copy_if_different(src, dest):
    if os.path.isfile(dest):
        # dest exists, check if the files are different
        with open_utf8(src, 'r') as f:
            source_text = f.read()
        with open_utf8(dest, 'r') as f:
            dest_text = f.read()
        if source_text == dest_text:
            # print("Skipping copy of " + src + ", identical copy already exists at " + dest)
            return
    # print("Copying " + src + " to " + dest)
    shutil.copyfile(src, dest)

Example #3

0

Show file

def get_file_contents(fpath, add_line_numbers=False):
    with open_utf8(fpath, 'r') as f:
        result = f.read()
        if add_line_numbers:
            return '#line 1 "%s"\n' % (fpath, ) + result
        else:
            return result

Example #4

0

Show file

def generate_amalgamation(source_file, header_file):
    # construct duckdb.hpp from these headers
    generate_duckdb_hpp(header_file)

    # now construct duckdb.cpp
    print("------------------------")
    print("-- Writing " + source_file + " --")
    print("------------------------")

    # scan all the .cpp files
    with open_utf8(temp_source, 'w+') as sfile:
        header_file_name = header_file.split(os.sep)[-1]
        sfile.write('#include "' + header_file_name + '"\n\n')
        sfile.write("#ifndef DUCKDB_AMALGAMATION\n#error header mismatch\n#endif\n\n")
        sfile.write("#if (!defined(DEBUG) && !defined NDEBUG)\n#define NDEBUG\n#endif\n\n")
        for compile_dir in compile_directories:
            sfile.write(write_dir(compile_dir))

        sfile.write('\n\n/*\n')
        license_idx = 0
        for license in licenses:
            sfile.write("\n\n\n### THIRD PARTY LICENSE #%s ###\n\n" % str(license_idx + 1))
            sfile.write(write_file(license))
            license_idx+=1
        sfile.write('\n\n*/\n')


    copy_if_different(temp_header, header_file)
    copy_if_different(temp_source, source_file)
    try:
        os.remove(temp_header)
        os.remove(temp_source)
    except:
        pass

Example #5

0

Show file

def gather_file(current_file, source_files, header_files):
    global linenumbers
    global written_files
    if not need_to_write_file(current_file, False):
        return ""
    written_files[current_file] = True

    # first read this file
    with open_utf8(current_file, 'r') as f:
        text = f.read()

    (statements, includes) = get_includes(current_file, text)
    # find the linenr of the final #include statement we parsed
    if len(statements) > 0:
        index = text.find(statements[-1])
        linenr = len(text[:index].split('\n'))

        # now write all the dependencies of this header first
        for i in range(len(includes)):
            # source file inclusions are inlined into the main text
            include_text = write_file(includes[i])
            if linenumbers and i == len(includes) - 1:
                # for the last include statement, we also include a #line directive
                include_text += '\n#line %d "%s"\n' % (linenr, current_file)
            if includes[i].endswith('.cpp') or includes[i].endswith('.cc') or includes[i].endswith('.c'):
                # source file inclusions are inlined into the main text
                text = text.replace(statements[i], include_text)
            else:
                text = text.replace(statements[i], '')
                header_files.append(include_text)

    # add the initial line here
    if linenumbers:
        text = '\n#line 1 "%s"\n' % (current_file,) + text
    source_files.append(cleanup_file(text))

Example #6

0

Show file

def write_file(current_file, ignore_excluded = False):
    global linenumbers
    global written_files
    if not need_to_write_file(current_file, ignore_excluded):
        return ""
    written_files[current_file] = True

    # first read this file
    with open_utf8(current_file, 'r') as f:
        text = f.read()

    if current_file.startswith("third_party") and not current_file.endswith("LICENSE"):
        lic_idx = find_license(current_file)
        text = "\n\n// LICENSE_CHANGE_BEGIN\n// The following code up to LICENSE_CHANGE_END is subject to THIRD PARTY LICENSE #%s\n// See the end of this file for a list\n\n" % str(lic_idx + 1) + text + "\n\n// LICENSE_CHANGE_END\n"

    (statements, includes) = get_includes(current_file, text)
    # find the linenr of the final #include statement we parsed
    if len(statements) > 0:
        index = text.find(statements[-1])
        linenr = len(text[:index].split('\n'))

        # now write all the dependencies of this header first
        for i in range(len(includes)):
            include_text = write_file(includes[i])
            if linenumbers and i == len(includes) - 1:
                # for the last include statement, we also include a #line directive
                include_text += '\n#line %d "%s"\n' % (linenr, current_file)
            text = text.replace(statements[i], include_text)

    # add the initial line here
    if linenumbers:
        text = '\n#line 1 "%s"\n' % (current_file,) + text
    # print(current_file)
    # now read the header and write it
    return cleanup_file(text)

Example #7

0

Show file

File: generate_benchmarks.py Project: zzl200012/duckdb

def format_tpch_queries(target_dir, tpch_in, comment):
    with open_utf8(tpch_in, 'r') as f:
        text = f.read()

    for i in range(1, 23):
        qnr = '%02d' % (i, )
        target_file = os.path.join(target_dir, 'q' + qnr + '.benchmark')
        new_text = '''# name: %s
# description: Run query %02d from the TPC-H benchmark (%s)
# group: [sf1]

template %s
QUERY_NUMBER=%d
QUERY_NUMBER_PADDED=%02d''' % (target_file, i, comment, tpch_in, i, i)
        with open_utf8(target_file, 'w+') as f:
            f.write(new_text)

Example #8

0

Show file

File: include_analyzer.py Project: zzl200012/duckdb

def analyze_include_file(fpath, already_included_files, prev_include = ""):
    if fpath in already_included_files:
        return
    if fpath in amalgamation.always_excluded:
        return
    if fpath not in cached_includes:
        # print(fpath)
        with open_utf8(fpath, 'r') as f:
            text = f.read()
        (statements, includes) = amalgamation.get_includes(fpath, text)
        cached_includes[fpath] = includes
    else:
        includes = cached_includes[fpath]

    if fpath in include_counts:
        include_counts[fpath] += 1
    else:
        include_counts[fpath] = 1

    if fpath not in include_chains:
        include_chains[fpath] = {}
    if prev_include not in include_chains[fpath]:
        include_chains[fpath][prev_include] = 0
    include_chains[fpath][prev_include] += 1

    already_included_files.append(fpath)
    if fpath.endswith('.h') or fpath.endswith('.hpp'):
        prev_include = fpath
    for include in includes:
        analyze_include_file(include, already_included_files, prev_include)

Example #9

0

Show file

def generate_parquet_amalgamation(source_file, header_file):
    # construct duckdb.hpp from these headers
    generate_parquet_hpp(header_file)

    print("------------------------")
    print("-- Writing " + source_file + " --")
    print("------------------------")

    # scan all the .cpp files
    with open_utf8(temp_source, 'w+') as sfile:
        header_file_name = header_file.split(os.sep)[-1]
        sfile.write('''#include "duckdb.hpp"
#ifdef DUCKDB_AMALGAMATION
#ifndef DUCKDB_AMALGAMATION_EXTENDED
#error Parquet amalgamation requires extended DuckDB amalgamation (--extended)
#endif
#endif
''')
        sfile.write('#include "' + header_file_name + '"\n\n')
        for compile_dir in compile_directories:
            sfile.write(amalgamation.write_dir(compile_dir))

    amalgamation.copy_if_different(temp_header, header_file)
    amalgamation.copy_if_different(temp_source, source_file)
    try:
        os.remove(temp_header)
        os.remove(temp_source)
    except:
        pass

Example #10

0

Show file

 def generate_unity_build(entries, unity_name, linenumbers):
     ub_file = os.path.join(target_dir, f'ub_{unity_name}.cpp')
     with open_utf8(ub_file, 'w+') as f:
         for entry in entries:
             if linenumbers:
                 f.write('#line 0 "{}"\n'.format(
                     convert_backslashes(entry)))
             f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
     return ub_file

Example #11

0

Show file

 def generate_unity_build(entries, idx, linenumbers):
     ub_file = os.path.join(target_dir,
                            'amalgamation-{}.cpp'.format(str(idx)))
     with open_utf8(ub_file, 'w+') as f:
         for entry in entries:
             if linenumbers:
                 f.write('#line 0 "{}"\n'.format(
                     convert_backslashes(entry)))
             f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
     return ub_file

Example #12

0

Show file

    def rewrite(file_in, file_out):
        # print(file_in)
        a_file = open_utf8(file_in, "r")
        out = open_utf8(file_out, "a")

        for line in a_file:
            if '#pragma once' in line:
                continue
            found = False
            for header in headers:
                if header in line:
                    found = True
                    break
            if found:
                out.write("// %s" % line)
            else:
                out.write(line)
        out.write("\n")
        out.close()

Example #13

0

Show file

def generate_parquet_hpp(header_file):
    print("-----------------------")
    print("-- Writing " + header_file + " --")
    print("-----------------------")
    with open_utf8(temp_header, 'w+') as hfile:
        hfile.write("/*\n")
        hfile.write(amalgamation.write_file("LICENSE"))
        hfile.write("*/\n\n")

        hfile.write("#pragma once\n")
        for fpath in amalgamation.main_header_files:
            hfile.write(amalgamation.write_file(fpath))

Example #14

0

Show file

File: amalgamation.py Project: erisonliang/duckdb

def generate_duckdb_hpp(header_file):
    print("-----------------------")
    print("-- Writing " + header_file + " --")
    print("-----------------------")
    with open_utf8(temp_header, 'w+') as hfile:
        hfile.write("/*\n")
        hfile.write(write_file("LICENSE"))
        hfile.write("*/\n\n")

        hfile.write("#pragma once\n")
        hfile.write("#define DUCKDB_AMALGAMATION 1\n")
        hfile.write("#define DUCKDB_SOURCE_ID \"%s\"\n" % git_commit_hash())
        hfile.write("#define DUCKDB_VERSION \"%s\"\n" % git_dev_version())
        for fpath in main_header_files:
            hfile.write(write_file(fpath))

Example #15

0

Show file

def create_tpcds_header(tpch_dir):
	result = """/* THIS FILE WAS AUTOMATICALLY GENERATED BY generate_csv_header.py */

#pragma once

const int TPCDS_QUERIES_COUNT = 99;
const int TPCDS_TABLE_COUNT = 24;
"""
	# write the queries
	result += write_dir(tpcds_queries, "TPCDS_QUERIES")
	result += write_dir(tpcds_answers_sf001, "TPCDS_ANSWERS_SF0_01")
	result += write_dir(tpcds_answers_sf1, "TPCDS_ANSWERS_SF1")

	with open_utf8(tpcds_header, 'w+') as f:
		f.write(result)

Example #16

0

Show file

def generate_amalgamation(source_file, header_file):
    # construct duckdb.hpp from these headers
    generate_duckdb_hpp(header_file)

    # now construct duckdb.cpp
    print("------------------------")
    print("-- Writing " + source_file + " --")
    print("------------------------")

    # scan all the .cpp files
    with open_utf8(temp_source, 'w+') as sfile:
        header_file_name = header_file.split(os.sep)[-1]
        sfile.write('#include "' + header_file_name + '"\n\n')
        sfile.write(
            "#ifndef DUCKDB_AMALGAMATION\n#error header mismatch\n#endif\n\n")
        for compile_dir in compile_directories:
            sfile.write(write_dir(compile_dir))
        # for windows we write file_system.cpp last
        # this is because it includes windows.h which contains a lot of #define statements that mess up the other code
        sfile.write(write_file(file_system_cpp, True))

        sfile.write('\n\n/*\n')
        license_idx = 0
        for license in licenses:
            sfile.write("\n\n\n### THIRD PARTY LICENSE #%s ###\n\n" %
                        str(license_idx + 1))
            sfile.write(write_file(license))
            license_idx += 1
        sfile.write('\n\n*/\n')

    copy_if_different(temp_header, header_file)
    copy_if_different(temp_source, source_file)
    try:
        os.remove(temp_header)
        os.remove(temp_source)
    except:
        pass

Example #17

0

Show file

def build_package(target_dir, linenumbers=False):
    if not os.path.isdir(target_dir):
        os.mkdir(target_dir)

    scripts_dir = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(scripts_dir)
    import amalgamation
    sys.path.append(os.path.join(scripts_dir, '..', 'extension', 'parquet'))
    import parquet_amalgamation

    prev_wd = os.getcwd()
    os.chdir(os.path.join(scripts_dir, '..'))

    # obtain the list of source files from the amalgamation
    source_list = amalgamation.list_sources()
    include_list = amalgamation.list_include_dirs()
    include_files = amalgamation.list_includes()

    def copy_file(src, target_dir):
        # get the path
        full_path = src.split(os.path.sep)
        current_path = target_dir
        for i in range(len(full_path) - 1):
            current_path = os.path.join(current_path, full_path[i])
            if not os.path.isdir(current_path):
                os.mkdir(current_path)
        target_name = full_path[-1]
        target_file = os.path.join(current_path, target_name)
        amalgamation.copy_if_different(src, target_file)

    # now do the same for the parquet extension
    parquet_include_directories = parquet_amalgamation.include_directories

    include_files += amalgamation.list_includes_files(
        parquet_include_directories)

    include_list += parquet_include_directories
    source_list += parquet_amalgamation.source_files

    for src in source_list:
        copy_file(src, target_dir)

    for inc in include_files:
        copy_file(inc, target_dir)

    # handle pragma_version.cpp: paste #define DUCKDB_SOURCE_ID there
    # read the source id
    proc = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            cwd=os.path.join(scripts_dir, '..'))
    githash = proc.stdout.read().strip().decode('utf8')
    # open the file and read the current contents
    fpath = os.path.join(target_dir, 'src', 'function', 'table', 'version',
                         'pragma_version.cpp')
    with open_utf8(fpath, 'r') as f:
        text = f.read()
    # now add the DUCKDB_SOURCE_ID define, if it is not there already
    found = False
    lines = text.split('\n')
    for i in range(len(lines)):
        if '#define DUCKDB_SOURCE_ID ' in lines[i]:
            lines[i] = '#define DUCKDB_SOURCE_ID "{}"'.format(githash)
            found = True
            break
    if not found:
        text = '#ifndef DUCKDB_SOURCE_ID\n#define DUCKDB_SOURCE_ID "{}"\n#endif\n'.format(
            githash) + text
    else:
        text = '\n'.join(text)
    with open_utf8(fpath, 'w+') as f:
        f.write(text)

    def file_is_excluded(fname):
        for entry in excluded_objects:
            if entry in fname:
                return True
        return False

    def generate_unity_build(entries, idx, linenumbers):
        ub_file = os.path.join(target_dir,
                               'amalgamation-{}.cpp'.format(str(idx)))
        with open_utf8(ub_file, 'w+') as f:
            for entry in entries:
                if linenumbers:
                    f.write('#line 0 "{}"\n'.format(
                        convert_backslashes(entry)))
                f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
        return ub_file

    def generate_unity_builds(source_list, nsplits, linenumbers):
        source_list.sort()

        files_per_split = len(source_list) / nsplits
        new_source_files = []
        current_files = []
        idx = 1
        for entry in source_list:
            if not entry.startswith('src'):
                new_source_files.append(os.path.join('duckdb', entry))
                continue

            current_files.append(entry)
            if len(current_files) > files_per_split:
                new_source_files.append(
                    generate_unity_build(current_files, idx, linenumbers))
                current_files = []
                idx += 1
        if len(current_files) > 0:
            new_source_files.append(
                generate_unity_build(current_files, idx, linenumbers))
            current_files = []
            idx += 1

        return new_source_files

    original_sources = source_list
    source_list = generate_unity_builds(source_list, 8, linenumbers)

    os.chdir(prev_wd)
    return ([
        convert_backslashes(x) for x in source_list if not file_is_excluded(x)
    ], [convert_backslashes(x) for x in include_list],
            [convert_backslashes(x) for x in original_sources])

Example #18

0

Show file

def generate_amalgamation(source_file, header_file):
    def copy_if_different(src, dest):
        if os.path.isfile(dest):
            # dest exists, check if the files are different
            with open_utf8(src, 'r') as f:
                source_text = f.read()
            with open_utf8(dest, 'r') as f:
                dest_text = f.read()
            if source_text == dest_text:
                print("Skipping copy of " + src +
                      ", identical copy already exists at " + dest)
                return
        print("Copying " + src + " to " + dest)
        shutil.copyfile(src, dest)

    # the header is unchanged
    copy_if_different('extension/parquet/include/parquet-extension.hpp',
                      header_file)

    # now concat all the source/header files while removing known files

    out = open_utf8(temp_source, "w")
    out.write("// Parquet reader amalgamation\n\n#include \"%s\"\n" %
              os.path.basename(header_file))
    out.close()

    def myglob(path, pattern):
        wd = os.getcwd()
        os.chdir(path)
        files = glob.glob(pattern)
        os.chdir(wd)
        return [f.replace('\\', '/') for f in files]

    headers = ["parquet-extension.hpp"
               ] + myglob("third_party/parquet", "*.h") + myglob(
                   "third_party", "thrift/thrift/*.h") + myglob(
                       "third_party", "thrift/thrift/**/*.h") + [
                           'protocol/TCompactProtocol.tcc'
                       ] + myglob("third_party/snappy", "*.h") + myglob(
                           "third_party/miniz", "*.hpp")

    def rewrite(file_in, file_out):
        # print(file_in)
        a_file = open_utf8(file_in, "r")
        out = open_utf8(file_out, "a")

        for line in a_file:
            if '#pragma once' in line:
                continue
            found = False
            for header in headers:
                if header in line:
                    found = True
                    break
            if found:
                out.write("// %s" % line)
            else:
                out.write(line)
        out.write("\n")
        out.close()

    # inline all the headers first

    def rewrite_prefix(prefix, files):
        for f in files:
            rewrite("%s/%s" % (prefix, f), temp_source)

    # the local and overall order of these rewrites matters.
    rewrite_prefix('third_party/thrift/thrift', [
        'transport/PlatformSocket.h', 'config.h', 'thrift-config.h',
        'Thrift.h', 'TLogging.h', 'transport/TTransportException.h',
        'transport/TTransport.h', 'protocol/TProtocolException.h',
        'protocol/TProtocol.h', 'protocol/TVirtualProtocol.h',
        'protocol/TCompactProtocol.h', 'protocol/TCompactProtocol.tcc',
        'transport/TVirtualTransport.h', 'transport/TBufferTransports.h',
        'TBase.h', 'TToString.h', 'protocol/TProtocol.cpp',
        'transport/TTransportException.cpp', 'transport/TBufferTransports.cpp'
    ])

    rewrite_prefix('third_party/parquet', [
        'windows_compatibility.h', 'parquet_types.h', 'parquet_constants.h',
        'parquet_types.cpp', 'parquet_constants.cpp'
    ])

    rewrite_prefix('third_party/snappy', ['snappy-stubs-public.h', 'snappy.h'])

    rewrite_prefix('third_party/miniz',
                   ['miniz.hpp'])  # miniz.cpp is already in duckdb.cpp

    rewrite('third_party/utf8proc/include/utf8proc_wrapper.hpp', temp_source)

    # 'main'
    rewrite('extension/parquet/parquet-extension.cpp', temp_source)

    # snappy last because tons of #defines
    rewrite_prefix('third_party/snappy', [
        'snappy-stubs-internal.h', 'snappy-internal.h', 'snappy-sinksource.h',
        'snappy-stubs-internal.cc', 'snappy-sinksource.cc', 'snappy.cc'
    ])

    copy_if_different(temp_source, source_file)

Example #19

0

Show file

GENERATED_HEADER = 'include/tpce_generated.hpp'
GENERATED_SOURCE = 'tpce_generated.cpp'
TPCE_DIR = os.path.join('third_party', 'tpce-tool')

GENERATED_HEADER = os.path.join(TPCE_DIR, GENERATED_HEADER)
GENERATED_SOURCE = os.path.join(TPCE_DIR, GENERATED_SOURCE)

current_table = None

tables = {}

print(GENERATED_HEADER)
print(GENERATED_SOURCE)

header = open_utf8(GENERATED_HEADER, 'w+')
source = open_utf8(GENERATED_SOURCE, 'w+')

for fp in [header, source]:
    fp.write("""
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////
// THIS FILE IS GENERATED BY gentpcecode.py, DO NOT EDIT MANUALLY //
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////

""")

header.write("""
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/main/appender.hpp"

Example #20

0

Show file

File: format.py Project: zrw/duckdb

def get_formatted_text(f, full_path, directory, ext):
    if not can_format_file(full_path):
        print("Eek, cannot format file " + full_path +
              " but attempted to format anyway")
        exit(1)
    if f == 'list.hpp':
        # fill in list file
        file_list = [
            os.path.join(dp, f) for dp, dn, filenames in os.walk(directory)
            for f in filenames
            if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp")
        ]
        file_list = [x.replace('src/include/', '') for x in file_list]
        file_list.sort()
        result = ""
        for x in file_list:
            result += '#include "%s"\n' % (x)
        return result

    if ext == ".hpp" and directory.startswith("src/include"):
        f = open_utf8(full_path, 'r')
        lines = f.readlines()
        f.close()

        # format header in files
        header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n"
        text = header_top + header_middle + header_bottom
        is_old_header = True
        for line in lines:
            if not (line.startswith("//")
                    or line.startswith("\n")) and is_old_header:
                is_old_header = False
            if not is_old_header:
                text += line

    if ext == '.test' or ext == '.test_slow':
        f = open_utf8(full_path, 'r')
        lines = f.readlines()
        f.close()

        found_name = False
        found_group = False
        group_name = full_path.split('/')[-2]
        new_path_line = '# name: ' + full_path + '\n'
        new_group_line = '# group: [' + group_name + ']' + '\n'
        found_diff = False
        for i in range(0, len(lines)):
            line = lines[i]
            if line.startswith('# name: ') or line.startswith('#name: '):
                if found_name:
                    print("Error formatting file " + full_path +
                          ", multiple lines starting with # name found")
                    exit(1)
                found_name = True
                if lines[i] != new_path_line:
                    lines[i] = new_path_line
            if line.startswith('# group: ') or line.startswith('#group: '):
                if found_group:
                    print("Error formatting file " + full_path +
                          ", multiple lines starting with # group found")
                    exit(1)
                found_group = True
                if lines[i] != new_group_line:
                    lines[i] = new_group_line
        if not found_group:
            lines = [new_group_line] + lines
        if not found_name:
            lines = [new_path_line] + lines
        return ''.join(lines)
    proc_command = format_commands[ext].split(' ') + [full_path]
    proc = subprocess.Popen(proc_command,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    new_text = proc.stdout.read().decode('utf8')
    stderr = proc.stderr.read().decode('utf8')
    if len(stderr) > 0:
        print(os.getcwd())
        print("Failed to format file " + full_path)
        print(' '.join(proc_command))
        print(stderr)
        exit(1)
    return new_text

Example #21

0

Show file

File: format.py Project: triump2020/duckdb

def get_formatted_text(f, full_path, directory, ext):
    if not can_format_file(full_path):
        print("Eek, cannot format file " + full_path +
              " but attempted to format anyway")
        exit(1)
    if f == 'list.hpp':
        # fill in list file
        file_list = [
            os.path.join(dp, f) for dp, dn, filenames in os.walk(directory)
            for f in filenames
            if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp")
        ]
        file_list = [x.replace('src/include/', '') for x in file_list]
        file_list.sort()
        result = ""
        for x in file_list:
            result += '#include "%s"\n' % (x)
        return result

    if ext == ".hpp" and directory.startswith("src/include"):
        with open_utf8(full_path, 'r') as f:
            lines = f.readlines()

        # format header in files
        header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n"
        text = header_top + header_middle + header_bottom
        is_old_header = True
        for line in lines:
            if not (line.startswith("//")
                    or line.startswith("\n")) and is_old_header:
                is_old_header = False
            if not is_old_header:
                text += line

    if ext == '.test' or ext == '.test_slow' or ext == '.test_coverage' or ext == '.benchmark':
        f = open_utf8(full_path, 'r')
        lines = f.readlines()
        f.close()

        found_name = False
        found_group = False
        group_name = full_path.split('/')[-2]
        new_path_line = '# name: ' + full_path + '\n'
        new_group_line = '# group: [' + group_name + ']' + '\n'
        found_diff = False
        # Find description.
        found_description = False
        for line in lines:
            if line.lower().startswith('# description:') or line.lower(
            ).startswith('#description:'):
                if found_description:
                    print("Error formatting file " + full_path +
                          ", multiple lines starting with # description found")
                    exit(1)
                found_description = True
                new_description_line = '# description: ' + line.split(
                    ':', 1)[1].strip() + '\n'
        # Filter old meta.
        meta = [
            '#name:', '# name:', '#description:', '# description:', '#group:',
            '# group:'
        ]
        lines = [
            line for line in lines
            if not any(line.lower().startswith(m) for m in meta)
        ]
        # Clean up empty leading lines.
        while lines and not lines[0].strip():
            lines.pop(0)
        # Ensure header is prepended.
        header = [new_path_line]
        if found_description: header.append(new_description_line)
        header.append(new_group_line)
        header.append('\n')
        return ''.join(header + lines)
    proc_command = format_commands[ext].split(' ') + [full_path]
    proc = subprocess.Popen(proc_command,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    new_text = proc.stdout.read().decode('utf8')
    stderr = proc.stderr.read().decode('utf8')
    if len(stderr) > 0:
        print(os.getcwd())
        print("Failed to format file " + full_path)
        print(' '.join(proc_command))
        print(stderr)
        exit(1)
    return new_text.replace('\r', '')

Example #22

0

Show file

File: test_vector_sizes.py Project: zzl200012/duckdb

def replace_in_file(fname, regex, replace):
	with open_utf8(fname, 'r') as f:
		contents = f.read()
	contents = re.sub(regex, replace, contents)
	with open_utf8(fname, 'w+') as f:
		f.write(contents)

Example #23

0

Show file

File: generate_querygraph.py Project: yianz/duckdb

    with open(input, 'r') as f:
        text = f.read()
    new_text = '{ "result"' + text.split('{ "result"')[tree_index + 1]
    input += '.tmp'
    with open(input, 'w+') as f:
        f.write(new_text)

duckdb_query_graph.generate(input, output)

with open(output, 'r') as f:
    text = f.read()

#inline javascript files
javascript_base = os.path.join('tools', 'pythonpkg', 'duckdb_query_graph')
with open(os.path.join(javascript_base, 'raphael.js'), 'r') as f:
    raphael = f.read()

with open(os.path.join(javascript_base, 'treant.js'), 'r') as f:
    treant = f.read()

text = text.replace('<script src="../../raphael.js"></script>',
                    '<script>' + raphael + '</script>')
text = text.replace('<script src="../../treant.js"></script>',
                    '<script>' + treant + '</script>')

with open_utf8(output, 'w+') as f:
    f.write(text)

if open_output:
    os.system('open "' + output.replace('"', '\\"') + '"')

Example #24

0

Show file

File: package_build.py Project: zzl200012/duckdb

def build_package(target_dir, extensions, linenumbers=False):
    if not os.path.isdir(target_dir):
        os.mkdir(target_dir)

    scripts_dir = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(scripts_dir)
    import amalgamation

    prev_wd = os.getcwd()
    os.chdir(os.path.join(scripts_dir, '..'))

    # obtain the list of source files from the amalgamation
    source_list = amalgamation.list_sources()
    include_list = amalgamation.list_include_dirs()
    include_files = amalgamation.list_includes()

    def copy_file(src, target_dir):
        # get the path
        full_path = src.split(os.path.sep)
        current_path = target_dir
        for i in range(len(full_path) - 1):
            current_path = os.path.join(current_path, full_path[i])
            if not os.path.isdir(current_path):
                os.mkdir(current_path)
        target_name = full_path[-1]
        target_file = os.path.join(current_path, target_name)
        amalgamation.copy_if_different(src, target_file)

    # include the main extension helper
    include_files += [os.path.join('extension', 'extension_helper.hpp')]
    # include the separate extensions
    for ext in extensions:
        ext_path = os.path.join(scripts_dir, '..', 'extension', ext)
        include_package(ext, ext_path, include_files, include_list,
                        source_list)

    for src in source_list:
        copy_file(src, target_dir)

    for inc in include_files:
        copy_file(inc, target_dir)

    # handle pragma_version.cpp: paste #define DUCKDB_SOURCE_ID and DUCKDB_VERSION there
    curdir = os.getcwd()
    os.chdir(os.path.join(scripts_dir, '..'))
    githash = git_commit_hash()
    dev_version = git_dev_version()
    os.chdir(curdir)
    # open the file and read the current contents
    fpath = os.path.join(target_dir, 'src', 'function', 'table', 'version',
                         'pragma_version.cpp')
    with open_utf8(fpath, 'r') as f:
        text = f.read()
    # now add the DUCKDB_SOURCE_ID define, if it is not there already
    found_hash = False
    found_dev = False
    lines = text.split('\n')
    for i in range(len(lines)):
        if '#define DUCKDB_SOURCE_ID ' in lines[i]:
            lines[i] = '#define DUCKDB_SOURCE_ID "{}"'.format(githash)
            found_hash = True
            break
        if '#define DUCKDB_VERSION ' in lines[i]:
            lines[i] = '#define DUCKDB_VERSION "{}"'.format(dev_version)
            found_dev = True
            break
    if not found_hash:
        lines = [
            '#ifndef DUCKDB_SOURCE_ID',
            '#define DUCKDB_SOURCE_ID "{}"'.format(githash), '#endif'
        ] + lines
    if not found_dev:
        lines = [
            '#ifndef DUCKDB_VERSION',
            '#define DUCKDB_VERSION "{}"'.format(dev_version), '#endif'
        ] + lines
    text = '\n'.join(lines)
    with open_utf8(fpath, 'w+') as f:
        f.write(text)

    def file_is_excluded(fname):
        for entry in excluded_objects:
            if entry in fname:
                return True
        return False

    def generate_unity_build(entries, idx, linenumbers):
        ub_file = os.path.join(target_dir,
                               'amalgamation-{}.cpp'.format(str(idx)))
        with open_utf8(ub_file, 'w+') as f:
            for entry in entries:
                if linenumbers:
                    f.write('#line 0 "{}"\n'.format(
                        convert_backslashes(entry)))
                f.write('#include "{}"\n\n'.format(convert_backslashes(entry)))
        return ub_file

    def generate_unity_builds(source_list, nsplits, linenumbers):
        source_list.sort()

        files_per_split = len(source_list) / nsplits
        new_source_files = []
        current_files = []
        idx = 1
        for entry in source_list:
            if not entry.startswith('src'):
                new_source_files.append(os.path.join('duckdb', entry))
                continue

            current_files.append(entry)
            if len(current_files) > files_per_split:
                new_source_files.append(
                    generate_unity_build(current_files, idx, linenumbers))
                current_files = []
                idx += 1
        if len(current_files) > 0:
            new_source_files.append(
                generate_unity_build(current_files, idx, linenumbers))
            current_files = []
            idx += 1

        return new_source_files

    original_sources = source_list
    source_list = generate_unity_builds(source_list, 8, linenumbers)

    os.chdir(prev_wd)
    return ([
        convert_backslashes(x) for x in source_list if not file_is_excluded(x)
    ], [convert_backslashes(x) for x in include_list],
            [convert_backslashes(x) for x in original_sources])

Example #25

0

Show file

def generate_amalgamation_splits(source_file, header_file, nsplits):
    # construct duckdb.hpp from these headers
    generate_duckdb_hpp(header_file)

    # gather all files to read and write
    source_files = []
    header_files = []
    for compile_dir in compile_directories:
        if compile_dir != src_dir:
            continue
        gather_files(compile_dir, source_files, header_files)

    # for windows we write file_system.cpp last
    # this is because it includes windows.h which contains a lot of #define statements that mess up the other code
    source_files.append(
        write_file(os.path.join('src', 'common', 'file_system.cpp'), True))

    # write duckdb-internal.hpp
    if '.hpp' in header_file:
        internal_header_file = header_file.replace('.hpp', '-internal.hpp')
    elif '.h' in header_file:
        internal_header_file = header_file.replace('.h', '-internal.h')
    else:
        raise "Unknown extension of header file"

    temp_internal_header = internal_header_file + '.tmp'

    with open_utf8(temp_internal_header, 'w+') as f:
        write_license(f)
        for hfile in header_files:
            f.write(hfile)

    # count the total amount of bytes in the source files
    total_bytes = 0
    for sfile in source_files:
        total_bytes += len(sfile)

    # now write the individual splits
    # we approximate the splitting up by making every file have roughly the same amount of bytes
    split_bytes = total_bytes / nsplits
    current_bytes = 0
    partitions = []
    partition_names = []
    current_partition = []
    current_partition_idx = 1
    for sfile in source_files:
        current_partition.append(sfile)
        current_bytes += len(sfile)
        if current_bytes >= split_bytes:
            partition_names.append(str(current_partition_idx))
            partitions.append(current_partition)
            current_partition = []
            current_bytes = 0
            current_partition_idx += 1
    if len(current_partition) > 0:
        partition_names.append(str(current_partition_idx))
        partitions.append(current_partition)
        current_partition = []
        current_bytes = 0
    # generate partitions from the third party libraries
    for compile_dir in compile_directories:
        if compile_dir != src_dir:
            partition_names.append(compile_dir.split(os.sep)[-1])
            partitions.append(write_dir(compile_dir))

    header_file_name = header_file.split(os.sep)[-1]
    internal_header_file_name = internal_header_file.split(os.sep)[-1]

    partition_fnames = []
    current_partition = 0
    for partition in partitions:
        partition_name = source_file.replace(
            '.cpp', '-%s.cpp' % (partition_names[current_partition], ))
        temp_partition_name = partition_name + '.tmp'
        partition_fnames.append([partition_name, temp_partition_name])
        with open_utf8(temp_partition_name, 'w+') as f:
            write_license(f)
            f.write('#include "%s"\n#include "%s"' %
                    (header_file_name, internal_header_file_name))
            f.write('''
#ifndef DUCKDB_AMALGAMATION
#error header mismatch
#endif
''')
            for sfile in partition:
                f.write(sfile)
        current_partition += 1

    copy_if_different(temp_header, header_file)
    copy_if_different(temp_internal_header, internal_header_file)
    try:
        os.remove(temp_header)
        os.remove(temp_internal_header)
    except:
        pass
    for p in partition_fnames:
        copy_if_different(p[1], p[0])
        try:
            os.remove(p[1])
        except:
            pass

Example #26

0

Show file

File: format.py Project: yianz/duckdb

def format_file(f, full_path, directory, ext, sort_includes):
    if not os.path.isfile(full_path):
        return
    if f == 'list.hpp':
        # fill in list file
        list = [
            os.path.join(dp, f) for dp, dn, filenames in os.walk(directory)
            for f in filenames
            if os.path.splitext(f)[1] == '.hpp' and not f.endswith("list.hpp")
        ]
        list = [x.replace('src/include/', '') for x in list]
        list.sort()
        with open_utf8(full_path, "w") as file:
            for x in list:
                file.write('#include "%s"\n' % (x))
    elif ext == ".hpp" and directory.startswith("src/include"):
        # format header in files
        header_middle = "// " + os.path.relpath(full_path, base_dir) + "\n"
        file = open_utf8(full_path, "r")
        lines = file.readlines()
        file.close()
        file = open_utf8(full_path, "w")
        file.write(header_top + header_middle + header_bottom)
        is_old_header = True
        for line in lines:
            if not (line.startswith("//")
                    or line.startswith("\n")) and is_old_header:
                is_old_header = False
            if not is_old_header:
                file.write(line)
        file.close()
    elif ext == ".txt" and f != 'CMakeLists.txt':
        return
    elif ext == '.test' or ext == '.test_slow':
        try:
            with open_utf8(full_path, "r") as file_:
                lines = file_.readlines()
        except:
            return
        found_name = False
        found_group = False
        group_name = full_path.split('/')[-2]
        new_path_line = '# name: ' + full_path + '\n'
        new_group_line = '# group: [' + group_name + ']' + '\n'
        found_diff = False
        for i in range(0, len(lines)):
            line = lines[i]
            if line.startswith('# name: ') or line.startswith('#name: '):
                if found_name:
                    print("Error formatting file " + full_path +
                          ", multiple lines starting with # name found")
                    exit(1)
                found_name = True
                if lines[i] != new_path_line:
                    found_diff = True
                    lines[i] = new_path_line
            if line.startswith('# group: ') or line.startswith('#group: '):
                if found_group:
                    print("Error formatting file " + full_path +
                          ", multiple lines starting with # group found")
                    exit(1)
                found_group = True
                if lines[i] != new_group_line:
                    found_diff = True
                    lines[i] = new_group_line
        if not found_group:
            lines = [new_group_line] + lines
            found_diff = True
        if not found_name:
            lines = [new_path_line] + lines
            found_diff = True
        if found_diff:
            print(full_path)
            print(new_path_line)
            print(new_group_line)
            with open_utf8(full_path, "w+") as file_:
                file_.write(''.join(lines))
        return
    format_command = format_commands[ext]
    cmd = format_command.replace("${FILE}", full_path).replace(
        "${SORT_INCLUDES}", "1" if sort_includes else "0")
    print(cmd)
    os.system(cmd)
    # remove empty lines at beginning and end of file
    with open_utf8(full_path, 'r') as fp:
        text = fp.read()
        text = text.strip() + "\n"
    with open_utf8(full_path, 'w+') as fp:
        fp.write(text)

Example #27

0

Show file

namespace duckdb_libpgquery {
#define PG_KEYWORD(a,b,c) {a,b,c},

const PGScanKeyword ScanKeywords[] = {
"""
for tpl in kwlist:
    kwtext += 'PG_KEYWORD("%s", %s, %s)\n' % (strip_p(
        tpl[0]).lower(), tpl[0], tpl[1])
kwtext += """
};

const int NumScanKeywords = lengthof(ScanKeywords);
} // namespace duckdb_libpgquery
"""

with open_utf8(kwlist_header, 'w+') as f:
    f.write(kwtext)

# generate the final main.y.tmp file
# first read the template file
with open_utf8(template_file, 'r') as f:
    text = f.read()

# now perform a series of replacements in the file to construct the final yacc file


def get_file_contents(fpath, add_line_numbers=False):
    with open_utf8(fpath, 'r') as f:
        result = f.read()
        if add_line_numbers:
            return '#line 1 "%s"\n' % (fpath, ) + result

Example #28

0

Show file

File: generate_flex.py Project: zzl200012/duckdb

flex_file_path = os.path.join(pg_path, 'scan.l')
target_file = os.path.join(pg_path, 'src_backend_parser_scan.cpp')

proc = subprocess.Popen(
    [flex_bin, '--nounistd', '-o', target_file, flex_file_path],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE)
stdout = proc.stdout.read().decode('utf8')
stderr = proc.stderr.read().decode('utf8')
if proc.returncode != None or len(stderr) > 0:
    print("Flex failed")
    print("stdout: ", stdout)
    print("stderr: ", stderr)
    exit(1)

with open_utf8(target_file, 'r') as f:
    text = f.read()

# add the libpg_query namespace
text = text.replace(
    '''
#ifndef FLEXINT_H
#define FLEXINT_H
''', '''
#ifndef FLEXINT_H
#define FLEXINT_H
namespace duckdb_libpgquery {
''')
text = text.replace('register ', '')

text = text + "\n} /* duckdb_libpgquery */\n"

Example #29

0

Show file

def read_list_from_file(fname):
    with open_utf8(fname, 'r') as f:
        return f.read().split('\n')

Example #30

0

Show file

try_remove_file(gen_storage_target)
try_remove_file(gen_storage_target + '.wal')


def run_command_in_shell(cmd):
    print(cmd)
    res = subprocess.run(
        [shell_proc, '--batch', '-init', '/dev/null', gen_storage_target],
        capture_output=True,
        input=bytearray(cmd, 'utf8'))
    stdout = res.stdout.decode('utf8').strip()
    stderr = res.stderr.decode('utf8').strip()
    if res.returncode != 0:
        print("Failed to create database file!")
        print("----STDOUT----")
        print(stdout)
        print("----STDERR----")
        print(stderr)


with open_utf8(gen_storage_script, 'r') as f:
    cmd = f.read()

run_command_in_shell(cmd)
# FIXME: force a checkpoint
run_command_in_shell('select * from integral_values')
run_command_in_shell('select * from integral_values')

try_remove_file(gen_storage_target + '.wal')