Beispiel #1
0
def write(dictionary, args, output_file_path):
    # result to be returned
    result = None

    # get absolute path
    output_file_path_absolute = os.path.abspath(output_file_path)

    # get absolute path for collation function file
    bookeen_collation_function_path = None
    if args.bookeen_collation_function is not None:
        bookeen_collation_function_path = os.path.abspath(args.bookeen_collation_function)

    # create tmp directory
    cwd = os.getcwd()
    tmp_path = create_temp_directory()
    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
    os.chdir(tmp_path)

    # get the basename
    base = os.path.basename(output_file_path)
    if base.endswith(".zip"):
        base = base[:-4]

    # copy empty.idx into tmp_path
    idx_file_path = base + u".dict.idx"
    dict_file_path = base + u".dict"
    copy_file(EMPTY_FILE_PATH, idx_file_path)

    # open index
    sql_connection = sqlite3.connect(idx_file_path)

    # install collation in the index
    collation_function = collate_function_default
    if bookeen_collation_function_path is not None:
        try:
            collation_function = imp.load_source("", bookeen_collation_function_path).collate_function
            print_debug("Using collation function from '%s'" % (bookeen_collation_function_path), args.debug)
        except:
            print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (bookeen_collation_function_path))
    sql_connection.create_collation("IcuNoCase", collation_function)
    sql_connection.text_factory = str

    # get a cursor and delete any data from the index file
    sql_cursor = sql_connection.cursor()
    sql_cursor.execute("delete from T_DictIndex")

    # write c_* files
    # each c_* file has MAX_CHUNK_SIZE < size <= (MAX_CHUNK_SIZE * 2) bytes (tentatively)
    print_debug("Writing c_* files...", args.debug)
    files_to_compress = []
    current_offset = 0
    chunk_index = 1
    chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
    files_to_compress.append(chunk_file_path)
    chunk_file_obj = io.open(chunk_file_path, "wb")
    for entry_index in dictionary.entries_index_sorted:
        entry = dictionary.entries[entry_index]
        definition_bytes = entry.definition.encode("utf-8")
        definition_size = len(definition_bytes)
        chunk_file_obj.write(definition_bytes)
        # insert headword into index file
        sql_tuple = (0, entry.headword, current_offset, definition_size, chunk_index)
        sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple)
        # insert synonyms into index file
        if not args.ignore_synonyms:
            for synonym in entry.get_synonyms():
                sql_tuple = (0, synonym[0], current_offset, definition_size, chunk_index)
                sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple)
        # update offset
        current_offset += definition_size
        # if we reached CHUNK_SIZE, open the next c_* file
        if current_offset > CHUNK_SIZE:
            chunk_file_obj.close()
            chunk_index += 1
            chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
            files_to_compress.append(chunk_file_path)
            chunk_file_obj = io.open(chunk_file_path, "wb")
            current_offset = 0
    chunk_file_obj.close()
    print_debug("Writing c_* files... done", args.debug)

    # compress
    print_debug("Compressing c_* files...", args.debug)
    file_zip_obj = zipfile.ZipFile(dict_file_path, "w", zipfile.ZIP_DEFLATED)
    for file_to_compress in files_to_compress:
        file_to_compress = os.path.basename(file_to_compress)
        file_zip_obj.write(file_to_compress)
    file_zip_obj.close()
    print_debug("Compressing c_* files... done", args.debug)

    # update index metadata
    print_debug("Updating index metadata...", args.debug)
    header = HEADER % (args.language_from)
    sql_cursor.execute("update T_DictInfo set F_xhtmlHeader=?", (header,))
    sql_cursor.execute("update T_DictInfo set F_LangFrom=?", (args.language_from,))
    sql_cursor.execute("update T_DictInfo set F_LangTo=?", (args.language_to,))
    sql_cursor.execute("update T_DictInfo set F_Licence=?", (args.license,))
    sql_cursor.execute("update T_DictInfo set F_Copyright=?", (args.copyright,))
    sql_cursor.execute("update T_DictInfo set F_Title=?", (args.title,))
    sql_cursor.execute("update T_DictInfo set F_Description=?", (args.description,))
    sql_cursor.execute("update T_DictInfo set F_Year=?", (args.year,))
    # the meaning of the following is unknown
    sql_cursor.execute("update T_DictInfo set F_Alphabet=?", ("Z",))
    sql_cursor.execute("update T_DictInfo set F_CollationLevel=?", ("1",))
    sql_cursor.execute("update T_DictVersion set F_DictType=?", ("stardict",))
    sql_cursor.execute("update T_DictVersion set F_Version=?", ("11",))
    print_debug("Updating index metadata... done", args.debug)

    # compact and close
    sql_cursor.execute("vacuum")
    sql_cursor.close()
    sql_connection.close()

    # create .install file or copy .dict.idx and .dict into requested output directory
    parent_output_directory = os.path.split(output_file_path_absolute)[0]
    if args.bookeen_install_file:
        print_debug("Creating .install file...", args.debug)
        file_zip_path = os.path.join(parent_output_directory, base + u".install")
        file_zip_obj = zipfile.ZipFile(file_zip_path, "w", zipfile.ZIP_DEFLATED)
        for file_to_compress in [dict_file_path, idx_file_path]:
            file_to_compress = os.path.basename(file_to_compress)
            file_zip_obj.write(file_to_compress)
        file_zip_obj.close()
        result = [file_zip_path]
        print_debug("Creating .install file... done", args.debug)
    else:
        print_debug("Copying .dict.idx and .dict files...", args.debug)
        dict_file_path_final = os.path.join(parent_output_directory, os.path.basename(dict_file_path))
        idx_file_path_final = os.path.join(parent_output_directory, os.path.basename(idx_file_path))
        copy_file(dict_file_path, dict_file_path_final)
        copy_file(idx_file_path, idx_file_path_final)
        result = [idx_file_path_final, dict_file_path_final]
        print_debug("Copying .dict.idx and .dict files... done", args.debug)

    # delete tmp directory
    os.chdir(cwd)
    if args.keep:
        print_info("Not deleting temp dir '%s'" % (tmp_path))
    else:
        delete_directory(tmp_path)
        print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)

    return result
Beispiel #2
0
def write(dictionary, args, output_file_path):
    # result to be returned
    result = None

    # get absolute path
    output_file_path_absolute = os.path.abspath(output_file_path)

    # sort by headword, optionally ignoring case
    dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case)

    # create groups
    special_group, group_keys, group_dict = dictionary.group(
        prefix_function_path=args.group_by_prefix_function,
        prefix_length=int(args.group_by_prefix_length),
        merge_min_size=int(args.group_by_prefix_merge_min_size),
        merge_across_first=args.group_by_prefix_merge_across_first
    )
    all_group_keys = group_keys
    if special_group is not None:
        all_group_keys += [u"SPECIAL"]

    # create mobi object
    mobi = DictionaryEbook(ebook_format=DictionaryEbook.MOBI, args=args)

    # add groups
    for key in all_group_keys:
        if key == u"SPECIAL":
            group_entries = special_group
        else:
            group_entries = group_dict[key]
        mobi.add_group(key, group_entries)

    # create output file
    print_debug("Writing to file '%s'..." % (output_file_path_absolute), args.debug)
    mobi.write(output_file_path_absolute, compress=False)
    result = [output_file_path]
    print_debug("Writing to file '%s'... done" % (output_file_path_absolute), args.debug)

    # run kindlegen
    tmp_path = mobi.get_tmp_path()
    if args.mobi_no_kindlegen:
        print_info("Not running kindlegen, the raw files are located in '%s'" % tmp_path)
        result = [tmp_path]
    else:
        try:
            print_debug("Creating .mobi file with kindlegen...", args.debug)
            kindlegen_path = KINDLEGEN
            opf_file_path_absolute = os.path.join(tmp_path, "OEBPS", "content.opf")
            mobi_file_path_relative = u"content.mobi"
            mobi_file_path_absolute = os.path.join(tmp_path, "OEBPS", mobi_file_path_relative)
            if args.kindlegen_path is None:
                print_info("  Running '%s' from $PATH" % KINDLEGEN)
            else:
                kindlegen_path = args.kindlegen_path
                print_info("  Running '%s' from '%s'" % (KINDLEGEN, kindlegen_path))
            proc = subprocess.Popen(
                [kindlegen_path, opf_file_path_absolute, "-o", mobi_file_path_relative],
                stdout=subprocess.PIPE,
                stdin=subprocess.PIPE,
                stderr=subprocess.PIPE
            )
            output = proc.communicate()
            if args.debug:
                output_unicode = (output[0]).decode("utf-8")
                print_debug(output_unicode, args.debug)
            copy_file(mobi_file_path_absolute, output_file_path_absolute)
            result = [output_file_path]
            print_debug("Creating .mobi file with kindlegen... done", args.debug)
        except OSError as exc:
            print_error("  Unable to run '%s' as '%s'" % (KINDLEGEN, kindlegen_path))
            print_error("  Please make sure '%s':" % KINDLEGEN)
            print_error("    1. is available on your $PATH or")
            print_error("    2. specify its path with --kindlegen-path")

    # delete tmp directory
    tmp_path = mobi.get_tmp_path()
    if args.keep:
        print_info("Not deleting temp dir '%s'" % (tmp_path))
    else:
        mobi.delete()
        print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)

    return result
Beispiel #3
0
def write(dictionary, args, output_file_path):
    # result to be returned
    result = None

    # get absolute path
    output_file_path_absolute = os.path.abspath(output_file_path)

    # sort by headword, optionally ignoring case
    dictionary.sort(by_headword=True, ignore_case=args.sort_ignore_case)

    # create groups
    special_group, group_keys, group_dict = dictionary.group(
        prefix_function_path=args.group_by_prefix_function,
        prefix_length=int(args.group_by_prefix_length),
        merge_min_size=int(args.group_by_prefix_merge_min_size),
        merge_across_first=args.group_by_prefix_merge_across_first)
    all_group_keys = group_keys
    if special_group is not None:
        all_group_keys += [u"SPECIAL"]

    # create mobi object
    mobi = DictionaryEbook(ebook_format=DictionaryEbook.MOBI, args=args)

    # add groups
    for key in all_group_keys:
        if key == u"SPECIAL":
            group_entries = special_group
        else:
            group_entries = group_dict[key]
        mobi.add_group(key, group_entries)

    # create output file
    print_debug("Writing to file '%s'..." % (output_file_path_absolute),
                args.debug)
    mobi.write(output_file_path_absolute, compress=False)
    result = [output_file_path]
    print_debug("Writing to file '%s'... done" % (output_file_path_absolute),
                args.debug)

    # run kindlegen
    tmp_path = mobi.get_tmp_path()
    if args.mobi_no_kindlegen:
        print_info("Not running kindlegen, the raw files are located in '%s'" %
                   tmp_path)
        result = [tmp_path]
    else:
        try:
            print_debug("Creating .mobi file with kindlegen...", args.debug)
            kindlegen_path = KINDLEGEN
            opf_file_path_absolute = os.path.join(tmp_path, "OEBPS",
                                                  "content.opf")
            mobi_file_path_relative = u"content.mobi"
            mobi_file_path_absolute = os.path.join(tmp_path, "OEBPS",
                                                   mobi_file_path_relative)
            if args.kindlegen_path is None:
                print_info("  Running '%s' from $PATH" % KINDLEGEN)
            else:
                kindlegen_path = args.kindlegen_path
                print_info("  Running '%s' from '%s'" %
                           (KINDLEGEN, kindlegen_path))
            proc = subprocess.Popen([
                kindlegen_path, opf_file_path_absolute, "-o",
                mobi_file_path_relative
            ],
                                    stdout=subprocess.PIPE,
                                    stdin=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            output = proc.communicate()
            if args.debug:
                output_unicode = (output[0]).decode("utf-8")
                print_debug(output_unicode, args.debug)
            copy_file(mobi_file_path_absolute, output_file_path_absolute)
            result = [output_file_path]
            print_debug("Creating .mobi file with kindlegen... done",
                        args.debug)
        except OSError as exc:
            print_error("  Unable to run '%s' as '%s'" %
                        (KINDLEGEN, kindlegen_path))
            print_error("  Please make sure '%s':" % KINDLEGEN)
            print_error("    1. is available on your $PATH or")
            print_error("    2. specify its path with --kindlegen-path")

    # delete tmp directory
    tmp_path = mobi.get_tmp_path()
    if args.keep:
        print_info("Not deleting temp dir '%s'" % (tmp_path))
    else:
        mobi.delete()
        print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)

    return result
Beispiel #4
0
def write(dictionary, args, output_file_path):
    # result to be returned
    result = None

    # get absolute path
    output_file_path_absolute = os.path.abspath(output_file_path)

    # get absolute path for collation function file 
    bookeen_collation_function_path = None
    if args.bookeen_collation_function is not None:
        bookeen_collation_function_path = os.path.abspath(args.bookeen_collation_function)

    # create tmp directory
    cwd = os.getcwd()
    tmp_path = create_temp_directory()
    print_debug("Working in temp dir '%s'" % (tmp_path), args.debug)
    os.chdir(tmp_path)

    # get the basename
    base = os.path.basename(output_file_path)
    if base.endswith(".zip"):
        base = base[:-4]

    # copy empty.idx into tmp_path
    idx_file_path = base + u".dict.idx"
    dict_file_path = base + u".dict"
    copy_file(EMPTY_FILE_PATH, idx_file_path)

    # open index
    sql_connection = sqlite3.connect(idx_file_path)

    # install collation in the index
    collation_function = collate_function_default
    if bookeen_collation_function_path is not None:
        try:
            collation_function = imp.load_source("", bookeen_collation_function_path).collate_function
            print_debug("Using collation function from '%s'" % (bookeen_collation_function_path), args.debug)
        except:
            print_error("Unable to load collation function from '%s'. Using the default collation function instead." % (bookeen_collation_function_path))
    sql_connection.create_collation("IcuNoCase", collation_function)
    sql_connection.text_factory = str

    # get a cursor and delete any data from the index file
    sql_cursor = sql_connection.cursor()
    sql_cursor.execute("delete from T_DictIndex")

    # write c_* files
    # each c_* file has MAX_CHUNK_SIZE < size <= (MAX_CHUNK_SIZE * 2) bytes (tentatively)
    print_debug("Writing c_* files...", args.debug)
    files_to_compress = []
    current_offset = 0
    chunk_index = 1
    chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
    files_to_compress.append(chunk_file_path)
    chunk_file_obj = open(chunk_file_path, "wb")
    for entry_index in dictionary.entries_index_sorted:
        entry = dictionary.entries[entry_index]
        definition_bytes = entry.definition.encode("utf-8")
        definition_size = len(definition_bytes)
        chunk_file_obj.write(definition_bytes)
        # insert headword into index file
        sql_tuple = (0, entry.headword, current_offset, definition_size, chunk_index)
        sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple)
        # insert synonyms into index file
        if not args.ignore_synonyms:
            for synonym in entry.get_synonyms():
                sql_tuple = (0, synonym[0], current_offset, definition_size, chunk_index)
                sql_cursor.execute("insert into T_DictIndex values (?,?,?,?,?)", sql_tuple)
        # update offset
        current_offset += definition_size
        # if we reached CHUNK_SIZE, open the next c_* file
        if current_offset > CHUNK_SIZE:
            chunk_file_obj.close()
            chunk_index += 1
            chunk_file_path = "%s%d" % (CHUNK_FILE_PREFIX, chunk_index)
            files_to_compress.append(chunk_file_path)
            chunk_file_obj = open(chunk_file_path, "wb")
            current_offset = 0
    chunk_file_obj.close()
    print_debug("Writing c_* files... done", args.debug)

    # compress
    print_debug("Compressing c_* files...", args.debug)
    file_zip_obj = zipfile.ZipFile(dict_file_path, "w", zipfile.ZIP_DEFLATED)
    for file_to_compress in files_to_compress:
        file_to_compress = os.path.basename(file_to_compress)
        file_zip_obj.write(file_to_compress)
    file_zip_obj.close()
    print_debug("Compressing c_* files... done", args.debug)

    # update index metadata
    print_debug("Updating index metadata...", args.debug)
    header = HEADER % (args.language_from)
    sql_cursor.execute("update T_DictInfo set F_xhtmlHeader=?", (header,))
    sql_cursor.execute("update T_DictInfo set F_LangFrom=?", (args.language_from,))
    sql_cursor.execute("update T_DictInfo set F_LangTo=?", (args.language_to,))
    sql_cursor.execute("update T_DictInfo set F_Licence=?", (args.license,))
    sql_cursor.execute("update T_DictInfo set F_Copyright=?", (args.copyright,))
    sql_cursor.execute("update T_DictInfo set F_Title=?", (args.title,))
    sql_cursor.execute("update T_DictInfo set F_Description=?", (args.description,))
    sql_cursor.execute("update T_DictInfo set F_Year=?", (args.year,))
    # the meaning of the following is unknown 
    sql_cursor.execute("update T_DictInfo set F_Alphabet=?", ("Z",))
    sql_cursor.execute("update T_DictInfo set F_CollationLevel=?", ("1",))
    sql_cursor.execute("update T_DictVersion set F_DictType=?", ("stardict",))
    sql_cursor.execute("update T_DictVersion set F_Version=?", ("11",))
    print_debug("Updating index metadata... done", args.debug)

    # compact and close
    sql_cursor.execute("vacuum")
    sql_cursor.close()
    sql_connection.close()

    # create .install file or copy .dict.idx and .dict into requested output directory
    parent_output_directory = os.path.split(output_file_path_absolute)[0]
    if args.bookeen_install_file:
        print_debug("Creating .install file...", args.debug)
        file_zip_path = os.path.join(parent_output_directory, base + u".install")
        file_zip_obj = zipfile.ZipFile(file_zip_path, "w", zipfile.ZIP_DEFLATED)
        for file_to_compress in [dict_file_path, idx_file_path]:
            file_to_compress = os.path.basename(file_to_compress)
            file_zip_obj.write(file_to_compress)
        file_zip_obj.close()
        result = [file_zip_path]
        print_debug("Creating .install file... done", args.debug)
    else:
        print_debug("Copying .dict.idx and .dict files...", args.debug)
        dict_file_path_final = os.path.join(parent_output_directory, os.path.basename(dict_file_path))
        idx_file_path_final = os.path.join(parent_output_directory, os.path.basename(idx_file_path))
        copy_file(dict_file_path, dict_file_path_final)
        copy_file(idx_file_path, idx_file_path_final)
        result = [idx_file_path_final, dict_file_path_final]
        print_debug("Copying .dict.idx and .dict files... done", args.debug)

    # delete tmp directory
    os.chdir(cwd)
    if args.keep:
        print_info("Not deleting temp dir '%s'" % (tmp_path))
    else:
        delete_directory(tmp_path)
        print_debug("Deleted temp dir '%s'" % (tmp_path), args.debug)

    return result