コード例 #1
0
ファイル: rst.py プロジェクト: suizokukan/logotheras
    def writeRSTInto(self,
                     sourcepath,
                     destpath,
                     srclanguage,
                     title,
                     maintoctree_maxdepth):
        """
                RST.writeRSTInto

                sourcepath              :       source path (e.g. '~/projects/phokaia/')
                destpath                :       destination path
                srclanguage             :       SourceLanguage object
                title                   :       (str)
                maintoctree_maxdepth    :       (int)
        """
        self.errors.debug("(RST.writeRSTInto()) sourcepath={0}; destpath={1}; " \
                          "srclanguage={2}; title={3}; " \
                          "maintoctree_maxdepth={4}".format(sourcepath,
                                                            destpath,
                                                            srclanguage,
                                                            title,
                                                            maintoctree_maxdepth))

        self.errors.debug("(RST.writeRSTInto()) logotherasdata entries : "+\
                          str(tuple(self.logotherasdata.keys())))
                     
        title_width = string_width(title)

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Is <destpath> valid ?
        if not os.path.exists(destpath):
            msg = "(ERR011) (RST.writeRSTInto) Missing directory : '{0}'"
            self.errors.error(msg.format(destpath))

            return

        if not os.path.isdir(destpath):
            msg = "(ERR010) (RST.writeRSTInto) '{0}' is not a directory"
            self.errors.error(msg.format(destpath))

            return

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # adding the default files :
        if logotheras.options.OPTIONS["rst::template directory"] is None:
            self.addTemplateFilesIntoRSTDir(sourcepath = os.path.join("logotheras",
                                                                      "rst",
                                                                      "templates"),
                                            destpath = destpath)
        else:
            self.addTemplateFilesIntoRSTDir(sourcepath = \
                                            logotheras.options.OPTIONS["rst::template directory"],
                                            destpath = destpath)

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # let's add the audio files from sourcepath/source/audio :
        audio_sourcepath = os.path.join(sourcepath, "source", "audio")  # e.g. phokaia/source/audio
        audio_destpath = os.path.join(destpath, "build", "audio")       # e.g. tmp/phokaia/build/audio
        # if the source path doesn't exist, no audio file.
        if os.path.exists(audio_sourcepath) == True:
            shutil.copytree( src = audio_sourcepath,
                             dst = audio_destpath )

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # writing the data :
        if logotheras.options.OPTIONS["rst::share out RST files"] is None:

            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # everything is packed into one file :
            indexfile_name = "index.rst"

            with open( os.path.join(destpath,
                                    indexfile_name),
                                    "w" ) as indexfile:

                indexfile.write( RST.header )

                indexfile.write(NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(title + NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(NEWLINE)

                indexfile.write(".. toctree::" + NEWLINE)
                indexfile.write("   :maxdepth: {0}{1}".format(maintoctree_maxdepth,
                                                             NEWLINE))
                indexfile.write(NEWLINE)

                num_article = 1
                for articledata in self.logotherasdata.getArticles(in_alphabetical_order = True):

                    self.writeArticle( articledata = articledata,
                                       destpath = destpath,
                                       upperfile = indexfile,
                                       num_article = num_article)

                    num_article += 1

        elif logotheras.options.OPTIONS["rst::share out RST files"].startswith('a'):

            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : all the words begin with the same letters
            length = len(logotheras.options.OPTIONS["rst::share out RST files"])

            indexfile_name = "index.rst"

            with open( os.path.join(destpath,
                                    indexfile_name),
                                    "w" ) as indexfile:

                indexfile.write( RST.header )

                indexfile.write(NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(title + NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(NEWLINE)

                indexfile.write(".. toctree::" + NEWLINE)
                indexfile.write("   :maxdepth: {0}{1}".format(maintoctree_maxdepth,
                                                             NEWLINE))
                indexfile.write(NEWLINE)

                num_article = 1

                for (datafile_number,
                     (articles, initial)) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                        method="initial",
                        length=length,
                        srclanguage=srclanguage)):

                    # name of this new file ?
                    datafile_name = "gro_{0:0>6}".format(datafile_number)
                    complete_datafile_name = os.path.join( destpath,
                                                           datafile_name + ".rst" )

                    # adding this file to index.rst :
                    indexfile.write( "   " + datafile_name + NEWLINE )

                    with open(complete_datafile_name, 'w') as datafile:
                        self.errors.info("writing data into a new file : " + datafile.name)

                        # <initial> -> <_initial>
                        #
                        #   We modify <initial> by removing any diacritic or putting the
                        # initial(s) in upper case :
                        _initial = srclanguage.set_to_its_most_visual_form(initial)

                        # title : either "(W) word" either "(W) word1 ... word2"
                        if len(articles) == 0:
                            datafile_title = "(no article to be written)"
                        elif len(articles) == 1:
                            datafile_title = str(_initial) + \
                                             " (" + articles[0] + ")"
                        else:
                            datafile_title = str(_initial) + \
                                             " ("+ articles[0] + " … " + articles[-1] + ")"
                        datafile_title_width = string_width(datafile_title)

                        datafile.write(RST.header)

                        datafile.write(NEWLINE)
                        datafile.write("="*datafile_title_width + NEWLINE)
                        datafile.write(datafile_title + NEWLINE)
                        datafile.write("="*datafile_title_width + NEWLINE)
                        datafile.write(NEWLINE)

                        datafile.write(".. toctree::" + NEWLINE)
                        datafile.write("   :maxdepth: {0}{1}".format(maintoctree_maxdepth,
                                                                     NEWLINE))
                        datafile.write(NEWLINE)

                        for article_name in articles:

                            self.writeArticle( articledata = self.logotherasdata[article_name],
                                               destpath = destpath,
                                               upperfile = datafile,
                                               num_article = num_article)
                            num_article += 1

        else :
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : each outputfile contains the same number of articles
            length = int(logotheras.options.OPTIONS["rst::share out RST files"])

            indexfile_name = "index.rst"

            with open( os.path.join(destpath,
                                    indexfile_name),
                                    "w" ) as indexfile:

                indexfile.write( RST.header )

                indexfile.write(NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(title + NEWLINE)
                indexfile.write("="*title_width + NEWLINE)
                indexfile.write(NEWLINE)

                indexfile.write(".. toctree::" + NEWLINE)
                indexfile.write("   :maxdepth: {0}{1}".format(maintoctree_maxdepth,
                                                             NEWLINE))
                indexfile.write(NEWLINE)

                num_article = 1

                for (datafile_number,
                    articles) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                        method="articles' number",
                        length=length,
                        srclanguage=srclanguage)):

                    # name of this new file ?
                    datafile_name = "gro_{0:0>6}".format(datafile_number)
                    complete_datafile_name = os.path.join( destpath,
                                                           datafile_name + ".rst" )

                    # adding this file to index.rst :
                    indexfile.write( "   " + datafile_name + NEWLINE )

                    with open(complete_datafile_name, 'w') as datafile:
                        self.errors.info("writing data into a new file : " + datafile.name)

                        # title : either "word" either "word1 ... word2"
                        if len(articles) == 1:
                            datafile_title = articles[0]
                        else:
                            datafile_title = articles[0] + " … " + articles[-1]

                        datafile.write(RST.header)

                        datafile.write(NEWLINE)
                        datafile.write("="*datafile_title_width + NEWLINE)
                        datafile.write(datafile_title + NEWLINE)
                        datafile.write("="*datafile_title_width + NEWLINE)
                        datafile.write(NEWLINE)

                        datafile.write(".. toctree::" + NEWLINE)
                        datafile.write("   :maxdepth: {0}{1}".format(maintoctree_maxdepth,
                                                                     NEWLINE))
                        datafile.write(NEWLINE)

                        for article_name in articles:

                            self.writeArticle( articledata = self.logotherasdata[article_name],
                                               destpath = destpath,
                                               upperfile = datafile,
                                               num_article = num_article)
                            num_article += 1
コード例 #2
0
ファイル: rst.py プロジェクト: suizokukan/logotheras
    def writeArticle( self,
                      articledata,
                      destpath,
                      upperfile,
                      num_article):
        """
                RST.writeArticle

                Add the article <articledata> into <destpath>.

                PARAMETERS :
                o articledata     : ArticleData object
                o destpath        : (str)
                o upperfile       : (file descriptor of index.rst)
                o num_article     : (int)

                NO RETURN VALUE
        """
        debug_msg = "(RST.writeArticle) article's name='{0}'; fullname='{1}'; " \
                    "destpath='{2}'; upperfile='{3}' "
        self.errors.debug(debug_msg.format(articledata.headerdata.artiname,
                                           articledata.headerdata.fullname,
                                           destpath,
                                           upperfile))

        #.......................................................................
        # normal case : we write the article

        # Buffer where the lines to be written are stored :
        buff = []

        # This function uses MD5 to compute the hash of the article's name.
        #
        #   e.g., articlefile_name = "art_778b6ba126e285e"
        #
        #   Previously, hash() built-in function was used, but this function
        # doesn't give the same results EVEN ON THE SAME PLATFORM.
        #
        md5_hash = hashlib.md5(articledata.headerdata.artiname.encode()).hexdigest()
        articlefile_name = "art_"+md5_hash
        # e.g., complete_articlefile_name = "path/art778b6ba126e285e.rst"
        complete_articlefile_name = os.path.join( destpath,
                                                  articlefile_name + ".rst" )

        # new article in the upperfile, index.rst or what so ever :
        upperfile.write("   {0}{1}".format(articlefile_name,
                                           NEWLINE))

        # do we need to build a morphological grid ?
        build_morphogrid = False        # do the function need to build a morphological grid ?
        morpho_pos = 0                  # (int) line in <buff> of the "morphologie" entry
        morphogrid_name = ""            # (str)
        infgrid = None                  # InflectionGrid object

        #  we initialize <morphogrid_data> to get all the informations
        #  required to fill such a grid.
        #
        # see HeaderData.get_morphogridline_data() to get more informations about
        # the format of <morphogrid_data> which is basically a dict={str:str}.
        morphogrid_data = articledata.headerdata.get_morphogridline_data()

        if morphogrid_data is not None:
            # yes, we have to build a morphological grid :
            morphogrid_name = morphogrid_data['name']
            morphogrid_flection = morphogrid_data['flection']

            if morphogrid_name not in MORPHOGRIDS:
                build_morphogrid = False
                self.errors.error("(ERR016) " \
                                  "unknown morphogrid's name '{0}'".format(morphogrid_name))
            else:
                build_morphogrid = True

                #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
                # let's build <infgrid> :
                infgrid = InflectionGrid( *MORPHOGRIDS[morphogrid_name].combinations,
                                    keywords_order = MORPHOGRIDS[morphogrid_name].keywords_order,
                                    default_values = MORPHOGRIDS[morphogrid_name].default_values)

                #. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
                # filling <infgrid> with computed values thanks to the Inflectiones library :
                if logotheras.options.OPTIONS["rst::compute morphogrids' content"]:

                    if morphogrid_flection not in INFLECTIONES:
                        # error : unknown inflection
                        msg = "(ERR015)" \
                              "unknown inflection '{0}'; " \
                              "known inflections : {1}"
                        self.errors.error(msg.format(morphogrid_flection,
                                                     INFLECTIONES.keys()))
                    else:
                        info_msg = "calling INFLECTIONES['{0}'] for artiname='{1}'."
                        self.errors.info(info_msg.format(morphogrid_flection,
                                                         articledata.headerdata.artiname))

                        # we fill the dict <characters_output> from the options :
                        #
                        #       every option begininng with "rst::CO::" is copied
                        #       into <characters_output> but the first characters
                        #       (i.e. 'rst::CO::') are cut :
                        #               "rst::CO::xxx" -> "xxx"
                        #
                        characters_output = dict()
                        for key, value in logotheras.options.OPTIONS.items():
                            if key.startswith("rst::CO::"):
                                _key = key[len("rst::CO::"):]
                                characters_output[ _key ] = value

                        # calling the Inflectiones library :
                        (infgrid,
                         error_msg) = \
                            INFLECTIONES[morphogrid_flection].inflect_from_logotheras_entry(
                                morphogrid_data = morphogrid_data,
                                fullname = articledata.headerdata.fullname,
                                infgrid_source = infgrid,
                                ortho_separator = \
                                   logotheras.options.OPTIONS["rst::inflectiones::ortho separator"],
                                phone_separator = \
                                   logotheras.options.OPTIONS["rst::inflectiones::phone separator"],
                                characters_output = characters_output,
                                formatstring = \
                                    logotheras.options.OPTIONS["rst::inflectiones::format string"])

                        # the call to INFLECTIONES[].inflect_from_logotheras_entry() returned
                        # an empty result : an error occurred.
                        if infgrid is None:
                            build_morphogrid = False

                            msg = "(ERR014) INFLECTIONES['{0}'] " \
                                  "for '{1}' was unable to return a result. " \
                                  "Error message = '{2}'."
                            self.errors.error(msg.format(morphogrid_flection,
                                                         articledata.headerdata.fullname,
                                                         error_msg))

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        # technical header without any data :
        for line in RST.header.split("\n"):
            buff.append( line )

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        # true header : article's names
        buff.append("")
        article_names = articledata.headerdata.artiname

        if articledata.headerdata.fullname:
            article_names += " ({0})".format(articledata.headerdata.fullname)
        if articledata.headerdata.sortingname and \
           logotheras.options.OPTIONS["textdbdict::display sortingname"]:
            article_names += " [{0}]".format(articledata.headerdata.sortingname)
        if articledata.headerdata.articlescategory:
            article_names += " : {0}".format(articledata.headerdata.articlescategory)

        article_names_width = string_width(article_names)
        buff.append("=" * article_names_width)
        buff.append(article_names)
        buff.append("=" * article_names_width)
        buff.append("")

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        # add the audio link if such a link has been defined in the header :
        if "audiorecord" in articledata.headerdata.informations:
            audiofile = os.path.join("audio",
                                     "article",
                                     "header",
                                     articledata.headerdata.informations["audiorecord"])
            audiofile_fullpath = os.path.join(destpath,
                                              "build",
                                              audiofile)

            if not os.path.exists(audiofile_fullpath):
                error_msg = "(ERR013) " \
                            "(RST.writeArticle) article's name='{0}'; " \
                            "fullname='{1}';" \
                            " audio file name as it appears in the header : '{2}'; " \
                            " full audio file name : '{3}' ."                

                self.errors.error( error_msg.format(articledata.headerdata.artiname,
                                              articledata.headerdata.fullname,
                                              articledata.headerdata.informations["audiorecord"],
                                              audiofile_fullpath ))

            else:
                buff.append("→ `(enregistrement audio) <{0}>`_".format(audiofile))

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        # entries' loop :
        for entryname in articledata.bodydata:

            entrydata = articledata.bodydata[entryname]

            debug_msg = "RST.writeArticle:: entryname={0} entrydata.entry_to_be_duplicated={1}"
            self.errors.debug(debug_msg.format(entryname,
                                               entrydata.entry_to_be_duplicated,))

            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # adding entrydata.title to <buff> :
            title = self.getHLevelRST(entrydata.hlevel) + " "

            if entrydata.title is None:

                # no title :
                title = ""
            else:
                # we add the selection detected by self.getHLevelRST() to <title> :
                title += entrydata.title

            title_width = string_width(title)
            buff.append("")
            buff.append("-" * title_width)
            buff.append(title)
            buff.append("-" * title_width)
            buff.append("")

            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # adding entrydata.text to <buff> :
            for line in entrydata.text:
                buff.append(self.str2rststr(context="standard",
                                            src=line,
                                            errorcontext="line in an entrydata object="+str(entrydata)))

            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # adding the extracts in entrydata to <buff> :
            buff.extend( self.fillBuffWithExtracts( entrydata ) )

            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # initializing <morpho_pos> : position just before the morphology part of the entries
            if build_morphogrid and entrydata.title == 'morphologie':
                morpho_pos = len(buff)

            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # filling <infgrid> with the values read in the dictionary :
            #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
            # if build_morphogrid is False, nothing to do.
            # if entrydata.entry_to_be_duplicated is None we have something like that :
            #
            #       (2.1) indicatif
            #       (2.1.1) indicatif présent
            #       (2.1.1.3) indicatif.présent.3S                  <----
            #       (2.1.1.3.1) ▣ἐστι(ν)▣ : indicatif.présent.3S
            #
            # "indicatif.présent.3S" is ok but there's no symbol before and after a morphological
            # form, so there's nothing to do.
            if build_morphogrid and \
               entrydata.entry_to_be_duplicated:

                # we translate <entrydata.title> :
                #
                #  E.g. "(2.1.1) indicatif.présent"     (French keywords)
                #       becomes :
                #       "(2.1.1) indicative.present"    (English keywords required by the
                #                                       Infgrid library)
                #
                entrydata_title = self.translateGrammaticalKeyWords(entrydata.title)

                # we search the data in <entrydata_title> :
                search_result = infgrid.searchSectionsInAString( srcstring = entrydata_title )

                # we expect only one result, i.e. only one substring matching an InflectionGrid
                # description as in "active.indicative.imperfect.1S : j'aimais".
                if len(search_result) != 1:
                    error_msg = "(ERR012) (RST.writeArticle) " \
                                "article's name='{0}'; fullname='{1}';" \
                                " searching a substring in '{2}' > '{3}' : " \
                                "no substring linked to an InflectionGrid adress " \
                                "or more than one substring found. " \
                                "Either a keyword in '{2}' is unknown or not translated " \
                                "either a non default keyword is missing in '{3}'. " \
                                "search_result={4}; " \
                                "infgrid.keywords_order={5}; " \
                                "infgrid.default_values={6}; " \
                                "infgrid.gridsv.keys()={7}"

                    self.errors.error( error_msg.format(articledata.headerdata.artiname,
                                                  articledata.headerdata.fullname,
                                                  entrydata.title,
                                                  entrydata_title,
                                                  search_result,
                                                  infgrid.keywords_order,
                                                  infgrid.default_values,
                                                  sorted(infgrid.gridsv.keys())))

                else:
                    #substring = search_result[0][0]
                    #substring_pos0 = search_result[0][1]
                    resdict = search_result[0][2]

                    # <_value> = formated version of <entrydata.entry_to_be_duplicated>
                    fmtstring = logotheras.options.OPTIONS["rst::grammatical form::format string"]
                    _value = fmtstring.replace("ORTHO",
                                               entrydata.entry_to_be_duplicated)

                    # filling <infgrid> :
                    infgrid.fillData(adress = resdict,
                                     value = _value,
                                     overwrite_the_data = True)

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        if infgrid is not None:
            res_template = infgrid.applyToATemplate(
                template = MORPHOGRIDS[morphogrid_name].strtemplate,
                unknown_value = "(...)")

            # we fill some fields :
            # the generic title :
            res_template = res_template.replace("__CAPTIONTITLE__", "")
            # some classes :
            res_template = res_template.replace("__CLASS__", "morphotable")
            res_template = res_template.replace("__TABLECLASSTITLE__", "morphotable-title")
            res_template = res_template.replace("__TABLEBORDER__", "1")

            res_morpho = []
            res_morpho.append( "" )
            res_morpho.append( ".. raw:: html" )
            res_morpho.append( "" )
            res_morpho.append( "    <embed>" )
            res_morpho.append( "" )
            for line in res_template.split("\n"):
                res_morpho.append( "        " + line )
            res_morpho.append( "" )
            res_morpho.append( "    </embed>")

            # adding the morphology grid just before the line number #<morpholo_pos> :
            for line in res_morpho[::-1]:
                buff.insert(morpho_pos, line)

            # filling the <hermaia> object :
            for stradress in infgrid.gridsv:

                # <infgrid.gridsv[stradress]> follows the
                # "rst::grammatical form::format string" format,
                # by example "<b>ORTHO</b>".
                #
                # So we drop every HTML tags (see @DOC0002)
                if infgrid.gridsv[stradress] is not None:
                    form = re.sub('<[^<]+?>',
                                  '',
                                  infgrid.gridsv[stradress])

                    self.hermaia.add_to_database( form = form,
                                                  sourceword = articledata.headerdata.artiname,
                                                  inflection = morphogrid_data['flection'],
                                                  context = stradress )

        #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
        # new article written in a new file :
        with open( complete_articlefile_name, "w" ) as articlefile:

            for line in buff:
                articlefile.write( line + NEWLINE )