Example #1
0
    def writeAddendum(self, destpath, _license):
        """
                TextDBDict.writeAddendum

                Write into the file named ADDENDUM_FILENAME the articles not 
                marked by the NOTALEXICAL_ARTINAME symbol.

                destpath        : (str) target path
                _license        : (str) text to be written at the beginning
                                        of the addendum file
        """
        self.errors.info("writing data into the addendum file... : " + ADDENDUM_FILENAME)

        source_dir = os.path.join(destpath,
                                  SOURCEDIRECTORY_DEFAULTNAME)
            
        with open( os.path.join(source_dir,
                                ADDENDUM_FILENAME), 'w') as addendumfile:

            for line in _license:
                addendumfile.write(line)
                
            for articledata in self.logotherasdata.getArticles():

                if NOTALEXICAL_ARTINAME in articledata.headerdata.artiname:

                    article = Article( errors = self.errors,
                                       logotherasdata = self.logotherasdata )

                    article.setData( self.logotherasdata[articledata.headerdata.artiname] )

                    addendumfile.write( article.getTextDBDictRepr() )
Example #2
0
    def writeTextDBDictInto(self,
                            destpath,
                            srclanguage):
        """
                TextDBDict.writeTextDBDictInto

                destpath        :       (str)
                srclanguage     :       Language object
        """
        self.errors.debug("TextDBDict.writeTextDBDictInto; destpath=" + str(destpath) )

        # maximal length for the name of the files :
        max_length = \
          logotheras.options.OPTIONS["textdbdict::maximal number of characters in a filename"]

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Is <destpath> valid ?
        if not os.path.exists(destpath):
            msg = "(ERR021) (TextDBDict.writeTextDBDictInto) Missing directory : '{0}'"
            self.errors.error(msg.format(destpath))

            return

        if not os.path.isdir(destpath):
            msg = "(ERR020) (TextDBDict.writeTextDBDictInto) '{0}' is not a directory"
            self.errors.error(msg.format(destpath))

            return

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # creation of the source directory :
        source_dir = os.path.join(destpath,
                                  SOURCEDIRECTORY_DEFAULTNAME)
        if os.path.exists(source_dir):
            shutil.rmtree(source_dir)

        os.makedirs( source_dir )

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # license :
        # adding "#COMMENT#" before each line :
        sep_license = NEWLINE + "#COMMENT# "
        _license = sep_license.join( logotheras.options.LICENSE_TEXT.split("\n") )
        # we add "#COMMENT#" before the first line since .join() doesn't.
        _license = "#COMMENT# " + _license

        # information file :
        infofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata,
                                      errors = self.errors)

        infofile.writeFromData(destpath = destpath)

        # data file :
        if logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"] is None:
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # everything is packed into one file :

            datafile_name = "unique_file.textdbdict"
            datafile_name = datafile_name[:max_length]

            with open( os.path.join(source_dir,
                                    datafile_name),
                                    "w" ) as datafile:

                for line in _license:
                    datafile.write(line)

                for articledata in self.logotherasdata.getArticles():

                    article = Article( errors = self.errors,
                                       logotherasdata = self.logotherasdata )
                    article.setData( articledata )

                    self.errors.info("writing data into the unique file : " + datafile.name)
                    datafile.write( article.getTextDBDictRepr() )

        elif logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"].startswith('a'):
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : all the words begin with the same letters
            length = len(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"])
            for (datafile_number,
                 (articles, initial)) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                    method="initial",
                    length=length,
                    srclanguage=srclanguage)):

                # name of this new file ?
                if not logotheras.options.OPTIONS["textdbdict::" \
                                                  "write TextDBDict files with detailed names"]:
                    # short name :
                    datafile_name = "{0:0>6}.textdbdict".format(datafile_number)

                else:
                    # long name :
                    first_article = articles[0]
                    first_article = first_article.strip()

                    last_article = articles[-1]
                    last_article = last_article.strip()

                    datafile_name = \
                      logotheras.options.OPTIONS["textdbdict::detailed filename"].format(
                        datafile_number,
                        first_article,
                        last_article)

                datafile_name = os.path.join( source_dir, datafile_name )
                datafile_name = datafile_name[:max_length]

                with open(datafile_name, 'w') as datafile:
                    self.errors.info("writing data into a new file : " + datafile.name)

                    for line in _license:
                        datafile.write(line)

                    for article_name in articles:

                        article = Article( errors = self.errors,
                                           logotherasdata = self.logotherasdata )
                        article.setData( self.logotherasdata[article_name] )

                        datafile.write( article.getTextDBDictRepr() )

            # let's add the articles marked with NOTALEXICAL_ARTINAME in a
            # special file.
            self.writeAddendum(destpath, _license)

        else :
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : each outputfile contain the same number of articles

            length = int(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"])

            for (datafile_number,
                articles) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                    method="articles' number",
                    length=length,
                    srclanguage=srclanguage)):

                # name of this new file ?
                if not logotheras.options.OPTIONS["textdbdict::" \
                                                  "write TextDBDict files with detailed names"]:
                    # short name :
                    datafile_name = "{0:0>6}.textdbdict".format(datafile_number)

                else:
                    # long name :
                    first_article = articles[0]
                    first_article = first_article.strip()

                    last_article = articles[-1]
                    last_article = last_article.strip()

                    datafile_name = \
                      logotheras.options.OPTIONS["textdbdict::" \
                                                 "detailed filename"].format(datafile_number,
                                                                             first_article,
                                                                             last_article)

                datafile_name = os.path.join( source_dir, datafile_name )
                datafile_name = datafile_name[:max_length]

                with open(datafile_name, 'w') as datafile:
                    self.errors.info("writing data into a new file : " + datafile.name)

                    for line in _license:
                        datafile.write(line)

                    for article_name in articles:

                        article = Article( errors = self.errors,
                                           logotherasdata = self.logotherasdata )
                        article.setData( self.logotherasdata[article_name] )

                        datafile.write( article.getTextDBDictRepr() )

            # let's add the articles marked with NOTALEXICAL_ARTINAME in a
            # special file.
            self.writeAddendum(destpath, _license)