Esempio n. 1
0
    def readInfoFile(self,
                     informationsdata,
                     filesrc,
                     srclanguage):
        """
                TextDBDict.readInfoFile

                Read the informations from the informations'file <src>.

                informationsdata        : InformationsData object
                filesrc :       (file descriptor)

                Return (bool)success
        """
        textdbdictinfofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata,
                                                errors = self.errors)

        success = textdbdictinfofile.readFromData(informationsdata = informationsdata,
                                                  reading_position = self.reading_position,
                                                  filesrc = filesrc,
                                                  srclanguage = srclanguage)
        return success
Esempio n. 2
0
    def writeTextDBDictInto(self,
                            destpath,
                            srclanguage):
        """
                TextDBDict.writeTextDBDictInto

                destpath        :       (str)
                srclanguage     :       Language object
        """
        self.errors.debug("TextDBDict.writeTextDBDictInto; destpath=" + str(destpath) )

        # maximal length for the name of the files :
        max_length = \
          logotheras.options.OPTIONS["textdbdict::maximal number of characters in a filename"]

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Is <destpath> valid ?
        if not os.path.exists(destpath):
            msg = "(ERR021) (TextDBDict.writeTextDBDictInto) Missing directory : '{0}'"
            self.errors.error(msg.format(destpath))

            return

        if not os.path.isdir(destpath):
            msg = "(ERR020) (TextDBDict.writeTextDBDictInto) '{0}' is not a directory"
            self.errors.error(msg.format(destpath))

            return

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # creation of the source directory :
        source_dir = os.path.join(destpath,
                                  SOURCEDIRECTORY_DEFAULTNAME)
        if os.path.exists(source_dir):
            shutil.rmtree(source_dir)

        os.makedirs( source_dir )

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # license :
        # adding "#COMMENT#" before each line :
        sep_license = NEWLINE + "#COMMENT# "
        _license = sep_license.join( logotheras.options.LICENSE_TEXT.split("\n") )
        # we add "#COMMENT#" before the first line since .join() doesn't.
        _license = "#COMMENT# " + _license

        # information file :
        infofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata,
                                      errors = self.errors)

        infofile.writeFromData(destpath = destpath)

        # data file :
        if logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"] is None:
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # everything is packed into one file :

            datafile_name = "unique_file.textdbdict"
            datafile_name = datafile_name[:max_length]

            with open( os.path.join(source_dir,
                                    datafile_name),
                                    "w" ) as datafile:

                for line in _license:
                    datafile.write(line)

                for articledata in self.logotherasdata.getArticles():

                    article = Article( errors = self.errors,
                                       logotherasdata = self.logotherasdata )
                    article.setData( articledata )

                    self.errors.info("writing data into the unique file : " + datafile.name)
                    datafile.write( article.getTextDBDictRepr() )

        elif logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"].startswith('a'):
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : all the words begin with the same letters
            length = len(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"])
            for (datafile_number,
                 (articles, initial)) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                    method="initial",
                    length=length,
                    srclanguage=srclanguage)):

                # name of this new file ?
                if not logotheras.options.OPTIONS["textdbdict::" \
                                                  "write TextDBDict files with detailed names"]:
                    # short name :
                    datafile_name = "{0:0>6}.textdbdict".format(datafile_number)

                else:
                    # long name :
                    first_article = articles[0]
                    first_article = first_article.strip()

                    last_article = articles[-1]
                    last_article = last_article.strip()

                    datafile_name = \
                      logotheras.options.OPTIONS["textdbdict::detailed filename"].format(
                        datafile_number,
                        first_article,
                        last_article)

                datafile_name = os.path.join( source_dir, datafile_name )
                datafile_name = datafile_name[:max_length]

                with open(datafile_name, 'w') as datafile:
                    self.errors.info("writing data into a new file : " + datafile.name)

                    for line in _license:
                        datafile.write(line)

                    for article_name in articles:

                        article = Article( errors = self.errors,
                                           logotherasdata = self.logotherasdata )
                        article.setData( self.logotherasdata[article_name] )

                        datafile.write( article.getTextDBDictRepr() )

            # let's add the articles marked with NOTALEXICAL_ARTINAME in a
            # special file.
            self.writeAddendum(destpath, _license)

        else :
            #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~
            # several outputfiles : each outputfile contain the same number of articles

            length = int(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"])

            for (datafile_number,
                articles) in enumerate(self.logotherasdata.getArticlesGroupedBy(
                    method="articles' number",
                    length=length,
                    srclanguage=srclanguage)):

                # name of this new file ?
                if not logotheras.options.OPTIONS["textdbdict::" \
                                                  "write TextDBDict files with detailed names"]:
                    # short name :
                    datafile_name = "{0:0>6}.textdbdict".format(datafile_number)

                else:
                    # long name :
                    first_article = articles[0]
                    first_article = first_article.strip()

                    last_article = articles[-1]
                    last_article = last_article.strip()

                    datafile_name = \
                      logotheras.options.OPTIONS["textdbdict::" \
                                                 "detailed filename"].format(datafile_number,
                                                                             first_article,
                                                                             last_article)

                datafile_name = os.path.join( source_dir, datafile_name )
                datafile_name = datafile_name[:max_length]

                with open(datafile_name, 'w') as datafile:
                    self.errors.info("writing data into a new file : " + datafile.name)

                    for line in _license:
                        datafile.write(line)

                    for article_name in articles:

                        article = Article( errors = self.errors,
                                           logotherasdata = self.logotherasdata )
                        article.setData( self.logotherasdata[article_name] )

                        datafile.write( article.getTextDBDictRepr() )

            # let's add the articles marked with NOTALEXICAL_ARTINAME in a
            # special file.
            self.writeAddendum(destpath, _license)