def writeAddendum(self, destpath, _license): """ TextDBDict.writeAddendum Write into the file named ADDENDUM_FILENAME the articles not marked by the NOTALEXICAL_ARTINAME symbol. destpath : (str) target path _license : (str) text to be written at the beginning of the addendum file """ self.errors.info("writing data into the addendum file... : " + ADDENDUM_FILENAME) source_dir = os.path.join(destpath, SOURCEDIRECTORY_DEFAULTNAME) with open( os.path.join(source_dir, ADDENDUM_FILENAME), 'w') as addendumfile: for line in _license: addendumfile.write(line) for articledata in self.logotherasdata.getArticles(): if NOTALEXICAL_ARTINAME in articledata.headerdata.artiname: article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( self.logotherasdata[articledata.headerdata.artiname] ) addendumfile.write( article.getTextDBDictRepr() )
def writeTextDBDictInto(self, destpath, srclanguage): """ TextDBDict.writeTextDBDictInto destpath : (str) srclanguage : Language object """ self.errors.debug("TextDBDict.writeTextDBDictInto; destpath=" + str(destpath) ) # maximal length for the name of the files : max_length = \ logotheras.options.OPTIONS["textdbdict::maximal number of characters in a filename"] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Is <destpath> valid ? if not os.path.exists(destpath): msg = "(ERR021) (TextDBDict.writeTextDBDictInto) Missing directory : '{0}'" self.errors.error(msg.format(destpath)) return if not os.path.isdir(destpath): msg = "(ERR020) (TextDBDict.writeTextDBDictInto) '{0}' is not a directory" self.errors.error(msg.format(destpath)) return #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # creation of the source directory : source_dir = os.path.join(destpath, SOURCEDIRECTORY_DEFAULTNAME) if os.path.exists(source_dir): shutil.rmtree(source_dir) os.makedirs( source_dir ) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # license : # adding "#COMMENT#" before each line : sep_license = NEWLINE + "#COMMENT# " _license = sep_license.join( logotheras.options.LICENSE_TEXT.split("\n") ) # we add "#COMMENT#" before the first line since .join() doesn't. _license = "#COMMENT# " + _license # information file : infofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata, errors = self.errors) infofile.writeFromData(destpath = destpath) # data file : if logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"] is None: #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # everything is packed into one file : datafile_name = "unique_file.textdbdict" datafile_name = datafile_name[:max_length] with open( os.path.join(source_dir, datafile_name), "w" ) as datafile: for line in _license: datafile.write(line) for articledata in self.logotherasdata.getArticles(): article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( articledata ) self.errors.info("writing data into the unique file : " + datafile.name) datafile.write( article.getTextDBDictRepr() ) elif logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"].startswith('a'): #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # several outputfiles : all the words begin with the same letters length = len(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"]) for (datafile_number, (articles, initial)) in enumerate(self.logotherasdata.getArticlesGroupedBy( method="initial", length=length, srclanguage=srclanguage)): # name of this new file ? if not logotheras.options.OPTIONS["textdbdict::" \ "write TextDBDict files with detailed names"]: # short name : datafile_name = "{0:0>6}.textdbdict".format(datafile_number) else: # long name : first_article = articles[0] first_article = first_article.strip() last_article = articles[-1] last_article = last_article.strip() datafile_name = \ logotheras.options.OPTIONS["textdbdict::detailed filename"].format( datafile_number, first_article, last_article) datafile_name = os.path.join( source_dir, datafile_name ) datafile_name = datafile_name[:max_length] with open(datafile_name, 'w') as datafile: self.errors.info("writing data into a new file : " + datafile.name) for line in _license: datafile.write(line) for article_name in articles: article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( self.logotherasdata[article_name] ) datafile.write( article.getTextDBDictRepr() ) # let's add the articles marked with NOTALEXICAL_ARTINAME in a # special file. self.writeAddendum(destpath, _license) else : #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # several outputfiles : each outputfile contain the same number of articles length = int(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"]) for (datafile_number, articles) in enumerate(self.logotherasdata.getArticlesGroupedBy( method="articles' number", length=length, srclanguage=srclanguage)): # name of this new file ? if not logotheras.options.OPTIONS["textdbdict::" \ "write TextDBDict files with detailed names"]: # short name : datafile_name = "{0:0>6}.textdbdict".format(datafile_number) else: # long name : first_article = articles[0] first_article = first_article.strip() last_article = articles[-1] last_article = last_article.strip() datafile_name = \ logotheras.options.OPTIONS["textdbdict::" \ "detailed filename"].format(datafile_number, first_article, last_article) datafile_name = os.path.join( source_dir, datafile_name ) datafile_name = datafile_name[:max_length] with open(datafile_name, 'w') as datafile: self.errors.info("writing data into a new file : " + datafile.name) for line in _license: datafile.write(line) for article_name in articles: article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( self.logotherasdata[article_name] ) datafile.write( article.getTextDBDictRepr() ) # let's add the articles marked with NOTALEXICAL_ARTINAME in a # special file. self.writeAddendum(destpath, _license)