def readInfoFile(self, informationsdata, filesrc, srclanguage): """ TextDBDict.readInfoFile Read the informations from the informations'file <src>. informationsdata : InformationsData object filesrc : (file descriptor) Return (bool)success """ textdbdictinfofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata, errors = self.errors) success = textdbdictinfofile.readFromData(informationsdata = informationsdata, reading_position = self.reading_position, filesrc = filesrc, srclanguage = srclanguage) return success
def writeTextDBDictInto(self, destpath, srclanguage): """ TextDBDict.writeTextDBDictInto destpath : (str) srclanguage : Language object """ self.errors.debug("TextDBDict.writeTextDBDictInto; destpath=" + str(destpath) ) # maximal length for the name of the files : max_length = \ logotheras.options.OPTIONS["textdbdict::maximal number of characters in a filename"] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Is <destpath> valid ? if not os.path.exists(destpath): msg = "(ERR021) (TextDBDict.writeTextDBDictInto) Missing directory : '{0}'" self.errors.error(msg.format(destpath)) return if not os.path.isdir(destpath): msg = "(ERR020) (TextDBDict.writeTextDBDictInto) '{0}' is not a directory" self.errors.error(msg.format(destpath)) return #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # creation of the source directory : source_dir = os.path.join(destpath, SOURCEDIRECTORY_DEFAULTNAME) if os.path.exists(source_dir): shutil.rmtree(source_dir) os.makedirs( source_dir ) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # license : # adding "#COMMENT#" before each line : sep_license = NEWLINE + "#COMMENT# " _license = sep_license.join( logotheras.options.LICENSE_TEXT.split("\n") ) # we add "#COMMENT#" before the first line since .join() doesn't. _license = "#COMMENT# " + _license # information file : infofile = TextDBDictInfoFile(logotherasdata = self.logotherasdata, errors = self.errors) infofile.writeFromData(destpath = destpath) # data file : if logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"] is None: #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # everything is packed into one file : datafile_name = "unique_file.textdbdict" datafile_name = datafile_name[:max_length] with open( os.path.join(source_dir, datafile_name), "w" ) as datafile: for line in _license: datafile.write(line) for articledata in self.logotherasdata.getArticles(): article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( articledata ) self.errors.info("writing data into the unique file : " + datafile.name) datafile.write( article.getTextDBDictRepr() ) elif logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"].startswith('a'): #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # several outputfiles : all the words begin with the same letters length = len(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"]) for (datafile_number, (articles, initial)) in enumerate(self.logotherasdata.getArticlesGroupedBy( method="initial", length=length, srclanguage=srclanguage)): # name of this new file ? if not logotheras.options.OPTIONS["textdbdict::" \ "write TextDBDict files with detailed names"]: # short name : datafile_name = "{0:0>6}.textdbdict".format(datafile_number) else: # long name : first_article = articles[0] first_article = first_article.strip() last_article = articles[-1] last_article = last_article.strip() datafile_name = \ logotheras.options.OPTIONS["textdbdict::detailed filename"].format( datafile_number, first_article, last_article) datafile_name = os.path.join( source_dir, datafile_name ) datafile_name = datafile_name[:max_length] with open(datafile_name, 'w') as datafile: self.errors.info("writing data into a new file : " + datafile.name) for line in _license: datafile.write(line) for article_name in articles: article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( self.logotherasdata[article_name] ) datafile.write( article.getTextDBDictRepr() ) # let's add the articles marked with NOTALEXICAL_ARTINAME in a # special file. self.writeAddendum(destpath, _license) else : #~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~.~ # several outputfiles : each outputfile contain the same number of articles length = int(logotheras.options.OPTIONS["textdbdict::share out TextDBDict files"]) for (datafile_number, articles) in enumerate(self.logotherasdata.getArticlesGroupedBy( method="articles' number", length=length, srclanguage=srclanguage)): # name of this new file ? if not logotheras.options.OPTIONS["textdbdict::" \ "write TextDBDict files with detailed names"]: # short name : datafile_name = "{0:0>6}.textdbdict".format(datafile_number) else: # long name : first_article = articles[0] first_article = first_article.strip() last_article = articles[-1] last_article = last_article.strip() datafile_name = \ logotheras.options.OPTIONS["textdbdict::" \ "detailed filename"].format(datafile_number, first_article, last_article) datafile_name = os.path.join( source_dir, datafile_name ) datafile_name = datafile_name[:max_length] with open(datafile_name, 'w') as datafile: self.errors.info("writing data into a new file : " + datafile.name) for line in _license: datafile.write(line) for article_name in articles: article = Article( errors = self.errors, logotherasdata = self.logotherasdata ) article.setData( self.logotherasdata[article_name] ) datafile.write( article.getTextDBDictRepr() ) # let's add the articles marked with NOTALEXICAL_ARTINAME in a # special file. self.writeAddendum(destpath, _license)