def _prepare(self): """Prepare the corpus directory hierarchy.""" log_stderr("Preparing corpus directory hierarchy ...") # prepare the project directory. projPath = self.config.getProjectDir() if not os.path.exists(projPath): os.mkdir(projPath) log_stderr("Creating project directory.") # create the directory for corpus if necessary, clean the Corpus.en/zh file. if cannot open the corpus file, # remove the target language from the list, so will not do the process for that target language. srclang = self.config.src targets = self.config.targets[:] # same as: targets = list(self.config.targets) for targetlang in targets: log_stderr("") log_stderr(localePairForm(srclang, targetlang)) corpusDirPath = self.config.getCorpusDir(srclang, targetlang) if not os.path.exists(corpusDirPath): os.mkdir(corpusDirPath) log_stderr( "Creating corpus directory '{0}'.".format(corpusDirPath)) log_stderr("Cleaning the corpus files ...") srcCorpusFile = self.config.getCorpusFile(srclang, targetlang, srclang) targetCorpusFile = self.config.getCorpusFile( srclang, targetlang, targetlang) srcfile = None targetfile = None try: srcfile = open(srcCorpusFile, 'w') targetfile = open(targetCorpusFile, 'w') log_stderr("Cleaned: {0}".format(srcCorpusFile)) log_stderr("Cleaned: {0}".format(targetCorpusFile)) except IOError as e: self.config.targets.remove(targetlang) log_stderr(str(e)) finally: if srcfile: srcfile.close() if targetfile: targetfile.close()
def _prepare(self): """Prepare the corpus directory hierarchy.""" log_stderr("Preparing corpus directory hierarchy ...") # prepare the project directory. projPath = self.config.getProjectDir() if not os.path.exists(projPath): os.mkdir(projPath) log_stderr("Creating project directory.") # create the directory for corpus if necessary, clean the Corpus.en/zh file. if cannot open the corpus file, # remove the target language from the list, so will not do the process for that target language. srclang = self.config.src targets = self.config.targets[:] # same as: targets = list(self.config.targets) for targetlang in targets: log_stderr("") log_stderr(localePairForm(srclang, targetlang)) corpusDirPath = self.config.getCorpusDir(srclang, targetlang) if not os.path.exists(corpusDirPath): os.mkdir(corpusDirPath) log_stderr("Creating corpus directory '{0}'.".format(corpusDirPath)) log_stderr("Cleaning the corpus files ...") srcCorpusFile = self.config.getCorpusFile(srclang, targetlang, srclang) targetCorpusFile = self.config.getCorpusFile(srclang, targetlang, targetlang) srcfile = None targetfile = None try: srcfile = open(srcCorpusFile, 'w') targetfile = open(targetCorpusFile, 'w') log_stderr("Cleaned: {0}".format(srcCorpusFile)) log_stderr("Cleaned: {0}".format(targetCorpusFile)) except IOError as e: self.config.targets.remove(targetlang) log_stderr(str(e)) finally: if srcfile: srcfile.close() if targetfile: targetfile.close()
def getCorpusDir(self, src, target): """get the path of corpus directory for (src, target).""" return os.path.join(self.getProjectDir(), localePairForm(src, target))
def getPCFilePath(self, src, target): """get the path of process config file.""" return os.path.join(self.userpath, localePairForm(src, target) + ".xml")