def __init__(self, path): """Construct the config object by parsing and validating the configuration file.""" # the config data member self.project = None # project name :string self.exportdir = None # export directory :path self.username = None # user name :string self.userpath = None # user directory :path self.rawfiles = None # raw file list :list of path self.src = None # source lang :string self.targets = None # target langs :list of string self.stanford_execpath = None # Stanford Chinese Word Segmenter path :path self.stanford_standard = None # Stanford Chinese Word Segmenter Standard : string try: log_start("Config") log_stderr("Config file: '{0}'".format(path)) self._readConfig(path) self._validateConfig() log_done("Config") except ConfigException as e: log_error(e.message) log_fail("Config") raise
def generateCorpus(self): log_start("Split") self._prepare() if ( len(self.config.targets) == 0 ): raise SplitException("Prepare the directory failed.") filelist = [] for afile in self.config.rawfiles: try: log_start("Split {0}".format(afile)) self.fillPool(afile) self.splitter.split(afile) self.filepool.closeFiles() self.filepool.clean() filelist.append(afile) log_done("Split {0}".format(afile)) except SplitException as e: log_warning(e.message) # TODO: del the files when failed. log_fail("Split {0}".format(afile)) if filelist == [] : log_error("No corpus file generated.") log_fail("Split") else: self.mergeCorpus(filelist) log_done("Split")
def main(): """The main function of convert module. Parse the cmdline, and create the config from xml file which describe the configuration for conversion. Then run the conversion to create and filter the corpus files according to config.""" progname = sys.argv[0] usage = """%prog -f command.xml""" parser = OptionParser( usage, version="%prog v0.1 (c) 2010 by Leo Jiang <*****@*****.**>") parser.add_option("-f", "--file", dest="filename", metavar="FILE", type="string", help="read the command from file.") (options, args) = parser.parse_args() log_stderr("convert.py v0.1 (c) 2010 by Leo Jiang <*****@*****.**>") if (options.filename == None): log_stderr("Usage: {0} -f command.xml".format(progname)) log_stderr(os.strerror(errno.EINVAL) + " : config file not specified.") sys.exit(errno.EINVAL) path = os.path.abspath(options.filename) if not os.path.isfile(path): log_error( os.strerror(errno.EINVAL) + " : file '{0}' not existed.".format(path)) log_fail("Convert") sys.exit(errno.EINVAL) try: config = ConversionConfig(path) conversion = Conversion(config) conversion.run() except ConfigException as e: log_fail("Convert: ConfigException") sys.exit(-1) except Exception as e: print "failed." log_fail(e.message) log_fail("Convert: unknown exception.") sys.exit(-1) log_done("Convert") sys.exit(0)
def main(): """The main function of convert module. Parse the cmdline, and create the config from xml file which describe the configuration for conversion. Then run the conversion to create and filter the corpus files according to config.""" progname = sys.argv[0] usage="""%prog -f command.xml""" parser = OptionParser(usage, version="%prog v0.1 (c) 2010 by Leo Jiang <*****@*****.**>") parser.add_option("-f", "--file", dest="filename", metavar="FILE", type="string", help="read the command from file.") (options, args) = parser.parse_args() log_stderr("convert.py v0.1 (c) 2010 by Leo Jiang <*****@*****.**>") if (options.filename == None): log_stderr("Usage: {0} -f command.xml".format(progname)) log_stderr(os.strerror(errno.EINVAL) + " : config file not specified.") sys.exit(errno.EINVAL) path = os.path.abspath(options.filename) if not os.path.isfile(path): log_error(os.strerror(errno.EINVAL) + " : file '{0}' not existed.".format(path)) log_fail("Convert") sys.exit(errno.EINVAL) try: config = ConversionConfig(path) conversion = Conversion(config) conversion.run() except ConfigException as e: log_fail("Convert: ConfigException") sys.exit(-1) except Exception as e: print "failed." log_fail(e.message) log_fail("Convert: unknown exception.") sys.exit(-1) log_done("Convert") sys.exit(0)