Beispiel #1
0
    def __init__(self, path):
        """Construct the config object by parsing and validating the configuration file."""

        # the config data member
        self.project   = None           # project name     :string
        self.exportdir = None           # export directory :path
        self.username  = None           # user name        :string
        self.userpath  = None           # user directory   :path
        self.rawfiles  = None           # raw file list    :list of path
        self.src       = None           # source lang      :string
        self.targets   = None           # target langs     :list of string
        self.stanford_execpath = None        # Stanford Chinese Word Segmenter path :path
        self.stanford_standard = None        # Stanford Chinese Word Segmenter Standard : string

        try:
            log_start("Config")
            log_stderr("Config file: '{0}'".format(path))

            self._readConfig(path)
            self._validateConfig()

            log_done("Config")
        except ConfigException as e:
            log_error(e.message)
            log_fail("Config")
            raise
Beispiel #2
0
    def generateCorpus(self):
        log_start("Split")
        self._prepare()
        if ( len(self.config.targets) == 0 ):
            raise SplitException("Prepare the directory failed.")

        filelist = []
        for afile in self.config.rawfiles:
            try:
                log_start("Split {0}".format(afile))
                self.fillPool(afile)
                self.splitter.split(afile)
                self.filepool.closeFiles()
                self.filepool.clean()
                filelist.append(afile)
                log_done("Split {0}".format(afile))
            except SplitException as e:
                log_warning(e.message)
                # TODO: del the files when failed.
                log_fail("Split {0}".format(afile))

        if filelist == [] :
            log_error("No corpus file generated.")
            log_fail("Split")
        else:
            self.mergeCorpus(filelist)
            log_done("Split")
Beispiel #3
0
def main():
    """The main function of convert module. Parse the cmdline, and create the config from xml file which describe the
    configuration for conversion. Then run the conversion to create and filter the corpus files according to config."""

    progname = sys.argv[0]
    usage = """%prog -f command.xml"""

    parser = OptionParser(
        usage, version="%prog v0.1 (c) 2010 by Leo Jiang <*****@*****.**>")
    parser.add_option("-f",
                      "--file",
                      dest="filename",
                      metavar="FILE",
                      type="string",
                      help="read the command from file.")
    (options, args) = parser.parse_args()

    log_stderr("convert.py v0.1 (c) 2010 by Leo Jiang <*****@*****.**>")

    if (options.filename == None):
        log_stderr("Usage: {0} -f command.xml".format(progname))
        log_stderr(os.strerror(errno.EINVAL) + " : config file not specified.")
        sys.exit(errno.EINVAL)

    path = os.path.abspath(options.filename)
    if not os.path.isfile(path):
        log_error(
            os.strerror(errno.EINVAL) +
            " : file '{0}' not existed.".format(path))
        log_fail("Convert")
        sys.exit(errno.EINVAL)

    try:
        config = ConversionConfig(path)
        conversion = Conversion(config)
        conversion.run()
    except ConfigException as e:
        log_fail("Convert: ConfigException")
        sys.exit(-1)
    except Exception as e:
        print "failed."
        log_fail(e.message)
        log_fail("Convert: unknown exception.")
        sys.exit(-1)

    log_done("Convert")
    sys.exit(0)
Beispiel #4
0
def main():
    """The main function of convert module. Parse the cmdline, and create the config from xml file which describe the
    configuration for conversion. Then run the conversion to create and filter the corpus files according to config."""

    progname = sys.argv[0]
    usage="""%prog -f command.xml"""

    parser = OptionParser(usage, version="%prog v0.1 (c) 2010 by Leo Jiang <*****@*****.**>")
    parser.add_option("-f", "--file", dest="filename", metavar="FILE", type="string",
                      help="read the command from file.")
    (options, args) = parser.parse_args()

    log_stderr("convert.py v0.1 (c) 2010 by Leo Jiang <*****@*****.**>")

    if (options.filename == None):
        log_stderr("Usage: {0} -f command.xml".format(progname))
        log_stderr(os.strerror(errno.EINVAL) + " : config file not specified.")
        sys.exit(errno.EINVAL)

    path = os.path.abspath(options.filename)
    if not os.path.isfile(path):
        log_error(os.strerror(errno.EINVAL) + " : file '{0}' not existed.".format(path))
        log_fail("Convert")
        sys.exit(errno.EINVAL)

    try:
        config = ConversionConfig(path)
        conversion = Conversion(config)
        conversion.run()
    except ConfigException as e:
        log_fail("Convert: ConfigException")
        sys.exit(-1)
    except Exception as e:
        print "failed."
        log_fail(e.message)
        log_fail("Convert: unknown exception.")
        sys.exit(-1)

    log_done("Convert")
    sys.exit(0)