Python setupLoggingの例、asrt.common.LoggingSetup.setupLogging Pythonの例

コード例 #1

0

ファイルを表示

ファイル: run_data_preparation.py プロジェクト: idiap/asrt

    inputFile = args.inputFile[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    #Api setup
    api = DataPreparationAPI(inputFile, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

コード例 #2

0

ファイルを表示

ファイル: run_apply_regex.py プロジェクト: d-unknown-processor/asrt

        count += 1
        if count % 50000 == 0:
            print "Processed %d values" % count

        #Read next line
        l = fd.readline()

    io.closeFile(fd)

    strContent = u"\n".join(linesList)
    io.writeFileContent(outputFile, strContent)

################
# main
#
if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i", "--input", help="input file", nargs=1, dest="inputFile", required=True)
    parser.add_argument("-o", "--output", help="output file", nargs=1, dest="outputFile", required=True)
    parser.add_argument("-r", "--regex", help="regular expression file", nargs=1, dest="regexFile", required=True)
    
    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

コード例 #3

0

ファイルを表示

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):

コード例 #4

0

ファイルを表示

#
if __name__ == "__main__":
    #Setup parser
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-t", "--target", help="target directory containing the data.olist and data.omap", 
                         nargs=1, dest="targetDir", required=True)
    parser.add_argument("-o", "--output", help="output directory", nargs=1, dest="outputDir", required=True)
    parser.add_argument("-r", "--regex", help="regex file", nargs=1, dest="regexFile", required=True)
    parser.add_argument("-f", "--filter", help="filter sentences", dest="filter",action="store_true")
    parser.add_argument("-d", "--debug", help="enable debug output", action="store_true")
    parser.add_argument("-n", "--rmpunctuation", help="remove punctuation", action="store_true")
    parser.add_argument("-p", "--vbpunctuation", help="verbalize punctuation", action="store_true")
    parser.add_argument("-s", "--rawseg", help="do not segment sentences with NLTK", dest="rawseg",action="store_true")
    parser.add_argument("-m", "--lm", help="prepare for lm modeling", dest="lm",action="store_true")

    #Parse arguments
    args = parser.parse_args()
    targetDir = args.targetDir[0]
    outputDir = args.outputDir[0]
    regexFile = args.regexFile[0]

    segmentWithNLTK = "True" if not args.rawseg else "False"

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    task = ImportDocumentTask(TaskInfo(STRPARAMETERS % (regexFile, str(args.debug), 
                                                        args.rmpunctuation, args.vbpunctuation,
                                                        segmentWithNLTK, args.filter, args.lm), 
                                       outputDir, targetDir))
    task.execute()

コード例 #5

0

ファイルを表示

ファイル: run_data_preparation_individual_files.py プロジェクト: idiap/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    #Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

コード例 #6

0

ファイルを表示

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i",
                        "--input",
                        help="input file",
                        nargs=1,
                        dest="inputFile",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="output file",
                        nargs=1,
                        dest="outputFile",
                        required=True)
    parser.add_argument("-r",
                        "--regex",
                        help="regular expression file",
                        nargs=1,
                        dest="regexFile",
                        required=True)

    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

コード例 #7

0

ファイルを表示

ファイル: FormulaLMPreparationUnitTest.py プロジェクト: idiap/asrt

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")

class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):
        languages = ['0', '1', '2']

コード例 #8

0

ファイルを表示

__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest
import re
import string
import logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging
from asrt.config.AsrtConfig import TEMPDIRUNITTEST

setupLogging(logging.INFO, TEMPDIRUNITTEST + "/output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEqual(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    # Tests
    #
    def testNormalizeUtf8(self):

コード例 #9

0

ファイルを表示

ファイル: run_data_preparation_individual_files.py プロジェクト: colincwilson/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    # Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage = bool(args.filter2ndStage)
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    expandNumberInWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    # Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setExpandNumberInWords(expandNumberInWords)

    if language == 0:
        api.trainClassifier()