Python setupLogging 예제들, asrt.common.LoggingSetup.setupLogging Python 예제들

예제 #1

0

파일 보기

파일: run_data_preparation.py 프로젝트: idiap/asrt

    inputFile = args.inputFile[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    #Api setup
    api = DataPreparationAPI(inputFile, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

예제 #2

0

파일 보기

파일: run_apply_regex.py 프로젝트: d-unknown-processor/asrt

        count += 1
        if count % 50000 == 0:
            print "Processed %d values" % count

        #Read next line
        l = fd.readline()

    io.closeFile(fd)

    strContent = u"\n".join(linesList)
    io.writeFileContent(outputFile, strContent)

################
# main
#
if __name__ == "__main__" :
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i", "--input", help="input file", nargs=1, dest="inputFile", required=True)
    parser.add_argument("-o", "--output", help="output file", nargs=1, dest="outputFile", required=True)
    parser.add_argument("-r", "--regex", help="regular expression file", nargs=1, dest="regexFile", required=True)
    
    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

예제 #3

0

파일 보기

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):

예제 #4

0

파일 보기

#
if __name__ == "__main__":
    #Setup parser
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-t", "--target", help="target directory containing the data.olist and data.omap", 
                         nargs=1, dest="targetDir", required=True)
    parser.add_argument("-o", "--output", help="output directory", nargs=1, dest="outputDir", required=True)
    parser.add_argument("-r", "--regex", help="regex file", nargs=1, dest="regexFile", required=True)
    parser.add_argument("-f", "--filter", help="filter sentences", dest="filter",action="store_true")
    parser.add_argument("-d", "--debug", help="enable debug output", action="store_true")
    parser.add_argument("-n", "--rmpunctuation", help="remove punctuation", action="store_true")
    parser.add_argument("-p", "--vbpunctuation", help="verbalize punctuation", action="store_true")
    parser.add_argument("-s", "--rawseg", help="do not segment sentences with NLTK", dest="rawseg",action="store_true")
    parser.add_argument("-m", "--lm", help="prepare for lm modeling", dest="lm",action="store_true")

    #Parse arguments
    args = parser.parse_args()
    targetDir = args.targetDir[0]
    outputDir = args.outputDir[0]
    regexFile = args.regexFile[0]

    segmentWithNLTK = "True" if not args.rawseg else "False"

    setupLogging(logging.INFO, outputDir + "/task_log.txt")

    task = ImportDocumentTask(TaskInfo(STRPARAMETERS % (regexFile, str(args.debug), 
                                                        args.rmpunctuation, args.vbpunctuation,
                                                        segmentWithNLTK, args.filter, args.lm), 
                                       outputDir, targetDir))
    task.execute()

예제 #5

0

파일 보기

파일: run_data_preparation_individual_files.py 프로젝트: idiap/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    #Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage  = bool( args.filter2ndStage )
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    keepNewWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    #Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setKeepNewWords(keepNewWords)

    if language == 0:
        api.trainClassifier()

예제 #6

0

파일 보기

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=usage)
    parser.add_argument("-i",
                        "--input",
                        help="input file",
                        nargs=1,
                        dest="inputFile",
                        required=True)
    parser.add_argument("-o",
                        "--output",
                        help="output file",
                        nargs=1,
                        dest="outputFile",
                        required=True)
    parser.add_argument("-r",
                        "--regex",
                        help="regular expression file",
                        nargs=1,
                        dest="regexFile",
                        required=True)

    args = parser.parse_args()

    inputFile = os.path.abspath(args.inputFile[0])
    outputFile = os.path.abspath(args.outputFile[0])
    regexFile = os.path.abspath(args.regexFile[0])

    setupLogging(logging.INFO)

    applyRegexes(inputFile, outputFile, regexFile)

예제 #7

0

파일 보기

파일: FormulaLMPreparationUnitTest.py 프로젝트: idiap/asrt

# along with asrt. If not, see <http://opensource.org/licenses/>.

__author__ = "Alexandre Nanchen"
__version__ = "Revision: 1.0 "
__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest, re, string, logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging

setupLogging(logging.INFO, "./output.log")

class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEquals(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    #Tests
    #
    def testNormalizeUtf8(self):
        languages = ['0', '1', '2']

예제 #8

0

파일 보기

__date__ = "Date: 2015/09"
__copyright__ = "Copyright (c) 2015 Idiap Research Institute"
__license__ = "BSD 3-Clause"

import unittest
import re
import string
import logging

from asrt.common.formula.FormulaLMPreparation import LMPreparationFormula
from asrt.common.AsrtConstants import UTF8MAP, SPACEPATTERN, DOTCOMMAEXCLUDE, PUNCTUATIONEXCLUDE
from asrt.common.AsrtConstants import ABBREVIATIONS
from asrt.common.LoggingSetup import setupLogging
from asrt.config.AsrtConfig import TEMPDIRUNITTEST

setupLogging(logging.INFO, TEMPDIRUNITTEST + "/output.log")


class TestFormulaLMPreparation(unittest.TestCase):
    allPunctList = DOTCOMMAEXCLUDE + PUNCTUATIONEXCLUDE

    def verifyEqual(self, testList, f, callback):
        for t, gt in testList:
            f.strText = t
            callback()
            self.assertEqual(gt.encode('utf-8'), f.strText.encode('utf-8'))

    ############
    # Tests
    #
    def testNormalizeUtf8(self):

예제 #9

0

파일 보기

파일: run_data_preparation_individual_files.py 프로젝트: colincwilson/asrt

    inputList = args.inputList[0]
    outputDir = args.outputDir[0]
    language = int(args.language[0])
    regexFile = args.regexFile[0]

    # Flags
    debug = bool(args.debug)
    filterSentences = bool(args.filter)
    filterSentences2ndStage = bool(args.filter2ndStage)
    removePunctuation = bool(args.rmpunct)
    verbalizePunctuation = bool(args.vbpunct)
    rawSeg = bool(args.rawseg)
    lmModeling = bool(args.lm)
    expandNumberInWords = bool(not args.trim)

    setupLogging(logging.INFO, outputDir + "/data_preparation_log.txt")

    # Api setup
    api = DataPreparationAPI(None, outputDir)
    api.setRegexFile(regexFile)
    api.setFilterSentences(filterSentences)
    api.setFilterSentences2ndStage(filterSentences2ndStage)
    api.setLMModeling(lmModeling)
    api.setRemovePunctuation(removePunctuation)
    api.setVerbalizePunctuation(verbalizePunctuation)
    api.setSegmentWithNLTK(not rawSeg)
    api.setExpandNumberInWords(expandNumberInWords)

    if language == 0:
        api.trainClassifier()