Python readDictionary Examples

Programming Language: Python

Namespace/Package Name: src.tagger.Utility.Utils

Method/Function: readDictionary

Examples at hotexamples.com: 3

Python readDictionary - 3 examples found. These are the top rated real world Python examples of src.tagger.Utility.Utils.readDictionary extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: VnRDRPOSTagger.py Project: hvhvhh/tomtatvanban

def VnTraining(args = sys.argv[2:]):
    pathToDict = args[0]
    dirPath = os.path.join(args[1] + "/")
    correctTrain = args[2]
    learntRules = args[3]
    
    print( '\nTraining RDRPOSTagger for Vietnamese POS Tagging...')   
    print( "Initial tagging...")
    
    getRawTextFromFile(dirPath + correctTrain, dirPath + correctTrain + ".RAW")
    DICT = readDictionary(pathToDict)
    VnInitTagger4Corpus(DICT, dirPath + correctTrain + ".RAW", dirPath + correctTrain + ".INIT")
    
    print ("Done Initialization!")
    
    print ('Building SCRDR-based POS tagging tree of rules...')
    
    for (improveThreshold, matchThreshold) in thresholds:
        timeStart = time.time()
        outputDir = "T%d-%d/" % (improveThreshold, matchThreshold)
        os.mkdir(dirPath + outputDir)         
        
        rdrTree = PosTaggingRDRTree(improveThreshold, matchThreshold) 
        rdrTree.buildTreeFromCorpus(dirPath + correctTrain + ".INIT", dirPath + correctTrain)
        
        print ("Write the tree to file...")
        rdrTree.writeToFileWithoutSeenCases(dirPath + outputDir + learntRules)
        #rdrTree.writeToFile(dirPath + outputDir + learntRules)       
        
        print ("\nTraining time for threshold %d-%d: %f seconds\n" % (improveThreshold, matchThreshold, time.time() - timeStart))
            
    print ('\nCompleted!')

Example #2

Show file

File: VnRDRPOSTagger.py Project: hvhvhh/tomtatvanban

def runVnRDRPOSTagger(args = sys.argv[1:]):
    if (len(args) == 0):
        printInstructions()
    elif args[0].lower().find("train") > -1:
        VnTraining()
    elif args[0].lower().find("tag") > -1:
        r = VnRDRTree()
        r.constructTreeFromRulesFile(args[1])
        DICT = readDictionary(args[2])
        r.tagRawCorpus(DICT, args[3])
    else:
        printInstructions()

Example #3

Show file

File: VnInitialTagger.py Project: hvhvhh/tomtatvanban

# -*- coding: utf-8 -*-
import re
# from Utility.Utils import readDictionary, isAbbre, isVnProperNoun,  isVnUpperChar
from src.tagger.Utility.Utils import readDictionary, isAbbre, isVnProperNoun,  isVnUpperChar 
# VNUNKNWORDS = readDictionary("../jSCRDRtagger/addDicts/VNOTHERS.DICT")
# VNNAMES = readDictionary("../jSCRDRtagger/addDicts/VNNAMES.DICT")

VNUNKNWORDS = readDictionary("./resource/VNOTHERS.DICT")
VNNAMES = readDictionary("./resource/VNNAMES.DICT")


def VnInitTagger4Sentence(VNFREQ, sentence):
    """
    Initial tagger for Vietnamese sentence.
    VNUNKNWORDS and VNNAMES were not utilized in the version as described in our CICLing 2011 paper
    """
    words = sentence.strip().split()
    taggedSen = ''
    for word in words:
        if word in VNFREQ:
            taggedSen += word + "/" + VNFREQ[word] + " "
        elif word in VNUNKNWORDS:
            taggedSen += word + "/" + VNUNKNWORDS[word] + " "
        elif word in VNNAMES:
            taggedSen += word + "/Np "      
        else:         
            if (re.search(r"[0-9]+", word) != None):
                taggedSen += word + "/M "
            elif(len(word) == 1 and isVnUpperChar(word[0])):
                taggedSen += word + "/Y "
            else: