Python DACTORY Examples

Programming Language: Python

Namespace/Package Name: DictionaryFactory

Class/Type: DACTORY

Examples at hotexamples.com: 7

Python DACTORY - 7 examples found. These are the top rated real world Python examples of DictionaryFactory.DACTORY extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getNaiveDictionary(6)

get_char_dictionary(1)

Example #1

Show file

File: Mmseg.py Project: raven1989/NaturalLanguageProcessing

 def forward_matching(text):
   length = 1
   max_length = min(len(text), DACTORY.getNaiveDictionary().query('maxTermSize'))
   # is_digit_alphabet = lambda x: x.isalnum() or x in ['.','-','+','/','(',')']
   while length<=max_length:
     # condition length==1 是为了将单字也输出为结果，因为词典里面没有包含单字
     if length==1 or DACTORY.getNaiveDictionary().query(text[:length].encode('utf-8')):
       # print text[length-1]
       yield length
     length += 1

Example #2

Show file

File: Mmseg.py Project: raven1989/NaturalLanguageProcessing

def ln_frequency_for_sigle_chars(unicode_text, triplets):
  def sigle_char(unicode_text, triplets):
    i = 0
    for l in triplets:
      if l-i==1:
        yield unicode_text[i:l]
      i = l
  sigle_chars = [x for x in sigle_char(unicode_text, triplets)]
  # print 'sigle chars:', '|'.join(sigle_chars)
  ln_freq = []
  for char in sigle_chars:
    freq = DACTORY.get_char_dictionary().query(char.encode('utf-8'))
    if freq:
      # print freq
      ln_freq.append(math.log(int(freq)))
  # print ln_freq
  return reduce(lambda x,y:x+y,ln_freq)

Example #3

Show file

def maximumMatching(text):
    uniText = text.decode('utf-8')
    terms = []
    size = len(uniText)
    dic = DACTORY.getNaiveDictionary()
    start = 0
    maxSize = dic.query('maxTermSize')
    while start < size:
        curSize = size - start
        end = start + min(curSize, maxSize)
        # print uniText[start:end]
        while end - start > 1 and not dic.query(
                uniText[start:end].encode('utf-8')):
            end -= 1
        terms.append(uniText[start:end])
        start = end
    return map(lambda x: x.encode('utf-8'), terms)

Example #4

Show file

File: ReverseMaximumMatching.py Project: raven1989/NaturalLanguageProcessing

def reverseMaximumMatching(text):
    uniText = text.decode('utf-8')
    terms = []
    size = len(uniText)
    dic = DACTORY.getNaiveDictionary()
    end = size
    maxSize = dic.query('maxTermSize')
    while end > 0:
        curSize = end
        start = end - min(curSize, maxSize)
        # print uniText[start:end]
        while end - start > 1 and not dic.query(
                uniText[start:end].encode('utf-8')):
            start += 1
        terms.append(uniText[start:end])
        end = start
    terms.reverse()
    return map(lambda x: x.encode('utf-8'), terms)

Example #5

Show file

File: Mmseg.py Project: raven1989/NaturalLanguageProcessing

      i = l
  sigle_chars = [x for x in sigle_char(unicode_text, triplets)]
  # print 'sigle chars:', '|'.join(sigle_chars)
  ln_freq = []
  for char in sigle_chars:
    freq = DACTORY.get_char_dictionary().query(char.encode('utf-8'))
    if freq:
      # print freq
      ln_freq.append(math.log(int(freq)))
  # print ln_freq
  return reduce(lambda x,y:x+y,ln_freq)
  


if __name__ == '__main__':
  print DACTORY.getNaiveDictionary().query('长江大桥')
  print DACTORY.getNaiveDictionary().query('科学')
  # 居然还有"和服务"这种词，为了测试规则四只能手动删了
  print DACTORY.getNaiveDictionary().query('和服务')
  DACTORY.getNaiveDictionary().dic_.pop('和服务')
  print DACTORY.getNaiveDictionary().query('和服务')
  print DACTORY.getNaiveDictionary().query('施和')
  # print DACTORY.getNaiveDictionary().query('色')
  # print len('武汉市'.decode('utf-8'))
  # print '|'.join(siple_mmseg('武汉市长江大桥'))
  # print get_longest_triplets_for_1st_char('武汉市长江大桥'.decode('utf-8'))
  # print get_longest_triplets_for_1st_char('武汉市'.decode('utf-8'))
  # print get_longest_triplets_for_1st_char('研究生命科学'.decode('utf-8'))
  # print get_longest_triplets_for_1st_char('科学'.decode('utf-8'))
  # terms = siple_mmseg('研究生命科学')
  print '|'.join(complex_mmseg('研究生命科学'))

Example #6

Show file

import sys, traceback
sys.path.append('../0_Dictionary')
from DictionaryFactory import DACTORY


def maximumMatching(text):
    uniText = text.decode('utf-8')
    terms = []
    size = len(uniText)
    dic = DACTORY.getNaiveDictionary()
    start = 0
    maxSize = dic.query('maxTermSize')
    while start < size:
        curSize = size - start
        end = start + min(curSize, maxSize)
        # print uniText[start:end]
        while end - start > 1 and not dic.query(
                uniText[start:end].encode('utf-8')):
            end -= 1
        terms.append(uniText[start:end])
        start = end
    return map(lambda x: x.encode('utf-8'), terms)


if __name__ == '__main__':
    print DACTORY.getNaiveDictionary().query('中国人')
    print DACTORY.getNaiveDictionary().query('武汉市')
    print len('武汉市'.decode('utf-8'))
    terms = maximumMatching('武汉市长江大桥')
    print '|'.join(terms)

Example #7

Show file

File: ReverseMaximumMatching.py Project: raven1989/NaturalLanguageProcessing

sys.path.append('../0_Dictionary')
from DictionaryFactory import DACTORY


def reverseMaximumMatching(text):
    uniText = text.decode('utf-8')
    terms = []
    size = len(uniText)
    dic = DACTORY.getNaiveDictionary()
    end = size
    maxSize = dic.query('maxTermSize')
    while end > 0:
        curSize = end
        start = end - min(curSize, maxSize)
        # print uniText[start:end]
        while end - start > 1 and not dic.query(
                uniText[start:end].encode('utf-8')):
            start += 1
        terms.append(uniText[start:end])
        end = start
    terms.reverse()
    return map(lambda x: x.encode('utf-8'), terms)


if __name__ == '__main__':
    # print DACTORY.getNaiveDictionary().query('中国人')
    print DACTORY.getNaiveDictionary().query('人')
    # print len('武汉市'.decode('utf-8'))
    terms = reverseMaximumMatching('武汉市长江大桥')
    print '|'.join(terms)