예제 #1
0
파일: utility.py 프로젝트: edvald/IcePy
def letter_frequency():
    """ Reads the BÍN corpus and counts instances of each letter found in the
    corpus. Returns a letter=>count mapping.
    """
    d = defaultdict(int)
    for entry in read_bin():
        for letter in entry.ordmynd: d[letter] += 1
    return d
예제 #2
0
파일: utility.py 프로젝트: sverrirab/IcePy
def bin_debug():
    """ Prints a list of all entries in the BÍN corpus where the first letter of
    the word form is not the same as the first letter of the lemma.

    Useful for debugging, not much else.
    """
    for entry in read_bin(silent=True):
        if entry.ordmynd[0] != entry.lemma[0]:
            print unicode(entry).encode('utf-8')
예제 #3
0
파일: utility.py 프로젝트: sverrirab/IcePy
def letter_frequency():
    """ Reads the BÍN corpus and counts instances of each letter found in the
    corpus. Returns a letter=>count mapping.
    """
    d = defaultdict(int)
    for entry in read_bin():
        for letter in entry.ordmynd:
            d[letter] += 1
    return d
예제 #4
0
파일: utility.py 프로젝트: edvald/IcePy
def bin_debug():
    """ Prints a list of all entries in the BÍN corpus where the first letter of
    the word form is not the same as the first letter of the lemma.

    Useful for debugging, not much else.
    """
    for entry in read_bin(silent=True):
        if entry.ordmynd[0]!=entry.lemma[0]:
            print unicode(entry).encode('utf-8')
예제 #5
0
파일: utility.py 프로젝트: edvald/IcePy
def tag_frequency():
    """ Reads the BÍN corpus and counts instances of each tag (translated to
    IceNLP format) found in the corpus. Returns a tag=>count mapping.
    """
    d = defaultdict(int)
    for entry in read_bin():
        tag = translate_tag(CATEGORY_MAP[entry.flokkur],entry.flokkur,entry.hluti,entry.greining)
        d[(tag,entry.flokkur,entry.hluti,entry.greining)] += 1
    return d
예제 #6
0
파일: utility.py 프로젝트: sverrirab/IcePy
def tag_frequency():
    """ Reads the BÍN corpus and counts instances of each tag (translated to
    IceNLP format) found in the corpus. Returns a tag=>count mapping.
    """
    d = defaultdict(int)
    for entry in read_bin():
        tag = translate_tag(CATEGORY_MAP[entry.flokkur], entry.flokkur,
                            entry.hluti, entry.greining)
        d[(tag, entry.flokkur, entry.hluti, entry.greining)] += 1
    return d
예제 #7
0
파일: utility.py 프로젝트: sverrirab/IcePy
def write_wordlist(fout, encoding='utf8'):
    """ Converts the BÍN database into a simple list of word forms and dumps
    into fout, which should be a file or file-like object.
    """
    for entry in read_bin():
        fout.write(entry.ordmynd.encode(encoding) + '\n')
예제 #8
0
파일: utility.py 프로젝트: sverrirab/IcePy
def bin_read():
    """ Simply prints all the entries in the BÍN corpus to screen.
    """
    for entry in read_bin(silent=True):
        print unicode(entry).encode('utf-8')
예제 #9
0
파일: utility.py 프로젝트: edvald/IcePy
def write_wordlist(fout, encoding='utf8'):
    """ Converts the BÍN database into a simple list of word forms and dumps
    into fout, which should be a file or file-like object.
    """
    for entry in read_bin():
        fout.write(entry.ordmynd.encode(encoding)+'\n')
예제 #10
0
파일: utility.py 프로젝트: edvald/IcePy
def bin_read():
    """ Simply prints all the entries in the BÍN corpus to screen.
    """
    for entry in read_bin(silent=True):
        print unicode(entry).encode('utf-8')