コード例 #1
0
def test_parse_csv():
    Iris = DataFile('iris.csv').read()
    assert parse_csv(Iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa']
コード例 #2
0
ファイル: search.py プロジェクト: corey-kipp/cs171
 def __init__(self, board=None):
     if BoggleFinder.wordlist is None:
         BoggleFinder.wordlist = Wordlist(DataFile("EN-text/wordlist.txt"))
     self.found = {}
     if board:
         self.set_board(board)
コード例 #3
0
def test_text_models():
    flatland = DataFile("EN-text/flatland.txt").read()
    wordseq = words(flatland)
    P1 = UnigramTextModel(wordseq)
    P2 = NgramTextModel(2, wordseq)
    P3 = NgramTextModel(3, wordseq)

    # The most frequent entries in each model
    assert P1.top(10) == [(2081, 'the'), (1479, 'of'), (1021, 'and'),
                          (1008, 'to'), (850, 'a'), (722, 'i'), (640, 'in'),
                          (478, 'that'), (399, 'is'), (348, 'you')]

    assert P2.top(10) == [(368, ('of', 'the')), (152, ('to', 'the')),
                          (152, ('in', 'the')), (86, ('of', 'a')),
                          (80, ('it', 'is')), (71, ('by', 'the')),
                          (68, ('for', 'the')), (68, ('and', 'the')),
                          (62, ('on', 'the')), (60, ('to', 'be'))]

    assert P3.top(10) == [(30, ('a', 'straight', 'line')),
                          (19, ('of', 'three', 'dimensions')),
                          (16, ('the', 'sense', 'of')),
                          (13, ('by', 'the', 'sense')),
                          (13, ('as', 'well', 'as')),
                          (12, ('of', 'the', 'circles')),
                          (12, ('of', 'sight', 'recognition')),
                          (11, ('the', 'number', 'of')),
                          (11, ('that', 'i', 'had')), (11, ('so', 'as', 'to'))]

    assert isclose(P1['the'], 0.0611, rel_tol=0.001)

    assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01)

    assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
    assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001)
    assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001)

    assert P2.cond_prob.get(('went', )) is None

    assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}

    test_string = 'unigram'
    wordseq = words(test_string)

    P1 = UnigramTextModel(wordseq)

    assert P1.dictionary == {('unigram'): 1}

    test_string = 'bigram text'
    wordseq = words(test_string)

    P2 = NgramTextModel(2, wordseq)

    assert (P2.dictionary == {
        ('', 'bigram'): 1,
        ('bigram', 'text'): 1
    } or P2.dictionary == {
        ('bigram', 'text'): 1,
        ('', 'bigram'): 1
    })

    test_string = 'test trigram text'
    wordseq = words(test_string)

    P3 = NgramTextModel(3, wordseq)

    assert ('', '', 'test') in P3.dictionary
    assert ('', 'test', 'trigram') in P3.dictionary
    assert ('test', 'trigram', 'text') in P3.dictionary
    assert len(P3.dictionary) == 3
コード例 #4
0
def test_rot13_decoding():
    flatland = DataFile("EN-text/flatland.txt").read()
    ring = ShiftDecoder(flatland)
    msg = ring.decode(rot13('Hello, world!'))

    assert msg == 'Hello, world!'
コード例 #5
0
def test_shift_decoding():
    flatland = DataFile("EN-text/flatland.txt").read()
    ring = ShiftDecoder(flatland)
    msg = ring.decode('Kyzj zj r jvtivk dvjjrxv.')

    assert msg == 'This is a secret message.'
コード例 #6
0
def _set_lang(lang):
    with DataFile(ensure_res('.'.join((lang, 'lang'))), None, json) as data:
        Text._data.update(data)