Beispiel #1
0
def test_build_tree(capsys):
    for tree, fn in {
            # sanity
            ex11.build_tree(ex11.parse_data(r'data/tiny_data.txt'), [
                                'cough', 'irritability', 'headache'
                            ]):
        [f'data/build_tree_sanity{i}.txt' for i in range(1, 5)],
            # empty symptoms
            ex11.build_tree(ex11.parse_data(r'data/tiny_data.txt'), []):
        [f'data/build_tree_empty_symptoms{i}.txt' for i in range(1, 7)],
            # empty records
            ex11.build_tree([], ['cough', 'irritability', 'headache']):
        ['data/build_tree_empty_records.txt'],
            # empty both
            ex11.build_tree([], []): ['data/build_tree_empty_both.txt'],
            # double symptoms
            ex11.build_tree(ex11.parse_data(r'data/tiny_data.txt'), [
                                'cough', 'headache', 'headache'
                            ]):
        [f'data/build_tree_double_symptoms{i}.txt' for i in range(1, 9)],
    }.items():
        actual = repr_tree(tree)
        expected = [open(_fn).read() for _fn in fn]
        assert actual in expected, \
            f"test name: {fn}\n" \
            f"expected: {expected}\n" \
            f"actual: {actual}"

    out, err = capsys.readouterr()
    assert not out and not err, f"Don't print. out: '{out}', err: '{err}'"
Beispiel #2
0
def test_optimal_tree3():
    records = parse_data(r"test_optimal_tree3.txt")
    tree = optimal_tree(records, ["fever","cough","headache"],2)

    assert "influenza" == tree.positive_child.positive_child.data
    assert "meningitis" == tree.positive_child.negative_child.data
    assert "cold" == tree.negative_child.positive_child.data
    assert "healthy" == tree.negative_child.negative_child.data
Beispiel #3
0
def test_optimal_tree(capsys):
    symptoms = [
        'congestion', 'cough', 'fatigue', 'fever', 'headache', 'irritability',
        'muscle_ache', 'nausea', 'rigidity', 'sore_throat'
    ]

    for fn, successes in {
            r'data/tiny_data.txt':
        [1 / 6, 2 / 6, 4 / 6, 5 / 6, 1, 1, 1, 1, 1, 1, 1],
            r'data/small_data.txt': [
                10 / 60, 20 / 60, 36 / 60, 49 / 60, 52 / 60, 53 / 60, 55 / 60,
                56 / 60, 56 / 60, 56 / 60, 56 / 60
            ],
            r'data/medium_data.txt': [
                100 / 600, 195 / 600, 346 / 600, 445 / 600, 491 / 600,
                517 / 600, 534 / 600, 540 / 600, 543 / 600, 544 / 600,
                545 / 600
            ],
            r'data/big_data.txt':
        [1000 / 6000, 1951 / 6000, 3359 / 6000, 4403 / 6000],
    }.items():
        records = ex11.parse_data(fn)
        for i in range(len(symptoms) + 1):
            if fn.endswith('big_data.txt') and i > 3:
                continue
            tree = ex11.optimal_tree(records, symptoms, i)
            if CHECK_OPTIMAL_TREE:
                actual = repr_tree(tree)
                # open(fn + '.4expected' + str(i), 'w').write(actual)
                expected1 = open(fn + '.expected' + str(i)).read()
                expected2 = open(fn + '.2expected' + str(i)).read()
                expected3 = open(fn + '.3expected' + str(i)).read()
                expected4 = open(fn + '.4expected' + str(i)).read()
                assert actual in (expected1, expected2, expected3, expected4), \
                    "Maybe your answer is correct, check it carefully. " \
                    "If it's OK, remove this assert."
            cur_diagnoser = ex11.Diagnoser(tree)
            # print(diagnoser.calculate_success_rate(records), end=', ')
            actual = cur_diagnoser.calculate_success_rate(records)
            expected = successes[i]
            assert actual == expected, \
                f"tree: {repr_tree(cur_diagnoser.root)}\n" \
                f"records from: {fn}\n" \
                f"num of symptoms: {i}\n" \
                f"expected: {expected}\n" \
                f"actual: {actual}"

            # if fn.endswith('big_data.txt') and i == 3:
            #     with open('data/big_diagnoser.dmp', 'wb') as f:
            #         pickle.dump(diagnoser, f)
            # if fn.endswith('medium_data.txt') and i == 6:
            #     with open('data/medium_diagnoser.dmp', 'wb') as f:
            #         pickle.dump(diagnoser, f)
        # print()

    out, err = capsys.readouterr()
    assert not out and not err, f"Don't print. out: '{out}', err: '{err}'"
Beispiel #4
0
def test_build_tree4():

    records = parse_data(r"medium_data2.txt")
    tree4 = build_tree(records, ["fever", "cough"])

    assert "influenza" == tree4.positive_child.positive_child.data
    assert "strep" == tree4.positive_child.negative_child.data
    assert "cold" == tree4.negative_child.positive_child.data
    assert "healthy" == tree4.negative_child.negative_child.data
Beispiel #5
0
def test_build_tree3():

    records = parse_data(r"medium_data1.txt")
    tree3 = build_tree(records, ["fever", "cough"])

    assert "influenza" == tree3.positive_child.positive_child.data
    assert "meningitis" == tree3.positive_child.negative_child.data
    assert "cold" == tree3.negative_child.positive_child.data
    assert "healthy" == tree3.negative_child.negative_child.data
Beispiel #6
0
def test_build_tree2():

    records = parse_data(r"small_data1.txt")
    tree2 = build_tree(records, ["headache", "fever"])

    assert "influenza" == tree2.positive_child.positive_child.data
    assert "cold" == tree2.positive_child.negative_child.data
    assert "strep" == tree2.negative_child.positive_child.data
    assert "healthy" == tree2.negative_child.negative_child.data
Beispiel #7
0
def test_build_tree1():

    records = parse_data(r"tiny_data2.txt")
    tree1 = build_tree(records, ["headache", "fever"])

    assert "meningitis" == tree1.positive_child.positive_child.data
    assert "influenza" == tree1.positive_child.negative_child.data
    assert "cold" == tree1.negative_child.positive_child.data
    assert "healthy" == tree1.negative_child.negative_child.data
Beispiel #8
0
def test_calculate_success_rate(capsys):
    for fn, successes in {
            r'data/tiny_data.txt': [2 / 6, 5 / 6, 5 / 6, 1 / 6],
            r'data/small_data.txt': [20 / 60, 44 / 60, 47 / 60, 10 / 60],
            r'data/medium_data.txt':
        [213 / 600, 442 / 600, 534 / 600, 100 / 600],
            r'data/big_data.txt':
        [2278 / 6000, 4403 / 6000, 5298 / 6000, 1000 / 6000],
    }.items():
        records = ex11.parse_data(fn)
        for i, cur_diagnoser in enumerate([
                simple_diagnoser, big_diagnoser, medium_diagnoser,
                one_leaf_diagnoser
        ]):
            expected = successes[i]
            actual = cur_diagnoser.calculate_success_rate(records)
            assert actual == expected, \
                f"tree: {repr_tree(cur_diagnoser.root)}\n" \
                f"records from: {fn}\n" \
                f"expected: {expected}\n" \
                f"actual: {actual}"

    out, err = capsys.readouterr()
    assert not out and not err, f"Don't print. out: '{out}', err: '{err}'"
Beispiel #9
0
def test_optimal_tree2():
    records = parse_data(r"test_optimal_tree2.txt")
    tree = optimal_tree(records,["cough", "fever", "headache"],1)

    assert "fever" == tree.data or "headache" == tree.data
Beispiel #10
0
def test_optimal_tree1():
    records = parse_data(r"test_optimal_tree1.txt")
    tree = optimal_tree(records, ["cough", "fever", "headache"], 2)
    
    assert "cough" == tree.data or "fever" == tree.data
    assert "cough" == tree.positive_child.data or "fever" == tree.positive_child.data