Esempio n. 1
0
def task_5(medical=get_medical_terms()):
    """Task 5: Exchange evaluations"""
    relevant = lambda chapter: set(chapter.vector.keys()) & medical
    with open('etc/groups.json', 'r') as f:
        groups = json.load(f)
    values = groups['Group 14']
    for case_code, results in sorted(values.items(), key=itemgetter(0)):
        filtered = [Therapy.ALL[i] for i in results if i in Therapy.ALL]
        _precision([(c, None, None, relevant(c)) for c in filtered])
Esempio n. 2
0
def _task_6_eval(case, medical=get_medical_terms()):
    """"Task 6 evaluation."""
    relevant = lambda chapter: set(chapter.vector.keys()) & medical
    #result_3 = [(c,) for s, c in task_3(case, 1000)]
    result_6a = [(c, s, None, relevant(c)) for s, c in task_6a(case, 1000)]
    result_6b = [(c, s, None, relevant(c)) for s, c in task_6b(case, 1000)]
    #_precision(result_6a)
    #_precision(result_6b)
    print("[%s]: Kendal Tau: %.3f" % (case.code,
            _kendall_tau(result_6a, result_6b, 1000)))
Esempio n. 3
0
def calculate_case_statistics():
    """Calculate statistics of patient cases."""
    words = data.get_stopwords()
    terms = data.get_medical_terms()
    print("Case | Lines | Stopwords | Terms | Medical terms")
    for code, case in sorted(PatientCase.ALL.items()):
        print(' & '.join((code, str(len(case.text.split('\n'))),
                str(len([i for i in case.text.split() if i in words])),
                str(len(case.vector)),
                str(len([i for i in case.vector.keys() if i in terms])))) + r' \\')
    print()
Esempio n. 4
0
def task_4(case, medical=get_medical_terms()):
    """Task 4: Evaluate results from task 3."""
    #if not hasattr(PatientCase.ALL["1"], 'vector2'):
    #    from index import create_vectors, _tf_raw_freq, _idf_prob
    #    create_vectors(idf=_idf_prob, attr='vector2')  # B
    #    #create_vectors(tf=_tf_raw_freq, attr='vector2')  # C
    #    #create_vectors(tf=_tf_raw_freq, idf=_idf_prob, attr='vector3')  # D
    results = _task_4_search(case, medical)
    #results2 = _task_4_search(case, medical, 'vector3')
    #print("[%s]: Kendal Tau: %.3f" % (case.code, _kendall_tau(results, results2, 1000)))
    #_task_4_print_terms(results)
    _precision(results)
Esempio n. 5
0
def _task_4_print_terms(results, medical=get_medical_terms()):
    """Prints out terms/medical terms etc for task 4"""
    print("Rank | Chapter | Score | Relevant | Terms")
    for i, tmp in enumerate(results[:10]):
        obj, v, terms, rel = tmp
        r = 'Yes' if rel else 'No'
        b_terms = []
        for t in terms:
            b_terms.append('\\textbf{%s}' % t if t in medical else t)
        #print('\t%i & %s & %.4f & %s & %s \\\\' % (
        #        i+1, obj.code, v, r, ', '.join(b_terms)))
        print('%i | %s | %.4f | %s | %s' % (
                i+1, obj.code, v, r, ', '.join(terms)))
Esempio n. 6
0
def main(script):
    """Run all the functions in this module."""
    data.main()  # Populate all objects

    # Generate a LaTeX table with all stopwords
    _generate_columned_table(sorted(data.get_stopwords()),
                             6, 'stopwords', 'Norwegian stopwords')

    # Generate a LaTeX table with all medical terms
    _generate_columned_table(sorted(data.get_medical_terms()),
                             3, 'medicalterms', 'Medical terms')

    generate_cases_table()
    calculate_chapter_statistics()
    calculate_case_statistics()