def task_5(medical=get_medical_terms()): """Task 5: Exchange evaluations""" relevant = lambda chapter: set(chapter.vector.keys()) & medical with open('etc/groups.json', 'r') as f: groups = json.load(f) values = groups['Group 14'] for case_code, results in sorted(values.items(), key=itemgetter(0)): filtered = [Therapy.ALL[i] for i in results if i in Therapy.ALL] _precision([(c, None, None, relevant(c)) for c in filtered])
def _task_6_eval(case, medical=get_medical_terms()): """"Task 6 evaluation.""" relevant = lambda chapter: set(chapter.vector.keys()) & medical #result_3 = [(c,) for s, c in task_3(case, 1000)] result_6a = [(c, s, None, relevant(c)) for s, c in task_6a(case, 1000)] result_6b = [(c, s, None, relevant(c)) for s, c in task_6b(case, 1000)] #_precision(result_6a) #_precision(result_6b) print("[%s]: Kendal Tau: %.3f" % (case.code, _kendall_tau(result_6a, result_6b, 1000)))
def calculate_case_statistics(): """Calculate statistics of patient cases.""" words = data.get_stopwords() terms = data.get_medical_terms() print("Case | Lines | Stopwords | Terms | Medical terms") for code, case in sorted(PatientCase.ALL.items()): print(' & '.join((code, str(len(case.text.split('\n'))), str(len([i for i in case.text.split() if i in words])), str(len(case.vector)), str(len([i for i in case.vector.keys() if i in terms])))) + r' \\') print()
def task_4(case, medical=get_medical_terms()): """Task 4: Evaluate results from task 3.""" #if not hasattr(PatientCase.ALL["1"], 'vector2'): # from index import create_vectors, _tf_raw_freq, _idf_prob # create_vectors(idf=_idf_prob, attr='vector2') # B # #create_vectors(tf=_tf_raw_freq, attr='vector2') # C # #create_vectors(tf=_tf_raw_freq, idf=_idf_prob, attr='vector3') # D results = _task_4_search(case, medical) #results2 = _task_4_search(case, medical, 'vector3') #print("[%s]: Kendal Tau: %.3f" % (case.code, _kendall_tau(results, results2, 1000))) #_task_4_print_terms(results) _precision(results)
def _task_4_print_terms(results, medical=get_medical_terms()): """Prints out terms/medical terms etc for task 4""" print("Rank | Chapter | Score | Relevant | Terms") for i, tmp in enumerate(results[:10]): obj, v, terms, rel = tmp r = 'Yes' if rel else 'No' b_terms = [] for t in terms: b_terms.append('\\textbf{%s}' % t if t in medical else t) #print('\t%i & %s & %.4f & %s & %s \\\\' % ( # i+1, obj.code, v, r, ', '.join(b_terms))) print('%i | %s | %.4f | %s | %s' % ( i+1, obj.code, v, r, ', '.join(terms)))
def main(script): """Run all the functions in this module.""" data.main() # Populate all objects # Generate a LaTeX table with all stopwords _generate_columned_table(sorted(data.get_stopwords()), 6, 'stopwords', 'Norwegian stopwords') # Generate a LaTeX table with all medical terms _generate_columned_table(sorted(data.get_medical_terms()), 3, 'medicalterms', 'Medical terms') generate_cases_table() calculate_chapter_statistics() calculate_case_statistics()