def evaluation(u=None, d=None): if u is None or d is None: return render_template("evaluate_landing.html") import numpy department = app.db.query(Department).join(University) \ .filter(University.abbreviation==u) \ .filter(Department.abbreviation==d) \ .first() if department is None: abort(404) # department not found # Retrieve the set of predicted and ground truth knowledge area labels # for each course. try: knowledge_areas = { 'predicted': { course.id: predicted_knowledge_areas(course, result_set=g.result_set_raw) for course in department.courses }, 'truth': { course.id: ground_truth_knowledge_areas(course) for course in department.courses }, } except RuntimeError: # Return empty knowledge area lists if an error is encountered. knowledge_areas = { 'predicted': {course.id: [] for course in department.courses}, 'truth': {course.id: [] for course in department.courses}, } # Calculate the jaccard coefficient and percentage correct of the # prediction/truth sets, use these as 'correctness' metrics. knowledge_areas['jaccard'] = { course.id: float( jaccard(knowledge_areas['predicted'][course.id], knowledge_areas['truth'][course.id])) for course in department.courses if knowledge_areas['truth'][course.id] } knowledge_areas['percent'] = { course.id: float(len(set(knowledge_areas['predicted'][course.id])\ .intersection(set(knowledge_areas['truth'][course.id])))\ / len(knowledge_areas['truth'][course.id])) for course in department.courses if knowledge_areas['truth'][course.id] } return render_template( "evaluate_department.html", department=department, knowledge_areas=knowledge_areas, )
def evaluation(u=None, d=None): if u is None or d is None: return render_template("evaluate_landing.html") import numpy department = app.db.query(Department).join(University) \ .filter(University.abbreviation==u) \ .filter(Department.abbreviation==d) \ .first() if department is None: abort(404) # department not found # Retrieve the set of predicted and ground truth knowledge area labels # for each course. try: knowledge_areas = { 'predicted': { course.id: predicted_knowledge_areas( course, result_set=g.result_set_raw) for course in department.courses }, 'truth': { course.id: ground_truth_knowledge_areas(course) for course in department.courses }, } except RuntimeError: # Return empty knowledge area lists if an error is encountered. knowledge_areas = { 'predicted': {course.id: [] for course in department.courses}, 'truth': {course.id: [] for course in department.courses}, } # Calculate the jaccard coefficient and percentage correct of the # prediction/truth sets, use these as 'correctness' metrics. knowledge_areas['jaccard'] = { course.id: float(jaccard( knowledge_areas['predicted'][course.id], knowledge_areas['truth'][course.id] )) for course in department.courses if knowledge_areas['truth'][course.id] } knowledge_areas['percent'] = { course.id: float(len(set(knowledge_areas['predicted'][course.id])\ .intersection(set(knowledge_areas['truth'][course.id])))\ / len(knowledge_areas['truth'][course.id])) for course in department.courses if knowledge_areas['truth'][course.id] } return render_template("evaluate_department.html", department=department, knowledge_areas=knowledge_areas,)
def compare_departments(daid=None, dbid=None): if None in [daid, dbid]: departments = app.db.query(Department).all() return render_template("compare_departments_landing.html", departments=departments) # Look up references to requested departments. department_a = app.db.query(Department).get(daid) department_b = app.db.query(Department).get(dbid) # If either department isn't found, or if there is no result set # (meaning no topics to infer) then simply 404. if department_a is None or department_b is None or g.result_set_raw is None: abort(404) # Identify a set of topics for each department. department_a_topics = set(topic_list(department_a, g.result_set_raw)) department_b_topics = set(topic_list(department_b, g.result_set_raw)) # Generate topic vectors for the two departments. a_vector = topic_vector(department_a, g.result_set_raw) b_vector = topic_vector(department_b, g.result_set_raw) a_vector_string = a_vector.unpack(one=b'1', zero=b'0').decode('utf-8') b_vector_string = b_vector.unpack(one=b'1', zero=b'0').decode('utf-8') # Run similarity metrics. similarity = dict() similarity['jaccard'] = { 'name': 'Jaccard Index', 'range': '[0, 1]', 'description': 'Comparative set cardinality.', 'value': jaccard(department_a_topics, department_b_topics), } similarity['cosine'] = { 'name': 'Cosine Similarity', 'range': '[-1, 1]', 'description': 'Geometric cosine distance.', 'value': cosine_similarity(a_vector, b_vector), } similarity['euclidean'] = { 'name': 'Euclidean Distance', 'description': 'Geometric vector distance.', 'value': euclidean_distance(a_vector, b_vector), } # Remove common topics from the topic sets. intersection = department_a_topics & department_b_topics department_a_topics = department_a_topics - intersection department_b_topics = department_b_topics - intersection # Number of courses in each department. num_courses_a = app.db.query(Course).join(Department) \ .filter(Department.id==daid).count() num_courses_b = app.db.query(Course).join(Department) \ .filter(Department.id==dbid).count() # Global list of departments for switching over. departments = app.db.query(Department).all() return render_template( "compare_departments.html", da=department_a, db=department_b, da_topics=department_a_topics, db_topics=department_b_topics, num_courses_a=num_courses_a, num_courses_b=num_courses_b, common_topics=intersection, departments=departments, similarity_metrics=similarity, da_vector=a_vector_string, db_vector=b_vector_string, )
course.id: predicted_knowledge_areas(course, rs) for course in gmu_cs.courses }, 'truth': { course.id: ground_truth_knowledge_areas(course) for course in gmu_cs.courses }, } for rs in result_sets ] print("Done.") print("Calculate jaccard and percent metrics...") for ka_dict in knowledge_areas: ka_dict['jaccard'] = { course.id: jaccard( ka_dict['predicted'][course.id], ka_dict['truth'][course.id] ) for course in gmu_cs.courses if None not in [ ka_dict['predicted'][course.id], ka_dict['truth'][course.id] ] } ka_dict['percent'] = { course.id: len(set(ka_dict['predicted'][course.id])\ .intersection(set(ka_dict['truth'][course.id])))\ / len(ka_dict['truth'][course.id]) for course in gmu_cs.courses if ka_dict['truth'][course.id] } print("Done.")
def compare(depA, depB, rs): dep_a_topics = topic_list(depA, rs) dep_b_topics = topic_list(depB, rs) return jaccard(dep_a_topics, dep_b_topics)
def compare_departments(daid=None, dbid=None): if None in [daid, dbid]: departments = app.db.query(Department).all() return render_template("compare_departments_landing.html", departments=departments) # Look up references to requested departments. department_a = app.db.query(Department).get(daid) department_b = app.db.query(Department).get(dbid) # If either department isn't found, or if there is no result set # (meaning no topics to infer) then simply 404. if department_a is None or department_b is None or g.result_set_raw is None: abort(404) # Identify a set of topics for each department. department_a_topics = set(topic_list(department_a, g.result_set_raw)) department_b_topics = set(topic_list(department_b, g.result_set_raw)) # Generate topic vectors for the two departments. a_vector = topic_vector(department_a, g.result_set_raw) b_vector = topic_vector(department_b, g.result_set_raw) a_vector_string = a_vector.unpack(one=b'1', zero=b'0').decode('utf-8') b_vector_string = b_vector.unpack(one=b'1', zero=b'0').decode('utf-8') # Run similarity metrics. similarity = dict() similarity['jaccard'] = { 'name': 'Jaccard Index', 'range': '[0, 1]', 'description': 'Comparative set cardinality.', 'value': jaccard(department_a_topics, department_b_topics), } similarity['cosine'] = { 'name': 'Cosine Similarity', 'range': '[-1, 1]', 'description': 'Geometric cosine distance.', 'value': cosine_similarity(a_vector, b_vector), } similarity['euclidean'] = { 'name': 'Euclidean Distance', 'description': 'Geometric vector distance.', 'value': euclidean_distance(a_vector, b_vector), } # Remove common topics from the topic sets. intersection = department_a_topics & department_b_topics department_a_topics = department_a_topics - intersection department_b_topics = department_b_topics - intersection # Number of courses in each department. num_courses_a = app.db.query(Course).join(Department) \ .filter(Department.id==daid).count() num_courses_b = app.db.query(Course).join(Department) \ .filter(Department.id==dbid).count() # Global list of departments for switching over. departments = app.db.query(Department).all() return render_template("compare_departments.html", da=department_a, db=department_b, da_topics=department_a_topics, db_topics=department_b_topics, num_courses_a=num_courses_a, num_courses_b=num_courses_b, common_topics=intersection, departments=departments, similarity_metrics=similarity, da_vector=a_vector_string, db_vector=b_vector_string, )