def test_fleiss_kappa_irr(): fleiss = Holder() #> r = kappam.fleiss(diagnoses) #> cat_items(r, pref="fleiss.") fleiss.method = "Fleiss' Kappa for m Raters" fleiss.irr_name = 'Kappa' fleiss.value = 0.4302445 fleiss.stat_name = 'z' fleiss.statistic = 17.65183 fleiss.p_value = 0 data_ = aggregate_raters(diagnoses)[0] res1_kappa = fleiss_kappa(data_) assert_almost_equal(res1_kappa, fleiss.value, decimal=7)
def get_iaa(question_num, queries=None, users=None, collection=None, level='fine'): data = get_annotations(question_num, queries=queries, users=users, level=level, collection=collection) #n_cat = Annotation.get_num_categories(question_num) results = aggregate_raters(data, n_cat=None) kappa = fleiss_kappa(results[0]) return kappa
def test_aggregate_raters(): data = diagnoses resf = aggregate_raters(data) colsum = np.array([26, 26, 30, 55, 43]) assert_equal(resf[0].sum(0), colsum)
def get_kappa_marking(data, c, result_file, results): """ Fleiss' Kappa for marking task - Task 1.1 """ data = data.loc[data['AssignmentStatus'] != 'Rejected'] #Taking only the raters that were approved titles = data['Input.threadtitle'].unique() #List of threads in batch aggregate_dataframe = pd.DataFrame() fks = [] if 'Answer.noreply' not in data.columns: data['Answer.noreply'] = 0 for title in titles: df = data.loc[data['Input.threadtitle'] == title] marked_posts = [ col for col in df.columns if re.search(r'Answer\.\d$', col) or 'Answer.noreply' in col ] df = df.loc[:, marked_posts].fillna(0) df = df.replace('unclear', 99) df = df.replace('none', 99) length = 0 ### THREAD NAMES WITH QUOTES DOESN'T WORK IN SQLITE # c.execute('select thread_id from post2 inner join thread on post2.thread_id= thread.id \ # where original=1 and post2.courseid=? and thread.title=?',('"%%'+course+'%%"','"%%'+title+'%%"')) ############################ Get the total number of posts+comments in that thread ############################ try: c.execute( 'select thread_id from post2 inner join thread on post2.thread_id= thread.id \ where original=1 and post2.courseid like ' + '"%%' + course + '%%"' + ' and thread.title like \ ' + '"%%' + title + '%%"') thread_id = c.fetchone() c.execute('select count(1) from post2 where thread_id like ' + '"%%' + str(thread_id[0]) + '%%"' + ' and \ courseid like ' + '"%%' + course + '%%"') post2 = c.fetchone() c.execute('select count(1) from comment2 where thread_id like ' + '"%%' + str(thread_id[0]) + '%%"' + ' \ and courseid like ' + '"%%' + course + '%%"') comment2 = c.fetchone() length = post2[0] + comment2[0] except: continue ################################################################################################################# ##################### Calculating Fleiss Kappa using the fleiss_kappa class above ############################# print(df) df1 = pd.DataFrame() print('length of thread: ', title, length) for i in range(1, length + 1): try: df1['Answer.' + str(i)] = df['Answer.' + str(i)] except Exception as e: #print('error raised', e) #raise pass df1['Answer.noreply'] = df['Answer.noreply'] ## df1 is a dataframe with dimensions (raters X posts). aggregate_raters (below) converts that to ## (posts X categories) with input as counts aggregate = aggregate_raters(df1.T) #print(aggregate[0]) fk = fleiss_kappa(aggregate[0]) fks.append(fk.calc_fleiss_kappa()) print(title + " -- " + str(fk.calc_fleiss_kappa())) result_file.write(str(fk.calc_fleiss_kappa()) + "\n") ################################################################################################################# results = np.append(results, np.mean(fks)) #print("\nAverage Kappa:" + str(np.mean(fks))) print("Std Dev:" + str(np.std(fks))) return results
def get_kappa_categorization(data, c, result_file, results): """ Fleiss Kappa for categorisation tasks - Task 2.1 and Task 2.2 """ data = data.loc[data['AssignmentStatus'] != 'Rejected'] titles = data['Input.threadtitle'].unique() aggregate_dataframe = pd.DataFrame() fks = [] ########################## Mapping the categories to numbers for input into kappa #################################### cat_to_num_2 = { # Categories for Task 2 "summary": 1, "justify": 2, "feedback": 3, "clarification": 4, "extension": 5, "juxtaposition": 6, "refinement": 7, "critique": 8, "agreement": 9, "disagreement": 10, "answer": 11, "appreciation": 12, "completion": 13, "paraphrase": 14, "other": 15 } ######################################################################################################################## if 'Answer.noreply' not in data.columns: #Add Answer.noreply if it does not exist in the dataframe data['Answer.noreply'] = "" marked_posts = [col for col in data.columns if 'Answer.' in col] #print(marked_posts) for post in marked_posts: data[post] = data[post].map(cat_to_num_2).fillna(0).astype( int) # Substituting the categories to #numbers above for input into kappa # The next block of code is same as above for title in titles: df = data.loc[data['Input.threadtitle'] == title] marked_posts = [col for col in df.columns if 'Answer.' in col] df = df.loc[:, marked_posts].fillna(0) ############################ Get the total number of posts+comments in that thread ############################ try: c.execute( 'select thread_id from post2 inner join thread on post2.thread_id= thread.id \ where original=1 and post2.courseid like ' + '"%%' + course + '%%"' + ' and thread.title like \ ' + '"%%' + title + '%%"') thread_id = c.fetchone() c.execute('select count(1) from post2 where thread_id like ' + '"%%' + str(thread_id[0]) + '%%"' + ' \ and courseid like ' + '"%%' + course + '%%"') post2 = c.fetchone() c.execute('select count(1) from comment2 where thread_id like ' + '"%%' + str(thread_id[0]) + '%%"' + ' \ and courseid like ' + '"%%' + course + '%%"') comment2 = c.fetchone() length = post2[0] + comment2[0] except: continue ################################################################################################################# ##################### Calculating Fleiss Kappa using the fleiss_kappa class above ############################# df1 = pd.DataFrame() for i in range(length): try: df1['Answer.' + str(i + 1)] = df['Answer.' + str(i + 1) + '_discourse_type'] except: pass df1['Answer.noreply'] = df['Answer.noreply'] ## df1 is a dataframe with dimensions (raters X posts). aggregate_raters (below) converts that to (posts X categories) ## with input as counts aggregate = aggregate_raters(df1.T) fk = fleiss_kappa(aggregate[0]) fks.append(fk.calc_fleiss_kappa()) result_file.write(str(fk.calc_fleiss_kappa()) + "\n") #print(title+" -- "+str(fk.calc_fleiss_kappa())) ################################################################################################################# #print("\nAverage Kappa:"+str(np.mean(fks))) results = np.append(results, np.mean(fks)) return results
def get_kappa_marking(df, c, result_file, results): """ Lenient Fleiss' Kappa for marking task - Task 1.1 """ df = df.loc[df['AssignmentStatus'] != 'Rejected'] #Taking only the mturk assignments that were accepted threads = df['Input.threadtitle'].unique() #Getting a list of unique threads in the list fks = [] #List of kappas in each thread in the given batch if 'Answer.noreply' not in df.columns: df['Answer.noreply'] = "" for thread in threads: ############################ Get the total number of posts+comments in that thread ############################ try: c.execute('select thread_id from post2 inner join thread on post2.thread_id= thread.id where \ original=1 and post2.courseid like '+'"%%'+course+'%%"'+' and thread.title like '+'"%%'+ \ thread+'%%"') thread_id = c.fetchone() c.execute('select count(1) from post2 where thread_id like '+'"%%'+str(thread_id[0])+'%%"'+ ' and \ courseid like '+'"%%'+course+'%%"' ) post2 = c.fetchone() c.execute('select count(1) from comment2 where thread_id like '+'"%%'+str(thread_id[0])+'%%"'+ ' and \ courseid like '+'"%%'+course+'%%"' ) comment2 = c.fetchone() length = post2[0]+comment2[0] except: continue ################################################################################################################# ############################### Selecting post(s) with maximum markings ######################################## filter_col = [col for col in df if col.startswith('Answer')] counts = (df.loc[df['Input.threadtitle'] == thread, \ filter_col].count(axis = 0)) counts_sorted = (counts.sort_values(ascending = False)) post_max_agreement = np.argwhere(counts == np.max(counts)).flatten().tolist() post_max_agreement = ((counts.iloc[post_max_agreement].index.values)) #print(post_max_agreement) df = df.replace('unclear',99) df = df.replace('none',99) ################################################################################################################## ##################### Calculating Fleiss Kappa using the fleiss_kappa class above ############################# #df1 = df.loc[df['Input.threadtitle'] == thread] df1 = pd.DataFrame() for i in range(length): if 'Answer.'+ str(i+1) in df.columns: df1['Answer.'+ str(i+1)] = 0 else: pass df1['Answer.noreply'] = 0 df1[post_max_agreement] = df.loc[df['Input.threadtitle'] == thread, post_max_agreement] #print(df1.fillna(0).astype(int)) aggregate = aggregate_raters(df1.fillna(0).astype(int).T) fk = fleiss_kappa(aggregate[0]) #print(aggregate) fks.append(fk.calc_fleiss_kappa()) #print(thread+ "("+str(length)+")"+" -- "+str(fk.calc_fleiss_kappa())+"\n") result_file.write(str(fk.calc_fleiss_kappa())+"\n") ################################################################################################################# print("\nAverage Kappa:"+str(np.mean(fks))) results = np.append(results,np.mean(fks)) return results
def get_kappa_categorization(df, c, result_file, results): """ Fleiss Kappa for categorisation tasks - Task 2.1 and Task 2.2 """ df = df.loc[df['AssignmentStatus'] != 'Rejected'] threads = df['Input.threadtitle'].unique() #print(len(threads)) entries = [] fks = [] ########################## Mapping the categories to numbers for input into kappa #################################### cat_to_num_2 = { # Categories for Task 2.1 "resolves":1, "elaborates":2, "requests":3, "social":4, "none":5, #Categories for Task 2.2 #elaborates "clarifies":1, "extension":2, "juxtaposition":3, "refinement":4, "critique":5, #resolves "agreement":1, "disagreement":2, "generic":3, "appreciation":4, "completion":5, "none":6, "nota":6, } ######################################################################################################################## if 'Answer.noreply' not in df.columns: # Add Answer.noreply if it does not exist in the dataframe df['Answer.noreply'] = "" marked_posts = [col for col in df.columns if 'Answer.' in col] for post in marked_posts: df[post] = df[post].map(cat_to_num_2) # Substituting the categories to numbers for thread in threads: ############################ Get the total number of posts+comments in that thread ############################ try: c.execute('select thread_id from post2 inner join thread on post2.thread_id= thread.id where \ original=1 and post2.courseid like '+'"%%'+course+'%%"'+' and thread.title like '+'"%%'+thread+'%%"') thread_id = c.fetchone() #print(thread_id) c.execute('select count(1) from post2 where thread_id like'+'"%%'+str(thread_id[0])+'%%"'+ ' and \ courseid like '+'"%%'+course+'%%"' ) post2 = c.fetchone() c.execute('select count(1) from comment2 where thread_id like '+'"%%'+str(thread_id[0])+'%%"'+ ' and \ courseid like '+'"%%'+course+'%%"' ) comment2 = c.fetchone() length = post2[0]+comment2[0] except: continue ################################################################################################################# ############################### Selecting post(s) with maximum markings ####################################### counts = (df.loc[df['Input.threadtitle'] == thread, \ marked_posts].count(axis = 0)) counts_sorted = (counts.sort_values(ascending = False)) post_max_agreement = np.argwhere(counts == np.max(counts)).flatten().tolist() post_max_agreement = ((counts.iloc[post_max_agreement].index.values)) ################################################################################################################# ##################### Calculating Fleiss Kappa using the fleiss_kappa class above ############################# #print(post_max_agreement) df1 = pd.DataFrame() for i in range(length) : if 'Answer.'+ str(i+1)+'_discourse_type' in df.columns: df1['Answer.'+ str(i+1)+'_discourse_type'] = 0 else: pass df1['Answer.noreply'] = 0 df1[post_max_agreement] = df.loc[df['Input.threadtitle'] == thread, post_max_agreement] #print(df1) aggregate = aggregate_raters(df1.fillna(0).astype(int).T) #print(aggregate[0]) fk = fleiss_kappa(aggregate[0]) fks.append(fk.calc_fleiss_kappa()) result_file.write(str(fk.calc_fleiss_kappa())+"\n") #print(thread+ "("+str(length)+")"+" -- "+str(fk.calc_fleiss_kappa())) ################################################################################################################# print("\nAverage Kappa:"+str(np.mean(fks))+"\n") results = np.append(results,np.mean(fks)) return results