def core(tsx, tsy=None, method='simple_kappa'): ''' input -------- tsx: 定类型数据 tsy: 定类型数据 method: 方法,下拉菜单 ''' msg = {} methods = {"simple_kappa": "简单kappa", "weight_kappa": "加权kappa"} table = pd.crosstab(tsx, tsy) if method == 'simple_kappa': res = cohens_kappa(table, return_results=True) else: res = cohens_kappa(table, wt='linear', return_results=True) columns = { '名称': '%s & %s' % (tsx.name, tsy.name), 'Kappa值': res.get('kappa'), 'Z值': res.get('z_value'), 'P值': res.get('pvalue_two_sided'), '95%CI(下限)': round(res.get('kappa_low'), 5), '95%CI(上限)': round(res.get('kappa_upp'), 5), 'ASE': round(res.get('std_kappa0'), 5), '类型': res.get('kind') } return pd.DataFrame([columns]).set_index('名称'), msg
def test_cohenskappa_weights(): #some tests for equivalent results with different options np.random.seed(9743678) table = np.random.randint(0, 10, size=(5, 5)) + 5 * np.eye(5) #example aggregation, 2 groups of levels mat = np.array([[1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]) table_agg = np.dot(np.dot(mat, table), mat.T) res1 = cohens_kappa(table, weights=np.arange(5) > 2, wt='linear') res2 = cohens_kappa(table_agg, weights=np.arange(2), wt='linear') assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14) #equivalence toeplitz with linear for special cases res1 = cohens_kappa(table, weights=2 * np.arange(5), wt='linear') res2 = cohens_kappa(table, weights=2 * np.arange(5), wt='toeplitz') res3 = cohens_kappa(table, weights=res1.weights[0], wt='toeplitz') #2-Dim weights res4 = cohens_kappa(table, weights=res1.weights) assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14) assert_almost_equal(res1.kappa, res3.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res3.var_kappa, decimal=14) assert_almost_equal(res1.kappa, res4.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res4.var_kappa, decimal=14) #equivalence toeplitz with quadratic for special cases res1 = cohens_kappa(table, weights=5 * np.arange(5)**2, wt='toeplitz') res2 = cohens_kappa(table, weights=5 * np.arange(5), wt='quadratic') assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14)
def test_cohenskappa_weights(): #some tests for equivalent results with different options np.random.seed(9743678) table = np.random.randint(0, 10, size=(5,5)) + 5*np.eye(5) #example aggregation, 2 groups of levels mat = np.array([[1,1,1, 0,0],[0,0,0,1,1]]) table_agg = np.dot(np.dot(mat, table), mat.T) res1 = cohens_kappa(table, weights=np.arange(5) > 2, wt='linear') res2 = cohens_kappa(table_agg, weights=np.arange(2), wt='linear') assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14) #equivalence toeplitz with linear for special cases res1 = cohens_kappa(table, weights=2*np.arange(5), wt='linear') res2 = cohens_kappa(table, weights=2*np.arange(5), wt='toeplitz') res3 = cohens_kappa(table, weights=res1.weights[0], wt='toeplitz') #2-Dim weights res4 = cohens_kappa(table, weights=res1.weights) assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14) assert_almost_equal(res1.kappa, res3.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res3.var_kappa, decimal=14) assert_almost_equal(res1.kappa, res4.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res4.var_kappa, decimal=14) #equivalence toeplitz with quadratic for special cases res1 = cohens_kappa(table, weights=5*np.arange(5)**2, wt='toeplitz') res2 = cohens_kappa(table, weights=5*np.arange(5), wt='quadratic') assert_almost_equal(res1.kappa, res2.kappa, decimal=14) assert_almost_equal(res1.var_kappa, res2.var_kappa, decimal=14)
def _cohen(a, b): if a.shape[0] == 1 and b.shape[0] == 1: return 1 cm = confusion_matrix(a, b) if cm.sum(axis=1).min() == 0: return 0 return cohens_kappa(cm).kappa
def cohens_kappa(project): """ Takes in the irr log data and calculates cohen's kappa NOTE: this should only be used if the num_users_irr = 2 https://onlinecourses.science.psu.edu/stat509/node/162/ https://en.wikipedia.org/wiki/Cohen%27s_kappa """ irr_data = set(IRRLog.objects.values_list("data", flat=True)) agree = 0 # initialize the dictionary rater1_rater2_dict = {} label_list = list( Label.objects.filter(project=project).values_list("name", flat=True)) label_list.append("skip") for label1 in label_list: rater1_rater2_dict[label1] = {} for label2 in label_list: rater1_rater2_dict[label1][label2] = 0 num_data = 0 labels_seen = set() for d in irr_data: d_log = IRRLog.objects.filter(data=d, data__project=project) labels = list(set(d_log.values_list("label", flat=True))) labels_seen = labels_seen | set(labels) # get the percent agreement between the users = (num agree)/size_data if d_log.count() < 2: # don't use this datum, it isn't processed yet continue num_data += 1 if len(labels) == 1: if labels[0] is not None: agree += 1 if d_log[0].label is None: label1 = "skip" else: label1 = d_log[0].label.name if d_log[1].label is None: label2 = "skip" else: label2 = d_log[1].label.name rater1_rater2_dict[label1][label2] += 1 if num_data == 0: # there is no irr data, so just return bad values raise ValueError("No irr data") if len(labels_seen) < 2: raise ValueError("Need at least two labels represented") kappa = raters.cohens_kappa(np.asarray(pd.DataFrame(rater1_rater2_dict)), return_results=False) p_o = agree / num_data return kappa, p_o
def core(tsx, tsy, weights=None, method='简单kappa'): ''' input -------- tsx: 定类型数据, 和tsy的unique应该有相同的值 tsy: 定类型数据 weights: 加权项(可选) method: {"简单kappa", "加权kappa(线性cohens)", "加权kappa(二次cohens)" }, 方法,下拉菜单 ''' table = pd.crosstab(tsx, tsy) s = list(set(table.columns) & set(table.index)) table = table.loc[s][s] if method == '简单kappa': res = cohens_kappa(table, weights=None, return_results=True) elif method == '加权kappa(线性cohens)': res = cohens_kappa(table, wt='linear', weights=weights, return_results=True) elif method == '加权kappa(二次cohens)': res = cohens_kappa(table, wt='quadratic', weights=weights, return_results=True) columns = { '名称': '%s & %s' % (tsx.name, tsy.name), 'Kappa值': res.get('kappa'), 'Z值': res.get('z_value'), 'P值': res.get('pvalue_two_sided'), '95%CI(下限)': round(res.get('kappa_low'), 5), '95%CI(上限)': round(res.get('kappa_upp'), 5), 'ASE': round(res.get('std_kappa0'), 5), '类型': res.get('kind') } return pd.DataFrame([columns]).set_index('名称')
def task_cohen_kappa(dataOne, dataTwo): """ http://statsmodels.sourceforge.net/devel/generated/statsmodels.stats.inter_rater.cohens_kappa.html """ kappa = cohens_kappa(confusion_matrix(dataOne, dataTwo)) clean_kappa = str(kappa).replace(' ', '') return clean_kappa
def fun(fpr, tpr): from statsmodels.stats.inter_rater import cohens_kappa contingency = [[(1 - fpr) * N, (1 - tpr) * P], [fpr * N, tpr * P]] with warnings.catch_warnings(): warnings.filterwarnings("ignore", message="invalid value encountered", category=RuntimeWarning) # Degenerate cases are not nicely handled by statsmodels. # https://github.com/statsmodels/statsmodels/issues/5530 return cohens_kappa(contingency)["kappa"]
def kappa(y_true, y_pred, type='cohens'): import statsmodels.stats.inter_rater as irater yy = (y_true & y_pred).sum() yn = (y_true & (~y_pred)).sum() nn = ((~y_true) & (~y_pred)).sum() ny = ((~y_true) & (y_pred)).sum() result = np.array([[yy, yn], [ny, nn]]) if type == 'cohens': stat = irater.cohens_kappa(result) score = stat['kappa'] elif type == 'fleiss': score = irater.fleiss_kappa(result) return score, result
def _cohens_kappa(annos1, annos2): assert set(s.sample_id for s in annos1) == set(s.sample_id for s in annos2) categories = distinct(sv.annotation or '' for sv in chain(annos1, annos2)) category_index = {c: i for i, c in enumerate(categories)} table = np.zeros((len(categories), len(categories))) annos1 = sorted(annos1, key=attrgetter('sample_id')) annos2 = sorted(annos2, key=attrgetter('sample_id')) for sv1, sv2 in zip(annos1, annos2): table[category_index[sv1.annotation or ''], category_index[sv2.annotation or '']] += 1 return cohens_kappa(table, return_results=False)
def _cohens_kappa(annos1, annos2): assert set(s.sample_id for s in annos1) == set(s.sample_id for s in annos2) categories = ldistinct(sv.annotation for sv in chain(annos1, annos2)) # If there is only one label then it can't be measured if len(categories) == 1: return float('nan') category_index = {c: i for i, c in enumerate(categories)} table = np.zeros((len(categories), len(categories))) annos1 = sorted(annos1, key=attrgetter('sample_id')) annos2 = sorted(annos2, key=attrgetter('sample_id')) for sv1, sv2 in zip(annos1, annos2): table[category_index[sv1.annotation], category_index[sv2.annotation]] += 1 return cohens_kappa(table, return_results=False)
def setup_class(cls): #temporary: res instance is at last position cls.res = cohens_kappa(table10, weights=[0, 1, 2]) res10w_sas = [0.4701, 0.1457, 0.1845, 0.7558] res10w_sash0 = [0.1426, 3.2971, 0.0005, 0.0010] #for test H0:kappa=0 cls.res2 = res10w_sas + res10w_sash0 #concatenate cls.res_string = '''\ Weighted Kappa Coefficient -------------------------------- Kappa 0.4701 ASE 0.1457 95% Lower Conf Limit 0.1845 95% Upper Conf Limit 0.7558 Test of H0: Weighted Kappa = 0 ASE under H0 0.1426 Z 3.2971 One-sided Pr > Z 0.0005 Two-sided Pr > |Z| 0.0010''' + '\n'
def __init__(self): #temporary: res instance is at last position self.res = cohens_kappa(table10) res10_sas = [0.4842, 0.1380, 0.2137, 0.7547] res10_sash0 = [0.1484, 3.2626, 0.0006, 0.0011] #for test H0:kappa=0 self.res2 = res10_sas + res10_sash0 #concatenate self.res_string = '''\ Simple Kappa Coefficient -------------------------------- Kappa 0.4842 ASE 0.1380 95% Lower Conf Limit 0.2137 95% Upper Conf Limit 0.7547 Test of H0: Simple Kappa = 0 ASE under H0 0.1484 Z 3.2626 One-sided Pr > Z 0.0006 Two-sided Pr > |Z| 0.0011''' + '\n'
def __init__(self): #temporary: res instance is at last position self.res = cohens_kappa(table10, weights=[0, 1, 2]) res10w_sas = [0.4701, 0.1457, 0.1845, 0.7558] res10w_sash0 = [0.1426, 3.2971, 0.0005, 0.0010] #for test H0:kappa=0 self.res2 = res10w_sas + res10w_sash0 #concatenate self.res_string = '''\ Weighted Kappa Coefficient -------------------------------- Kappa 0.4701 ASE 0.1457 95% Lower Conf Limit 0.1845 95% Upper Conf Limit 0.7558 Test of H0: Weighted Kappa = 0 ASE under H0 0.1426 Z 3.2971 One-sided Pr > Z 0.0005 Two-sided Pr > |Z| 0.0010''' + '\n'
def setup_class(cls): #temporary: res instance is at last position cls.res = cohens_kappa(table10) res10_sas = [0.4842, 0.1380, 0.2137, 0.7547] res10_sash0 = [0.1484, 3.2626, 0.0006, 0.0011] #for test H0:kappa=0 cls.res2 = res10_sas + res10_sash0 #concatenate cls.res_string = '''\ Simple Kappa Coefficient -------------------------------- Kappa 0.4842 ASE 0.1380 95% Lower Conf Limit 0.2137 95% Upper Conf Limit 0.7547 Test of H0: Simple Kappa = 0 ASE under H0 0.1484 Z 3.2626 One-sided Pr > Z 0.0006 Two-sided Pr > |Z| 0.0011''' + '\n'
def kappa(f1, f2, pathologists, cols_to_parse, outname, ratings): contingency_tables = {} lvsi = {} for (pathologist_one, pathologist_two) in itertools.combinations(pathologists, 2): KEY = '%s-%s' % (pathologist_one, pathologist_two) df_one = pd.read_excel(f1, pathologist_one, parse_cols=cols_to_parse, convert_float=True) df_two = pd.read_excel(f2, pathologist_two, parse_cols=cols_to_parse, convert_float=True) patho_one_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in df_one.apply(np.nonzero, axis=1).values ]).astype(int) patho_two_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in df_two.apply(np.nonzero, axis=1).values ]).astype(int) #-1 indicates an invalid value in case the rater forgot to fill the form out table = [[ np.logical_and(patho_one_ratings == rating_one, patho_two_ratings == rating_two).sum() for rating_one in ratings ] for rating_two in ratings] contingency_tables[KEY] = table lvsi['%s-%s' % (pathologist_one, pathologist_two)] = cohens_kappa(table).kappa json.dump(lvsi, open('../data/%s.json' % outname, 'wb')) return contingency_tables
4 0 3 9 2 0 0.440 5 2 2 8 1 1 0.330 6 7 7 0 0 0 0.462 7 3 2 6 3 0 0.242 8 2 5 3 2 2 0.176 9 6 5 2 1 0 0.286 10 0 2 2 3 7 0.286'''.split(), float).reshape(10, -1) Total = np.asarray("20 28 39 21 32".split('\t'), int) Pj = np.asarray("0.143 0.200 0.279 0.150 0.229".split('\t'), float) kappa_wp = 0.210 table1 = table0[:, 1:-1] print fleiss_kappa(table1) table4 = np.array([[20, 5], [10, 15]]) print 'res', cohens_kappa(table4), 0.4 #wikipedia table5 = np.array([[45, 15], [25, 15]]) print 'res', cohens_kappa(table5), 0.1304 #wikipedia table6 = np.array([[25, 35], [5, 35]]) print 'res', cohens_kappa(table6), 0.2593 #wikipedia print 'res', cohens_kappa(table6, weights=np.arange(2)), 0.2593 #wikipedia t7 = np.array([[16, 18, 28], [10, 27, 13], [28, 20, 24]]) print cohens_kappa(t7, weights=[0, 1, 2]) table8 = np.array([[25, 35], [5, 35]]) print 'res', cohens_kappa(table8) #SAS example from http://www.john-uebersax.com/stat/saskappa.htm '''
gold_goals += [ (manual_output_dir + file, len(" ".join(list(gold_standard_lemmatized)).split(" "))) ] # if mwe counts as multiple words # gold_goals += [(manual_output_dir + file, len(list(gold_standard)))]# if mwe count as one word with codecs.open(os.path.join(manual_output_dir, file + ".tolerated.txt"), "w", encoding="utf-8") as outfile: outfile.write("\n".join(list(tolerated_lemmatized))) tokens_one_count += len(tokens_one) tokens_two_count += len(tokens_two) # with codecs.open(os.path.join(algorithm_output, file), 'r', encoding='utf-8', errors='replace') as in_file: # for line in in_file: # tokens_extr = [] # tokens_extr += " ".split(line).lower() # tokens_extr = set(tokens_extr) with codecs.open(os.path.join("goal_goals.txt"), "w", encoding="utf-8") as outfile: outfile.write("\n".join([elem[0] + " " + str(elem[1]) for elem in gold_goals])) print("contigency_tables: \n", contigency_tables) print(cohens_kappa(contigency_tables)) print("Annotator 1 keyword_count1: ", tokens_one_count) print("Annotator 2 keyword_count2: ", tokens_two_count) # test kappa, should be 1 # print(cohens_kappa(np.array([[10,0],[0,10]])))
6 7 7 0 0 0 0.462 7 3 2 6 3 0 0.242 8 2 5 3 2 2 0.176 9 6 5 2 1 0 0.286 10 0 2 2 3 7 0.286'''.split(), float).reshape(10,-1) Total = np.asarray("20 28 39 21 32".split('\t'), int) Pj = np.asarray("0.143 0.200 0.279 0.150 0.229".split('\t'), float) kappa_wp = 0.210 table1 = table0[:, 1:-1] print(fleiss_kappa(table1)) table4 = np.array([[20,5], [10, 15]]) print('res', cohens_kappa(table4), 0.4) #wikipedia table5 = np.array([[45, 15], [25, 15]]) print('res', cohens_kappa(table5), 0.1304) #wikipedia table6 = np.array([[25, 35], [5, 35]]) print('res', cohens_kappa(table6), 0.2593) #wikipedia print('res', cohens_kappa(table6, weights=np.arange(2)), 0.2593) #wikipedia t7 = np.array([[16, 18, 28], [10, 27, 13], [28, 20, 24]]) print(cohens_kappa(t7, weights=[0, 1, 2])) table8 = np.array([[25, 35], [5, 35]]) print('res', cohens_kappa(table8))
# Build Random Forest model (using optimal hyperparameters (see Scripts/PythonScripts/Hyperparameters.py) # and classify the validation data, do this for classifier1 and classifier2 clf_classifier1 = RandomForestClassifier(n_estimators=46, max_depth=13, min_samples_split=2, min_samples_leaf=1, random_state=12) clf_classifier1.fit(X_training_classifier1, Y_training_classifier1) Y_prediction_classifier1 = clf_classifier1.predict(X_validation_classifier1) clf_classifier2 = RandomForestClassifier(n_estimators=46, max_depth=13, min_samples_split=2, min_samples_leaf=1, random_state=12) clf_classifier2.fit(X_training_classifier2, Y_training_classifier2) Y_prediction_classifier2 = clf_classifier2.predict(X_validation_classifier2) # Accuracy assessment (confusion matrix, kappa, overall accuracy), again do this for classifier1 and classifier2 CM_classifier1 = confusion_matrix(Y_validation, Y_prediction_classifier1) kappa_classifier1 = cohens_kappa(CM_classifier1).kappa kappa_var_classifier1 = cohens_kappa(CM_classifier1).var_kappa CM_classifier2 = confusion_matrix(Y_validation, Y_prediction_classifier2) kappa_classifier2 = cohens_kappa(CM_classifier2).kappa kappa_var_classifier2 = cohens_kappa(CM_classifier2).var_kappa # Compute Kappa_hat KHAT = np.abs(kappa_classifier1 - kappa_classifier2) / np.sqrt(kappa_var_classifier1 + kappa_var_classifier2)
kappas = [] for coder_pair in itertools.combinations(coders, 2): tempdf = labels[labels['screenname'].isin(coder_pair)] label_counts = tempdf[['externalId', 'screenname']].groupby('externalId').count() overlapping_ids = label_counts[label_counts['screenname'] == 2].index if len(overlapping_ids) == 0: kappa = np.nan else: eval_df = tempdf.pivot(index='externalId', columns='screenname', values='labelText') eval_df = eval_df.loc[overlapping_ids] cm = confusion_matrix(eval_df[[coder_pair[0]]], eval_df[[coder_pair[1]]]) kappa = cohens_kappa(cm) kappas.append((*coder_pair, kappa['kappa'], len(overlapping_ids))) kappas = pd.DataFrame( kappas, columns=['coder_1', 'coder_2', 'kappa', 'overlapping_elements']) kappas.set_index(['coder_1', 'coder_2'], drop=True, inplace=True) print_summary("Cohen's Kappa for all coder pairs", kappas) avg_kappas = [] kappas.reset_index(drop=False, inplace=True) for coder in coders: val = kappas[(kappas['coder_1'] == coder) | (kappas['coder_2'] == coder)]['kappa'].mean() avg_kappas.append((coder, val)) avg_kappas = pd.DataFrame(avg_kappas, columns=['coder', 'avg_kappa']) avg_kappas.set_index('coder', inplace=True, verify_integrity=True) print_summary("Average Cohen's Kappa by coder", avg_kappas)
df_pathologist_two = pd.read_excel('stains.xls',pathologist_two,parse_cols=cols_with_grades,convert_float=False) patho_one_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_pathologist_one.apply(np.nonzero,axis=1).values]).astype(int) patho_two_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_pathologist_two.apply(np.nonzero,axis=1).values]).astype(int) for rating_one,rating_two in zip(patho_one_ratings,patho_two_ratings): #print rating_one,rating_two if type(rating_one) == type(list): rating_one = rating_one[0] if type(rating_two) == type(list): rating_two = rating_two[0] #print i contingency_table[j,rating_one,rating_two] += 1 kappas['%s-%s'%(pathologist_one,pathologist_two)] = cohens_kappa(contingency_table[j,:,:].squeeze()).kappa print np.median(contingency_table,axis=0) print 0.5*(np.percentile(contingency_table,75,axis=0) - np.percentile(contingency_table,25,axis=0)) json.dump(kappas,open('kappa-by-grade-no-ihc.json','wb')) ap(np.median(kappas.values())) print 0.5*(np.percentile(kappas.values(),75)-np.percentile(kappas.values(),25)) ''' df_one = pd.read_excel('stains.xls',pathologist,parse_cols=cols_with_grades, convert_float=False) df_two = pd.read_excel('no-stain.xls',pathologist,parse_cols=cols_with_grades, convert_float=False) patho_one_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_one.apply(np.nonzero,axis=1).values]).astype(int) patho_two_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_two.apply(np.nonzero,axis=1).values]).astype(int) #Really inefficient implementation, but too many exceptions to vectorize:
pathologists = open('../data/rater-names','rb').read().splitlines() lvsi = {} for pathologist in pathologists: df_one = pd.read_excel('../data/stains.xls',pathologist,parse_cols=cols_with_grades, convert_float=False) df_two = pd.read_excel('../data/no-stain.xls',pathologist,parse_cols=cols_with_grades, convert_float=False) patho_one_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_one.apply(np.nonzero,axis=1).values]).astype(int) patho_two_ratings = np.array([i[0][0] if len(i[0]) > 0 else -1 for i in df_two.apply(np.nonzero,axis=1).values]).astype(int) #Really inefficient implementation, but too many exceptions to vectorize: contingency_table = np.zeros((3,3)) for rating_one in patho_one_ratings: if type(rating_one) == type(list): rating_one = rating_one[0] for rating_two in patho_two_ratings: if type(rating_two) == type(list): rating_two = rating_two[0] print '\t %d'%rating_two contingency_table[rating_one,rating_two] += 1 lvsi[pathologist] = cohens_kappa(contingency_table).kappa json.dump(lvsi,open('../data/intra-rater-reliability.json','wb')) ap(np.median(lvsi.values())) print 0.5*(np.percentile(lvsi.values(),75)-np.percentile(lvsi.values(),25))
def test_option(self): kappa = cohens_kappa(table10, weights=[0, 1, 2], return_results=False) assert_almost_equal(kappa, self.res2[0], decimal=4)
temp2 = SquareTable.from_data(tmp) temp2.summary() #可求边缘分布概率 row, col = temp2.marginal_probabilities #方形列联表检验 temp2.symmetry() #程序中并没有比例差和置信区间的估计方法.看来需要手动去求.或者对statsmodels进行更深入探索. #(2)kappa data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/癌症与诊断.csv") temp = np.array([[22, 2, 2, 0], [5, 7, 14, 0], [0, 2, 36, 0], [0, 1, 17, 10]]) #与书中值一致. cohens_kappa(temp) #2.模型 #(1)配对边缘logistic。 #这个模型非常怪,并没有属于任何已知常用的模型,需要自己手动去拟合logit函数. #由于只存在两个点直接调用logit函数能得到近似的结果,不用加入似然过程. #-0.78236221 - -0.88589346 =0.10353125 #关于边缘的模型可以使用GEE。只是GEE进行拟合检验需要额外的假设. #(2)条件模型 叫做ConditionalLogit。但是仅适用于 #名义的,二项的多分类的和poisson #对于有序的支持不够. data = pd.read_csv(r"D:/书籍资料整理/属性数据分析/环保.csv") tmp = pd.DataFrame() zhi = 0 # print(data)
def predict(filename, clf, selected_feature): # read data all_data = sio.loadmat(filename) x_tr = all_data["x_tr"].tolist() y_tr = all_data["y_tr"][0].tolist() x_te = all_data["x_te"].tolist() y_te = all_data["y_te"][0].tolist() te_location = all_data["te_location"].tolist() if selected_feature != "": sf = selected_feature.split(",") sf = map(lambda x: int(x), sf) for i in range(len(x_tr)): new_feature=[] for j in sf: new_feature.append(x_tr[i][j]) x_tr[i]=new_feature for i in range(len(x_te)): new_feature=[] for j in sf: new_feature.append(x_te[i][j]) x_te[i]=new_feature # fit and predict clf.fit(np.array(x_tr),np.array(y_tr)) print "here" testing_result = clf.predict(x_te).tolist() print "predict done" # overall accuracy OA = float(sum([1 for i in range(len(y_te)) if testing_result[i] == y_te[i]])) / len(y_te) # count of each label,for average accuracy label_count = {} for label in y_te: if label in label_count: label_count[label] = label_count[label] + 1 else: label_count[label] = 1 # correct classification in each label label_correct_count = {} # kappa matrix for calculate kappa statistics kappa_matrix = {} for label in label_count: label_correct_count[label] = 0 kappa_matrix[label] = {} for alabel in label_count: kappa_matrix[label][alabel] = 0 for i in range(len(y_te)): if y_te[i] == int(testing_result[i]): # record correct classification label_correct_count[y_te[i]] = label_correct_count[y_te[i]] + 1 kappa_matrix[y_te[i]][int(testing_result[i])] = kappa_matrix[ y_te[i]][int(testing_result[i])] + 1 # accuracy of each label label_accuracy = {} for label in label_count: label_accuracy[label] = float(label_correct_count[ label]) / label_count[label] # average accuracy AA = 0 for label in label_accuracy: AA = AA + label_accuracy[label] AA = AA / len(label_accuracy) # kappa statistics kappa_matrix_list = [[0 for j in label_count] for i in label_count] i = 0 for label in kappa_matrix: j = 0 for alabel in kappa_matrix[label]: kappa_matrix_list[i][j] = kappa_matrix[label][alabel] j = j + 1 i = i + 1 kappa = cohens_kappa(np.array(kappa_matrix_list)).kappa # label uniq_ele = list(set(y_te)) # get true data' location and label y_location = map(lambda x, y: [x, y], te_location, y_te) # sorted by label y_location.sort(key=lambda x: x[1]) # get location of different label in independent list y_te_location = [] for i in uniq_ele: y_te_location.append( map(lambda x: x[0], filter(lambda x: x[1] == i, y_location))) # get testing result' location and label y_location = map(lambda x, y: [x, y], te_location, testing_result) # print y_location y_location.sort(key=lambda x: x[1]) t_te_location = [] for i in uniq_ele: t_te_location.append( map(lambda x: x[0], filter(lambda x: x[1] == i, y_location))) # for i in tr_location: # if i ==[]: # print i # f = open("tempresult", "w") # f.write(str(label_accuracy)) # f.close() filename = "result/result.mat" while os.path.exists(filename): filename = filename[:-4] + str(random.randint(1, 9)) + filename[-4:] # print filename sio.savemat( filename, {"testing_result": testing_result, "true_result": y_te, "location": te_location}) return [OA * 100, AA * 100, kappa * 100, label_accuracy, y_te_location, t_te_location, uniq_ele, filename]
parse_cols=cols_with_lvsi, convert_float=False) histology_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in histology_grade.apply(np.nonzero, axis=1).values ]).astype(int) lvsi_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in lvsi.apply(np.nonzero, axis=1).values ]).astype(int) for histolog_rating, lvsi_rating in zip(histology_ratings, lvsi_ratings): #print rating_one,rating_two if type(histolog_rating) == type(list): histology_rating = histolog_rating[0] if type(lvsi_rating) == type(list): lvsi_rating = lvsi_rating[0] #print i contingency_table[j, histolog_rating, lvsi_rating] += 1 kappas[pathologist] = cohens_kappa( contingency_table[j, :, :].squeeze()).kappa print np.median(contingency_table, axis=0) print 0.5 * (np.percentile(contingency_table, 75, axis=0) - np.percentile(contingency_table, 25, axis=0)) json.dump(kappas, open('../data/lvsi-by-grade.json', 'wb')) ap(np.median(kappas.values())) print 0.5 * (np.percentile(kappas.values(), 75) - np.percentile(kappas.values(), 25))
def evaluate(model, dev_loader, device, f1_weights, return_pred=False): """ Function to evaluate the current model weights @param model (nn.Module): the labeler module @param dev_loader (torch.utils.data.DataLoader): dataloader for dev set @param device (torch.device): device on which data should be @param f1_weights (dictionary): dictionary mapping conditions to f1 task weights @param return_pred (bool): whether to return predictions or not @returns res_dict (dictionary): dictionary with keys 'blank', 'mention', 'negation', 'uncertain', 'positive' and 'weighted', with values being lists of length 14 with each element in the lists as a scalar. If return_pred is true then a tuple is returned with the aforementioned dictionary as the first item, a list of predictions as the second item, and a list of ground truth as the third item """ was_training = model.training model.eval() y_pred = [[] for _ in range(len(CONDITIONS))] y_true = [[] for _ in range(len(CONDITIONS))] with torch.no_grad(): for i, data in enumerate(dev_loader, 0): batch = data['imp'] #(batch_size, max_len) batch = batch.to(device) label = data['label'] #(batch_size, 14) label = label.permute(1, 0).to(device) src_len = data['len'] batch_size = batch.shape[0] attn_mask = generate_attention_masks(batch, src_len, device) out = model(batch, attn_mask) for j in range(len(out)): out[j] = out[j].to('cpu') #move to cpu for sklearn curr_y_pred = out[j].argmax(dim=1) #shape is (batch_size) y_pred[j].append(curr_y_pred) y_true[j].append(label[j].to('cpu')) if (i + 1) % 200 == 0: print('Evaluation batch no: ', i + 1) for j in range(len(y_true)): y_true[j] = torch.cat(y_true[j], dim=0) y_pred[j] = torch.cat(y_pred[j], dim=0) if was_training: model.train() mention_f1 = compute_mention_f1(copy.deepcopy(y_true), copy.deepcopy(y_pred)) negation_f1 = compute_negation_f1(copy.deepcopy(y_true), copy.deepcopy(y_pred)) uncertain_f1 = compute_uncertain_f1(copy.deepcopy(y_true), copy.deepcopy(y_pred)) positive_f1 = compute_positive_f1(copy.deepcopy(y_true), copy.deepcopy(y_pred)) blank_f1 = compute_blank_f1(copy.deepcopy(y_true), copy.deepcopy(y_pred)) weighted = [] kappas = [] for j in range(len(y_pred)): cond = CONDITIONS[j] avg = weighted_avg([negation_f1[j], uncertain_f1[j], positive_f1[j]], f1_weights[cond]) weighted.append(avg) mat = confusion_matrix(y_true[j], y_pred[j]) kappas.append(cohens_kappa(mat, return_results=False)) res_dict = { 'mention': mention_f1, 'blank': blank_f1, 'negation': negation_f1, 'uncertain': uncertain_f1, 'positive': positive_f1, 'weighted': weighted, 'kappa': kappas } if return_pred: return res_dict, y_pred, y_true else: return res_dict
def _cohen(a, b): if a.shape[0] == 1 and b.shape[0] == 1: return 1 return cohens_kappa(confusion_matrix(a, b)).kappa
def test_cohens_kappa_irr(): ck_w3 = Holder() ck_w4 = Holder() #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1)) #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.1891892 ck_w3.stat_name = 'z' ck_w3.statistic = 0.5079002 ck_w3.p_value = 0.6115233 #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2820513 ck_w4.stat_name = 'z' ck_w4.statistic = 1.257410 ck_w4.p_value = 0.2086053 ck_w1 = Holder() ck_w2 = Holder() ck_w3 = Holder() ck_w4 = Holder() #> r = kappa2(anxiety[,2:3]) #> cat_items(r, pref="ck_w1.") ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)" ck_w1.irr_name = 'Kappa' ck_w1.value = -0.006289308 ck_w1.stat_name = 'z' ck_w1.statistic = -0.0604067 ck_w1.p_value = 0.9518317 #> r = kappa2(anxiety[,2:3], "equal") #> cat_items(r, pref="ck_w2.") ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)" ck_w2.irr_name = 'Kappa' ck_w2.value = 0.1459075 ck_w2.stat_name = 'z' ck_w2.statistic = 1.282472 ck_w2.p_value = 0.1996772 #> r = kappa2(anxiety[,2:3], "squared") #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.2520325 ck_w3.stat_name = 'z' ck_w3.statistic = 1.437451 ck_w3.p_value = 0.1505898 #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2391304 ck_w4.stat_name = 'z' ck_w4.statistic = 1.223734 ck_w4.p_value = 0.2210526 all_cases = [(ck_w1, None, None), (ck_w2, None, 'linear'), (ck_w2, np.arange(5), None), (ck_w2, np.arange(5), 'toeplitz'), (ck_w3, None, 'quadratic'), (ck_w3, np.arange(5)**2, 'toeplitz'), (ck_w3, 4 * np.arange(5)**2, 'toeplitz'), (ck_w4, [0, 0, 1, 1, 2], 'toeplitz')] #Note R:irr drops the missing category level 4 and uses the reduced matrix r = np.histogramdd(anxiety[:, 1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7])) for res2, w, wt in all_cases: msg = repr(w) + repr(wt) res1 = cohens_kappa(r[0], weights=w, wt=wt) assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg) assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg) assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
omitting_censored = df.loc[(df['tak-censored'] == 'no') | (df['barbuto-censored'] == "no")] omitting_censored = omitting_censored.loc[ omitting_censored['tak_toxidrome'] != "n/a"] #At least one person rated it? Perhaps there are better ways #print len(df) #217 #print len(omitting_censored) #191 omitting_censored.to_csv(index=False) crosstab = pd.crosstab(omitting_censored['tak_toxidrome'], omitting_censored['barbuto_toxidrome']) crosstab.to_csv(os.path.join(args.input, "fellow-pivot-table.csv")) crosstab = crosstab.drop(crosstab.index[2]) print crosstab ''' Simple Kappa Coefficient -------------------------------- Kappa 0.8145 ASE 0.0314 95% Lower Conf Limit 0.7530 95% Upper Conf Limit 0.8760 Test of H0: Simple Kappa = 0 ASE under H0 0.0332 Z 24.5027 One-sided Pr > Z 0.0000 Two-sided Pr > |Z| 0.0000 ''' print cohens_kappa(crosstab)
def predict(filename, clf, selected_feature): # read data all_data = sio.loadmat(filename) x_tr = all_data["x_tr"].tolist() y_tr = all_data["y_tr"][0].tolist() x_te = all_data["x_te"].tolist() y_te = all_data["y_te"][0].tolist() te_location = all_data["te_location"].tolist() if selected_feature != "": sf = selected_feature.split(",") sf = map(lambda x: int(x), sf) for i in range(len(x_tr)): new_feature = [] for j in sf: new_feature.append(x_tr[i][j]) x_tr[i] = new_feature for i in range(len(x_te)): new_feature = [] for j in sf: new_feature.append(x_te[i][j]) x_te[i] = new_feature # fit and predict clf.fit(np.array(x_tr), np.array(y_tr)) print "here" testing_result = clf.predict(x_te).tolist() print "predict done" # overall accuracy OA = float( sum([1 for i in range(len(y_te)) if testing_result[i] == y_te[i] ])) / len(y_te) # count of each label,for average accuracy label_count = {} for label in y_te: if label in label_count: label_count[label] = label_count[label] + 1 else: label_count[label] = 1 # correct classification in each label label_correct_count = {} # kappa matrix for calculate kappa statistics kappa_matrix = {} for label in label_count: label_correct_count[label] = 0 kappa_matrix[label] = {} for alabel in label_count: kappa_matrix[label][alabel] = 0 for i in range(len(y_te)): if y_te[i] == int(testing_result[i]): # record correct classification label_correct_count[y_te[i]] = label_correct_count[y_te[i]] + 1 kappa_matrix[y_te[i]][int( testing_result[i])] = kappa_matrix[y_te[i]][int( testing_result[i])] + 1 # accuracy of each label label_accuracy = {} for label in label_count: label_accuracy[label] = float( label_correct_count[label]) / label_count[label] # average accuracy AA = 0 for label in label_accuracy: AA = AA + label_accuracy[label] AA = AA / len(label_accuracy) # kappa statistics kappa_matrix_list = [[0 for j in label_count] for i in label_count] i = 0 for label in kappa_matrix: j = 0 for alabel in kappa_matrix[label]: kappa_matrix_list[i][j] = kappa_matrix[label][alabel] j = j + 1 i = i + 1 kappa = cohens_kappa(np.array(kappa_matrix_list)).kappa # label uniq_ele = list(set(y_te)) # get true data' location and label y_location = map(lambda x, y: [x, y], te_location, y_te) # sorted by label y_location.sort(key=lambda x: x[1]) # get location of different label in independent list y_te_location = [] for i in uniq_ele: y_te_location.append( map(lambda x: x[0], filter(lambda x: x[1] == i, y_location))) # get testing result' location and label y_location = map(lambda x, y: [x, y], te_location, testing_result) # print y_location y_location.sort(key=lambda x: x[1]) t_te_location = [] for i in uniq_ele: t_te_location.append( map(lambda x: x[0], filter(lambda x: x[1] == i, y_location))) # for i in tr_location: # if i ==[]: # print i # f = open("tempresult", "w") # f.write(str(label_accuracy)) # f.close() filename = "result/result.mat" while os.path.exists(filename): filename = filename[:-4] + str(random.randint(1, 9)) + filename[-4:] # print filename sio.savemat( filename, { "testing_result": testing_result, "true_result": y_te, "location": te_location }) return [ OA * 100, AA * 100, kappa * 100, label_accuracy, y_te_location, t_te_location, uniq_ele, filename ]
patho_one_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in df_one.apply(np.nonzero, axis=1).values ]).astype(int) patho_two_ratings = np.array([ i[0][0] if len(i[0]) > 0 else -1 for i in df_two.apply(np.nonzero, axis=1).values ]).astype(int) #Really inefficient implementation, but too many exceptions to vectorize: contingency_table = np.zeros((3, 3)) for rating_one in patho_one_ratings: if type(rating_one) == type(list): rating_one = rating_one[0] for rating_two in patho_two_ratings: if type(rating_two) == type(list): rating_two = rating_two[0] print '\t %d' % rating_two contingency_table[rating_one, rating_two] += 1 lvsi['%s-%s' % (pathologist_one, pathologist_two)] = cohens_kappa(contingency_table).kappa json.dump(lvsi, open('../data/lvsi-stains-grades.json', 'wb')) ap(np.median(lvsi.values())) print 0.5 * (np.percentile(lvsi.values(), 75) - np.percentile(lvsi.values(), 25))
def test_cohens_kappa_irr(): ck_w3 = Holder() ck_w4 = Holder() #>r = kappa2(anxiety[,1:2], c(0,0,0,1,1,1)) #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,0,1,1,1)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.1891892 ck_w3.stat_name = 'z' ck_w3.statistic = 0.5079002 ck_w3.p_value = 0.6115233 #> r = kappa2(anxiety[,1:2], c(0,0,1,1,2,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2820513 ck_w4.stat_name = 'z' ck_w4.statistic = 1.257410 ck_w4.p_value = 0.2086053 ck_w1 = Holder() ck_w2 = Holder() ck_w3 = Holder() ck_w4 = Holder() #> r = kappa2(anxiety[,2:3]) #> cat_items(r, pref="ck_w1.") ck_w1.method = "Cohen's Kappa for 2 Raters (Weights: unweighted)" ck_w1.irr_name = 'Kappa' ck_w1.value = -0.006289308 ck_w1.stat_name = 'z' ck_w1.statistic = -0.0604067 ck_w1.p_value = 0.9518317 #> r = kappa2(anxiety[,2:3], "equal") #> cat_items(r, pref="ck_w2.") ck_w2.method = "Cohen's Kappa for 2 Raters (Weights: equal)" ck_w2.irr_name = 'Kappa' ck_w2.value = 0.1459075 ck_w2.stat_name = 'z' ck_w2.statistic = 1.282472 ck_w2.p_value = 0.1996772 #> r = kappa2(anxiety[,2:3], "squared") #> cat_items(r, pref="ck_w3.") ck_w3.method = "Cohen's Kappa for 2 Raters (Weights: squared)" ck_w3.irr_name = 'Kappa' ck_w3.value = 0.2520325 ck_w3.stat_name = 'z' ck_w3.statistic = 1.437451 ck_w3.p_value = 0.1505898 #> r = kappa2(anxiety[,2:3], c(0,0,1,1,2)) #> cat_items(r, pref="ck_w4.") ck_w4.method = "Cohen's Kappa for 2 Raters (Weights: 0,0,1,1,2)" ck_w4.irr_name = 'Kappa' ck_w4.value = 0.2391304 ck_w4.stat_name = 'z' ck_w4.statistic = 1.223734 ck_w4.p_value = 0.2210526 all_cases = [(ck_w1, None, None), (ck_w2, None, 'linear'), (ck_w2, np.arange(5), None), (ck_w2, np.arange(5), 'toeplitz'), (ck_w3, None, 'quadratic'), (ck_w3, np.arange(5)**2, 'toeplitz'), (ck_w3, 4*np.arange(5)**2, 'toeplitz'), (ck_w4, [0,0,1,1,2], 'toeplitz')] #Note R:irr drops the missing category level 4 and uses the reduced matrix r = np.histogramdd(anxiety[:,1:], ([1, 2, 3, 4, 6, 7], [1, 2, 3, 4, 6, 7])) for res2, w, wt in all_cases: msg = repr(w) + repr(wt) res1 = cohens_kappa(r[0], weights=w, wt=wt) assert_almost_equal(res1.kappa, res2.value, decimal=6, err_msg=msg) assert_almost_equal(res1.z_value, res2.statistic, decimal=5, err_msg=msg) assert_almost_equal(res1.pvalue_two_sided, res2.p_value, decimal=6, err_msg=msg)
X_validation = S1_validation[['VH_intensity', 'GLCM_mean']] Y_validation = S1_validation['IceStage'] # Labels # Build Random Forest model (using optimal hyperparameters (see Scripts/PythonScripts/Hyperparameters.py) # and classify the validation data clf = RandomForestClassifier(n_estimators=46, max_depth=13, min_samples_split=2, min_samples_leaf=1, random_state=12) clf.fit(X_training, Y_training) Y_prediction = clf.predict(X_validation) # Accuracy assessment (confusion matrix, kappa, overall accuracy) CM = confusion_matrix(Y_validation, Y_prediction) kappa = cohens_kappa(CM).kappa kappa_var = cohens_kappa(CM).var_kappa overall_accuracy = metrics.accuracy_score(Y_validation, Y_prediction) # Visualize the confusion matrix sns.set() plt.figure(dpi=400) plot_confusion_matrix(CM, classes=['Sheet ice', 'Ice jam', 'Open water'], normalize=True, title='S1-Int - Normalized confusion matrix') plt.ylim([2.5, -.5]) plt.grid(False) plt.show() sns.set()