def get_QWkappa(input): head,body = fio.ReadMatrix(input, True) metric = Metric() data = {} for i,row in enumerate(body): for coder, label in enumerate(row): if label == 'a': label = '0' label = int(label) if head[coder] not in data: data[ head[coder] ] = [] data[ head[coder] ].append(label) print 'annototor 1', '\t','annototor 2', '\t', 'accuracy', '\t', 'kappa', '\t', 'QWkappa' print head[0], '\t', head[1], '\t', metric.accuracy(data[head[0]], data[head[1]]), '\t', metric.kappa(data[head[0]], data[head[1]]), '\t', metric.QWkappa(data[head[0]], data[head[1]]) print head[0], '\t', head[2], '\t', metric.accuracy(data[head[0]], data[head[2]]), '\t', metric.kappa(data[head[0]], data[head[2]]), '\t', metric.QWkappa(data[head[0]], data[head[2]]) print head[1], '\t', head[2], '\t', metric.accuracy(data[head[1]], data[head[2]]), '\t', metric.kappa(data[head[1]], data[head[2]]), '\t', metric.QWkappa(data[head[1]], data[head[2]]) print '', '\t', 'Average', '\t', np.mean([metric.accuracy(data[head[0]], data[head[1]]), metric.accuracy(data[head[0]], data[head[2]]), metric.accuracy(data[head[1]], data[head[2]])]), '\t',\ np.mean([metric.kappa(data[head[0]], data[head[1]]), metric.kappa(data[head[0]], data[head[2]]), metric.kappa(data[head[1]], data[head[2]])]), '\t',\ np.mean([metric.QWkappa(data[head[0]], data[head[1]]), metric.QWkappa(data[head[0]], data[head[2]]), metric.QWkappa(data[head[1]], data[head[2]])]) print metric.confusion_matrix(data[head[0]], data[head[1]]) return 0
def get_metrics_H1(datadir): files = ['quality_WC_Unigram.label', 'quality_WC_Unigam_NonZero.label', 'quality_WC_Unigam_Content.label', 'quality_WC_Unigam_OrgAssign.label', 'quality_WC_Unigam_Speciteller.label', 'quality_WC_Unigam_Title.label', 'quality_DT.label', 'quality_New.label', #'quality_New-Title.label', 'quality_firstnode.label' ] metric = Metric() body = [] for file in files: labels, predicts, _ = load_label(datadir + file) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) body.append(row) output = datadir+'H1.txt' print output fio.WriteMatrix(output, body, header=None)
def get_metrics_H2c(datadir): metric = Metric() feature = 'quality_CrossCourse_Rubric_firstnode' feature_fixed = 'quality_CrossCourse_Rubric_firstnode_fixed' input = datadir + feature +'_test.label' zero_file = datadir + feature +'_test_0.txt' output = datadir + feature_fixed +'_test.label' fix_firstnode(input, zero_file, output) body = [] for feature in [ 'quality_CrossCourse_WC_Unigram', 'quality_CrossCourse_Rubric', 'quality_CrossCourse_Rubric_firstnode_fixed', 'quality_CrossCourse_DT', ]: file = feature+'_test.label' print file labels, predicts, _ = load_label(datadir + file) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) body.append(row) output = datadir+'H2c.txt' print output fio.WriteMatrix(output, body, header=None)
def get_metrics_H2b(datadir): metric = Metric() lectures = range(1, 9) feature = 'quality_CrossTopic_Rubric_firstnode_' feature_fixed = 'quality_CrossTopic_Rubric_firstnode_fixed_' for fold in lectures: input = datadir + feature + str(fold)+'_test.label' zero_file = datadir + feature + str(fold)+'_test_0.txt' output = datadir + feature_fixed + str(fold)+'_test.label' fix_firstnode(input, zero_file, output) body = [] for feature in [ 'quality_CrossTopic_WC_Unigram_', 'quality_CrossTopic_Rubric_', 'quality_CrossTopic_Rubric_firstnode_fixed_', 'quality_CrossTopic_DT_', ]: for lecture in lectures: file = feature + str(lecture)+'_test.label' print file labels, predicts, _ = load_label(datadir + file) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) body.append(row) output = datadir+'H2b.txt' print output fio.WriteMatrix(output, body, header=None)
def evaluate(self): test_featureset = self._get_featuresets(self.test_file) labels = [int(x[1]) for x in test_featureset] featureset = [x[0] for x in test_featureset] predicts = [int(x) for x in self._model.classify_many(featureset)] metric = Metric() return metric.accuracy(labels, predicts), metric.kappa(labels, predicts), metric.QWkappa(labels, predicts)
def get_metrics_NewCourse(datadir): files = ['DT.txt', 'DT_NoneZero.txt', 'Rubric.txt', 'Rubric_NoneZero.txt', ] metric = Metric() body = [] for file in files: labels, predicts, _ = load_label(datadir + file) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) body.append(row) output = datadir+'H_NewCourse.txt' print output fio.WriteMatrix(output, body, header=None)
def get_metrics_H1_CV(datadir): metric = Metric() folds = range(0, 10) #fix firstnode feature = 'quality_rubric_firstnode_' feature_fixed = 'quality_rubric_firstnode_fixed_' for fold in folds: input = datadir + feature + str(fold)+'_test.label' zero_file = datadir + feature + str(fold)+'_test_0.txt' output = datadir + feature_fixed + str(fold)+'_test.label' fix_firstnode(input, zero_file, output) body = [] for feature in ['quality_WC_Unigram_', 'quality_WC_Unigam_NonZero_', 'quality_WC_Unigam_Content_', 'quality_WC_Unigam_OrgAssign_', 'quality_WC_Unigam_Speciteller_', 'quality_WC_Unigam_Title_', 'quality_rubric_', 'quality_rubric_firstnode_fixed_', 'quality_DT_', ]: for fold in folds: file = feature + str(fold)+'_test.label' print file labels, predicts, _ = load_label(datadir + file) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) body.append(row) output = datadir+'H1_cv.txt' print output fio.WriteMatrix(output, body, header=None)
def get_metrics_H0(datadir): metric = Metric() body = [] for feature in ['quality_rubric', 'quality_binary_model', 'quality_New', 'quality_firstnode']: file = feature+'.label' print file labels, predicts, _ = load_label(datadir + file) fio.WriteMatrix(datadir + file + '.cm', metric.confusion_matrix(labels, predicts), None) row = [file] row.append(metric.accuracy(labels, predicts)) row.append(metric.kappa(labels, predicts)) row.append(metric.QWkappa(labels, predicts)) # row += metric.cv_accuracy(labels, predicts) # row += metric.cv_kappa(labels, predicts) # row += metric.cv_QWkappa(labels, predicts) body.append(row) output = datadir+'H0.txt' print output fio.WriteMatrix(output, body, header=None)