Beispiel #1
0
def get_QWkappa(input):
    head,body = fio.ReadMatrix(input, True)
    
    metric = Metric()
    
    data = {}
    
    for i,row in enumerate(body):
        for coder, label in enumerate(row):
            if label == 'a': label = '0'
            label = int(label)
            
            if head[coder] not in data:
                data[ head[coder] ] = []
            data[ head[coder] ].append(label)
    
    print 'annototor 1', '\t','annototor 2', '\t', 'accuracy', '\t', 'kappa', '\t', 'QWkappa'
    print head[0], '\t', head[1], '\t', metric.accuracy(data[head[0]], data[head[1]]), '\t', metric.kappa(data[head[0]], data[head[1]]), '\t', metric.QWkappa(data[head[0]], data[head[1]])
    print head[0], '\t', head[2], '\t', metric.accuracy(data[head[0]], data[head[2]]), '\t', metric.kappa(data[head[0]], data[head[2]]), '\t', metric.QWkappa(data[head[0]], data[head[2]])
    print head[1], '\t', head[2], '\t', metric.accuracy(data[head[1]], data[head[2]]), '\t', metric.kappa(data[head[1]], data[head[2]]), '\t', metric.QWkappa(data[head[1]], data[head[2]])
    print '', '\t', 'Average', '\t', np.mean([metric.accuracy(data[head[0]], data[head[1]]), metric.accuracy(data[head[0]], data[head[2]]), metric.accuracy(data[head[1]], data[head[2]])]), '\t',\
        np.mean([metric.kappa(data[head[0]], data[head[1]]), metric.kappa(data[head[0]], data[head[2]]), metric.kappa(data[head[1]], data[head[2]])]), '\t',\
        np.mean([metric.QWkappa(data[head[0]], data[head[1]]), metric.QWkappa(data[head[0]], data[head[2]]), metric.QWkappa(data[head[1]], data[head[2]])])
    
    print metric.confusion_matrix(data[head[0]], data[head[1]])
    
    return 0
def get_metrics_H1(datadir):
    files = ['quality_WC_Unigram.label',
         'quality_WC_Unigam_NonZero.label',
         'quality_WC_Unigam_Content.label',
         'quality_WC_Unigam_OrgAssign.label',
         'quality_WC_Unigam_Speciteller.label',
         'quality_WC_Unigam_Title.label',
         'quality_DT.label',
         'quality_New.label',
         #'quality_New-Title.label',
         'quality_firstnode.label'
         ] 
        
    metric = Metric()
    
    body = []
    for file in files:
        labels, predicts, _ = load_label(datadir + file)
        row = [file]
        row.append(metric.accuracy(labels, predicts))
        row.append(metric.kappa(labels, predicts))
        row.append(metric.QWkappa(labels, predicts))
        body.append(row)
    
    output = datadir+'H1.txt'
    print output
    fio.WriteMatrix(output, body, header=None)
def get_metrics_H2c(datadir):
    metric = Metric()
    
    feature = 'quality_CrossCourse_Rubric_firstnode'
    feature_fixed = 'quality_CrossCourse_Rubric_firstnode_fixed'
    
    input = datadir + feature +'_test.label'
    zero_file = datadir + feature +'_test_0.txt'
    output = datadir + feature_fixed +'_test.label'
    fix_firstnode(input, zero_file, output)
    
    body = []
    for feature in [
                    'quality_CrossCourse_WC_Unigram',
                    'quality_CrossCourse_Rubric',
                    'quality_CrossCourse_Rubric_firstnode_fixed',
                    'quality_CrossCourse_DT', 
                    ]:
        file = feature+'_test.label'
        print file
        
        labels, predicts, _ = load_label(datadir + file)
        row = [file]
        row.append(metric.accuracy(labels, predicts))
        row.append(metric.kappa(labels, predicts))
        row.append(metric.QWkappa(labels, predicts))
        body.append(row)
    
    output = datadir+'H2c.txt'
    print output
    fio.WriteMatrix(output, body, header=None) 
def get_metrics_H2b(datadir):
    metric = Metric()
    
    lectures = range(1, 9)
    
    feature = 'quality_CrossTopic_Rubric_firstnode_'
    feature_fixed = 'quality_CrossTopic_Rubric_firstnode_fixed_'
    
    for fold in lectures:
        input = datadir + feature + str(fold)+'_test.label'
        zero_file = datadir + feature + str(fold)+'_test_0.txt'
        output = datadir + feature_fixed + str(fold)+'_test.label'
        fix_firstnode(input, zero_file, output)
        
    body = []
    for feature in [
                    'quality_CrossTopic_WC_Unigram_', 
                    'quality_CrossTopic_Rubric_', 
                    'quality_CrossTopic_Rubric_firstnode_fixed_',
                    'quality_CrossTopic_DT_', 
                    ]:
        for lecture in lectures:
            file = feature + str(lecture)+'_test.label'
            print file
            
            labels, predicts, _ = load_label(datadir + file)
            row = [file]
            row.append(metric.accuracy(labels, predicts))
            row.append(metric.kappa(labels, predicts))
            row.append(metric.QWkappa(labels, predicts))
            body.append(row)
    
    output = datadir+'H2b.txt'
    print output
    fio.WriteMatrix(output, body, header=None)  
 def evaluate(self):
     test_featureset = self._get_featuresets(self.test_file)
     
     labels = [int(x[1]) for x in test_featureset]
     featureset = [x[0] for x in test_featureset]
     predicts = [int(x) for x in self._model.classify_many(featureset)]
     
     metric = Metric()
     
     return metric.accuracy(labels, predicts), metric.kappa(labels, predicts), metric.QWkappa(labels, predicts)
def get_metrics_NewCourse(datadir):
    files = ['DT.txt',
         'DT_NoneZero.txt',
         'Rubric.txt',
         'Rubric_NoneZero.txt',
         ] 
        
    metric = Metric()
    
    body = []
    for file in files:
        labels, predicts, _ = load_label(datadir + file)
        row = [file]
        row.append(metric.accuracy(labels, predicts))
        row.append(metric.kappa(labels, predicts))
        row.append(metric.QWkappa(labels, predicts))
        body.append(row)
    
    output = datadir+'H_NewCourse.txt'
    print output
    fio.WriteMatrix(output, body, header=None)
def get_metrics_H1_CV(datadir):
    metric = Metric()
    
    folds = range(0, 10)
    
    #fix firstnode
    feature = 'quality_rubric_firstnode_'
    feature_fixed = 'quality_rubric_firstnode_fixed_'
    
    for fold in folds:
        input = datadir + feature + str(fold)+'_test.label'
        zero_file = datadir + feature + str(fold)+'_test_0.txt'
        output = datadir + feature_fixed + str(fold)+'_test.label'
        fix_firstnode(input, zero_file, output)
         
    body = []
    for feature in ['quality_WC_Unigram_', 
                    'quality_WC_Unigam_NonZero_', 
                    'quality_WC_Unigam_Content_', 
                    'quality_WC_Unigam_OrgAssign_', 
                    'quality_WC_Unigam_Speciteller_', 
                    'quality_WC_Unigam_Title_',
                    'quality_rubric_',
                    'quality_rubric_firstnode_fixed_',
                    'quality_DT_',
                    ]:
        for fold in folds:
            file = feature + str(fold)+'_test.label'
            print file
            
            labels, predicts, _ = load_label(datadir + file)
            row = [file]
            row.append(metric.accuracy(labels, predicts))
            row.append(metric.kappa(labels, predicts))
            row.append(metric.QWkappa(labels, predicts))
            body.append(row)
    
    output = datadir+'H1_cv.txt'
    print output
    fio.WriteMatrix(output, body, header=None)  
def get_metrics_H0(datadir):
    metric = Metric()

    body = []
    for feature in ['quality_rubric', 'quality_binary_model', 'quality_New', 'quality_firstnode']:
        file = feature+'.label'
        print file

        labels, predicts, _ = load_label(datadir + file)
        fio.WriteMatrix(datadir + file + '.cm', metric.confusion_matrix(labels, predicts), None)
        
        row = [file]
        row.append(metric.accuracy(labels, predicts))
        row.append(metric.kappa(labels, predicts))
        row.append(metric.QWkappa(labels, predicts))
#         row += metric.cv_accuracy(labels, predicts)
#         row += metric.cv_kappa(labels, predicts)
#         row += metric.cv_QWkappa(labels, predicts)
        body.append(row)
    
    output = datadir+'H0.txt'
    print output
    fio.WriteMatrix(output, body, header=None)