def testQWK(self): self.assertEqual(1, quadratic_weighted_kappa([0, 1, 2, 3], [0, 1, 2, 3])) self.assertEqual(-1, quadratic_weighted_kappa([3, 0, 3, 0], [0, 3, 0, 3])) self.assertEqual(0, quadratic_weighted_kappa([2, 2, 2, 2], [1, 1, 1, 1])) self.assertEqual(0, quadratic_weighted_kappa([3, 3, 3, 3], [0, 0, 0, 0])) self.assertAlmostEqual(-0.15384615, quadratic_weighted_kappa([2, 2, 2, 2, 0, 2], [2, 2, 2, 2, 2, 1])) self.assertAlmostEqual(-0.2, quadratic_weighted_kappa([2, 2, 2, 2, 1, 2], [2, 2, 2, 2, 2, 1])) self.assertAlmostEqual(0.76923076, quadratic_weighted_kappa([2, 2, 2, 2, 2, 0], [2, 2, 2, 2, 2, 1])) self.assertAlmostEqual(0.875, quadratic_weighted_kappa([0, 0, 1, 1, 2, 2, 3, 3], [0, 0, 1, 1, 2, 2, 2, 2])) self.assertAlmostEqual( 0.916666666, quadratic_weighted_kappa([0, 0, 1, 1, 2, 2, 3, 3], [0, 0, 1, 1, 3, 3, 3, 3]) )
def KappaForPredictor(predictor, signals, questions, extra_filter=util.FTrue, out_of_bag=True): S = lambda id: [signal[id] for signal in signals] F = predictor.predict_out_of_bag if out_of_bag else predictor.predict predicted = [int(round(F(id, S(id)))) for id, q in enumerate(G.question) if q in questions and extra_filter(id)] actual = [G.score[id] for id, q in enumerate(G.question) if q in questions and extra_filter(id)] return kappa.quadratic_weighted_kappa(actual, predicted)
def RenderPredictorProgress(av_p, boost_params, filter_train, list_of_signals, q, steps): filter_test = lambda id: not filter_train(id) signals_train = signal.ListOfSignals(list_of_signals, q, extra_filter=filter_train) signals_test = signal.ListOfSignals(list_of_signals, q, extra_filter=filter_test) scores_train = G.score.ValuesForQuestion(q, extra_filter=filter_train) scores_test = G.score.ValuesForQuestion(q, extra_filter=filter_test) kappa_train, kappa_test, loss_train, loss_test = [], [], [], [] predictions_train = av_p.PredictionAtSteps(signals_train) predictions_test = av_p.PredictionAtSteps(signals_test) for step in range(steps): pred_train = predictions_train[step] pred_test = predictions_test[step] kappa_train.append(kappa.quadratic_weighted_kappa(util.IntRound(pred_train), scores_train)) kappa_test.append(kappa.quadratic_weighted_kappa(util.IntRound(pred_test), scores_test)) loss_train.append(av_p.Loss(numpy.array(pred_train), numpy.array(scores_train))) loss_test.append(av_p.Loss(numpy.array(pred_test), numpy.array(scores_test))) Render([(kappa_train, 'blue', 'kappa_train'), (kappa_test, 'purple', 'kappa_test: %.2f' % max(kappa_test)), (loss_train, 'red', 'loss_train'), (loss_test, 'orange', 'loss_test')], title='%s' % boost_params)
def EvalPerQuestion(raw_scores, extra_filter=FTrue, only_questions=None): golden = G.average_score if FLAGS.use_average_score else G.score if only_questions is None: only_questions = range(10) else: assert only_questions assert all(0 <= q < signal.NUM_QUESTIONS for q in only_questions), only_questions assert len(golden) == len(raw_scores), '%d != %d' % (len(golden), len(raw_scores)) vals = [signal.UNKNOWN] * signal.NUM_QUESTIONS for q in only_questions: scaled_scores = FilterAndScaleScores(raw_scores, extra_filter, q) filtered_golden_scores = golden.ValuesForQuestion(q, extra_filter=extra_filter) assert len(scaled_scores) == len(filtered_golden_scores) vals[q] = kappa.quadratic_weighted_kappa(scaled_scores, filtered_golden_scores) return vals