Esempio n. 1
0
    def load_predictions(self, questions):
        if not self.classifier:
            print "Classifier hasn't been trained yet"
            sys.exit()

        X = []
        for question in questions:
            score_vector = self.get_answer(question)
            question_features = solver_utils.get_feature_vector(question)
            feature_vector = score_vector + question_features
            X.append(feature_vector)
        soft_predictions = self.classifier.predict_proba(X)

        for index, question in enumerate(questions):
            self.soft_cache[question.id] = soft_predictions[index]
Esempio n. 2
0
    def train_classifier(self, train_questions, prefix):
        self.load_answer_cache(train_questions, prefix)

        X, y = [], []
        for question in train_questions:
            score_vector = self.get_answer(question)
            answer = np.argmax(score_vector)
            label = int(["A", "B", "C", "D"][answer] == question.correct_answer)
            question_features = solver_utils.get_feature_vector(question)
            feature_vector = score_vector + question_features
            X.append(feature_vector)
            y.append(label)

        clf = RandomForestClassifier(n_estimators=300, n_jobs=-1)
        calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=10)
        calibrated_clf.fit(X, y)
        self.classifier = calibrated_clf