def predict(self, X): if self.test_batch % 1 == 0: logging.info('Running test batch {}'.format(self.test_batch)) X = self.lectures_pipeline.transform(X) X = RiiidModel.remove_lectures(X) if len(X) > 0: predictions = X[['row_id']].copy() X = self.pipeline.transform(X) self._update_context_with_priors(X) inputs = self._create_prediction_data(X) self._roll_context_on_priors(X) self._update_context(X, self.independent_features) predictions['answered_correctly'] = self.model.predict(inputs)[:, -1, -1] else: predictions = pd.DataFrame(columns=['row_id', 'answered_correctly']) self.test_batch += 1 return X, predictions
def fit_transform(self, X): logging.info('- Fit') self._init_fit(X) self.lectures_pipeline = make_pipeline( LecturesTransformer(self.lectures) ) X = self.lectures_pipeline.fit_transform(X) X = RiiidModel.remove_lectures(X) cv = self._build_cv(X) self.pipeline = make_pipeline( ScoreEncoder('content_id', cv=cv, smoothing_min=5, smoothing_value=1, noise=0.005), QuestionsTransformer(self.questions, time_bins=self.time_bins, lag_bins=self.lag_bins) ) X = self.pipeline.fit_transform(X) self._create_context(X) return X
def update(self, test): prior_user_answer = eval(test['prior_group_responses'].values[0]) prior_answered_correctly = eval(test['prior_group_answers_correct'].values[0]) test = test.drop(columns=['prior_group_answers_correct', 'prior_group_responses']) if self.previous_test is not None: self.previous_test['user_answer'] = prior_user_answer self.previous_test['answered_correctly'] = prior_answered_correctly X = self.previous_test # X = update_pipeline(self.lectures_pipeline, X) # Not required X = RiiidModel.remove_lectures(X) if len(X) > 0: y = X['answered_correctly'] X = update_pipeline(self.pipeline, X, y) self._update_context(X, self.dependent_features) self.previous_test = test.copy() return test