def main(): markdown = PagedownToHtml() print("Reading in the training data") train = data_io.get_train_df() for i in train.index: train["BodyMarkdown"][i] = markdown.convert(train["BodyMarkdown"][i]) print("Extracting features and training") classifier = get_pipeline() classifier.fit(train, train["OpenStatus"]) print("Saving the classifier") data_io.save_model(classifier, "model.pickle") model = data_io.load_model("model.pickle")
def main(): markdown = PagedownToHtml() print("Reading the private leaderboard file") test = data_io.get_test_df() for i in test.index: test["BodyMarkdown"][i] = markdown.convert(test["BodyMarkdown"][i]) print("Loading the trained model") classifier = data_io.load_model("model.pickle") print("Making predictions") probs = classifier.predict_proba(test) solution = data_io.get_private_leaderboard_solution_df() print("Open AUC: %0.6f" % metrics.auc(solution["open"], probs[:,1]))