def main(): markdown = PagedownToHtml() print("Reading the private leaderboard file") test = data_io.get_test_df() for i in test.index: test["BodyMarkdown"][i] = markdown.convert(test["BodyMarkdown"][i]) print("Loading the trained model") classifier = data_io.load_model("model.pickle") print("Making predictions") probs = classifier.predict_proba(test) solution = data_io.get_private_leaderboard_solution_df() print("Open AUC: %0.6f" % metrics.auc(solution["open"], probs[:,1]))
def main(): print("Loading the test data") classifier = data_io.load_model() print ("Load test data. And Clean..") test = data_io.get_test_df() test = FeatureConverter().clean_data(test) passengerIds = test['Id'] test.drop(['Id'], axis = 1, inplace = True) test = test.values print("Making predictions") predictions = classifier.predict(test).astype(int) #predictions = predictions.reshape(len(predictions), 1) print("Writing predictions to file") data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
def main(): print("Loading the test data") classifier = data_io.load_model() print("Load test data. And Clean..") test = data_io.get_test_df() test = FeatureConverter().clean_data(test) passengerIds = test['Id'] test.drop(['Id'], axis=1, inplace=True) test = test.values print("Making predictions") predictions = classifier.predict(test).astype(int) #predictions = predictions.reshape(len(predictions), 1) print("Writing predictions to file") data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
def main(): print("Reading in the raw data of features and salaries for merging") train_f = data_io.get_train_f_df() train_s = data_io.get_train_s_df() #train_f: training feature data; train_s: training salary data with 0 items deleted """ train_f.describe train_s.describe """ #merge the data by jobId, similar to SQL join data = pd.merge(train_f,train_s,how='left') data.to_csv("D:/job/indeed_data_science_exercise/RFC1/train9merge2.csv", sep=',',encoding='utf-8') # seperate the data into features set of the feature columns and the set with target column salary only #'companyId' excluded characters = ["jobType", "degree", "major", "industry", "yearsExperience", "milesFromMetropolis"] x = data[characters] y = data[['salary']] print("Extracting features and training model") classifier = get_pipeline() classifier.fit(xtr, ytr) print("Saving the classifier") data_io.save_model(classifier) print("Load testing data") testin = data_io.get_test_df() test=testin[characters] print("Making predictions") predictions = classifier.predict(test) predictions = predictions.reshape(len(predictions), 1) #classifier.get_params #pred_score=explained_variance_score(ycv, predictions, multioutput='raw_values') print("Writing predictions to file") write_submission(predictions)