def main():
    markdown = PagedownToHtml()

    print("Reading the private leaderboard file")
    test = data_io.get_test_df()
    for i in test.index:
        test["BodyMarkdown"][i] = markdown.convert(test["BodyMarkdown"][i])

    print("Loading the trained model")
    classifier = data_io.load_model("model.pickle")

    print("Making predictions")
    probs = classifier.predict_proba(test)

    solution = data_io.get_private_leaderboard_solution_df()
    print("Open AUC: %0.6f" % metrics.auc(solution["open"], probs[:,1]))
Example #2
0
def main():
    print("Loading the test data")
    classifier = data_io.load_model()
    
    print ("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis = 1, inplace = True)
    test = test.values
    
    print("Making predictions") 
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)
    
    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
Example #3
0
def main():
    print("Loading the test data")
    classifier = data_io.load_model()

    print("Load test data. And Clean..")
    test = data_io.get_test_df()
    test = FeatureConverter().clean_data(test)
    passengerIds = test['Id']
    test.drop(['Id'], axis=1, inplace=True)
    test = test.values

    print("Making predictions")
    predictions = classifier.predict(test).astype(int)
    #predictions = predictions.reshape(len(predictions), 1)

    print("Writing predictions to file")
    data_io.write_submission(predictions, passengerIds, ['Id', 'Cover_Type'])
Example #4
0
def main():

    print("Reading in the raw data of features and salaries for merging")
    train_f = data_io.get_train_f_df()
    train_s = data_io.get_train_s_df()
    #train_f: training feature data; train_s: training salary data with 0 items deleted 

    """
    train_f.describe
    train_s.describe
    """
    #merge the data by jobId, similar to SQL join
    data = pd.merge(train_f,train_s,how='left')
    data.to_csv("D:/job/indeed_data_science_exercise/RFC1/train9merge2.csv", sep=',',encoding='utf-8')

    # seperate the data into features set of the feature columns and the set with target column salary only
    #'companyId' excluded
    characters = ["jobType", "degree", "major", "industry", "yearsExperience", "milesFromMetropolis"]
    x = data[characters]
    y = data[['salary']]
         
    print("Extracting features and training model")
    classifier = get_pipeline()
    classifier.fit(xtr, ytr)

    print("Saving the classifier")
    data_io.save_model(classifier)
    
    print("Load testing data") 
    testin = data_io.get_test_df()
    test=testin[characters]

    print("Making predictions") 
    predictions = classifier.predict(test)   
    predictions = predictions.reshape(len(predictions), 1)

    #classifier.get_params
    #pred_score=explained_variance_score(ycv, predictions, multioutput='raw_values')

    print("Writing predictions to file")
    write_submission(predictions)