def gpu_load_performance_csv(performance_path, **kwargs): """ Loads performance data Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = [ "loan_id", "monthly_reporting_period", "servicer", "interest_rate", "current_actual_upb", "loan_age", "remaining_months_to_legal_maturity", "adj_remaining_months_to_maturity", "maturity_date", "msa", "current_loan_delinquency_status", "mod_flag", "zero_balance_code", "zero_balance_effective_date", "last_paid_installment_date", "foreclosed_after", "disposition_date", "foreclosure_costs", "prop_preservation_and_repair_costs", "asset_recovery_costs", "misc_holding_expenses", "holding_taxes", "net_sale_proceeds", "credit_enhancement_proceeds", "repurchase_make_whole_proceeds", "other_foreclosure_proceeds", "non_interest_bearing_upb", "principal_forgiveness_upb", "repurchase_make_whole_proceeds_flag", "foreclosure_principal_write_off_amount", "servicing_activity_indicator", ] dtypes = OrderedDict([ ("loan_id", "int64"), ("monthly_reporting_period", "date"), ("servicer", "category"), ("interest_rate", "float64"), ("current_actual_upb", "float64"), ("loan_age", "float64"), ("remaining_months_to_legal_maturity", "float64"), ("adj_remaining_months_to_maturity", "float64"), ("maturity_date", "date"), ("msa", "float64"), ("current_loan_delinquency_status", "int32"), ("mod_flag", "category"), ("zero_balance_code", "category"), ("zero_balance_effective_date", "date"), ("last_paid_installment_date", "date"), ("foreclosed_after", "date"), ("disposition_date", "date"), ("foreclosure_costs", "float64"), ("prop_preservation_and_repair_costs", "float64"), ("asset_recovery_costs", "float64"), ("misc_holding_expenses", "float64"), ("holding_taxes", "float64"), ("net_sale_proceeds", "float64"), ("credit_enhancement_proceeds", "float64"), ("repurchase_make_whole_proceeds", "float64"), ("other_foreclosure_proceeds", "float64"), ("non_interest_bearing_upb", "float64"), ("principal_forgiveness_upb", "float64"), ("repurchase_make_whole_proceeds_flag", "category"), ("foreclosure_principal_write_off_amount", "float64"), ("servicing_activity_indicator", "category"), ]) print(performance_path) performance_table = pyblazing.create_table( table_name="perf", type=get_type_schema(performance_path), path=performance_path, delimiter="|", names=cols, dtypes=get_dtype_values(dtypes), skip_rows=1, ) Chronometer.show(chronometer, "Read Performance CSV") return performance_table
def gpu_load_acquisition_csv(acquisition_path, **kwargs): """ Loads acquisition data Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = [ "loan_id", "orig_channel", "seller_name", "orig_interest_rate", "orig_upb", "orig_loan_term", "orig_date", "first_pay_date", "orig_ltv", "orig_cltv", "num_borrowers", "dti", "borrower_credit_score", "first_home_buyer", "loan_purpose", "property_type", "num_units", "occupancy_status", "property_state", "zip", "mortgage_insurance_percent", "product_type", "coborrow_credit_score", "mortgage_insurance_type", "relocation_mortgage_indicator", ] dtypes = OrderedDict([ ("loan_id", "int64"), ("orig_channel", "category"), ("seller_name", "category"), ("orig_interest_rate", "float64"), ("orig_upb", "int64"), ("orig_loan_term", "int64"), ("orig_date", "date"), ("first_pay_date", "date"), ("orig_ltv", "float64"), ("orig_cltv", "float64"), ("num_borrowers", "float64"), ("dti", "float64"), ("borrower_credit_score", "float64"), ("first_home_buyer", "category"), ("loan_purpose", "category"), ("property_type", "category"), ("num_units", "int64"), ("occupancy_status", "category"), ("property_state", "category"), ("zip", "int64"), ("mortgage_insurance_percent", "float64"), ("product_type", "category"), ("coborrow_credit_score", "float64"), ("mortgage_insurance_type", "float64"), ("relocation_mortgage_indicator", "category"), ]) print(acquisition_path) acquisition_table = pyblazing.create_table( table_name="acq", type=get_type_schema(acquisition_path), path=acquisition_path, delimiter="|", names=cols, dtypes=get_dtype_values(dtypes), skip_rows=1, ) Chronometer.show(chronometer, "Read Acquisition CSV") return acquisition_table
final_cpu_df_label = pd.concat( [final_cpu_df_label, cpu_df_label]) final_cpu_df_data = pd.concat([final_cpu_df_data, cpu_df_data]) xgb_convert_end_time = time.time() all_xgb_convert_times.append(xgb_convert_end_time - xgb_convert_start_time) data_train, data_test, label_train, label_test = train_test_split( final_cpu_df_data, final_cpu_df_label, test_size=0.20, random_state=42) xgdf_train = xgb.DMatrix(data_train, label_train) xgdf_test = xgb.DMatrix(data_test, label_test) chronometerTrain1 = Chronometer.makeStarted() startTime = time.time() bst = xgb.train(dxgb_gpu_params, xgdf_train, num_boost_round=dxgb_gpu_params["nround"]) Chronometer.show(chronometerTrain1, "Train 1") chronometerPredict1 = Chronometer.makeStarted() preds = bst.predict(xgdf_test) Chronometer.show(chronometerPredict1, "Predict 1") labels = xgdf_test.get_label() print("prediction error=%f" % (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds))))