Esempio n. 1
0
def gpu_load_performance_csv(performance_path, **kwargs):
    """ Loads performance data

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = [
        "loan_id",
        "monthly_reporting_period",
        "servicer",
        "interest_rate",
        "current_actual_upb",
        "loan_age",
        "remaining_months_to_legal_maturity",
        "adj_remaining_months_to_maturity",
        "maturity_date",
        "msa",
        "current_loan_delinquency_status",
        "mod_flag",
        "zero_balance_code",
        "zero_balance_effective_date",
        "last_paid_installment_date",
        "foreclosed_after",
        "disposition_date",
        "foreclosure_costs",
        "prop_preservation_and_repair_costs",
        "asset_recovery_costs",
        "misc_holding_expenses",
        "holding_taxes",
        "net_sale_proceeds",
        "credit_enhancement_proceeds",
        "repurchase_make_whole_proceeds",
        "other_foreclosure_proceeds",
        "non_interest_bearing_upb",
        "principal_forgiveness_upb",
        "repurchase_make_whole_proceeds_flag",
        "foreclosure_principal_write_off_amount",
        "servicing_activity_indicator",
    ]

    dtypes = OrderedDict([
        ("loan_id", "int64"),
        ("monthly_reporting_period", "date"),
        ("servicer", "category"),
        ("interest_rate", "float64"),
        ("current_actual_upb", "float64"),
        ("loan_age", "float64"),
        ("remaining_months_to_legal_maturity", "float64"),
        ("adj_remaining_months_to_maturity", "float64"),
        ("maturity_date", "date"),
        ("msa", "float64"),
        ("current_loan_delinquency_status", "int32"),
        ("mod_flag", "category"),
        ("zero_balance_code", "category"),
        ("zero_balance_effective_date", "date"),
        ("last_paid_installment_date", "date"),
        ("foreclosed_after", "date"),
        ("disposition_date", "date"),
        ("foreclosure_costs", "float64"),
        ("prop_preservation_and_repair_costs", "float64"),
        ("asset_recovery_costs", "float64"),
        ("misc_holding_expenses", "float64"),
        ("holding_taxes", "float64"),
        ("net_sale_proceeds", "float64"),
        ("credit_enhancement_proceeds", "float64"),
        ("repurchase_make_whole_proceeds", "float64"),
        ("other_foreclosure_proceeds", "float64"),
        ("non_interest_bearing_upb", "float64"),
        ("principal_forgiveness_upb", "float64"),
        ("repurchase_make_whole_proceeds_flag", "category"),
        ("foreclosure_principal_write_off_amount", "float64"),
        ("servicing_activity_indicator", "category"),
    ])
    print(performance_path)
    performance_table = pyblazing.create_table(
        table_name="perf",
        type=get_type_schema(performance_path),
        path=performance_path,
        delimiter="|",
        names=cols,
        dtypes=get_dtype_values(dtypes),
        skip_rows=1,
    )
    Chronometer.show(chronometer, "Read Performance CSV")
    return performance_table
Esempio n. 2
0
def gpu_load_acquisition_csv(acquisition_path, **kwargs):
    """ Loads acquisition data

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = [
        "loan_id",
        "orig_channel",
        "seller_name",
        "orig_interest_rate",
        "orig_upb",
        "orig_loan_term",
        "orig_date",
        "first_pay_date",
        "orig_ltv",
        "orig_cltv",
        "num_borrowers",
        "dti",
        "borrower_credit_score",
        "first_home_buyer",
        "loan_purpose",
        "property_type",
        "num_units",
        "occupancy_status",
        "property_state",
        "zip",
        "mortgage_insurance_percent",
        "product_type",
        "coborrow_credit_score",
        "mortgage_insurance_type",
        "relocation_mortgage_indicator",
    ]

    dtypes = OrderedDict([
        ("loan_id", "int64"),
        ("orig_channel", "category"),
        ("seller_name", "category"),
        ("orig_interest_rate", "float64"),
        ("orig_upb", "int64"),
        ("orig_loan_term", "int64"),
        ("orig_date", "date"),
        ("first_pay_date", "date"),
        ("orig_ltv", "float64"),
        ("orig_cltv", "float64"),
        ("num_borrowers", "float64"),
        ("dti", "float64"),
        ("borrower_credit_score", "float64"),
        ("first_home_buyer", "category"),
        ("loan_purpose", "category"),
        ("property_type", "category"),
        ("num_units", "int64"),
        ("occupancy_status", "category"),
        ("property_state", "category"),
        ("zip", "int64"),
        ("mortgage_insurance_percent", "float64"),
        ("product_type", "category"),
        ("coborrow_credit_score", "float64"),
        ("mortgage_insurance_type", "float64"),
        ("relocation_mortgage_indicator", "category"),
    ])

    print(acquisition_path)

    acquisition_table = pyblazing.create_table(
        table_name="acq",
        type=get_type_schema(acquisition_path),
        path=acquisition_path,
        delimiter="|",
        names=cols,
        dtypes=get_dtype_values(dtypes),
        skip_rows=1,
    )
    Chronometer.show(chronometer, "Read Acquisition CSV")
    return acquisition_table
Esempio n. 3
0
                final_cpu_df_label = pd.concat(
                    [final_cpu_df_label, cpu_df_label])
                final_cpu_df_data = pd.concat([final_cpu_df_data, cpu_df_data])

            xgb_convert_end_time = time.time()

            all_xgb_convert_times.append(xgb_convert_end_time -
                                         xgb_convert_start_time)

data_train, data_test, label_train, label_test = train_test_split(
    final_cpu_df_data, final_cpu_df_label, test_size=0.20, random_state=42)

xgdf_train = xgb.DMatrix(data_train, label_train)
xgdf_test = xgb.DMatrix(data_test, label_test)

chronometerTrain1 = Chronometer.makeStarted()
startTime = time.time()
bst = xgb.train(dxgb_gpu_params,
                xgdf_train,
                num_boost_round=dxgb_gpu_params["nround"])
Chronometer.show(chronometerTrain1, "Train 1")

chronometerPredict1 = Chronometer.makeStarted()
preds = bst.predict(xgdf_test)
Chronometer.show(chronometerPredict1, "Predict 1")

labels = xgdf_test.get_label()
print("prediction error=%f" %
      (sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) /
       float(len(preds))))