def gpu_load_names(col_names_path, **kwargs): """ Loads names used for renaming the banks Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = ["seller_name", "new_seller_name"] dtypes = OrderedDict([ ("seller_name", "category"), ("new_seller_name", "category"), ]) new = col_names_path + "names.load" print(new) names_table = pyblazing.create_table( table_name="names", type=get_type_schema(new), path=new, delimiter="|", names=cols, dtypes=dtypes, # TODO: dtypes=get_dtype_values(dtypes) skip_rows=1, ) Chronometer.show(chronometer, "Read Names CSV") return names_table
def gpu_load_names(**kwargs): """ Loads names used for renaming the banks Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = ['seller_name', 'new_seller_name'] dtypes = OrderedDict([ ("seller_name", "category"), ("new_seller_name", "category"), ]) names_table = pyblazing.create_table(table_name='names', type=get_type_schema(col_names_path), path=col_names_path, delimiter='|', names=cols, dtypes=get_dtype_values(dtypes), skip_rows=1) Chronometer.show(chronometer, 'Read Names CSV') return names_table
def gpu_load_acquisition_csv(acquisition_path, **kwargs): """ Loads acquisition data Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = [ 'loan_id', 'orig_channel', 'seller_name', 'orig_interest_rate', 'orig_upb', 'orig_loan_term', 'orig_date', 'first_pay_date', 'orig_ltv', 'orig_cltv', 'num_borrowers', 'dti', 'borrower_credit_score', 'first_home_buyer', 'loan_purpose', 'property_type', 'num_units', 'occupancy_status', 'property_state', 'zip', 'mortgage_insurance_percent', 'product_type', 'coborrow_credit_score', 'mortgage_insurance_type', 'relocation_mortgage_indicator' ] dtypes = OrderedDict([("loan_id", "int64"), ("orig_channel", "category"), ("seller_name", "category"), ("orig_interest_rate", "float64"), ("orig_upb", "int64"), ("orig_loan_term", "int64"), ("orig_date", "date"), ("first_pay_date", "date"), ("orig_ltv", "float64"), ("orig_cltv", "float64"), ("num_borrowers", "float64"), ("dti", "float64"), ("borrower_credit_score", "float64"), ("first_home_buyer", "category"), ("loan_purpose", "category"), ("property_type", "category"), ("num_units", "int64"), ("occupancy_status", "category"), ("property_state", "category"), ("zip", "int64"), ("mortgage_insurance_percent", "float64"), ("product_type", "category"), ("coborrow_credit_score", "float64"), ("mortgage_insurance_type", "float64"), ("relocation_mortgage_indicator", "category")]) print(acquisition_path) acquisition_table = pyblazing.create_table( table_name='acq', type=get_type_schema(acquisition_path), path=acquisition_path, delimiter='|', names=cols, dtypes=get_dtype_values(dtypes), skip_rows=1) Chronometer.show(chronometer, 'Read Acquisition CSV') return acquisition_table
def gpu_load_acquisition_csv(acquisition_path, **kwargs): """ Loads acquisition data Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = [ "loan_id", "orig_channel", "seller_name", "orig_interest_rate", "orig_upb", "orig_loan_term", "orig_date", "first_pay_date", "orig_ltv", "orig_cltv", "num_borrowers", "dti", "borrower_credit_score", "first_home_buyer", "loan_purpose", "property_type", "num_units", "occupancy_status", "property_state", "zip", "mortgage_insurance_percent", "product_type", "coborrow_credit_score", "mortgage_insurance_type", "relocation_mortgage_indicator", ] dtypes = OrderedDict([ ("loan_id", "int64"), ("orig_channel", "category"), ("seller_name", "category"), ("orig_interest_rate", "float64"), ("orig_upb", "int64"), ("orig_loan_term", "int64"), ("orig_date", "date"), ("first_pay_date", "date"), ("orig_ltv", "float64"), ("orig_cltv", "float64"), ("num_borrowers", "float64"), ("dti", "float64"), ("borrower_credit_score", "float64"), ("first_home_buyer", "category"), ("loan_purpose", "category"), ("property_type", "category"), ("num_units", "int64"), ("occupancy_status", "category"), ("property_state", "category"), ("zip", "int64"), ("mortgage_insurance_percent", "float64"), ("product_type", "category"), ("coborrow_credit_score", "float64"), ("mortgage_insurance_type", "float64"), ("relocation_mortgage_indicator", "category"), ]) print(acquisition_path) acquisition_table = pyblazing.create_table( table_name="acq", type=get_type_schema(acquisition_path), path=acquisition_path, delimiter="|", names=cols, dtypes=dtypes, # TODO: dtypes=get_dtype_values(dtypes) skip_rows=1, ) Chronometer.show(chronometer, "Read Acquisition CSV") return acquisition_table
def gpu_load_performance_csv(performance_path, **kwargs): """ Loads performance data Returns ------- GPU DataFrame """ chronometer = Chronometer.makeStarted() cols = [ "loan_id", "monthly_reporting_period", "servicer", "interest_rate", "current_actual_upb", "loan_age", "remaining_months_to_legal_maturity", "adj_remaining_months_to_maturity", "maturity_date", "msa", "current_loan_delinquency_status", "mod_flag", "zero_balance_code", "zero_balance_effective_date", "last_paid_installment_date", "foreclosed_after", "disposition_date", "foreclosure_costs", "prop_preservation_and_repair_costs", "asset_recovery_costs", "misc_holding_expenses", "holding_taxes", "net_sale_proceeds", "credit_enhancement_proceeds", "repurchase_make_whole_proceeds", "other_foreclosure_proceeds", "non_interest_bearing_upb", "principal_forgiveness_upb", "repurchase_make_whole_proceeds_flag", "foreclosure_principal_write_off_amount", "servicing_activity_indicator", ] dtypes = OrderedDict([ ("loan_id", "int64"), ("monthly_reporting_period", "date"), ("servicer", "category"), ("interest_rate", "float64"), ("current_actual_upb", "float64"), ("loan_age", "float64"), ("remaining_months_to_legal_maturity", "float64"), ("adj_remaining_months_to_maturity", "float64"), ("maturity_date", "date"), ("msa", "float64"), ("current_loan_delinquency_status", "int32"), ("mod_flag", "category"), ("zero_balance_code", "category"), ("zero_balance_effective_date", "date"), ("last_paid_installment_date", "date"), ("foreclosed_after", "date"), ("disposition_date", "date"), ("foreclosure_costs", "float64"), ("prop_preservation_and_repair_costs", "float64"), ("asset_recovery_costs", "float64"), ("misc_holding_expenses", "float64"), ("holding_taxes", "float64"), ("net_sale_proceeds", "float64"), ("credit_enhancement_proceeds", "float64"), ("repurchase_make_whole_proceeds", "float64"), ("other_foreclosure_proceeds", "float64"), ("non_interest_bearing_upb", "float64"), ("principal_forgiveness_upb", "float64"), ("repurchase_make_whole_proceeds_flag", "category"), ("foreclosure_principal_write_off_amount", "float64"), ("servicing_activity_indicator", "category"), ]) print(performance_path) performance_table = pyblazing.create_table( table_name="perf", type=get_type_schema(performance_path), path=performance_path, delimiter="|", names=cols, dtypes=dtypes, # TODO: dtypes=get_dtype_values(dtypes) skip_rows=1, ) Chronometer.show(chronometer, "Read Performance CSV") return performance_table