def load_data(self, sample, x_axi_attr_index, y_axi_attr_index): from Preprocessing import Preprocess from Postprocessing import Postprocess creditdata = Preprocess("default of credit card clients.xls") raw_X_train, raw_X_test, raw_y_train, raw_y_test = creditdata.load() low_dim_X_train, low_dim_X_test, low_dim_Y_train, low_dim_Y_test = creditdata.dimension_decrease( ) postp = Postprocess(low_dim_X_train, low_dim_X_test, low_dim_Y_train, low_dim_Y_test) x1, x2, y1, y2 = postp.improve_data() return self.data_simplification(x1, y1, sample, x_axi_attr_index, y_axi_attr_index)
def __init__(self): self.classifier = [] self.processor = [] self.result = [] creditdata = Preprocess("default of credit card clients.xls") self.raw_X_train, self.raw_X_test, self.raw_Y_train, self.raw_Y_test = creditdata.load( ) self.low_dim_X_train, self.low_dim_X_test, self.low_dim_Y_train, self.low_dim_Y_test = \ creditdata.dimension_decrease() x1, x2, y1, y2 = self.low_dim_X_train, self.low_dim_X_test, self.low_dim_Y_train, self.low_dim_Y_test self.discretizer = Postprocess(x1, x2, y1, y2) self.discretized_X_train, self.discretized_X_test, self.discretized_Y_train, self.discretized_Y_test = \ self.discretizer.improve_data() self.buildclf() self.buildprocessor() self.logfile = open("execution_Log", "a")
def trainmodel(self): prep = Preprocess("default of credit card clients.xls") prep.load() low_dim_x1, low_dim_x2, low_dim_y1, low_dim_y2 = prep.dimension_decrease( ) postp = Postprocess(low_dim_x1, low_dim_x2, low_dim_y1, low_dim_y2) discretized_x1, discretized_x2, discretized_y1, discretized_y2 = postp.improve_data( ) x = np.concatenate((discretized_x1, discretized_x2)) y = np.concatenate((discretized_y1, discretized_y2)) self.c.fit(x, y) y_pred = self.c.predict(x) mislabeled = (y != y_pred).sum() totaltest = x.shape[0] print( "Mislabeled points (%s Classification) out of a total %d points : %d" % ("SVC", totaltest, mislabeled)) Precision = 1 - mislabeled / totaltest print("Precision of %s is %4.2f%%" % ("SVC", Precision * 100))
# group owed amount into different intervals if -100000 <= temp_owe < 0: self.x_test[row, 6] = -1 elif -500000 <= temp_owe < -100000: self.x_test[row, 6] = -2 elif temp_owe < -500000: self.x_test[row, 6] = -3 elif self.x_test[row, 6] == 0: continue elif 1 <= temp_owe < 100001: self.x_test[row, 6] = 1 elif 10000 <= temp_owe < 500001: self.x_test[row, 6] = 2 else: self.x_test[row, 6] = 3 def improve_data(self): self.set_age() self.set_amount() return self.x_train, self.x_test, self.y_train, self.y_test if __name__ == '__main__': a = Preprocess("default of credit card clients.xls") rx1, rx2, ry1, ry2 = a.load() x1, x2, y1, y2 = a.dimension_decrease() b = Postprocess(x1, x2, y1, y2) xd1, xd2, yd1, yd2 = b.improve_data()