# read training info digit_train_set = pd.read_csv(csv_filename) image_info = DisplayImage(csv_filename) digit_train_set = image_info.get_all_info() # separate training info into samples and target samples_v1 = digit_train_set[:, 1] target = digit_train_set[:, 0] target = target.astype(int) # print(type(target[0])) # print(target) # exit(0) samples_v2 = list(map(lambda v: np.reshape(v, (-1)), samples_v1)) samples_v3 = image_info.circle_info_arr(samples_v2, samples_v1) x_train, x_test_before, y_train, y_test = train_test_split(samples_v3, target, test_size=0.2, random_state=10) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test_before) from sklearn.decomposition import PCA model_pca = PCA(0.80) x_train = model_pca.fit_transform(x_train) x_test = model_pca.transform(x_test)
# read testing info digit_test_set = pd.read_csv(test_csv_filename) test_samples_v1 = digit_test_set.values # separate training info into samples and target train_samples_v1 = digit_train_set[:, 1] target = digit_train_set[:, 0] target = target.astype(int) train_samples_v2 = list(map(lambda v: np.reshape(v, (-1)), train_samples_v1)) train_samples_v3 = image_info.circle_info_arr(train_samples_v2, train_samples_v1) test_samples_v2 = list(map(lambda v: np.reshape(v, (28, -1, 1)), test_samples_v1)) test_samples_v2 = list(map(lambda v: v.astype(np.uint8), test_samples_v2)) test_samples_v3 = image_info.circle_info_arr(test_samples_v1, test_samples_v2) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() x_train = scaler.fit_transform(train_samples_v3) x_test = scaler.transform(test_samples_v3)