resources = '../../../resources/digit-recognizer' # test_csv = 'test.csv' test_csv = 'jason_test_1000.csv' # train_csv = 'train.csv' # train_csv = 'jason_train_10000.csv' train_csv = 'jason_train_5000.csv' # train_csv = 'jason_train_4000.csv' # train_csv = 'jason_train_2000.csv' # train_csv = 'jason_train_1000.csv' train_csv_filename = resources + train_csv test_csv_filename = resources + test_csv image_mod = DisplayImage() x_train, x_test, y_train, y_test = image_mod.train_test_set( train_file=train_csv_filename, train_size=.8, random_state=10) # x_train, x_test, y_train, y_test = DisplayImage().train_test_set(train_file=train_csv_filename, # train_size=1.0, # test_file=test_csv_filename) model_scaler = StandardScaler() x_train_v2 = model_scaler.fit_transform(x_train) x_test_v2 = model_scaler.transform(x_test) y_train_v2 = to_categorical(y_train) # learning_rates = [.0001, 0.01, 1] # for lr in learning_rates: # Create the model: model
from sklearn.metrics import accuracy_score from image_test_space import DisplayImage from sklearn.linear_model import LogisticRegression resources = '../../../resources/digit-recognizer' # train_csv = 'train.csv' # train_csv = 'jason_train_10000.csv' train_csv = 'jason_train_5000.csv' # train_csv = 'jason_train_4000.csv' # train_csv = 'jason_train_2000.csv' # train_csv = 'jason_train_1000.csv' csv_filename = '%s/%s' % (resources, train_csv) # read training info digit_train_set = pd.read_csv(csv_filename) image_info = DisplayImage(csv_filename) digit_train_set = image_info.get_all_info() # separate training info into samples and target samples_v1 = digit_train_set[:, 1] target = digit_train_set[:, 0] target = target.astype(int) # print(type(target[0])) # print(target) # exit(0) samples_v2 = list(map(lambda v: np.reshape(v, (-1)), samples_v1)) samples_v3 = image_info.circle_info_arr(samples_v2, samples_v1) x_train, x_test_before, y_train, y_test = train_test_split(samples_v3,
new_model = False # Gather images to review large_resources = '../../../../image-data-train-test-large-data/Coccidia/img/' base_train = "0" images_to_review = glob.glob(large_resources + base_train + "*" + ".jpg") train_images, test_images, _, _ = train_test_split(images_to_review, range(0, len(images_to_review)), test_size=0.2, random_state=10) screen_size = 32 img_size = 512 zeros_mask = np.zeros((img_size, img_size, 1), dtype=np.uint8) img_mod = DisplayImage(img_size=img_size, screen_size=screen_size) model_save_name = "model_save_v2_%sx.h5" % base_train if new_model: # Get training data x_values, y_values = img_mod.get_training_values(train_images) x_train = np.array(x_values) y_train = y_values # Cleanup data for CNN x_train_v3 = x_train.astype(np.float32) x_train_v3 /= 255 y_train_array = np.array(y_train) print("Total samples:\t\t" + str(y_train_array.shape[0]))
from sklearn.metrics import accuracy_score from image_test_space import DisplayImage from sklearn.linear_model import LogisticRegression resources = '../../../../resources/digit-recognizer' # train_csv = 'train.csv' train_csv = 'jason_train_10000.csv' # train_csv = 'jason_train_5000.csv' # train_csv = 'jason_train_4000.csv' # train_csv = 'jason_train_2000.csv' # train_csv = 'jason_train_1000.csv' csv_filename = '%s/%s' % (resources, train_csv) # read training info digit_train_set = pd.read_csv(csv_filename) image_info = DisplayImage(csv_filename) digit_train_set = image_info.get_all_info() # separate training info into samples and target samples_v1 = digit_train_set[:, 1] target = digit_train_set[:, 0] target = target.astype(int) # print(type(target[0])) # print(target) # exit(0) samples_v2 = np.array( list(map(lambda v: np.reshape(v, (28, 28, 1)), samples_v1))) samples_v2 = samples_v2.astype(np.uint8)
import numpy as np import cv2 as cv import operator import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from image_test_space import DisplayImage csv_filename = '../../resources/digit-recognizer/jason_train_2000.csv' # csv_filename = '../../resources/digit-recognizer/train.csv' digit_train_set = pd.read_csv(csv_filename) image_info = DisplayImage(csv_filename) digit_train_set = image_info.get_all_info() digit_train_set[:, 1] = list( map(lambda v: cv.dilate(v, np.ones((1, 3), dtype=np.uint8)), digit_train_set[:, 1])) digit_train_set[:, 1] = list( map(lambda v: cv.threshold(v, 100, 255, cv.THRESH_TOZERO)[1], digit_train_set[:, 1])) digit_train_set[:, 1] = list( map(lambda v: image_info.rotate_to_upright(v), digit_train_set[:, 1])) x = digit_train_set[:, 1] y = digit_train_set[:, 0] y = y.astype('int') x = list(map(lambda v: np.reshape(v, (-1)), x)) k_scores = {}
# digit_train_set = pd.read_csv('../../resources/digit-recognizer/train.csv') digit_train_set = pd.read_csv( '../../resources/digit-recognizer/jason_train_4000.csv') independent_columns = digit_train_set.columns[1:] dependent_column = digit_train_set.columns[0:1] x = digit_train_set.loc[:, independent_columns].values y = digit_train_set.loc[:, dependent_column].values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=91) display_image = DisplayImage(data_set=x_test) scores = {} # creating loop for neighbor_itr for later # 3 is best, though 1-10 are very similar at about 93.5% for neighbor_itr in range(3, 4): knn = KNeighborsClassifier(n_neighbors=neighbor_itr) knn.fit(x_train, y_train.ravel()) pred = knn.predict(x_test) y_test_raveled = y_test.ravel() score = accuracy_score(y_test_raveled, pred) tf_result = y_test_raveled == pred print(str(neighbor_itr) + ":" + str(score)) for idx, tf in enumerate(tf_result): if not tf: print("Actual:" + str(y_test_raveled[idx]) + "::Guess:" +
# Training files train_csv = 'train.csv' # train_csv = 'jason_train_10000.csv' # train_csv = 'jason_train_5000.csv' # train_csv = 'jason_train_4000.csv' # train_csv = 'jason_train_2000.csv' # train_csv = 'jason_train_1000.csv' train_csv_filename = '%s/%s' % (resources, train_csv) test_csv_filename = '%s/%s' % (resources, test_csv) # read training info digit_train_set = pd.read_csv(train_csv_filename) image_info = DisplayImage(train_csv_filename) digit_train_set = image_info.get_all_info() # read testing info digit_test_set = pd.read_csv(test_csv_filename) test_samples_v1 = digit_test_set.values # separate training info into samples and target train_samples_v1 = digit_train_set[:, 1] target = digit_train_set[:, 0] target = target.astype(int)
from image_test_space import DisplayImage screen_size = 16 img_size = 512 img_mod = DisplayImage(img_size=img_size, screen_size=screen_size) assert not img_mod.in_corner(256, 256, 256 + 16, 256 + 16) assert img_mod.in_corner(0, 0, 16, 16) assert img_mod.in_corner(0, 512 - 16, 16, 512) assert not img_mod.in_corner(0, 256, 16, 256 + 16)