def create_mnist_dataset(): images, labels = get_mnist_raw_data() mask = labels != 0 print("Pre-zero removal: Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))])) images = list(itertools.compress(images, mask)) labels = labels[mask] images = images[3::20] labels = labels[3::20] print("Pre-blobify: Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))])) y = np.array(labels, 'int8') images, mask = blobify(images) y = y[mask] print("Post-blobify: Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))])) print("Extract features...") X = np.array([extract_efd_features(img) for img in images]) try: os.makedirs(os.path.expanduser('~/sudokuextract')) except: pass try: for i, (img, lbl) in enumerate(zip(images, labels)): img = Image.fromarray(img, 'L') with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f: img.save(f) except Exception as e: print(e) return images, labels, X, y
def classify_efd_features(image, classifier): img = get_centered_blob(image) if img is None: return -1, image X = extract_efd_features(img) prediction = classifier.predict(X.reshape((1, len(X))))[0] return prediction, img
def create_mnist_dataset(): images, labels = get_mnist_raw_data() mask = labels != 0 print("Pre-zero removal: Label / N : {0}".format([ (v, c) for v, c in zip(_range(10), np.bincount(labels)) ])) images = list(itertools.compress(images, mask)) labels = labels[mask] images = images[3::20] labels = labels[3::20] print("Pre-blobify: Label / N : {0}".format([ (v, c) for v, c in zip(_range(10), np.bincount(labels)) ])) y = np.array(labels, 'int8') images, mask = blobify(images) y = y[mask] print("Post-blobify: Label / N : {0}".format([ (v, c) for v, c in zip(_range(10), np.bincount(y)) ])) print("Extract features...") X = np.array([extract_efd_features(img) for img in images]) try: os.makedirs(os.path.expanduser('~/sudokuextract')) except: pass try: for i, (img, lbl) in enumerate(zip(images, labels)): img = Image.fromarray(img, 'L') with open( os.path.expanduser( '~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f: img.save(f) except Exception as e: print(e) return images, labels, X, y
def create_data_set_from_images(path_to_data_dir, force=False): try: import matplotlib.pyplot as plt except ImportError: print("This method requires matplotlib installed...") return images = [] labels = [] path_to_data_dir = os.path.abspath(os.path.expanduser(path_to_data_dir)) _, _, files = next(os.walk(path_to_data_dir)) for f in files: file_name, file_ext = os.path.splitext(f) if file_ext in ('.jpg', '.png', '.bmp') and "{0}.txt".format(file_name) in files: # The current file is an image and it has a corresponding text file as reference. # Use it as data. print("Handling {0}...".format(f)) image = Image.open(os.path.join(path_to_data_dir, f)) with open( os.path.join(path_to_data_dir, "{0}.txt".format(file_name)), 'rt') as f: parsed_img = f.read().strip().split('\n') for sudoku, subimage in _extraction_iterator_map( np.array(image.convert('L'))): if not force: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): ax = plt.subplot2grid((9, 9), (k, kk)) ax.imshow(sudoku[k][kk], plt.cm.gray) ax.set_title(str(parsed_img[k][kk])) ax.axis('off') plt.show() ok = raw_input("Is this OK (Y/n/a)? ") if ok.lower() == 'a': break elif ok.lower() == 'n': continue else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break for sudoku, subimage in _extraction_iterator_map( np.array(image.convert('L')), use_local_thresholding=True): if not force: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): ax = plt.subplot2grid((9, 9), (k, kk)) ax.imshow(sudoku[k][kk], plt.cm.gray) ax.set_title(str(parsed_img[k][kk])) ax.axis('off') plt.show() ok = raw_input("Is this OK (Y/n/a)? ") if ok.lower() == 'a': break elif ok.lower() == 'n': continue else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break try: os.makedirs(os.path.expanduser('~/sudokuextract')) except: pass try: for i, (img, lbl) in enumerate(zip(images, labels)): img = Image.fromarray(img, 'L') with open( os.path.expanduser( '~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f: img.save(f) except Exception as e: print(e) print("Pre-blobify: Label / N : {0}".format([ (v, c) for v, c in zip(_range(10), np.bincount(labels)) ])) y = np.array(labels, 'int8') images, mask = blobify(images) y = y[mask] print("Post-blobify: Label / N : {0}".format([ (v, c) for v, c in zip(_range(10), np.bincount(y)) ])) print("Extract features...") X = np.array([extract_efd_features(img) for img in images]) return images, labels, X, y
def create_data_set_from_images(path_to_data_dir, force=False): try: import matplotlib.pyplot as plt except ImportError: print("This method requires matplotlib installed...") return images = [] labels = [] path_to_data_dir = os.path.abspath(os.path.expanduser(path_to_data_dir)) _, _, files = next(os.walk(path_to_data_dir)) for f in files: file_name, file_ext = os.path.splitext(f) if file_ext in ('.jpg', '.png', '.bmp') and "{0}.txt".format(file_name) in files: # The current file is an image and it has a corresponding text file as reference. # Use it as data. print("Handling {0}...".format(f)) image = Image.open(os.path.join(path_to_data_dir, f)) with open(os.path.join(path_to_data_dir, "{0}.txt".format(file_name)), 'rt') as f: parsed_img = f.read().strip().split('\n') for sudoku, subimage in _extraction_iterator_map(np.array(image.convert('L'))): if not force: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): ax = plt.subplot2grid((9, 9), (k, kk)) ax.imshow(sudoku[k][kk], plt.cm.gray) ax.set_title(str(parsed_img[k][kk])) ax.axis('off') plt.show() ok = raw_input("Is this OK (Y/n/a)? ") if ok.lower() == 'a': break elif ok.lower() == 'n': continue else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break for sudoku, subimage in _extraction_iterator_map(np.array(image.convert('L')), use_local_thresholding=True): if not force: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): ax = plt.subplot2grid((9, 9), (k, kk)) ax.imshow(sudoku[k][kk], plt.cm.gray) ax.set_title(str(parsed_img[k][kk])) ax.axis('off') plt.show() ok = raw_input("Is this OK (Y/n/a)? ") if ok.lower() == 'a': break elif ok.lower() == 'n': continue else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break else: for k in range(len(sudoku)): for kk in range(len(sudoku[k])): images.append(sudoku[k][kk].copy()) labels.append(int(parsed_img[k][kk])) break try: os.makedirs(os.path.expanduser('~/sudokuextract')) except: pass try: for i, (img, lbl) in enumerate(zip(images, labels)): img = Image.fromarray(img, 'L') with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i+1, lbl)), 'w') as f: img.save(f) except Exception as e: print(e) print("Pre-blobify: Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))])) y = np.array(labels, 'int8') images, mask = blobify(images) y = y[mask] print("Post-blobify: Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))])) print("Extract features...") X = np.array([extract_efd_features(img) for img in images]) return images, labels, X, y