def getDataNonMalik(imageComplete): # SHUFFLE THE IMAGES random.shuffle(imageComplete) # Arrays to hold the shuffled data and labels shuffledData = [] shuffledLabels = [] for elem in imageComplete: shuffledData.append((elem[0]).flatten()) # Use a simple array of pixels as the feature shuffledLabels.append((elem[1][0])) return shuffledData, shuffledLabels, imageComplete
def getDataNonMalik(imageComplete): # SHUFFLE THE IMAGES random.shuffle(imageComplete) # Arrays to hold the shuffled data and labels shuffledData = [] shuffledLabels = [] for elem in imageComplete: shuffledData.append( (elem[0]).flatten()) # Use a simple array of pixels as the feature shuffledLabels.append((elem[1][0])) return shuffledData, shuffledLabels, imageComplete
def getDataMALIK(gauss_bool, imageData): ############# # Ink Normalization ############# for i in range(len(imageData)): aux_norm = np.linalg.norm(imageData[i]) if aux_norm != 0: imageData[i] /= aux_norm imageComplete = zip(imageData, imageLabels) if DEBUG: print 50*'-' print ("Shapes of image data and labels: ", imageData.shape, imageLabels.shape, len(imageComplete)) print "Image/Digit 10000:\n", imageComplete[20000] ############# SET ASIDE VALIDATION DATA (10,000) ############# # SHUFFLE THE IMAGES random.shuffle(imageComplete) shuffledData, shuffledLabels = getDataPickle() if len(shuffledLabels)>0: return shuffledData, shuffledLabels n_bins=9 for ind in range(len(imageComplete)): if ind % 100 ==0: print 'feature extraction :' + str(ind*100./len(imageComplete))+ ' % over' if gauss_bool: gaussFirst_x = filters.gaussian_filter1d(imageComplete[i][0], 1, order = 1, axis = 0) gaussFirst_y = filters.gaussian_filter1d(imageComplete[i][0], 1, order = 1, axis = 1) ori = np.array(np.arctan2(gaussFirst_y, gaussFirst_x)) else: grad_filter = np.array([[-1, 0, 1]]) gradx = signal.convolve2d(imageComplete[i][0], grad_filter, 'same') grady = signal.convolve2d(imageComplete[i][0], np.transpose(grad_filter), 'same') ori = np.array(np.arctan2(grady, gradx)) ori_4_hist = list() ori_7_hist = list() ori_4_1 = blockshaped(ori, 4, 4) ori_4_2 = blockshaped(ori[2:-2, 2:-2], 4, 4) for (elem1, elem2) in zip(ori_4_1, ori_4_2): ori_4_hist.append(np.histogram(elem1.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist.append(np.histogram(elem2.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_7_1 = (blockshaped(ori, 7, 7)) ori_7_2 = (blockshaped(ori[3:-4, 3:-4], 7, 7)) for elem1, elem2 in zip(ori_7_1, ori_7_2): ori_4_hist.append(np.histogram(elem1.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist.append(np.histogram(elem2.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist = np.float64(ori_4_hist)/(np.linalg.norm(ori_4_hist)) ori_7_hist = np.float64(ori_7_hist)/(np.linalg.norm(ori_7_hist)) shuffledData.append(np.append(ori_4_hist, ori_7_hist)) shuffledLabels.append((imageComplete[i][1][0])) pickle.dump(shuffledData, open("./Results/shuffledData.p", 'wb')) pickle.dump(shuffledLabels, open("./Results/shuffledLabels.p", 'wb')) return shuffledData, shuffledLabels, imageComplete
def getDataMALIK(gauss_bool, imageData): ############# # Ink Normalization ############# for i in range(len(imageData)): aux_norm = np.linalg.norm(imageData[i]) if aux_norm != 0: imageData[i] /= aux_norm imageComplete = zip(imageData, imageLabels) if DEBUG: print 50 * '-' print("Shapes of image data and labels: ", imageData.shape, imageLabels.shape, len(imageComplete)) print "Image/Digit 10000:\n", imageComplete[20000] ############# SET ASIDE VALIDATION DATA (10,000) ############# # SHUFFLE THE IMAGES random.shuffle(imageComplete) shuffledData, shuffledLabels = getDataPickle() if len(shuffledLabels) > 0: return shuffledData, shuffledLabels n_bins = 9 for ind in range(len(imageComplete)): if ind % 100 == 0: print 'feature extraction :' + str( ind * 100. / len(imageComplete)) + ' % over' if gauss_bool: gaussFirst_x = filters.gaussian_filter1d(imageComplete[i][0], 1, order=1, axis=0) gaussFirst_y = filters.gaussian_filter1d(imageComplete[i][0], 1, order=1, axis=1) ori = np.array(np.arctan2(gaussFirst_y, gaussFirst_x)) else: grad_filter = np.array([[-1, 0, 1]]) gradx = signal.convolve2d(imageComplete[i][0], grad_filter, 'same') grady = signal.convolve2d(imageComplete[i][0], np.transpose(grad_filter), 'same') ori = np.array(np.arctan2(grady, gradx)) ori_4_hist = list() ori_7_hist = list() ori_4_1 = blockshaped(ori, 4, 4) ori_4_2 = blockshaped(ori[2:-2, 2:-2], 4, 4) for (elem1, elem2) in zip(ori_4_1, ori_4_2): ori_4_hist.append( np.histogram(elem1.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist.append( np.histogram(elem2.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_7_1 = (blockshaped(ori, 7, 7)) ori_7_2 = (blockshaped(ori[3:-4, 3:-4], 7, 7)) for elem1, elem2 in zip(ori_7_1, ori_7_2): ori_4_hist.append( np.histogram(elem1.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist.append( np.histogram(elem2.flatten(), n_bins, (-np.pi, np.pi))[0]) ori_4_hist = np.float64(ori_4_hist) / (np.linalg.norm(ori_4_hist)) ori_7_hist = np.float64(ori_7_hist) / (np.linalg.norm(ori_7_hist)) shuffledData.append(np.append(ori_4_hist, ori_7_hist)) shuffledLabels.append((imageComplete[i][1][0])) pickle.dump(shuffledData, open("./Results/shuffledData.p", 'wb')) pickle.dump(shuffledLabels, open("./Results/shuffledLabels.p", 'wb')) return shuffledData, shuffledLabels, imageComplete