def main_pix2pix(): def out_pix2pix_name(): global _pix2pix_counter outname = f'{_pix2pix_dir}/{_pix2pix_counter}.jpg' _pix2pix_counter += 1 return outname for f_img in glob.glob(dir_in + '/*'): try: img = dip.imread(f_img) except: continue img_out = dip.resize(img, _pix2pix_outsize, interpolation=INTER_AREA) print(f_img, img.shape) if len(img.shape) == 3: gray = np.mean(img, axis=-1) else: # gray = img continue grad = dip.transforms.edge_detect(gray) mask, m_min, m_max, n_min, n_max = space_fill(grad) for n in range(n_fragments): # frag_size = max(img.shape[0], img.shape[1]) // 4 frag_size = img.shape[0]//4, img.shape[1]//4 frag, _ = fragment(img, m_min, m_max, n_min, n_max, frag_size=frag_size) if frag is None: break else: frag = dip.resize(frag, _pix2pix_outsize, interpolation=INTER_AREA) both = np.concatenate([img_out, frag], axis=1) # plt.imshow(both) # plt.show() dip.im_write(both, out_pix2pix_name())
def main(outsize=default_outsize): print(len(all_info), 'vases') for img_id, img_info in all_info.items(): # input(img_info) # if you want to see one line if 'fragment' in img_info['Title'].lower() or \ 'fragment' in img_info['description'].lower() or \ 'Fragments' in img_info['categories']: continue # just get Terracotta elif 'terra' in img_info['Title'].lower() or \ 'Medium' not in img_info or \ ('Medium' in img_info and 'terra' in img_info['Medium'].lower()) or \ 'terra' in img_info['description'].lower() or \ 'Terracotta' in img_info['categories']: # just get everything that's not broken # else: if outsize: try: img = dip.imread(id_to_img_name(img_id)) except: continue print(img.shape) new_img = dip.resize(img, dsize=outsize, interpolation=INTER_AREA) dip.im_write(new_img, id_to_out_name(img_id)) else: shutil.copyfile(id_to_img_name(img_id), id_to_out_name(img_id))
def main(): print(len(all_info), 'vases') link_dict = dict() for img_id, img_info in tqdm(all_info.items()): # input(img_info) # if you want to see one line if 'fragment' in img_info['Title'].lower() or \ 'fragment' in img_info['description'].lower() or \ 'Fragments' in img_info['categories']: link_dict[img_id] = img_info['src'] try: img = dip.imread(id_to_img_name(img_id)) except: continue print(img.shape) new_img = dip.resize(img, dsize=outsize, interpolation=INTER_AREA) dip.im_write(new_img, id_to_out_name(img_id)) elif 'terra' in img_info['Title'].lower() or \ 'Medium' not in img_info or \ ('Medium' in img_info and 'terra' in img_info['Medium'].lower()) or \ 'terra' in img_info['description'].lower() or \ 'Terracotta' in img_info['categories']: pass # if outsize: # try: # img = dip.imread(id_to_img_name(img_id)) # except: # continue # print(img.shape) # new_img = dip.resize(img, dsize=outsize, interpolation=INTER_AREA) # dip.im_write(new_img, id_to_out_name(img_id)) # else: # shutil.copyfile(id_to_img_name(img_id), id_to_out_name(img_id)) with open('data/raw/frag_links.pkl', 'wb') as f: pickle.dump(link_dict, f)
def encodeVlad(kmeans, im_path, xi, yi, scale, stride, blkRadii, numClusters): img = dip.im_read(im_path) if np.shape(img) != (200, 200): img = dip.resize(img, (200, 200)) # Resizes each image to be the same size features = computeGradientDMD(img, xi, yi, scale, stride, blkRadii) classes = kmeans.predict(features.T) #classes = np.array([kmeans.predict(features[:,i]) for i in range(np.shape(features)[1])]) numSamp = np.shape(xi)[1] descr = np.zeros((len(kmeans.cluster_centers_) * numSamp * 5, )) for i in range(numClusters): center = kmeans.cluster_centers_[i] center = np.reshape(center, (np.shape(center)[0], 1)) inds = np.where(classes == i)[0] centermat = np.repeat(center, np.shape(inds)[0], axis=1) descr[i * numSamp * 5:(i + 1) * numSamp * 5] = np.sum( centermat - features[:, inds], axis=1) return descr
def trainEncoder(images, numDescr, numClusters, xi, yi, scale, stride, blkRadii): numImages = len(images) numDescrPerImg = int(np.ceil(numDescr / numImages)) descrs = np.zeros((np.shape(xi)[1] * 5, numDescrPerImg * numImages)) for i in range(numImages): print("Training on : " + images[i]) # Prints name of image im = dip.im_read(images[i]) if np.shape(im) != (200, 200): im = dip.resize( im, (200, 200)) # Resizes each image to be the same size features = computeGradientDMD(im, xi, yi, scale, stride, blkRadii) subset = np.random.choice(np.shape(features)[1], size=int(numDescrPerImg), replace=False) ''' if(i == numImages - 1): numRemaining = np.shape(descrs[:, i*numDescrPerImg : (i+1)*numDescrPerImg])[1] subset = subset[:numRemaining] ''' descrs[:, i * numDescrPerImg:(i + 1) * numDescrPerImg] = features[:, subset] print("Finished descriptors") # Kmeans learning to find cluster centers kmeans = KMeans(n_clusters=numClusters, random_state=0, max_iter=1, algorithm='full').fit(descrs.T) # Returns Kmeans object containing clusters and assignments return kmeans
def imageToBoard(path): # Read the image and extract the grayscale and hue channels image = dip.im_read(path) image = np.rot90(image[:, :, 0:3], 3) hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) hue = hsv[:, :, 0] image = dip.rgb2gray(image) # Scale down the image so that the smallest dimension is 1000 pixels x, y = image.shape if min(x, y) > 1000: scale = 1000.0 / min(x, y) newSize = (int(scale * y), int(scale * x)) image = dip.resize(image, newSize) hue = dip.resize(hue, newSize) # Detect the straight lines and draw them (dilated) on a blank canvas image2 = np.zeros(image.shape, dtype=np.uint8) lsd = cv2.createLineSegmentDetector(0) lines = lsd.detect(image)[0] for line in lines: for (x1, y1, x2, y2) in line: # length = abs(np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)) cv2.line(image2, (x1, y1), (x2, y2), 255, 3) # dip.figure("Image") # dip.imshow(image, 'gray') # dip.figure("Test Lines") # dip.imshow(image2, 'gray') # dip.show() # Find the largest blob in the image to find the board Labeled, numobj = ndImage.label(image2) lastSum = 0 displayImage = None for item in range(1, numobj + 1): newImage = (Labeled == item) newSum = newImage.sum() if newSum > lastSum: displayImage = newImage lastSum = newSum # Find the four corners of the image. # The corners are defined as the maxima of the four functions: # (x + y), (X - x + y), (x + Y - y), and (X - x + Y - y) # This assumes the image is taken roughly square with the image boundaries, but it can vary somewhat cornerBR = (0, 0) sumBR = 0 cornerTR = (0, 0) sumTR = 0 cornerBL = (0, 0) sumBL = 0 cornerTL = (0, 0) sumTL = 0 imagey, imagex = displayImage.shape for x in range(0, imagex): for y in range(0, imagey): if displayImage[y][x] != 0: temp = x + y if temp > sumBR: sumBR = temp cornerBR = (x, y) temp = x + imagey - y if temp > sumTR: sumTR = temp cornerTR = (x, y) temp = imagex - x + imagey - y if temp > sumTL: sumTL = temp cornerTL = (x, y) temp = imagex - x + y if temp > sumBL: sumBL = temp cornerBL = (x, y) # Estimate the transformation that would put the board corners on the image corners dest = np.array([cornerTL, cornerBL, cornerBR, cornerTR]) scale = 1000 # 15 * 15 * 20 src = np.array([[0, 0], [0, scale], [scale, scale], [scale, 0]]) tform3 = tf.ProjectiveTransform() tform3.estimate(src, dest) warped = tf.warp(image, tform3, output_shape=[scale, scale]) hue = tf.warp(hue, tform3, output_shape=[scale, scale]) warped = warped[7 : -7, 7 : -7] hue = hue[7 : -7, 7 : -7] # dip.figure("warped") # dip.imshow(warped, 'gray') # dip.figure("hue") # dip.imshow(hue, 'gray') # dip.show() # Do line detection again to try to fine the best grid lines, particularly on the board borders warped = dip.float_to_im(warped) gridLines = np.zeros(warped.shape, dtype=np.uint8) lsd = cv2.createLineSegmentDetector(_refine=cv2.LSD_REFINE_ADV) lines = lsd.detect(warped)[0] for line in lines: for (x1, y1, x2, y2) in line: cv2.line(gridLines, (x1, y1), (x2, y2), 255, 3) # dip.figure("warped") # dip.imshow(warped, 'gray') # dip.figure("gridLines") # dip.imshow(gridLines, 'gray') # dip.show() # Determine the actual rows/cols that start with pixels dimX, dimY = warped.shape topRow = 0 botRow = dimY - 1 leftCol = 0 rightCol = dimX - 1 while np.amax(gridLines[:, leftCol]) == 0: leftCol += 1 while np.amax(gridLines[:, rightCol]) == 0: rightCol -= 1 while np.amax(gridLines[topRow]) == 0: topRow += 1 while np.amax(gridLines[botRow]) == 0: botRow -= 1 lineTop = (topRow, topRow) lineBot = (botRow, botRow) lineLeft = (leftCol, leftCol) lineRight = (rightCol, rightCol) bestScoreTop = 0 bestScoreBot = 0 bestScoreLeft = 0 bestScoreRight = 0 # Within a small range from the border, determine the lines that best describe the image borders # They are scored by which one has the most overlap with the canvas with the image lines canvas = np.zeros(warped.shape, dtype=np.uint8) thickness = 13 for i in range(6, 50): for j in range(6, 50): # Top Row x1 = 0 y1 = topRow + i x2 = dimX - 1 y2 = topRow + j canvas.fill(0) cv2.line(canvas, (x1, y1), (x2, y2), 255, thickness) score = np.count_nonzero(np.logical_and(canvas > 0, gridLines)) if score > bestScoreTop: lineTop = (y1, y2) bestScoreTop = score # Bottom Row x1 = 0 y1 = botRow - i x2 = dimX - 1 y2 = botRow - j canvas.fill(0) cv2.line(canvas, (x1, y1), (x2, y2), 255, thickness) score = np.count_nonzero(np.logical_and(canvas > 0, gridLines)) if score > bestScoreBot: lineBot = (y1, y2) bestScoreBot = score # Left Column x1 = leftCol + i y1 = 0 x2 = leftCol + j y2 = dimY - 1 canvas.fill(0) cv2.line(canvas, (x1, y1), (x2, y2), 255, thickness) score = np.count_nonzero(np.logical_and(canvas > 0, gridLines)) if score > bestScoreLeft: lineLeft = (x1, x2) bestScoreLeft = score # Right Column x1 = rightCol - i y1 = 0 x2 = rightCol - j y2 = dimY - 1 canvas.fill(0) cv2.line(canvas, (x1, y1), (x2, y2), 255, thickness) score = np.count_nonzero(np.logical_and(canvas > 0, gridLines)) if score > bestScoreRight: lineRight = (x1, x2) bestScoreRight = score xDiff0 = (lineBot[0] - lineTop[0]) / 15.0 xDiff1 = (lineBot[1] - lineTop[1]) / 15.0 yDiff0 = (lineRight[0] - lineLeft[0]) / 15.0 yDiff1 = (lineRight[1] - lineLeft[1]) / 15.0 # cv2.line(warped, (0, lineTop[0]), (dimY - 1, lineTop[1]), 0, 3) # cv2.line(warped, (0, lineBot[0]), (dimY - 1, lineBot[1]), 0, 3) # cv2.line(warped, (lineLeft[0], 0), (lineLeft[1], dimX - 1), 0, 3) # cv2.line(warped, (lineRight[0], 0), (lineRight[1], dimX - 1), 0, 3) # # for i in range(1, 15): # y1 = int(lineTop[0] + i * xDiff0) # y2 = int(lineTop[1] + i * xDiff1) # cv2.line(warped, (0, y1), (dimY - 1, y2), 0, 3) # # x1 = int(lineLeft[0] + i * yDiff0) # x2 = int(lineLeft[1] + i * yDiff1) # cv2.line(warped, (x1, 0), (x2, dimX - 1), 0, 3) # # dip.figure("Lined image") # dip.imshow(warped, 'gray') # dip.show() # Now go through each of the 225 (15 * 15) cells grid = [] for i in range(0, 15): grid.append([]) for j in range(0, 15): # Calculate the four corners of the current grid square amt_i = [i / 15.0, (i + 1) / 15.0] amt_i_inv = [1.0 - i / 15.0, 1.0 - (i + 1) / 15.0] tl_y = int((lineTop[0] + (i + 0) * xDiff0) * amt_i_inv[0] + (lineTop[1] + (i + 0) * xDiff1) * amt_i[0]) tr_y = int((lineTop[0] + (i + 0) * xDiff0) * amt_i_inv[1] + (lineTop[1] + (i + 0) * xDiff1) * amt_i[1]) bl_y = int((lineTop[0] + (i + 1) * xDiff0) * amt_i_inv[0] + (lineTop[1] + (i + 1) * xDiff1) * amt_i[0]) br_y = int((lineTop[0] + (i + 1) * xDiff0) * amt_i_inv[1] + (lineTop[1] + (i + 1) * xDiff1) * amt_i[1]) amt_j = [j / 15.0, (j + 1) / 15.0] amt_j_inv = [1.0 - j / 15.0, 1.0 - (j + 1) / 15.0] tl_x = int((lineLeft[0] + (j + 0) * yDiff0) * amt_j_inv[0] + (lineLeft[1] + (j + 0) * yDiff1) * amt_j[0]) bl_x = int((lineLeft[0] + (j + 0) * yDiff0) * amt_j_inv[1] + (lineLeft[1] + (j + 0) * yDiff1) * amt_j[1]) tr_x = int((lineLeft[0] + (j + 1) * yDiff0) * amt_j_inv[0] + (lineLeft[1] + (j + 1) * yDiff1) * amt_j[0]) br_x = int((lineLeft[0] + (j + 1) * yDiff0) * amt_j_inv[1] + (lineLeft[1] + (j + 1) * yDiff1) * amt_j[1]) scale = 80 pad = 10 # Warp the image so that the grid square becomes the center of the image with some padding on all sides dest = np.array([[pad, pad], [pad, scale + pad], [scale + pad, scale + pad], [scale + pad, pad]]) src = np.array([[tl_x, tl_y], [bl_x, bl_y], [br_x, br_y], [tr_x, tr_y]]) tform = tf.ProjectiveTransform() tform.estimate(dest, src) total = scale + 2 * pad output = tf.warp(warped, tform, output_shape=[total, total]) outputHue = tf.warp(hue, tform, output_shape=[total, total]) # Output hue doesn't use any of the extra padding because it wants the values from the middle of the tile outputHue = outputHue[2 * pad : -2 * pad, 2 * pad : -2 * pad] # Perform a simple image threshold to determine any text on the tile outputBinary = np.logical_not(output < 0.55) Labeled, numobj = ndImage.label(outputBinary) closestBlob = None distance = 20 for item in range(1, numobj + 1): blob = (Labeled != item) x, y = output.shape for a in range(0, x): for b in range(0, y): if blob[a, b] == 0: dist = np.sqrt((a - 50) ** 2 + (b - 50) ** 2) tot = np.sum(blob) # If the current blob is within a set distance from the middle of the image, # and the total count doesn't indicate a false tile or a blank tile if dist < distance and 9000 < tot and tot < 9950: distance = dist closestBlob = blob text = "?" # If a blob was detected if closestBlob is not None: closestBlob = closestBlob.astype(np.uint8) * 255 # Perform OCR text = pytesseract.image_to_string(closestBlob, config='--oem 0 -c tessedit_char_whitelist=ABCDEFGHIJLKMNOPQRSTUVWXYZ|01l --psm 10') # Just a precaution to fix any ambiguity with 0s and Os text = text.replace("0", "O") # Correct the I tile, as a straight line doesn't easily count with vanilla Tesseract if text in ['', '|', 'l', '1']: text = "I" # If no letter detected and the median hue & grayscale values indicate a blank tile med = np.median(outputHue) if text == "?" and (med > 0.6 or med < 0.01) and np.median(output) < 0.3: text = '_' grid[-1].append(text) return grid
".png") elif directory == dir_yes: out.save("../grayscale_data/yes/" + filename_without_extension + ".png") ########################### # RESIZING IMAGES # ########################### dir_no = "../grayscale_data/no/" dir_yes = "../grayscale_data/yes/" dirs = [dir_no, dir_yes] for directory in dirs: for filename in os.listdir(directory): im = dip.im_read(directory + filename) out = dip.resize(im, (350, 300), interpolation=cv2.INTER_CUBIC) filename_without_extension = os.path.splitext(filename)[0] if directory == dir_no: dip.im_write(out, "../resized_data/no/" + filename_without_extension + ".png", quality=95) # 95 is the best possible image quality elif directory == dir_yes: dip.im_write(out, "../resized_data/yes/" + filename_without_extension + ".png", quality=95) # 95 is the best possible image quality ########################### # VARYING CONTRAST # ###########################
def resize_image(self, im, dims): return dip.resize(im, dims)
from tqdm import tqdm import dippykit as dip from cv2 import INTER_AREA, INTER_CUBIC try: outsize = (int(sys.argv[1]) * 2, int(sys.argv[1])) except: print('provide a number for the side of the square image to scale to') exit() _pix2pix_indir = 'data/processed/pix2pix_vase_fragments/train/' _pix2pix_outdir = f'data/processed/pix2pix_vase_fragments_{sys.argv[1]}/train/' out_pix2pix = lambda fname: f'{_pix2pix_outdir}/{fname}.jpg' if os.path.exists(_pix2pix_outdir): y_n = input(f'Folder {_pix2pix_outdir} exists, overwrite?') if 'y' not in y_n: exit() os.makedirs(_pix2pix_outdir, exist_ok=True) for f_img in tqdm(glob.glob(_pix2pix_indir + '/*.jpg')): try: img = dip.imread(f_img) except: continue img_name = os.path.split(f_img)[-1] img = dip.resize(img, outsize, interpolation=INTER_CUBIC) dip.im_write(img, out_pix2pix(img_name))