def __len__(self): return self.dataset.shape[0] def __getitem__(self, index): return self.dataset[index], self.label[index] def _init_dataset(self, image_abs_path): for (_, _, filelist) in os.walk(image_abs_path): self.dataset = np.array( [[cv2.imread(os.path.join(image_abs_path, filename), 1)] for filename in filelist]) self.label = np.array( [to_argmax(filename[:5]) for filename in filelist]) if __name__ == "__main__": IMAGE_PATH = "./split/test" dataset = captchaDataset(IMAGE_PATH) print("Length of dataset:", len(dataset)) # for verifying whether all data has benn loaded (images, labels) = dataset[0:2] print(images.shape) for image, label in zip(images, labels): visualize(image.cpu(), to_argmax(argmax_to_string(label.cpu()))) # Or show in this way: # img = np.array(image[0].cpu()) # cv2.imshow("The first image", img) # cv2.waitKey(0)
matchID = 0 for slide in range(0, NUM_SLIDES): for frame in range(0, NUM_SLIDES): slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]] framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]] matches = flann.knnMatch(np.asarray(slideDescs,np.float32),np.asarray(frameDescs,np.float32), k=2) matchPairs = [] distances = [] good = [] slideKeypoints = [] frameKeypoints = [] for m, n in matches: if m.distance < 0.75*n.distance: slideKeypoints.append(slidekpts[m.queryIdx]) frameKeypoints.append(framekpts[m.trainIdx]) distances.append(m.distance) matchID += 1 distances.sort(reverse=True) matchMetric[(slide, frame)] = sum(distances[0:10]) / len(matches) matchMetricNotNormalized[(slide, frame)] = sum(distances[0:10]) matchSlideFrameDict[(slide, frame) ] = matchPairs visualize(matchMetric, NUM_SLIDES, "./new_results/normalized_distanceReversed.jpg") visualize(matchMetricNotNormalized, NUM_SLIDES, "./new_results/distanceReversed.jpg") predictARGMIN(matchMetric)
slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]] framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]] matches = flann.knnMatch(np.asarray(slideDescs, np.float32), np.asarray(frameDescs, np.float32), k=2) matchPairs = [] distances = [] good = [] slideKeypoints = [] frameKeypoints = [] for m, n in matches: if m.distance < 0.75 * n.distance: print("start") print(len(slidekpts)) print(len(framekpts)) print(m.trainIdx) print(m.queryIdx) slideKeypoints.append(slidekpts[m.queryIdx]) frameKeypoints.append(framekpts[m.trainIdx]) distances.append(m.distance) matchID += 1 distances.sort(reverse=True) print(distances) matchMetric[(slide, frame)] = sum(distances[0:10]) matchSlideFrameDict[(slide, frame)] = matchPairs visualize(matchMetric, 28, "./distanceReversed.jpg")
def match_and_draw(match, r_threshold): m = match(desc1, desc2, r_threshold) matched_p1 = np.array([kp1[i].pt for i, j in m]) matched_p2 = np.array([kp2[j].pt for i, j in m]) H, status = cv2.findHomography(matched_p1, matched_p2, cv2.RANSAC, 5.0) print '%d / %d inliers/matched' % (np.sum(status), len(status)) #print matched_p1 #print "---------" #print matched_p2 #Size of image size = (img1.shape[1] * 2, img1.shape[0] * 2) #Initial prepare(Shift images far way from corners #of resulting mosaic print 'center of imges ' c_y, c_x = (np.asarray(img1.shape[:2]) / 2.).tolist() #c_y, c_x = (np.asarray(img2.shape[:2]) / 2.).tolist() #Translate matched points in the middle for i in range(matched_p1.shape[0]): matched_p1[i] -= np.array([c_x, c_y]) matched_p2[i] -= np.array([c_x, c_y]) #print matched_p1 #print "---------" #print matched_p2 #Intitial parameters theta_1 = np.array([0, 1., 1, 0, 0, 0, 0]) theta_2 = np.array([0, 1., 1, 0, 0, 0, 0]) src = np.array(matched_p1[3:6], np.float32) dst = np.array(matched_p2[3:6], np.float32) warp_affine = cv2.getAffineTransform(src, dst) print "warp_affine ", warp_affine theta_1[0] = -np.cos(warp_affine[0][0]) theta_1[5] = -warp_affine[0][2] theta_1[6] = -warp_affine[1][2] #for LMA lam = 10.0 penalty = 10e2 threshold = 0.000001 #Run LEVENBERG-MARQUARDT params = lma.levenberg_marquardt(matched_p1, matched_p2, theta_1, theta_2, lam, penalty, threshold, img1, img2) t_1 = params[0:params.size / 2] t_2 = params[params.size / 2:] print "T_1", t_1 print "T_2", t_2 print 'points after transform' for i in range(matched_p1.shape[0]): err_1, err_2 = util.transformPoint(matched_p1[i], t_1), util.transformPoint(matched_p2[i], t_2) print err_1, err_2, " --error-- ", np.abs(err_1 - err_2) print '---------------------------------------------------' result = util.stitch_for_visualization(img1, img2, t_1, t_2, c_x, c_y, size) util.visualize(img1, img2, matched_p1, matched_p2, t_1, t_2) cv2.imwrite('output.jpg', result) return None
frameKeypoints.append(framekpts[m.trainIdx].pt) good.append(m) ''' if (frame == 5 and slide==5): print(slideImgPath + slideNames[slide]) print(vidFramePath + videoFrameNames[frame]) slideImg = cv2.imread(slideImgPath + slideNames[slide], 0) vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0) drawMatches(slidekpts, framekpts, slideImg, vidImg, good) if (frame == 6 and slide==5): print(slideImgPath + slideNames[slide]) print(vidFramePath + videoFrameNames[frame]) slideImg = cv2.imread(slideImgPath + slideNames[slide], 0) vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0) drawMatches(slidekpts, framekpts, slideImg, vidImg, good) ''' matchMetric[(slide, frame)] = computeRDistanceDifference( slideKeypoints, frameKeypoints, slideDimensions, vidDimensions) matchMetricNotNormalized[(slide, frame)] = computeRDistanceDifference( slideKeypoints, frameKeypoints, slideDimensions, vidDimensions, False) mat = visualize(matchMetric, NUM_SLIDES, "./normalized_differenceCentroidDifferences.jpg") visualize(matchMetricNotNormalized, NUM_SLIDES, "./differenceCentroidDifferences.jpg") predictARGMIN(mat)
def compute_match_save(SLIDES_PATH, SLIDE_START, SLIDE_END, FRAME_SAMPLE_SAVE_FOLDER, SLIDE_KEYPTS_DESCS_PKL_SAVE_PATH, VID_START, VID_END, VID_KEYPTS_DESCS_PKL_SAVE_PATH, FINAL_FIGURES_RESULTS_SAVE_PATH): NUM_SLIDES = SLIDE_END - SLIDE_START NUM_FRAMES = VID_END - VID_START ## getting filenames videoFrameNames = [ filename for filename in os.listdir(FRAME_SAMPLE_SAVE_FOLDER) if filename.endswith(".png") ] videoFrameNames = slidesInRange(videoFrameNames, VID_START, VID_END) slideNames = [ filename for filename in os.listdir(SLIDES_PATH) if filename.endswith(".png") ] slideNames = slidesInRange(slideNames, SLIDE_START, SLIDE_END) sampleImgSlides = cv2.imread(slideImgPath + slideNames[0], 0) sampleVidSlides = cv2.imread(vidFramePath + videoFrameNames[0], 0) ## slide and video dimensions slideDimensions = (sampleImgSlides.shape[0], sampleImgSlides.shape[1]) vidDimensions = (sampleVidSlides.shape[0], sampleVidSlides.shape[1]) ## load dictionary of things slideImgKeypts = pickle.load(open(SLIDE_KEYPTS_DESCS_PKL_SAVE_PATH, "rb")) vidImgKeypts = pickle.load(open(VID_KEYPTS_DESCS_PKL_SAVE_PATH, "rb")) slideImgKeyptsDesc = dict() vidImgKeyptsDesc = dict() ## load slide img, keypoints for sn in slideNames: path = slideImgPath + sn slideImgKeyptsDesc[sn] = convertPickledToKPDesc(slideImgKeypts[path]) for vidName in videoFrameNames: path = vidFramePath + vidName vidImgKeyptsDesc[vidName] = convertPickledToKPDesc(vidImgKeypts[path]) FLANN_INDEX_KDTREE = 0 index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5) search_params = dict(checks=50) ## matches is a list of DMatch objects flann = cv2.FlannBasedMatcher(index_params, search_params) matchSlideFrameDict = dict() matchMetric = dict() matchMetricNotNormalized = dict() ## cross matching matchID = 0 for slide in range(0, NUM_SLIDES): for frame in range(0, NUM_FRAMES): print("Slide, Frame", (slide, frame), slideNames[slide], videoFrameNames[frame]) slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]] framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]] matches = flann.knnMatch(np.asarray(slideDescs, np.float32), np.asarray(frameDescs, np.float32), k=2) matchPairs = [] distances = [] good = [] slideKeypoints = [] frameKeypoints = [] good = [] for m, n in matches: if m.distance < 0.75 * n.distance: slideKeypoints.append(slidekpts[m.queryIdx].pt) frameKeypoints.append(framekpts[m.trainIdx].pt) good.append(m) ''' slideImg = cv2.imread(slideImgPath + slideNames[slide], 0) vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0) saveMatches(slidekpts, framekpts, slideImg, vidImg, good, savePath + str(slide) + '-' + str(frame) + '-match.jpg') ''' matchMetric[(slide, frame)] = computeSumDiffKeypoints( slideKeypoints, frameKeypoints, slideDimensions, vidDimensions) matchMetricNotNormalized[(slide, frame)] = computeSumDiffKeypoints( slideKeypoints, frameKeypoints, slideDimensions, vidDimensions, False) ## save visualizations mat = visualize( matchMetric, NUM_SLIDES, NUM_FRAMES, FINAL_FIGURES_RESULTS_SAVE_PATH + "normalized_sumDistanceKeypoints.jpg") visualize(matchMetricNotNormalized, NUM_SLIDES, NUM_FRAMES, FINAL_FIGURES_RESULTS_SAVE_PATH + "sumDistanceKeypoints.jpg") ## save matrix as txt np.savetxt(FINAL_FIGURES_RESULTS_SAVE_PATH + "slides_by_vidframes_metric.txt", mat, fmt='%d') col_best_prediction = {} for col in range(NUM_FRAMES): slides = list(mat[:, col]) best = slides.index(min(slides)) col_best_prediction[col] = best print(json.dumps(col_best_prediction, sort_keys=True))
def match_and_draw(match, r_threshold): m = match(desc1, desc2, r_threshold) matched_p1 = np.array([kp1[i].pt for i, j in m]) matched_p2 = np.array([kp2[j].pt for i, j in m]) H, status = cv2.findHomography(matched_p1, matched_p2, cv2.RANSAC, 5.0) print '%d / %d inliers/matched' % (np.sum(status), len(status)) #print matched_p1 #print "---------" #print matched_p2 #Size of image size = (img1.shape[1] * 2, img1.shape[0] * 2) #Initial prepare(Shift images far way from corners #of resulting mosaic print 'center of imges ' c_y, c_x = (np.asarray(img1.shape[:2]) / 2.).tolist() #c_y, c_x = (np.asarray(img2.shape[:2]) / 2.).tolist() #Translate matched points in the middle for i in range(matched_p1.shape[0]): matched_p1[i] -= np.array([c_x, c_y]) matched_p2[i] -= np.array([c_x, c_y]) #print matched_p1 #print "---------" #print matched_p2 #Parameters for Gradient Descent iterations = 1000 gamma = 0.000002 gamma_transl = 0.05 #gamma = 10e-10 lambd = 10e4 #Intitial parameters theta_1 = np.array([0, 1., 1, 0, 0, 0, 0]) theta_2 = np.array([0, 1., 1, 0, 0, 0, 0]) src = np.array(matched_p1[0:3], np.float32) dst = np.array(matched_p2[0:3], np.float32) warp_affine = cv2.getAffineTransform(src, dst) print "warp_affine ", warp_affine theta_1[0] = -np.cos(warp_affine[0][0]) theta_1[5] = -warp_affine[0][2] theta_1[6] = -warp_affine[1][2] print "warp_affine.ravel()", warp_affine.ravel() #theta_1 = np.concatenate((warp_affine.ravel(), [0, 0, 1])) print "theta_1", theta_1 #Run Gradient t_1, t_2 = gradientDescent(iterations, matched_p1, matched_p2, theta_1, theta_2, gamma, lambd, gamma_transl, img1, img2, size, c_x, c_y) print 'points after transform' for i in range(matched_p1.shape[0]): err_1, err_2 = util.transformPoint(matched_p1[i], t_1), util.transformPoint(matched_p2[i], t_2) print err_1, err_2, " --error-- ", np.abs(err_1 - err_2) print '---------------------------------------------------' result = util.stitch_for_visualization(img1, img2, t_1, t_2, c_x, c_y, size) util.visualize(img1, img2, matched_p1, matched_p2, t_1, t_2) cv2.imwrite('output.jpg', result) return None
matchID = 0 for slide in range(0, NUM_SLIDES): for frame in range(0, NUM_SLIDES): slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]] framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]] matches = flann.knnMatch(np.asarray(slideDescs, np.float32), np.asarray(frameDescs, np.float32), k=2) matchPairs = [] distances = [] good = [] slideKeypoints = [] frameKeypoints = [] for m, n in matches: if m.distance < 0.75 * n.distance: slideKeypoints.append(slidekpts[m.queryIdx]) frameKeypoints.append(framekpts[m.trainIdx]) good.append(m) distances.append(m.distance) matchID += 1 distances.sort(reverse=True) print(distances) matchMetric[(slide, frame)] = len(good) matchSlideFrameDict[(slide, frame)] = matchPairs visualize(matchMetric, NUM_SLIDES, "./numFriends.jpg")
def test_visualize(self): plt.clf() utilities.visualize(utilities.load_data()[-1])
for frame in range(0, 28): slidekpts, slideDescs = slideImgKeyptsDesc[slideNames[slide]] framekpts, frameDescs = vidImgKeyptsDesc[videoFrameNames[frame]] matches = flann.knnMatch(np.asarray(slideDescs, np.float32), np.asarray(frameDescs, np.float32), k=2) matchPairs = [] distances = [] good = [] slideKeypoints = [] frameKeypoints = [] for m, n in matches: if m.distance < 0.75 * n.distance: slideKeypoints.append(slidekpts[m.queryIdx].pt) frameKeypoints.append(framekpts[m.trainIdx].pt) ''' if (frame == 23 and slide==18): print(slideImgPath + slideNames[slide]) print(vidFramePath + videoFrameNames[frame]) slideImg = cv2.imread(slideImgPath + slideNames[slide], 0) vidImg = cv2.imread(vidFramePath + videoFrameNames[frame], 0) drawMatches(slidekpts, framekpts, slideImg, vidImg, good) ''' matchMetric[(slide, frame)] = computeRDistanceDifference( slideKeypoints, frameKeypoints, slideDimensions, vidDimensions) visualize(matchMetric, 28, "./distanceCentroid.jpg")