def estimate_translation(R, T, max_size=1024): """ estimate translation T(x,y) = R(x - dx, y - dy) motion vector (dx, dy) """ Rcrop = util.crop_image(R, (max_size, max_size)) Tcrop = util.crop_image(T, (max_size, max_size)) rgb2gray = lambda x: (0.2125 * x[:, :, 0] + 0.7154 * x[:, :, 1] + 0.0721 * x[:, :, 2]) if len(R.shape) == 3 and R.shape[2] == 3: Rg = rgb2gray(Rcrop) Tg = rgb2gray(Tcrop) elif len(R.shape) == 2: Rg = R Tg = T else: raise RuntimeError("Invalid image size!") # FFT Fr = np.fft.fft2(Rg) Ft = np.fft.fft2(Tg) Fc = Fr * np.conj(Ft) Rc = Fc / np.abs(Fc) r = np.fft.ifft2(Rc) # get the peak max_r = np.max(r) max_index = np.argmax(r) shift = list(np.unravel_index(max_index, r.shape)) for i in range(2): if r.shape[i] / 2 <= shift[i]: shift[i] -= r.shape[i] return shift
def precomputeValues(fishDict, imgKey, spotsJSONKey='spotsJson', maskImgKey='mask', precomputedPickle='precompAA'): """Precomputes values necessary for comparison of the fish to another fish Args: fishDict (dict[str,str]): The dictionary describing all components of the fish image imgKey (str): The key of the fish imge spotsJSONKey (str, optional): The key in the dictionary indicating the filepath to the json file containing the spot information for the fish. Defaults to 'spotsJson'. maskImgKey (str, optional): The key in the dictionary indicating the filepath to the mask file containing the boolean mask for the fish. Defaults to 'mask'. precomputedPickle (str, optional): The key in the dictionary indicating the filepath to the precomputed values of the fish. Defaults to 'precompAA'. Returns: [type]: [description] """ spots = None if fishDict[spotsJSONKey] is not None: with open(fishDict[spotsJSONKey], 'r') as f: spots = np.asarray(json.load(f)) if len(spots) > 5: spots = spots[:, :2] # Remove size from spots mask = crop_image(cv2.imread(fishDict[maskImgKey], 0)) R = get_normalise_direction_matrix(mask) tmpPoints = np.copy(np.asarray(spots)) tmpPoints = np.dot(R[:, (0, 1)], np.array([tmpPoints[:, 0], tmpPoints[:, 1]])) tmpPoints = np.array(list(zip(tmpPoints[0], tmpPoints[1]))) warpedTargetMask = crop_image( cv2.warpAffine(mask, R, (mask.shape[1] + 500, mask.shape[0] + 500))) spotsProcessed = tmpPoints / np.max(warpedTargetMask.nonzero()) invariants, asterisms = astroalign._generate_invariants( spotsProcessed[:astroalign.MAX_CONTROL_POINTS]) kdTree = cKDTree(invariants) fishDict[precomputedPickle] = os.path.join(get_cache_dir(), imgKey + ".aa.pickle") precomputedObject = { "max_control_points": astroalign.MAX_CONTROL_POINTS, "invariants": invariants, "asterisms": asterisms, "kdtree": kdTree, "spots": spots, "spots_standardised": spotsProcessed, "nn": astroalign.NUM_NEAREST_NEIGHBORS, "version": 1 } with open(os.path.join(get_cache_dir(), imgKey + ".aa.pickle"), 'wb') as f: pickle.dump(precomputedObject, f) return precomputedObject return None
def get_image(self, crop=False, output_size=None, greyscale=False, flip_red_blue=False): if self.request_render: self._render() if output_size is None: output_size = self.size string_image = pygame.image.tostring(self.window, 'RGB') image = pygame.image.fromstring(string_image, (self.size, self.size), 'RGB') if greyscale: array = np.transpose(pygame.surfarray.array3d(image)[:, :, 0]) else: array = np.transpose( pygame.surfarray.array3d(image)[:, :, ( 2, 1, 0) if flip_red_blue else slice(None)], (1, 0, 2)) if crop: array = crop_image(array) if output_size != self.size: array = cv2.resize(array, dsize=(output_size, output_size), interpolation=cv2.INTER_CUBIC) return array
def crop_workflow_image(workflow, seq_num, img_type): # TODO: We have to update the checksum! page = get_next(p for p in workflow.pages if p.sequence_num == seq_num) if not page: raise ApiException("Could not find page with sequence number {0}" .format(seq_num), 404) if img_type != 'raw': raise ApiException("Can only crop raw images.", 400) left = int(request.args.get('left', 0)) top = int(request.args.get('top', 0)) width = int(request.args.get('width', 0)) or None height = int(request.args.get('height', 0)) or None crop_image(unicode(page.raw_image), left, top, width, height) cache_key = "{0}.{1}.{2}".format(workflow.id, 'raw', page.raw_image.name) cache.delete(cache_key) return 'OK'
async def predict_with_camera_feed(self, robot: anki_vector.Robot) -> None: """Use the camera feed from Vector to detect sign language hand signs by applying a trained convolutional neural network on to images received from the camera feed. .. code-block:: python recognizer = SignLanguageRecognizer() recognizer.load_model("/path/to/model_config_filename", "/path/to/model_weights_filename") with anki_vector.Robot(show_viewer=True) as robot: print("------ predicting hand signs, press ctrl+c to exit early ------") try: robot.conn.run_coroutine(recognizer.predict_with_camera_feed(robot)) except KeyboardInterrupt: print("------ predicting done ------") """ with self.graph.as_default(): while True: await asyncio.sleep(2) # Get the latest image from the robot's camera feed camera_image = robot.camera.latest_image.raw_image # - Image pre-processing - # Convert the image into black and white using Pillow black_white_image = camera_image.convert("L") # Crop the image to reduce the complexity of the network cropped_image = util.crop_image(black_white_image, util.NetworkConstants.IMAGE_WIDTH, util.NetworkConstants.IMAGE_HEIGHT) # Convert image to an array with shape (image_width, image_height, 1) image = img_to_array(cropped_image) # Normalize the image data image = image.astype("float") / 255.0 # Expand array shape to add an axis to denote the number of images fed as input image = np.expand_dims(image, axis=0) prediction = self.model.predict(image)[0] prediction = enumerate(prediction) prediction = sorted(prediction, key=lambda x: x[1], reverse=True)[0] label = prediction[0] if label == (util.NetworkConstants.NUM_CLASSES - 1): label = "No Sign Displayed" else: label = chr(label + 97) prediction = (label, prediction[1] * 100) print(f"Prediction: {prediction[0]} Confidence: {prediction[1]:.2f}%") if prediction[0] != "No Sign Displayed": # If valid prediction is available, use Vector's text-to-speech system to say the # recognized alphabet out loud await robot.behavior.say_text(prediction[0])
def main(args): input_path = args.image_path dir_path, file_name = ntpath.split(input_path) original_image = cv2.imread(input_path) height, width, depth = original_image.shape detector = MTCNN() landmarks = detector.detect_faces(original_image) old_landmark = landmarks[ 0] # you have just one face in your profile, otherwise you pick the image with most confident _, _, nose, theta = get_coordinates(old_landmark) rotated_image = ndimage.rotate(original_image, theta + 180.0) new_height, new_width, _ = rotated_image.shape delta_width = abs(new_width - width) // 2 delta_height = abs(new_height - height) // 2 cropped_image = crop_image(delta_width, new_width - delta_width, delta_height, new_height - delta_height, rotated_image) cv2.imwrite(os.path.join(dir_path, 'rotated_' + file_name), cropped_image)
def tonemap(E, l_remap=(0, 1), saturation=1., numtiles=(4, 4)): """ render HDR for viewing exposure estimate -> log2 -> CLAHE -> remap to l_remap -> gamma correction -> HDR @param E: exposure (N x M x 3) @param l_remap: remap intensity to l_remap in the image adjust step @param saturation: saturation of the color. @param numtiles: number of contextual tiles in the CLAHE step return contrast reduced image """ if E.shape[0] % numtiles[0] != 0 or E.shape[1] % numtiles[1] != 0: E = util.crop_image(E, (E.shape[0] // numtiles[0] * numtiles[0], E.shape[1] // numtiles[1] * numtiles[1])) l2E, has_nonzero = lognormal(E) if has_nonzero: I = tone_operator(l2E, l_remap, saturation, numtiles) else: I = l2E # clip I[I < 0] = 0 I[1 < I] = 1 return np.uint8(I * 255.)
############################################################################################################################################# ########################### Resize Depth images to match input size ################################# ### Crop the depth images using the crop size in mm and store in cropped directory ##################################################################################################### cropped = 'cropped/' ### directory for the cropped images names = util.load_names(dataset, phase) centers = util.load_centers(dataset, phase).astype(float) cube_size = 150 # crop size in mm for idx, name in enumerate(names): img = util.load_image(dataset, os.path.join(root_dir, name)) center = centers[idx] crop = crop_image(img, center, dataset) crop -= center[2] crop = np.maximum(crop, -cube_size) crop = np.minimum(crop, cube_size) crop /= cube_size ### depth values are stored temporarily in [0,255] in order to enable viewing/validating crop += 1 crop *= 255 crop /= 2 cv2.imwrite(os.path.join(root_dir, cropped, name), crop) if idx % 500 == 0: print('{}/{}'.format(idx + 1, len(names))) #####################################################################################################
viewer.set_mesh(mesh, center_and_scale=True) image = viewer.get_image(crop=False, output_size=viewer.size, greyscale=False) cv2.imwrite(image_filename, image) if "wgan-results" in sys.argv: from util import crop_image COUNT = 5 plot = ImageGrid(COUNT, create_viewer=False) for i in range(COUNT): image = plt.imread('screenshots/wgan/{:d}.png'.format(i)) plot.set_image(crop_image(image, background=1), i) plot.save('plots/wgan-results.pdf') if 'sdf_net_reconstruction' in sys.argv: from rendering.raymarching import render_image_for_index from PIL import Image from util import crop_image sdf_net, latent_codes = load_sdf_net(return_latent_codes=True) COUNT = 5 MESH_FILENAME = 'screenshots/sdf_meshes/{:d}.png' indices = random.sample(range(latent_codes.shape[0]), COUNT) print(indices)
def get_data(self, idx): img_idx, r_idx = idx annolist = getattr(self.mat['RELEASE'], 'annolist')[img_idx] if r_idx != -1: annorect = getattr(annolist, 'annorect')[r_idx] else: annorect = getattr(annolist, 'annorect') image_name = self.image_path + getattr(getattr(annolist, 'image'), 'name') image = skimage.img_as_float(skimage.io.imread(image_name)) scale = annorect.scale rotate = 0 if self.task == 'train': scale *= 1.25 if self.augmentation: scale *= 2**(random.gauss(0, 1) * MPII.SCALE_FACTOR) rotate = random.gauss( 0, 1) * MPII.ROTATE_FACTOR if random.random() <= 0.4 else 0 hitbox = 200 * scale objpos = getattr(annorect, 'objpos') center = Vector2(getattr(objpos, 'x'), getattr(objpos, 'y')) ret_image = crop_image(image, center, scale, rotate, 256) if self.task and self.augmentation: ret_image[:, :, 0] *= random.uniform(0.6, 1.4) ret_image[:, :, 1] *= random.uniform(0.6, 1.4) ret_image[:, :, 2] *= random.uniform(0.6, 1.4) ret_image = np.clip(ret_image, 0, 1) assert ret_image.shape == (256, 256, 3) ret_heatmap = np.zeros(shape=(64, 64, self.joint_num), dtype=np.float32) ret_keypoint = np.zeros(shape=(self.joint_num, 2)) keypoints = annorect.annopoints.point for key_idx in range(keypoints.shape[0]): joint_id = keypoints[key_idx].id in_rgb = Vector2(keypoints[key_idx].x, keypoints[key_idx].y) # input RGB coordinate. in_heatmap = (in_rgb - center) * 64 / hitbox if rotate != 0: cos = math.cos(rotate * math.pi / 180) sin = math.sin(rotate * math.pi / 180) in_heatmap = Vector2(sin * in_heatmap.y + cos * in_heatmap.x, cos * in_heatmap.y - sin * in_heatmap.x) keypoint = in_heatmap + Vector2(32, 32) if min(keypoint) < 0 or max(keypoint) >= 64: continue ret_heatmap[:, :, joint_id] = generate_heatmap( 64, keypoint.y, keypoint.x) # cropped RGB coordinate. ret_keypoint[joint_id, :] = [keypoint.y, keypoint.x] act = getattr(self.mat['RELEASE'], 'act')[img_idx] ret_activity = act.act_id ret_threshold = 25.6 return ret_image, ret_heatmap, ret_keypoint, ret_activity, ret_threshold
############################################################################################################################################# ############################ Resize RGB images to match input size ################################# ### Crop the RGB images using the crop size in mm and store in cropped directory ##################################################################################################### cropped = 'cropped/' names = util.load_names(dataset,phase) centers = util.load_centers(dataset,phase).astype(float) centers = np.reshape(centers, (-1,3)) for idx, name in enumerate(names): img = util.load_image(dataset, os.path.join(root_dir, name)) crop = crop_image(img, centers[idx], dataset) name = name.replace('.jpeg','.png') cv2.imwrite(os.path.join(root_dir,cropped,name), crop) if idx % 500 == 0: print('{}/{}'.format(idx + 1, len(names))) ##################################################################################################### ############################ Draw pose on cropped RGB samples from normalized labels ################################# ### Plot the normalized labels on a sample of RGB images to validate cropping and label normalization ### this segment is only for validation ###################################################################################################################### lbls = util.load_labels(dataset,phase) ### load test/train data names = util.load_names(dataset,phase)
cords_3d = util.pixel2world(cords_d, 'fpad') # from world coordinates to rgb image coordinates, gives us an array containing the mapping of each depth image pixel to its corresponding rgb image pixel (if it exists) cords_c, skel_camcoords = util.world2pixel(cords_3d, 'fpac') cords_3d = np.reshape(cords_3d, (480, 640, -1)) img_rgbd = np.zeros((img.shape[0], img.shape[1], 4)) cords_c = np.reshape(cords_c, (480, 640, 3)) # using cython functions to drastically increase the speed of image pixel looping # loops over all RGB-D image pixels and assigns RGB value using cimg and cords_c img_rgbd = image_loops.color_map(img, cimg, img_rgbd, cords_c) img_rgbd = np.asarray(img_rgbd) # use center to crop the image to required input size then normalize depth and RGB values center = centers[idx] crop = util.crop_image(img_rgbd, center, 'fpad') # norm depth values crop[:, :, 3] -= center[2] crop[:, :, 3] = np.maximum(crop[:, :, 3], -cube_size) crop[:, :, 3] = np.minimum(crop[:, :, 3], cube_size) crop[:, :, 3] /= cube_size # norm RGB values crop[:, :, :3] *= 2 crop[:, :, :3] /= 255 crop[:, :, :3] -= 1 # swap axes; channels should be the first axis imgs[idx] = np.swapaxes(crop, 0, 2) lbls[idx] = np.asarray(labels[idx].split(), dtype=np.float32) # print progress if idx % 500 == 0:
def render_image(sdf_net, latent_code, resolution=800, threshold=0.0005, sdf_offset=0, iterations=1000, ssaa=2, radius=1.0, crop=False, color=(0.8, 0.1, 0.1), vertical_cutoff=None): camera_forward = camera_position / np.linalg.norm(camera_position) * -1 camera_distance = np.linalg.norm(camera_position).item() up = np.array([0, 1, 0]) camera_right = np.cross(camera_forward, up) camera_right /= np.linalg.norm(camera_right) camera_up = np.cross(camera_forward, camera_right) camera_up /= np.linalg.norm(camera_up) screenspace_points = np.meshgrid( np.linspace(-1, 1, resolution * ssaa), np.linspace(-1, 1, resolution * ssaa), ) screenspace_points = np.stack(screenspace_points) screenspace_points = screenspace_points.reshape(2, -1).transpose() points = np.tile(camera_position, (screenspace_points.shape[0], 1)) points = points.astype(np.float32) focal_distance = 1.0 / math.tan(math.asin(radius / camera_distance)) ray_directions = screenspace_points[:, 0] * camera_right[:, np.newaxis] \ + screenspace_points[:, 1] * camera_up[:, np.newaxis] \ + focal_distance * camera_forward[:, np.newaxis] ray_directions = ray_directions.transpose().astype(np.float32) ray_directions /= np.linalg.norm(ray_directions, axis=1)[:, np.newaxis] b = np.einsum('ij,ij->i', points, ray_directions) * 2 c = np.dot(camera_position, camera_position) - radius * radius distance_to_sphere = (-b - np.sqrt(np.power(b, 2) - 4 * c)) / 2 indices = np.argwhere(np.isfinite(distance_to_sphere)).reshape(-1) points[indices] += ray_directions[indices] * distance_to_sphere[indices, np.newaxis] points = torch.tensor(points, device=device, dtype=torch.float32) ray_directions_t = torch.tensor(ray_directions, device=device, dtype=torch.float32) indices = torch.tensor(indices, device=device, dtype=torch.int64) model_mask = torch.zeros(points.shape[0], dtype=torch.uint8) for i in tqdm(range(iterations)): test_points = points[indices, :] sdf = sdf_net.evaluate_in_batches( test_points, latent_code, return_cpu_tensor=False) + sdf_offset torch.clamp_(sdf, -0.02, 0.02) points[indices, :] += ray_directions_t[indices, :] * sdf.unsqueeze(1) hits = (sdf > 0) & (sdf < threshold) model_mask[indices[hits]] = 1 indices = indices[~hits] misses = torch.norm(points[indices, :], dim=1) > radius indices = indices[~misses] if indices.shape[0] < 2: break model_mask[indices] = 1 if vertical_cutoff is not None: model_mask[points[:, 1] > vertical_cutoff] = 0 model_mask[points[:, 1] < -vertical_cutoff] = 0 normal = get_normals(sdf_net, points[model_mask], latent_code).cpu().numpy() model_mask = model_mask.cpu().numpy().astype(bool) points = points.cpu().numpy() model_points = points[model_mask] seen_by_light = 1.0 - get_shadows(sdf_net, model_points, light_position, latent_code, radius=radius, sdf_offset=sdf_offset) light_direction = light_position[np.newaxis, :] - model_points light_direction /= np.linalg.norm(light_direction, axis=1)[:, np.newaxis] diffuse = np.einsum('ij,ij->i', light_direction, normal) diffuse = np.clip(diffuse, 0, 1) * seen_by_light reflect = light_direction - np.einsum('ij,ij->i', light_direction, normal)[:, np.newaxis] * normal * 2 reflect /= np.linalg.norm(reflect, axis=1)[:, np.newaxis] specular = np.einsum('ij,ij->i', reflect, ray_directions[model_mask, :]) specular = np.clip(specular, 0.0, 1.0) specular = np.power(specular, 20) * seen_by_light rim_light = -np.einsum('ij,ij->i', normal, ray_directions[model_mask, :]) rim_light = 1.0 - np.clip(rim_light, 0, 1) rim_light = np.power(rim_light, 4) * 0.3 color = np.array(color)[np.newaxis, :] * (diffuse * 0.5 + 0.5)[:, np.newaxis] color += (specular * 0.3 + rim_light)[:, np.newaxis] color = np.clip(color, 0, 1) ground_points = ray_directions[:, 1] < 0 ground_points[model_mask] = 0 ground_points = np.argwhere(ground_points).reshape(-1) ground_plane = np.min(model_points[:, 1]).item() points[ground_points, :] -= ray_directions[ground_points, :] * ( (points[ground_points, 1] - ground_plane) / ray_directions[ground_points, 1])[:, np.newaxis] ground_points = ground_points[ np.linalg.norm(points[ground_points, ::2], axis=1) < 3] ground_shadows = get_shadows(sdf_net, points[ground_points, :], light_position, latent_code, sdf_offset=sdf_offset) pixels = np.ones((points.shape[0], 3)) pixels[model_mask] = color pixels[ground_points] -= ((1.0 - 0.65) * ground_shadows)[:, np.newaxis] pixels = pixels.reshape((resolution * ssaa, resolution * ssaa, 3)) if crop: from util import crop_image pixels = crop_image(pixels, background=1) image = Image.fromarray(np.uint8(pixels * 255), 'RGB') if ssaa != 1: image = image.resize((resolution, resolution), Image.ANTIALIAS) return image
def findClosestMatch(modelImageName, modelDict, imagesToComareDicts, spotsJsonKey='spotsJson', maskImgKey='mask', verbose=False, progress=False): """Finds the best matches for the model image in the images to compare dictionaries Args: modelImageName (str): The name of the image being compared modelDict (dict): The dictionary containing data about the image imagesToComareDicts (Dict[Dict]): The list of other image dictionaries to compare to. spotsJsonKey (str, optional): The key in the dictionary indicating the filepath to the json file containing the spot information for the fish. Defaults to 'spotsJson'. maskImgKey (str, optional): The key in the dictionary indicating the filepath to the mask file containing the boolean mask for the fish. Defaults to 'mask'. verbose (bool, optional): If to print verbose information to stdout. Defaults to False. progress (bool, optional): If to pring progress information to stdout. Defaults to False. Returns: List[Tuple[float, float, str]]: List of tuples of (score, maskCoverage, image name) for each image the model image was compared to. """ modelPrecompValues = open_cached(modelDict, modelImageName, spotsJsonKey=spotsJsonKey, maskImgKey=maskImgKey) if modelPrecompValues is None: if verbose: print("Not enough points") return [] centerTranslationMatrix = np.float32([[1, 0, 250], [0, 1, 250], [0, 0, 1]]) targetSpots = cv2.imread(modelDict["spots"], 0) targetMask = crop_image(cv2.imread(modelDict[maskImgKey], 0)) targetMaskShifted = cv2.warpPerspective( targetMask, centerTranslationMatrix, (targetMask.shape[1] + 500, targetMask.shape[0] + 500)) if verbose: print(modelImageName) visualize(modelPrecompValues["spots_standardised"], modelPrecompValues["spots_standardised"], annotateOrder=True, invertYAxis=False, figsize=(10, 10)) ranking = [] for imageKey in imagesToComareDicts: if progress or verbose: print(imageKey) comparatorImageDict = imagesToComareDicts[imageKey] dataPrecompValues = open_cached(comparatorImageDict, imageKey, spotsJsonKey=spotsJsonKey, maskImgKey=maskImgKey) if dataPrecompValues is None: ranking.append((-1, 0, imageKey)) continue if verbose: visualize(dataPrecompValues["spots_standardised"], dataPrecompValues["spots_standardised"], annotateOrder=True, invertYAxis=False, figsize=(10, 10)) T, num_points, score, s_idx, t_idx = aamatch( dataPrecompValues["spots_standardised"], modelPrecompValues["spots_standardised"], dataPrecompValues["invariants"], dataPrecompValues["asterisms"], dataPrecompValues["kdtree"], modelPrecompValues["invariants"], modelPrecompValues["asterisms"], modelPrecompValues["kdtree"]) maskCoverage = 0 spotsCoverage = 0 if T is not None: if verbose: transformedCoords = [] for coord in dataPrecompValues["spots_standardised"]: coordNew = np.array([coord[0], coord[1], 1]) coordNew = T.params @ coordNew transformedCoords.append(coordNew[:2]) maxValue = max(np.max(dataPrecompValues["spots_standardised"]), np.max(dataPrecompValues["spots_standardised"]), np.max(np.asarray(transformedCoords))) visualize(dataPrecompValues["spots_standardised"], dataPrecompValues["spots_standardised"], annotateOrder=True, invertYAxis=False, figsize=(10, 10)) visualize(modelPrecompValues["spots_standardised"], modelPrecompValues["spots_standardised"], annotateOrder=True, invertYAxis=False, figsize=(10, 10)) visualize(modelPrecompValues["spots_standardised"], np.asarray(transformedCoords), annotateOrder=False, invertYAxis=False, figsize=(10, 10)) model = skimage.transform.AffineTransform() model.estimate(dataPrecompValues["spots"][s_idx], modelPrecompValues["spots"][t_idx]) dataMask = crop_image( cv2.imread(comparatorImageDict[maskImgKey], 0)) warped = cv2.warpPerspective( dataMask, centerTranslationMatrix @ model.params, (targetMaskShifted.shape[1], targetMaskShifted.shape[0])) pMask, rMask = get_average_precision_recall( np.array([warped]), np.array([targetMaskShifted]), verbose=verbose) maskCoverage = 2 * (pMask * rMask) / (pMask + rMask) ranking.append((score * maskCoverage, maskCoverage, imageKey)) return ranking
def data_capture(camera: anki_vector.camera.CameraComponent, stats: dict, root_folder: str) -> None: """Build an image dataset using the camera feed from Vector. This method uses an image from the camera and generates a multiplier number of images by rotating the original image. The keystroke used to initiate the image capture and processing is used to label the image. """ try: # TODO: curses works well with Mac OS and Linux, explore msvcrt for Windows terminal = curses.initscr() curses.cbreak() curses.noecho() terminal.nodelay(True) # The number of images to generate using the image captured as a seed image_multiplier = 10 # The maximum amount of rotation by which to rotate the original image to generate more images min_rotation = -10 max_rotation = 10 print( "------ capturing hand signs dataset, press ctrl+c to exit ------") while True: key = terminal.getch() if (ord("a") <= key <= ord("z")) or (key == ord(" ")): # Represents background images, filenames are switched to be prefixed with "background" instead of " " if key == ord(" "): key = "background" else: key = chr(key) # Pull image from camera original_image = camera.latest_image.raw_image if original_image: # Convert image to black and white black_white_image = original_image.convert("L") rotation_axes = [1, 1, 0] # Generate more images with random rotation for rotation in random.sample( range(min_rotation, max_rotation), image_multiplier): # Randomly define which axis to rotate the image by random.shuffle(rotation_axes) x_axis_rotation_enabled, y_axis_rotation_enabled = rotation_axes[: 2] rotated_image_array = ndimage.rotate( black_white_image, rotation, axes=(x_axis_rotation_enabled, y_axis_rotation_enabled), reshape=False) # Convert to a 200*200 image rotated_image = Image.fromarray(rotated_image_array) cropped_image = util.crop_image( rotated_image, util.NetworkConstants.IMAGE_WIDTH, util.NetworkConstants.IMAGE_HEIGHT) # Save the image image_filename = key + "_" + str(stats.get(key, 0)) + ".png" stats[key] = stats.get(key, 0) + 1 cropped_image.save( os.path.join(root_folder, image_filename)) # Character print(f"Recorded images for {key}\n\r") except (CancelledError, KeyboardInterrupt): pass finally: curses.nocbreak() curses.echo() curses.endwin()