Beispiel #1
0
def estimate_translation(R, T, max_size=1024):
    """
        estimate translation
        T(x,y) = R(x - dx, y - dy)
        motion vector (dx, dy)
    """
    Rcrop = util.crop_image(R, (max_size, max_size))
    Tcrop = util.crop_image(T, (max_size, max_size))
    rgb2gray = lambda x: (0.2125 * x[:, :, 0] + 0.7154 * x[:, :, 1] + 0.0721 *
                          x[:, :, 2])
    if len(R.shape) == 3 and R.shape[2] == 3:
        Rg = rgb2gray(Rcrop)
        Tg = rgb2gray(Tcrop)
    elif len(R.shape) == 2:
        Rg = R
        Tg = T
    else:
        raise RuntimeError("Invalid image size!")

    # FFT
    Fr = np.fft.fft2(Rg)
    Ft = np.fft.fft2(Tg)
    Fc = Fr * np.conj(Ft)
    Rc = Fc / np.abs(Fc)
    r = np.fft.ifft2(Rc)
    # get the peak
    max_r = np.max(r)
    max_index = np.argmax(r)
    shift = list(np.unravel_index(max_index, r.shape))
    for i in range(2):
        if r.shape[i] / 2 <= shift[i]:
            shift[i] -= r.shape[i]
    return shift
Beispiel #2
0
def precomputeValues(fishDict,
                     imgKey,
                     spotsJSONKey='spotsJson',
                     maskImgKey='mask',
                     precomputedPickle='precompAA'):
    """Precomputes values necessary for comparison of the fish to another fish

    Args:
        fishDict (dict[str,str]): The dictionary describing all components of the fish image
        imgKey (str): The key of the fish imge
        spotsJSONKey (str, optional): The key in the dictionary indicating the filepath to the json file containing the spot information for the fish. Defaults to 'spotsJson'.
        maskImgKey (str, optional): The key in the dictionary indicating the filepath to the mask file containing the boolean mask for the fish. Defaults to 'mask'.
        precomputedPickle (str, optional): The key in the dictionary indicating the filepath to the precomputed values of the fish. Defaults to 'precompAA'.

    Returns:
        [type]: [description]
    """
    spots = None
    if fishDict[spotsJSONKey] is not None:
        with open(fishDict[spotsJSONKey], 'r') as f:
            spots = np.asarray(json.load(f))
        if len(spots) > 5:
            spots = spots[:, :2]  # Remove size from spots
            mask = crop_image(cv2.imread(fishDict[maskImgKey], 0))
            R = get_normalise_direction_matrix(mask)
            tmpPoints = np.copy(np.asarray(spots))
            tmpPoints = np.dot(R[:, (0, 1)],
                               np.array([tmpPoints[:, 0], tmpPoints[:, 1]]))
            tmpPoints = np.array(list(zip(tmpPoints[0], tmpPoints[1])))
            warpedTargetMask = crop_image(
                cv2.warpAffine(mask, R,
                               (mask.shape[1] + 500, mask.shape[0] + 500)))
            spotsProcessed = tmpPoints / np.max(warpedTargetMask.nonzero())

            invariants, asterisms = astroalign._generate_invariants(
                spotsProcessed[:astroalign.MAX_CONTROL_POINTS])
            kdTree = cKDTree(invariants)
            fishDict[precomputedPickle] = os.path.join(get_cache_dir(),
                                                       imgKey + ".aa.pickle")
            precomputedObject = {
                "max_control_points": astroalign.MAX_CONTROL_POINTS,
                "invariants": invariants,
                "asterisms": asterisms,
                "kdtree": kdTree,
                "spots": spots,
                "spots_standardised": spotsProcessed,
                "nn": astroalign.NUM_NEAREST_NEIGHBORS,
                "version": 1
            }
            with open(os.path.join(get_cache_dir(), imgKey + ".aa.pickle"),
                      'wb') as f:
                pickle.dump(precomputedObject, f)
            return precomputedObject
    return None
Beispiel #3
0
    def get_image(self,
                  crop=False,
                  output_size=None,
                  greyscale=False,
                  flip_red_blue=False):
        if self.request_render:
            self._render()
        if output_size is None:
            output_size = self.size

        string_image = pygame.image.tostring(self.window, 'RGB')
        image = pygame.image.fromstring(string_image, (self.size, self.size),
                                        'RGB')
        if greyscale:
            array = np.transpose(pygame.surfarray.array3d(image)[:, :, 0])
        else:
            array = np.transpose(
                pygame.surfarray.array3d(image)[:, :, (
                    2, 1, 0) if flip_red_blue else slice(None)], (1, 0, 2))

        if crop:
            array = crop_image(array)

        if output_size != self.size:
            array = cv2.resize(array,
                               dsize=(output_size, output_size),
                               interpolation=cv2.INTER_CUBIC)

        return array
Beispiel #4
0
def crop_workflow_image(workflow, seq_num, img_type):
    # TODO: We have to update the checksum!
    page = get_next(p for p in workflow.pages if p.sequence_num == seq_num)
    if not page:
        raise ApiException("Could not find page with sequence number {0}"
                           .format(seq_num), 404)
    if img_type != 'raw':
        raise ApiException("Can only crop raw images.", 400)
    left = int(request.args.get('left', 0))
    top = int(request.args.get('top', 0))
    width = int(request.args.get('width', 0)) or None
    height = int(request.args.get('height', 0)) or None
    crop_image(unicode(page.raw_image), left, top, width, height)
    cache_key = "{0}.{1}.{2}".format(workflow.id, 'raw', page.raw_image.name)
    cache.delete(cache_key)
    return 'OK'
Beispiel #5
0
def crop_workflow_image(workflow, seq_num, img_type):
    # TODO: We have to update the checksum!
    page = get_next(p for p in workflow.pages if p.sequence_num == seq_num)
    if not page:
        raise ApiException("Could not find page with sequence number {0}"
                           .format(seq_num), 404)
    if img_type != 'raw':
        raise ApiException("Can only crop raw images.", 400)
    left = int(request.args.get('left', 0))
    top = int(request.args.get('top', 0))
    width = int(request.args.get('width', 0)) or None
    height = int(request.args.get('height', 0)) or None
    crop_image(unicode(page.raw_image), left, top, width, height)
    cache_key = "{0}.{1}.{2}".format(workflow.id, 'raw', page.raw_image.name)
    cache.delete(cache_key)
    return 'OK'
Beispiel #6
0
    async def predict_with_camera_feed(self, robot: anki_vector.Robot) -> None:
        """Use the camera feed from Vector to detect sign language hand signs by applying a trained
        convolutional neural network on to images received from the camera feed.

        .. code-block:: python

            recognizer = SignLanguageRecognizer()
            recognizer.load_model("/path/to/model_config_filename",
                                "/path/to/model_weights_filename")
            with anki_vector.Robot(show_viewer=True) as robot:
                print("------ predicting hand signs, press ctrl+c to exit early ------")
                try:
                    robot.conn.run_coroutine(recognizer.predict_with_camera_feed(robot))
                except KeyboardInterrupt:
                    print("------ predicting done ------")
        """
        with self.graph.as_default():
            while True:
                await asyncio.sleep(2)

                # Get the latest image from the robot's camera feed
                camera_image = robot.camera.latest_image.raw_image
                
                # - Image pre-processing -
                # Convert the image into black and white using Pillow
                black_white_image = camera_image.convert("L")
                # Crop the image to reduce the complexity of the network
                cropped_image = util.crop_image(black_white_image, util.NetworkConstants.IMAGE_WIDTH, util.NetworkConstants.IMAGE_HEIGHT)
                # Convert image to an array with shape (image_width, image_height, 1)
                image = img_to_array(cropped_image)
                # Normalize the image data
                image = image.astype("float") / 255.0
                # Expand array shape to add an axis to denote the number of images fed as input
                image = np.expand_dims(image, axis=0)

                prediction = self.model.predict(image)[0]
                prediction = enumerate(prediction)
                prediction = sorted(prediction, key=lambda x: x[1], reverse=True)[0]
                label = prediction[0]
                if label == (util.NetworkConstants.NUM_CLASSES - 1):
                    label = "No Sign Displayed"
                else:
                    label = chr(label + 97)
                prediction = (label, prediction[1] * 100)
                print(f"Prediction: {prediction[0]} Confidence: {prediction[1]:.2f}%")
                if prediction[0] != "No Sign Displayed":
                    # If valid prediction is available, use Vector's text-to-speech system to say the
                    # recognized alphabet out loud
                    await robot.behavior.say_text(prediction[0])
Beispiel #7
0
def main(args):
    input_path = args.image_path
    dir_path, file_name = ntpath.split(input_path)
    original_image = cv2.imread(input_path)
    height, width, depth = original_image.shape
    detector = MTCNN()
    landmarks = detector.detect_faces(original_image)
    old_landmark = landmarks[
        0]  # you have just one face in your profile, otherwise you pick the image with most confident
    _, _, nose, theta = get_coordinates(old_landmark)
    rotated_image = ndimage.rotate(original_image, theta + 180.0)
    new_height, new_width, _ = rotated_image.shape
    delta_width = abs(new_width - width) // 2
    delta_height = abs(new_height - height) // 2
    cropped_image = crop_image(delta_width, new_width - delta_width,
                               delta_height, new_height - delta_height,
                               rotated_image)
    cv2.imwrite(os.path.join(dir_path, 'rotated_' + file_name), cropped_image)
Beispiel #8
0
def tonemap(E, l_remap=(0, 1), saturation=1., numtiles=(4, 4)):
    """
        render HDR for viewing
        exposure estimate -> log2 -> CLAHE -> remap to l_remap -> gamma correction -> HDR
        @param E: exposure (N x M x 3)
        @param l_remap: remap intensity to l_remap in the image adjust step
        @param saturation: saturation of the color.
        @param numtiles: number of contextual tiles in the CLAHE step
        return contrast reduced image
    """
    if E.shape[0] % numtiles[0] != 0 or E.shape[1] % numtiles[1] != 0:
        E = util.crop_image(E, (E.shape[0] // numtiles[0] * numtiles[0],
                                E.shape[1] // numtiles[1] * numtiles[1]))
    l2E, has_nonzero = lognormal(E)
    if has_nonzero:
        I = tone_operator(l2E, l_remap, saturation, numtiles)
    else:
        I = l2E
    # clip
    I[I < 0] = 0
    I[1 < I] = 1
    return np.uint8(I * 255.)
Beispiel #9
0
#############################################################################################################################################

########################### Resize Depth images to match input size #################################
### Crop the depth images using the crop size in mm and store in cropped directory
#####################################################################################################
cropped = 'cropped/'  ### directory for the cropped images

names = util.load_names(dataset, phase)
centers = util.load_centers(dataset, phase).astype(float)
cube_size = 150  # crop size in mm

for idx, name in enumerate(names):
    img = util.load_image(dataset, os.path.join(root_dir, name))
    center = centers[idx]

    crop = crop_image(img, center, dataset)

    crop -= center[2]
    crop = np.maximum(crop, -cube_size)
    crop = np.minimum(crop, cube_size)
    crop /= cube_size
    ### depth values are stored temporarily in [0,255] in order to enable viewing/validating
    crop += 1
    crop *= 255
    crop /= 2

    cv2.imwrite(os.path.join(root_dir, cropped, name), crop)

    if idx % 500 == 0:
        print('{}/{}'.format(idx + 1, len(names)))
#####################################################################################################
Beispiel #10
0
        viewer.set_mesh(mesh, center_and_scale=True)
        image = viewer.get_image(crop=False,
                                 output_size=viewer.size,
                                 greyscale=False)
        cv2.imwrite(image_filename, image)

if "wgan-results" in sys.argv:
    from util import crop_image

    COUNT = 5

    plot = ImageGrid(COUNT, create_viewer=False)

    for i in range(COUNT):
        image = plt.imread('screenshots/wgan/{:d}.png'.format(i))
        plot.set_image(crop_image(image, background=1), i)

    plot.save('plots/wgan-results.pdf')

if 'sdf_net_reconstruction' in sys.argv:
    from rendering.raymarching import render_image_for_index
    from PIL import Image
    from util import crop_image
    sdf_net, latent_codes = load_sdf_net(return_latent_codes=True)

    COUNT = 5
    MESH_FILENAME = 'screenshots/sdf_meshes/{:d}.png'

    indices = random.sample(range(latent_codes.shape[0]), COUNT)
    print(indices)
Beispiel #11
0
    def get_data(self, idx):
        img_idx, r_idx = idx
        annolist = getattr(self.mat['RELEASE'], 'annolist')[img_idx]
        if r_idx != -1:
            annorect = getattr(annolist, 'annorect')[r_idx]
        else:
            annorect = getattr(annolist, 'annorect')

        image_name = self.image_path + getattr(getattr(annolist, 'image'),
                                               'name')
        image = skimage.img_as_float(skimage.io.imread(image_name))

        scale = annorect.scale
        rotate = 0
        if self.task == 'train':
            scale *= 1.25
            if self.augmentation:
                scale *= 2**(random.gauss(0, 1) * MPII.SCALE_FACTOR)
                rotate = random.gauss(
                    0, 1) * MPII.ROTATE_FACTOR if random.random() <= 0.4 else 0

        hitbox = 200 * scale
        objpos = getattr(annorect, 'objpos')
        center = Vector2(getattr(objpos, 'x'), getattr(objpos, 'y'))
        ret_image = crop_image(image, center, scale, rotate, 256)

        if self.task and self.augmentation:
            ret_image[:, :, 0] *= random.uniform(0.6, 1.4)
            ret_image[:, :, 1] *= random.uniform(0.6, 1.4)
            ret_image[:, :, 2] *= random.uniform(0.6, 1.4)
            ret_image = np.clip(ret_image, 0, 1)

        assert ret_image.shape == (256, 256, 3)

        ret_heatmap = np.zeros(shape=(64, 64, self.joint_num),
                               dtype=np.float32)

        ret_keypoint = np.zeros(shape=(self.joint_num, 2))

        keypoints = annorect.annopoints.point

        for key_idx in range(keypoints.shape[0]):
            joint_id = keypoints[key_idx].id
            in_rgb = Vector2(keypoints[key_idx].x,
                             keypoints[key_idx].y)  # input RGB coordinate.
            in_heatmap = (in_rgb - center) * 64 / hitbox

            if rotate != 0:
                cos = math.cos(rotate * math.pi / 180)
                sin = math.sin(rotate * math.pi / 180)
                in_heatmap = Vector2(sin * in_heatmap.y + cos * in_heatmap.x,
                                     cos * in_heatmap.y - sin * in_heatmap.x)

            keypoint = in_heatmap + Vector2(32, 32)

            if min(keypoint) < 0 or max(keypoint) >= 64:
                continue

            ret_heatmap[:, :, joint_id] = generate_heatmap(
                64, keypoint.y, keypoint.x)  # cropped RGB coordinate.
            ret_keypoint[joint_id, :] = [keypoint.y, keypoint.x]

        act = getattr(self.mat['RELEASE'], 'act')[img_idx]
        ret_activity = act.act_id
        ret_threshold = 25.6
        return ret_image, ret_heatmap, ret_keypoint, ret_activity, ret_threshold
#############################################################################################################################################




############################ Resize RGB images to match input size #################################
### Crop the RGB images using the crop size in mm and store in cropped directory
#####################################################################################################
cropped = 'cropped/'
names = util.load_names(dataset,phase)
centers = util.load_centers(dataset,phase).astype(float)
centers = np.reshape(centers, (-1,3))

for idx, name in enumerate(names):
    img = util.load_image(dataset, os.path.join(root_dir, name))
    crop = crop_image(img, centers[idx], dataset)

    name = name.replace('.jpeg','.png')
    cv2.imwrite(os.path.join(root_dir,cropped,name), crop)
    if idx % 500 == 0:
        print('{}/{}'.format(idx + 1, len(names)))
#####################################################################################################



############################ Draw pose on cropped RGB samples from normalized labels #################################
### Plot the normalized labels on a sample of RGB images to validate cropping and label normalization
### this segment is only for validation
######################################################################################################################
lbls = util.load_labels(dataset,phase) ### load test/train data
names = util.load_names(dataset,phase)
Beispiel #13
0
    cords_3d = util.pixel2world(cords_d, 'fpad')
    # from world coordinates to rgb image coordinates, gives us an array containing the mapping of each depth image pixel to its corresponding rgb image pixel (if it exists)
    cords_c, skel_camcoords = util.world2pixel(cords_3d, 'fpac')
    cords_3d = np.reshape(cords_3d, (480, 640, -1))

    img_rgbd = np.zeros((img.shape[0], img.shape[1], 4))
    cords_c = np.reshape(cords_c, (480, 640, 3))

    # using cython functions to drastically increase the speed of image pixel looping
    # loops over all RGB-D image pixels and assigns RGB value using cimg and cords_c
    img_rgbd = image_loops.color_map(img, cimg, img_rgbd, cords_c)
    img_rgbd = np.asarray(img_rgbd)

    # use center to crop the image to required input size then normalize depth and RGB values
    center = centers[idx]
    crop = util.crop_image(img_rgbd, center, 'fpad')
    # norm depth values
    crop[:, :, 3] -= center[2]
    crop[:, :, 3] = np.maximum(crop[:, :, 3], -cube_size)
    crop[:, :, 3] = np.minimum(crop[:, :, 3], cube_size)
    crop[:, :, 3] /= cube_size
    # norm RGB values
    crop[:, :, :3] *= 2
    crop[:, :, :3] /= 255
    crop[:, :, :3] -= 1

    # swap axes; channels should be the first axis
    imgs[idx] = np.swapaxes(crop, 0, 2)
    lbls[idx] = np.asarray(labels[idx].split(), dtype=np.float32)
    # print progress
    if idx % 500 == 0:
Beispiel #14
0
def render_image(sdf_net,
                 latent_code,
                 resolution=800,
                 threshold=0.0005,
                 sdf_offset=0,
                 iterations=1000,
                 ssaa=2,
                 radius=1.0,
                 crop=False,
                 color=(0.8, 0.1, 0.1),
                 vertical_cutoff=None):
    camera_forward = camera_position / np.linalg.norm(camera_position) * -1
    camera_distance = np.linalg.norm(camera_position).item()
    up = np.array([0, 1, 0])
    camera_right = np.cross(camera_forward, up)
    camera_right /= np.linalg.norm(camera_right)
    camera_up = np.cross(camera_forward, camera_right)
    camera_up /= np.linalg.norm(camera_up)

    screenspace_points = np.meshgrid(
        np.linspace(-1, 1, resolution * ssaa),
        np.linspace(-1, 1, resolution * ssaa),
    )
    screenspace_points = np.stack(screenspace_points)
    screenspace_points = screenspace_points.reshape(2, -1).transpose()

    points = np.tile(camera_position, (screenspace_points.shape[0], 1))
    points = points.astype(np.float32)

    focal_distance = 1.0 / math.tan(math.asin(radius / camera_distance))
    ray_directions = screenspace_points[:, 0] * camera_right[:, np.newaxis] \
        + screenspace_points[:, 1] * camera_up[:, np.newaxis] \
        + focal_distance * camera_forward[:, np.newaxis]
    ray_directions = ray_directions.transpose().astype(np.float32)
    ray_directions /= np.linalg.norm(ray_directions, axis=1)[:, np.newaxis]

    b = np.einsum('ij,ij->i', points, ray_directions) * 2
    c = np.dot(camera_position, camera_position) - radius * radius
    distance_to_sphere = (-b - np.sqrt(np.power(b, 2) - 4 * c)) / 2
    indices = np.argwhere(np.isfinite(distance_to_sphere)).reshape(-1)

    points[indices] += ray_directions[indices] * distance_to_sphere[indices,
                                                                    np.newaxis]

    points = torch.tensor(points, device=device, dtype=torch.float32)
    ray_directions_t = torch.tensor(ray_directions,
                                    device=device,
                                    dtype=torch.float32)

    indices = torch.tensor(indices, device=device, dtype=torch.int64)
    model_mask = torch.zeros(points.shape[0], dtype=torch.uint8)

    for i in tqdm(range(iterations)):
        test_points = points[indices, :]
        sdf = sdf_net.evaluate_in_batches(
            test_points, latent_code, return_cpu_tensor=False) + sdf_offset
        torch.clamp_(sdf, -0.02, 0.02)
        points[indices, :] += ray_directions_t[indices, :] * sdf.unsqueeze(1)

        hits = (sdf > 0) & (sdf < threshold)
        model_mask[indices[hits]] = 1
        indices = indices[~hits]

        misses = torch.norm(points[indices, :], dim=1) > radius
        indices = indices[~misses]

        if indices.shape[0] < 2:
            break

    model_mask[indices] = 1

    if vertical_cutoff is not None:
        model_mask[points[:, 1] > vertical_cutoff] = 0
        model_mask[points[:, 1] < -vertical_cutoff] = 0

    normal = get_normals(sdf_net, points[model_mask],
                         latent_code).cpu().numpy()

    model_mask = model_mask.cpu().numpy().astype(bool)
    points = points.cpu().numpy()
    model_points = points[model_mask]

    seen_by_light = 1.0 - get_shadows(sdf_net,
                                      model_points,
                                      light_position,
                                      latent_code,
                                      radius=radius,
                                      sdf_offset=sdf_offset)

    light_direction = light_position[np.newaxis, :] - model_points
    light_direction /= np.linalg.norm(light_direction, axis=1)[:, np.newaxis]

    diffuse = np.einsum('ij,ij->i', light_direction, normal)
    diffuse = np.clip(diffuse, 0, 1) * seen_by_light

    reflect = light_direction - np.einsum('ij,ij->i', light_direction,
                                          normal)[:, np.newaxis] * normal * 2
    reflect /= np.linalg.norm(reflect, axis=1)[:, np.newaxis]
    specular = np.einsum('ij,ij->i', reflect, ray_directions[model_mask, :])
    specular = np.clip(specular, 0.0, 1.0)
    specular = np.power(specular, 20) * seen_by_light
    rim_light = -np.einsum('ij,ij->i', normal, ray_directions[model_mask, :])
    rim_light = 1.0 - np.clip(rim_light, 0, 1)
    rim_light = np.power(rim_light, 4) * 0.3

    color = np.array(color)[np.newaxis, :] * (diffuse * 0.5 + 0.5)[:,
                                                                   np.newaxis]
    color += (specular * 0.3 + rim_light)[:, np.newaxis]

    color = np.clip(color, 0, 1)

    ground_points = ray_directions[:, 1] < 0
    ground_points[model_mask] = 0
    ground_points = np.argwhere(ground_points).reshape(-1)
    ground_plane = np.min(model_points[:, 1]).item()
    points[ground_points, :] -= ray_directions[ground_points, :] * (
        (points[ground_points, 1] - ground_plane) /
        ray_directions[ground_points, 1])[:, np.newaxis]
    ground_points = ground_points[
        np.linalg.norm(points[ground_points, ::2], axis=1) < 3]

    ground_shadows = get_shadows(sdf_net,
                                 points[ground_points, :],
                                 light_position,
                                 latent_code,
                                 sdf_offset=sdf_offset)

    pixels = np.ones((points.shape[0], 3))
    pixels[model_mask] = color
    pixels[ground_points] -= ((1.0 - 0.65) * ground_shadows)[:, np.newaxis]
    pixels = pixels.reshape((resolution * ssaa, resolution * ssaa, 3))

    if crop:
        from util import crop_image
        pixels = crop_image(pixels, background=1)

    image = Image.fromarray(np.uint8(pixels * 255), 'RGB')

    if ssaa != 1:
        image = image.resize((resolution, resolution), Image.ANTIALIAS)

    return image
Beispiel #15
0
def findClosestMatch(modelImageName,
                     modelDict,
                     imagesToComareDicts,
                     spotsJsonKey='spotsJson',
                     maskImgKey='mask',
                     verbose=False,
                     progress=False):
    """Finds the best matches for the model image in the images to compare dictionaries

    Args:
        modelImageName (str): The name of the image being compared
        modelDict (dict): The dictionary containing data about the image
        imagesToComareDicts (Dict[Dict]): The list of other image dictionaries to compare to.
        spotsJsonKey (str, optional): The key in the dictionary indicating the filepath to the json file containing the spot information for the fish. Defaults to 'spotsJson'.
        maskImgKey (str, optional): The key in the dictionary indicating the filepath to the mask file containing the boolean mask for the fish. Defaults to 'mask'.
        verbose (bool, optional): If to print verbose information to stdout. Defaults to False.
        progress (bool, optional): If to pring progress information to stdout. Defaults to False.

    Returns:
        List[Tuple[float, float, str]]: List of tuples of (score, maskCoverage, image name) for each image the model image was compared to.
    """
    modelPrecompValues = open_cached(modelDict,
                                     modelImageName,
                                     spotsJsonKey=spotsJsonKey,
                                     maskImgKey=maskImgKey)
    if modelPrecompValues is None:
        if verbose:
            print("Not enough points")
        return []

    centerTranslationMatrix = np.float32([[1, 0, 250], [0, 1, 250], [0, 0, 1]])

    targetSpots = cv2.imread(modelDict["spots"], 0)
    targetMask = crop_image(cv2.imread(modelDict[maskImgKey], 0))
    targetMaskShifted = cv2.warpPerspective(
        targetMask, centerTranslationMatrix,
        (targetMask.shape[1] + 500, targetMask.shape[0] + 500))

    if verbose:
        print(modelImageName)
        visualize(modelPrecompValues["spots_standardised"],
                  modelPrecompValues["spots_standardised"],
                  annotateOrder=True,
                  invertYAxis=False,
                  figsize=(10, 10))

    ranking = []
    for imageKey in imagesToComareDicts:
        if progress or verbose:
            print(imageKey)

        comparatorImageDict = imagesToComareDicts[imageKey]

        dataPrecompValues = open_cached(comparatorImageDict,
                                        imageKey,
                                        spotsJsonKey=spotsJsonKey,
                                        maskImgKey=maskImgKey)

        if dataPrecompValues is None:
            ranking.append((-1, 0, imageKey))
            continue

        if verbose:
            visualize(dataPrecompValues["spots_standardised"],
                      dataPrecompValues["spots_standardised"],
                      annotateOrder=True,
                      invertYAxis=False,
                      figsize=(10, 10))

        T, num_points, score, s_idx, t_idx = aamatch(
            dataPrecompValues["spots_standardised"],
            modelPrecompValues["spots_standardised"],
            dataPrecompValues["invariants"], dataPrecompValues["asterisms"],
            dataPrecompValues["kdtree"], modelPrecompValues["invariants"],
            modelPrecompValues["asterisms"], modelPrecompValues["kdtree"])

        maskCoverage = 0
        spotsCoverage = 0
        if T is not None:
            if verbose:
                transformedCoords = []
                for coord in dataPrecompValues["spots_standardised"]:
                    coordNew = np.array([coord[0], coord[1], 1])
                    coordNew = T.params @ coordNew
                    transformedCoords.append(coordNew[:2])

                maxValue = max(np.max(dataPrecompValues["spots_standardised"]),
                               np.max(dataPrecompValues["spots_standardised"]),
                               np.max(np.asarray(transformedCoords)))
                visualize(dataPrecompValues["spots_standardised"],
                          dataPrecompValues["spots_standardised"],
                          annotateOrder=True,
                          invertYAxis=False,
                          figsize=(10, 10))
                visualize(modelPrecompValues["spots_standardised"],
                          modelPrecompValues["spots_standardised"],
                          annotateOrder=True,
                          invertYAxis=False,
                          figsize=(10, 10))

                visualize(modelPrecompValues["spots_standardised"],
                          np.asarray(transformedCoords),
                          annotateOrder=False,
                          invertYAxis=False,
                          figsize=(10, 10))

            model = skimage.transform.AffineTransform()
            model.estimate(dataPrecompValues["spots"][s_idx],
                           modelPrecompValues["spots"][t_idx])
            dataMask = crop_image(
                cv2.imread(comparatorImageDict[maskImgKey], 0))

            warped = cv2.warpPerspective(
                dataMask, centerTranslationMatrix @ model.params,
                (targetMaskShifted.shape[1], targetMaskShifted.shape[0]))

            pMask, rMask = get_average_precision_recall(
                np.array([warped]),
                np.array([targetMaskShifted]),
                verbose=verbose)
            maskCoverage = 2 * (pMask * rMask) / (pMask + rMask)

        ranking.append((score * maskCoverage, maskCoverage, imageKey))
    return ranking
Beispiel #16
0
def data_capture(camera: anki_vector.camera.CameraComponent, stats: dict,
                 root_folder: str) -> None:
    """Build an image dataset using the camera feed from Vector.

    This method uses an image from the camera and generates a multiplier number of images by
    rotating the original image. The keystroke used to initiate the image capture and processing
    is used to label the image.
    """

    try:
        # TODO: curses works well with Mac OS and Linux, explore msvcrt for Windows
        terminal = curses.initscr()
        curses.cbreak()
        curses.noecho()
        terminal.nodelay(True)

        # The number of images to generate using the image captured as a seed
        image_multiplier = 10
        # The maximum amount of rotation by which to rotate the original image to generate more images
        min_rotation = -10
        max_rotation = 10

        print(
            "------ capturing hand signs dataset, press ctrl+c to exit ------")
        while True:
            key = terminal.getch()
            if (ord("a") <= key <= ord("z")) or (key == ord(" ")):

                # Represents background images, filenames are switched to be prefixed with "background" instead of " "
                if key == ord(" "):
                    key = "background"
                else:
                    key = chr(key)

                # Pull image from camera
                original_image = camera.latest_image.raw_image
                if original_image:
                    # Convert image to black and white
                    black_white_image = original_image.convert("L")
                    rotation_axes = [1, 1, 0]

                    # Generate more images with random rotation
                    for rotation in random.sample(
                            range(min_rotation, max_rotation),
                            image_multiplier):
                        # Randomly define which axis to rotate the image by
                        random.shuffle(rotation_axes)
                        x_axis_rotation_enabled, y_axis_rotation_enabled = rotation_axes[:
                                                                                         2]
                        rotated_image_array = ndimage.rotate(
                            black_white_image,
                            rotation,
                            axes=(x_axis_rotation_enabled,
                                  y_axis_rotation_enabled),
                            reshape=False)

                        # Convert to a 200*200 image
                        rotated_image = Image.fromarray(rotated_image_array)
                        cropped_image = util.crop_image(
                            rotated_image, util.NetworkConstants.IMAGE_WIDTH,
                            util.NetworkConstants.IMAGE_HEIGHT)

                        # Save the image
                        image_filename = key + "_" + str(stats.get(key,
                                                                   0)) + ".png"
                        stats[key] = stats.get(key, 0) + 1
                        cropped_image.save(
                            os.path.join(root_folder, image_filename))

                    # Character
                    print(f"Recorded images for {key}\n\r")
    except (CancelledError, KeyboardInterrupt):
        pass
    finally:
        curses.nocbreak()
        curses.echo()
        curses.endwin()