コード例 #1
0
class WebCamBis(object):
    """
    The class WebCamBis goal is to take a picture and send it back to CameraManager.
    It's a duplicate from WebCam class, it will be replace by a Kinect class in the future.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])

        logger.info(f"{self.__class__.__name__} ready")

    def callback(self, body, **_):
        # avoid conflict over opencv between WebCam class
        time.sleep(2)

        cap = cv2.VideoCapture(0)
        _, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = cv2.resize(image, (640, 480), cv2.INTER_AREA)
        pprint(image.shape)
        body["picture"] = {
            "data": encode(image),
            "shape": image.shape,
            "from": self.__class__.__name__,
        }
        self.queue_manager.publish("CameraManager", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #2
0
ファイル: main.py プロジェクト: jessi678/aye-saac
class PositionDetection(object):
    """
    The class PositionDetection purpose provide the global position of the detected objects.
    """

    def __init__(self):
        self.queue_manager = QueueManager([self.__class__.__name__, "Interpreter"])

        logger.info(f"{self.__class__.__name__} ready")

    def get_pos_str(self, obj, max_pos=(1, 1)):
        step = (max_pos[0] / 3, max_pos[1] / 3)
        center = (
            (obj["bbox"][0] + obj["bbox"][2]) / 2,
            (obj["bbox"][1] + obj["bbox"][3]) / 2,
        )
        if center[1] > 2 * step[1]:
            return " on the left"
        elif center[1] > step[1]:
            return " in the center"
        return " on the right"

    def callback(self, body, **_):
        pprint(body)

        for i, obj in enumerate(body["objects"]):
            body["objects"][i]["lateral_position"] = self.get_pos_str(obj)

        body["path_done"].append(self.__class__.__name__)
        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
コード例 #3
0
class WebCam(object):
    """
    The class WebCam goal is to take a picture and send it back to CameraManager.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])

        logger.info(f"{self.__class__.__name__} ready")

    def callback(self, body, **_):
        cap = cv2.VideoCapture(0)
        _, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        image = cv2.resize(image, (640, 480), cv2.INTER_AREA)
        body["picture"] = {
            "data": encode(image),
            "shape": image.shape,
            "from": self.__class__.__name__,
        }
        self.queue_manager.publish("CameraManager", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #4
0
class OCR(object):
    """
    The class OCR purpose is to detect all the possible text in the picture.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "Interpreter"])
        self.pipeline = keras_ocr.pipeline.Pipeline()

    def callback(self, body, **_):
        image = [
            decode(body["pictures"][0]["data"], body["pictures"][0]["shape"],
                   np.uint8)
        ]
        predictions = self.pipeline.recognize(image)[0]
        """fig, axs = plt.subplots(nrows=len(image), figsize=(20, 20))
        keras_ocr.tools.drawAnnotations(image=image[0], predictions=predictions, ax=axs)
        plt.show()"""
        pprint(predictions)
        text = bb_to_text(predictions)

        body["texts"] = text
        body["path_done"].append(self.__class__.__name__)
        del body["pictures"]
        pprint(body)
        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

        logger.info(f"{self.__class__.__name__} ready")

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #5
0
    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__,
            "Interpreter",
            "ColourDetection",
            "PositionDetection",
        ])

        self.models = [{
            "name":
            "coco",
            "model_path":
            config.directory.data.joinpath("coco_resnet"),
            "category_index":
            coco_category_index
        }, {
            "name":
            "epic-kitchens",
            "model_path":
            config.directory.data.joinpath("epic_kitchens"),
            "category_index":
            epic_kitchens_category_index
        }]

        for model in self.models:
            tf_model = tf.saved_model.load(str(model["model_path"]))
            model["model"] = tf_model.signatures["serving_default"]

        logger.info(f"{self.__class__.__name__} ready")
コード例 #6
0
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "NaturalLanguageGenerator"]
        )
        self.memory = {}

        logger.info(f"{self.__class__.__name__} ready")
コード例 #7
0
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "ExternalInterface"])
        self.answers = {}
        self.description_types = [
            "DESCRIPTION_NOTHING", "DESCRIPTION_ANSWER_S",
            "DESCRIPTION_ANSWER_P", "DESCRIPTION_UNKNOWN"
        ]
        self.build_generator()

        logger.info(f"{self.__class__.__name__} ready")
コード例 #8
0
    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__, "WebCam", "WebCamBis", "ObjectDetection",
            "OCR"
        ])
        self.camera_names = ["WebCam"]
        self.pictures = []
        self.waiting_cameras = 0
        self.save_body = None

        logger.info(f"{self.__class__.__name__} ready")
コード例 #9
0
ファイル: main.py プロジェクト: jessi678/aye-saac
    def __init__(self):
        self.queue_manager = QueueManager([self.__class__.__name__, "Manager"])
        self.previous_query = None

        model_path = str(config.directory.data.joinpath("rasa", "nlu"))

        dirs = [f for f in listdir(model_path) if isdir(join(model_path, f))]
        dirs.sort(reverse=True)
        model = join(model_path, dirs[0])
        self.interpreter = Interpreter.load(model)

        logger.info(f"{self.__class__.__name__} ready")
コード例 #10
0
class Kinect(object):
    """
    The class Kinect goal is to take a picture and send it back to CameraManager.
    """
    def __init__(self, mode=COLOR_MODE):
        self.kinect = PyKinectRuntime.PyKinectRuntime(mode)
        if mode & DEPTH_MODE:
            self.kinect_frame_size = (
                self.kinect.depth_frame_desc.Height,
                self.kinect.depth_frame_desc.Width,
            )
        if mode & COLOR_MODE:
            self.kinect_frame_size = (
                self.kinect.color_frame_desc.Height,
                self.kinect.color_frame_desc.Width,
                -1,
            )
        self.transform = mode & DEPTH_MODE and cv2.COLOR_GRAY2RGB or cv2.COLOR_RGBA2RGB
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])

        logger.info(f"{self.__class__.__name__} ready")

    def get_colored_frame(self, size=None):
        frame = self.kinect.get_last_color_frame()
        frame = frame.reshape(self.kinect_frame_size).astype(np.uint8)
        frame = cv2.cvtColor(frame, self.transform)
        if size:
            return cv2.resize(frame, size)
        return frame

    def callback(self, body, **_):
        cap = cv2.VideoCapture(0)
        _, image_np = cap.read()

        # if mode & DEPTH_MODE:
        #     frame = _kinect.get_last_depth_frame()
        #     frameD = _kinect._depth_frame_data
        #     draw = True

        # if mode & COLOR_MODE and _kinect.has_new_color_frame():
        frame = self.get_colored_frame()

        body["picture"] = {
            "data": encode(frame),
            "shape": frame.shape,
            "from": self.__class__.__name__,
        }
        self.queue_manager.publish("CameraManager", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #11
0
class Manager(object):
    """
    The class Manager purpose is to create a path inside the dialogue manager depending on the goal of the query.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager", "Interpreter"])
        # TODO: Missing intent for lateral position
        self.intents_to_path = {
            "read_text": [["CameraManager", "OCR", "Interpreter"]],
            "detect_colour": [[
                "CameraManager", "ObjectDetection", "ColourDetection",
                "Interpreter"
            ]],
            "identify": [
                ["CameraManager", "OCR", "Interpreter"],
                ["CameraManager", "ObjectDetection", "Interpreter"],
            ],
            "recognise": [["CameraManager", "ObjectDetection", "Interpreter"]],
            "locate": [[
                "CameraManager", "ObjectDetection", "PositionDetection",
                "Interpreter"
            ]],
            "count": [["CameraManager", "ObjectDetection", "Interpreter"]],
            "confidence": [["Interpreter"]],
        }

        logger.info(f"{self.__class__.__name__} ready")

    def callback(self, body, **_):
        intern_token = token_hex(8)
        intent = body["intents"]["intent_ranking"][0]["name"]
        body["intern_token"] = intern_token
        body["wait_package"] = (len(self.intents_to_path[intent])
                                if intent in self.intents_to_path else 0)
        body["path_done"].append(self.__class__.__name__)

        intents_path = (copy.deepcopy(self.intents_to_path[intent])
                        if intent in self.intents_to_path else [])

        for path in intents_path:
            pprint(path)

            body_ = copy.deepcopy(body)
            body_["vision_path"] = path
            next_service = body_["vision_path"].pop(0)
            self.queue_manager.publish(next_service, body_)
            if "run_as_webservice" not in body:
                time.sleep(1)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #12
0
    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__,
            "Interpreter",
            "ColourDetection",
            "PositionDetection",
        ])
        self.category_index = coco_category_index

        self.model_path = config.directory.data.joinpath("resnet")
        model = tf.saved_model.load(str(self.model_path))
        self.model = model.signatures["serving_default"]

        logger.info(f"{self.__class__.__name__} ready")
コード例 #13
0
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "Interpreter"])

        data_file = config.directory.data.joinpath("colour", "lab.txt")

        colour_list = pd.read_csv(data_file,
                                  skiprows=28,
                                  header=None,
                                  names=["l", "a", "b", "name"])
        colour_list = colour_list.values.tolist()[1:]

        self.colour_list_names = [x[3] for x in colour_list]
        self.colour_list_values = np.asarray(
            [np.asarray(x[:3], dtype=np.float32) for x in colour_list])

        logger.info(f"{self.__class__.__name__} ready")
コード例 #14
0
class NaturalLanguageUnderstanding(object):
    """
    The class NaturalLanguageUnderstanding purpose is to sense the objectives of the
    query.
    """
    def __init__(self):
        self.queue_manager = QueueManager([self.__class__.__name__, "Manager"])
        self.previous_query = None

        model_path = str(config.directory.data.joinpath("rasa", "nlu"))

        dirs = [f for f in listdir(model_path) if isdir(join(model_path, f))]
        dirs.sort(reverse=True)
        model = join(model_path, dirs[0])
        logger.info(f"Model loading: " + model)
        self.interpreter = Interpreter.load(model)

        logger.info(f"{self.__class__.__name__} ready")

    def callback(self, body, **_):
        body["asking"] = body["query"].split()
        intents = self.interpreter.parse(body["query"])
        try:
            if (intents["intent"]["name"] == "same_intent"
                    and self.previous_query != None):
                intents["intent"]["name"] = self.previous_query["intent"][
                    "name"]
            if (intents["intent"]["name"] != "recognise"
                    and intents["intent"]["name"] != "identify"
                    and check_followup(body["query"]) == True):
                intents["entities"].extend(self.previous_query["entities"])
        except IndexError as error:
            logger.error(error)
        except Exception as exception:
            logger.warn(exception)
        self.previous_query = intents
        body["intents"] = intents
        body["path_done"].append(self.__class__.__name__)
        logger.info(body)

        self.queue_manager.publish("Manager", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #15
0
    def __init__(self, mode=COLOR_MODE):
        self.kinect = PyKinectRuntime.PyKinectRuntime(mode)
        if mode & DEPTH_MODE:
            self.kinect_frame_size = (
                self.kinect.depth_frame_desc.Height,
                self.kinect.depth_frame_desc.Width,
            )
        if mode & COLOR_MODE:
            self.kinect_frame_size = (
                self.kinect.color_frame_desc.Height,
                self.kinect.color_frame_desc.Width,
                -1,
            )
        self.transform = mode & DEPTH_MODE and cv2.COLOR_GRAY2RGB or cv2.COLOR_RGBA2RGB
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])

        logger.info(f"{self.__class__.__name__} ready")
コード例 #16
0
ファイル: main.py プロジェクト: jessi678/aye-saac
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])
        # TODO: Missing intent for lateral position
        self.intents_to_path = {
            "read_text": [["CameraManager", "OCR", "Interpreter"]],
            "detect_colour": [[
                "CameraManager", "ObjectDetection", "ColourDetection",
                "Interpreter"
            ]],
            "identify": [
                ["CameraManager", "OCR", "Interpreter"],
                ["CameraManager", "ObjectDetection", "Interpreter"],
            ],
            "recognise": [["CameraManager", "ObjectDetection", "Interpreter"]],
            "locate": [["CameraManager", "ObjectDetection", "Interpreter"]],
        }

        logger.info(f"{self.__class__.__name__} ready")
コード例 #17
0
def submit():
    service_if_audio = "AutomaticSpeechRecognition"
    service_if_text = "NaturalLanguageUnderstanding"

    # Parse user request
    user_request = UserRequest(
        service_if_audio=service_if_audio, service_if_text=service_if_text
    )

    # Create queue for Ayesaac and send it
    ayesaac_queue_manager = QueueManager([user_request.first_service])
    ayesaac_queue_manager.publish(user_request.first_service, user_request.body)

    status_url = url_for("submit_status", task_id=user_request.uid)

    return (
        status_url,
        202,
        {"Location": status_url},
    )
コード例 #18
0
class Interpreter(object):
    """
    The Interpreter class purpose is a simple comparison with what the vision part find
     and what the user asked for.
    (Which object was found and not found)
    """

    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "NaturalLanguageGenerator"]
        )
        self.memory = {}

        logger.info(f"{self.__class__.__name__} ready")

    def filter_objects(self, body):
        return body["objects"]

    def filter_texts(self, body):
        return body["texts"]

    def callback(self, body, **_):
        data = None
        key = ""

        if "objects" in body:
            key = "objects"
            data = self.filter_objects(body)
            body["objects"] = data
        elif "texts" in body:
            key = "texts"
            data = self.filter_texts(body)
            body["texts"] = data

        if body["wait_package"] == 1:
            body["path_done"].append(self.__class__.__name__)
            del body["vision_path"]
            pprint(body)
            # TODO: uncomment if you wanna test the NLG, it could be text, objects,
            # objects + colour, objects + lateral position
            self.queue_manager.publish("NaturalLanguageGenerator", body)
        else:
            if body["intern_token"] not in self.memory:
                self.memory[body["intern_token"]] = {key: data}
            elif (
                body["intern_token"] in self.memory
                and body["wait_package"] < len(self.memory[body["intern_token"]]) - 1
            ):
                self.memory[body["intern_token"]][key] = data
            else:
                for key in self.memory[body["intern_token"]]:
                    body[key] = self.memory[body["intern_token"]][key]
                del self.memory[body["intern_token"]][key]
                pprint(body)
                # TODO: uncomment if you wanna test the NLG
                self.queue_manager.publish("NaturalLanguageGenerator", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
コード例 #19
0
class PositionDetection(object):
    """
    The class PositionDetection purpose provide the global position of the detected objects.
    
    Methods within use bounding boxes to determine positions relative to hands/anchors.
    
    A little reminder about bounding boxes returned from TensorFlow...
    The bounding box coordinates are (top, left, bottom and right) and are normalised
    to the image width/height (between 0 and 1).
    
    Example coordinates:
    * top    = 0.2
    * bottom = 0.4
    * left   = 0.3
    * right  = 0.6
    
    The bounding box will Look like this:
    
                   left (0.3)  right (0.6)
                      |          |
    top (0.2)    -----|----------|----
                      |          |
                      |          |
                      |          |
    bottom (0.4) -----|----------|----
                      |          |
                    
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "Interpreter"])

        logger.info(f"{self.__class__.__name__} ready")

    def get_lateral_position(self, obj, max_pos=(1, 1)):
        step = (max_pos[0] / 3, max_pos[1] / 3)
        center = (
            (obj["bbox"][0] + obj["bbox"][2]) / 2,
            (obj["bbox"][1] + obj["bbox"][3]) / 2,
        )
        if center[1] > 2 * step[1]:
            return " on the right"
        elif center[1] > step[1]:
            return " in the center"
        return " on the left"

    def get_hand_position(self, obj, objects):
        '''Method identifies position relative to hands or people using bounding boxes'''

        # Set default to ""
        position_str = ""

        # Set the hand classes
        hand_classes = ["hand", "person"]

        # If the object is a hand itself, it does not need to be positioned
        if obj["name"] in hand_classes:
            return position_str

        # Determine if there are hands in the image: use "hand" or "person" class name
        hand_objects = copy.deepcopy([
            o for o in objects
            if o["from"] == obj["from"] and o["name"] in hand_classes
        ])
        if len(hand_objects) == 0:
            return position_str

        # Not going to be able to use hands to position if there are more than two in this image
        if len(hand_objects) > 2:
            return position_str

        # Now we have hands in the image, find the positioning
        # Get the bounding box normalised coords
        top_obj, left_obj, bottom_obj, right_obj = tuple(obj["bbox"])

        # get the central coords
        x_obj = (left_obj + right_obj) / 2
        y_obj = (top_obj + bottom_obj) / 2

        # If there are two objects, see if the object is positioned between the hands
        if len(hand_objects) == 2 and hand_objects[0][
                "name"] == "hand" and hand_objects[1]["name"] == "hand":

            if self.__obj_is_between_hands__(x_obj, y_obj, hand_objects):
                position_str = " is between hands"

        elif len(hand_objects) == 1:

            # Get the bounding box normalised coords
            top_hand, left_hand, bottom_hand, right_hand = tuple(
                hand_objects[0]["bbox"])

            # Get the midpoints
            x_hand = (left_hand + right_hand) / 2
            y_hand = (top_hand + bottom_hand) / 2

            if self.__obj_is_right_of_hand__(x_obj, x_hand):
                position_str = " is to the right of a " + hand_objects[0][
                    "name"]

            if self.__obj_is_left_of_hand__(x_obj, x_hand):
                position_str = " is to the left of a " + hand_objects[0]["name"]

        return position_str

    def __obj_is_between_hands__(self, x_obj, y_obj, hand_objects):
        '''Method to determine if the object is between the hands'''
        is_between = False

        # Get the bounding boxes of the hands
        top_hand1, left_hand1, bottom_hand1, right_hand1 = tuple(
            hand_objects[0]["bbox"])
        top_hand2, left_hand2, bottom_hand2, right_hand2 = tuple(
            hand_objects[1]["bbox"])

        # get the central coords
        x_hand1 = (left_hand1 + right_hand1) / 2
        x_hand2 = (left_hand2 + right_hand2) / 2
        y_hand1 = (top_hand1 + bottom_hand1) / 2
        y_hand2 = (top_hand2 + bottom_hand2) / 2

        # determine if object is between hands
        if (x_hand1 < x_obj < x_hand2) or (x_hand1 > x_obj > x_hand2):
            is_between = True

        elif (y_hand1 < y_obj < y_hand2) or (y_hand1 > y_obj > y_hand2):
            is_between = True

        return is_between

    def __obj_is_right_of_hand__(self, x_obj, x_hand):
        '''Method to determine if the object is to the right of the hand'''
        if (x_obj > x_hand):
            return True
        else:
            return False

    def __obj_is_left_of_hand__(self, x_obj, x_hand):
        '''Method to determine if the object is to the left of the hand'''
        if (x_hand > x_obj):
            return True
        else:
            return False

    def get_anchored_position(self, obj, objects):
        '''Method identifies position relative to anchors, e.g. "next to fridge"'''

        # Set default to [] because anchored position may not be possible
        position_str_list = []

        # List the anchors in the image
        anchors = copy.deepcopy([
            o for o in objects
            if o["from"] == obj["from"] and o["name"] in ANCHORS.keys()
        ])

        # Remove self from anchors
        if obj["name"] in [anchor["name"] for anchor in anchors]:
            anchors.remove(obj)

        # Determine if there are anchors in the image, if not return None
        if len(anchors) == 0:
            return position_str_list

        # Append the relationship info to anchors
        for anchor in anchors:
            anchor["relationships"] = ANCHORS[anchor["name"]]["relationships"]

        # Now we have anchors in the image, find the positioning relationship between the object and the anchor
        for anchor in anchors:

            # Get the bounding box normalised coords
            top_obj, left_obj, bottom_obj, right_obj = tuple(obj["bbox"])
            top_anchor, left_anchor, bottom_anchor, right_anchor = tuple(
                anchor["bbox"])

            if "in" in anchor["relationships"]:
                if self.__obj_is_in_anchor__(left_obj, left_anchor, right_obj,
                                             right_anchor, bottom_obj,
                                             bottom_anchor, top_obj,
                                             top_anchor):
                    position_str_list.append(" it's in the " + anchor["name"])
                    continue

            if "on" in anchor["relationships"]:
                if self.__obj_is_on_anchor__(left_obj, left_anchor, right_obj,
                                             right_anchor, bottom_obj,
                                             bottom_anchor):
                    position_str_list.append(" it's on the " + anchor["name"])
                    continue

            if "next to" in anchor["relationships"]:
                if self.__obj_is_left_of_anchor__(left_obj, left_anchor,
                                                  right_obj, right_anchor):
                    position_str_list.append(" it's left of the " +
                                             anchor["name"])
                    continue

                elif self.__obj_is_right_of_anchor__(left_obj, left_anchor,
                                                     right_obj, right_anchor):
                    position_str_list.append(" it's right of the " +
                                             anchor["name"])
                    continue

            if "below" in anchor["relationships"]:
                if self.__obj_is_below_anchor__(left_obj, left_anchor,
                                                right_obj, right_anchor,
                                                top_obj, top_anchor):
                    position_str_list.append(" it's below the " +
                                             anchor["name"])
                    continue

            # Default position if the above conditions cannot be met
            position_str_list.append(" it's near the " + anchor["name"])

        return position_str_list

    def __obj_is_in_anchor__(self, left_obj, left_anchor, right_obj,
                             right_anchor, bottom_obj, bottom_anchor, top_obj,
                             top_anchor):
        '''Method to determine whether obj is in anchor.'''
        is_on = False

        if (left_obj > left_anchor and right_obj < right_anchor
                and bottom_obj < bottom_anchor and top_obj > top_anchor):
            is_on = True

        return is_on

    def __obj_is_on_anchor__(self, left_obj, left_anchor, right_obj,
                             right_anchor, bottom_obj, bottom_anchor):
        '''Method to determine whether obj is on anchor.'''
        is_on = False

        if (left_obj > left_anchor and right_obj < right_anchor
                and bottom_obj < bottom_anchor):
            is_on = True

        return is_on

    def __obj_is_left_of_anchor__(self, left_obj, left_anchor, right_obj,
                                  right_anchor):
        '''Method to determine whether obj is left of anchor.'''
        is_left_of = False

        if (left_obj < left_anchor):
            is_left_of = True

        return is_left_of

    def __obj_is_right_of_anchor__(self, left_obj, left_anchor, right_obj,
                                   right_anchor):
        '''Method to determine whether obj is right of anchor.'''
        is_right_of = False

        if (right_obj > right_anchor):
            is_right_of = True

        return is_right_of

    def __obj_is_below_anchor__(self, left_obj, left_anchor, right_obj,
                                right_anchor, top_obj, top_anchor):
        '''Method to determine whether obj is below anchor.'''
        is_below = False

        if (left_obj > left_anchor and right_obj < right_anchor
                and top_obj > top_anchor):
            is_below = True

        return is_below

    def callback(self, body, **_):
        pprint(body)

        for i, obj in enumerate(body["objects"]):
            body["objects"][i]["lateral_position"] = self.get_lateral_position(
                obj)
            body["objects"][i][
                "anchored_position"] = self.get_anchored_position(
                    obj, body["objects"])
            body["objects"][i]["hand_position"] = self.get_hand_position(
                obj, body["objects"])
            print("lateral_position: " +
                  str(body["objects"][i]["lateral_position"]))
            print("anchored_position: " +
                  str(body["objects"][i]["anchored_position"]))
            print("hand_position: " + str(body["objects"][i]["hand_position"]))

        body["path_done"].append(self.__class__.__name__)
        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #20
0
ファイル: main.py プロジェクト: jessi678/aye-saac
 def __init__(self):
     self.queue_manager = QueueManager(
         [self.__class__.__name__, "Interpreter"])
     self.pipeline = keras_ocr.pipeline.Pipeline()
コード例 #21
0
class NaturalLanguageGenerator(object):
    """
    The class NaturalLanguageGenerator purpose is to translate the results obtained to a nicely formatted sentence.
    """

    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "ExternalInterface"]
        )
        self.answers = {}
        self.description_types = [
            "DESCRIPTION_NOTHING",
            "DESCRIPTION_ANSWER_S",
            "DESCRIPTION_ANSWER_P",
        ]
        self.build_generator()

        logger.info(f"{self.__class__.__name__} ready")

    def build_generator(self):
        folder_path = config.directory.data.joinpath("sentence_templates")
        for _, _, files in os.walk(folder_path):
            for name in files:
                with open(str(folder_path / name)) as f:
                    self.answers[name] = [line.strip() for line in f]

    def get_det(self, word):
        return str(word[1]) + " " if word[1] > 1 else "a "

    def generate_text(self, words, context, obj_cnt):
        answer = choice(self.answers[context])
        if type(words) == str:
            return answer.replace("*", words, 1)
        elif len(words) > 1:
            tmp = (
                ", ".join([self.get_det(w) + w[0] for w in words[:-1]])
                + " and "
                + self.get_det(words[-1])
                + words[-1][0]
            )
            return answer.replace("*", tmp, 1)
        elif len(words):
            return answer.replace(
                "*",
                ((str(words[0][1]) + " ") if words[0][1] > 1 else "") + words[0][0],
                1,
            )
        return answer

    def identify(self, body):
        pprint("identify")

        objects = []
        for o in body["objects"]:
            if o["name"] != "person":
                objects.append(
                    o["name"]
                    + (o["lateral_position"] if o.get("lateral_position") else "")
                )
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = self.description_types[obj_cnt if obj_cnt < 2 else 2]
        return objects, context, obj_cnt

    def recognise(self, body):
        pprint("recognise")

        objects = []
        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if o["name"] == p["value"]:
                    objects.append(
                        o["name"]
                        + (o["lateral_position"] if o.get("lateral_position") else "")
                    )
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = (
            ("POSITIVE" if obj_cnt > 0 else "NEGATIVE")
            + "_ANSWER_"
            + ("P" if obj_cnt > 1 else "S")
        )
        if not obj_cnt:
            objects = [(p["value"], 1) for p in body["intents"]["entities"]]
            obj_cnt = sum(n for _, n in objects)
        return objects, context, obj_cnt

    def REAL_read_text(self, body):
        pprint("read_text")

        objects = " ".join(" ".join(t) for t in body["texts"])
        print(objects)
        obj_cnt = 1 if len(objects) > 0 else 0
        context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE")
        return objects, context, obj_cnt

    '''
    TEMPORARILY HIJACKING THE read_text FUNCTION ABOVE
    THIS SHOULD BE CHANGED BACK TO extract_label ONCE
    SUCH AN INTENT HAS BEEN TRAINED WITH THE NLU!!!
    '''
    def read_text(self, body):
    # def extract_label(self, body):
        objects = " ".join(" ".join(t) for t in body["texts"])
        print(objects)

        objects = extract_label(objects)

        obj_cnt = 1 if len(objects) > 0 else 0
        context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE")
        return objects, context, obj_cnt

    def detect_colour(self, body):
        pprint("detect_colour")

        obj_cnt = 0
        objects = None
        context = None

        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if o["name"] == p["value"]:
                    objects = (p["value"], o["colour"])
                    break
                else:
                    objects = (p["value"], None)
        if objects:
            obj_cnt = 1 if objects[1] else 0
            objects = objects[obj_cnt]
            context = "COLOR_DETECTION" if obj_cnt else "COLOR_DETECTION_N"
        return objects, context, obj_cnt

    def locate(self, body):
        pprint("locate")

        objects = []
        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if o["name"] == p["value"]:
                    if (
                        not o.get("lateral_position")
                        and o.get("bbox")
                        and len(o["bbox"]) >= 4
                    ):
                        bbox = o["bbox"]
                        yStart = bbox[0]
                        xStart = bbox[1]
                        yEnd = bbox[2]
                        xEnd = bbox[3]
                        xCenter = (xEnd + xStart) / 2
                        yCenter = (yEnd + yStart) / 2
                        pprint("xCenter")
                        pprint(xCenter)
                        if xCenter < 0.382:
                            o["lateral_position"] = " on the left"
                        elif xCenter >= 0.382 and xCenter <= 0.618:
                            o["lateral_position"] = " in front"
                        elif xCenter > 0.618:
                            o["lateral_position"] = " on the right"
                    objects.append(
                        o["name"]
                        + (o["lateral_position"] if o.get("lateral_position") else "")
                    )
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = self.description_types[obj_cnt if obj_cnt < 2 else 2]
        return objects, context, obj_cnt

    def default(self, body):
        pprint("default")

        # Creates list of object detected in the scene
        objects = [
            o["name"] + (o["lateral_position"] if o.get("lateral_position") else "")
            for o in body["objects"]
        ]
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = self.description_types[obj_cnt if obj_cnt < 2 else 2]
        return objects, context, obj_cnt

    def callback(self, body, **_):
        pprint(body)

        method = getattr(self, body["intents"]["intent"]["name"], self.default)
        pprint("----- METHOD CALLED -----")
        objects, context, obj_cnt = method(body)

        print(objects)
        print(context)

        if objects != None and context != None:
            response = self.generate_text(objects, context, obj_cnt)
        else:
            response = "I didn't understand the question, could you repeat please."

        body["response"] = response
        pprint(body["response"])
        body["path_done"].append(self.__class__.__name__)

        self.queue_manager.publish("ExternalInterface", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
コード例 #22
0
class CameraManager(object):
    """
    The class CameraManager goal is to organise the collect of pictures from different
     camera sources.
    """
    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__, "WebCam", "WebCamBis", "ObjectDetection",
            "OCR"
        ])
        self.camera_names = ["WebCam"]
        self.pictures = []
        self.waiting_cameras = 0
        self.save_body = None

        logger.info(f"{self.__class__.__name__} ready")

    def from_cameras(self, body):
        logger.info("Receiving picture from: ", body["picture"]["from"])
        logger.info(body)
        self.pictures.append(body["picture"])
        self.waiting_cameras -= 1

    def request_pictures_from_all_concern_cameras(self):
        logger.info("Request pictures !")
        self.waiting_cameras = len(self.camera_names)
        for camera_name in self.camera_names:
            self.queue_manager.publish(camera_name, {"nb_picture": 1})

    def callback(self, body, **_):
        logger.info("Callback triggered")
        if "run_as_webservice" in body:
            # skip running cameras, we have an image!
            logger.info(
                "Camera management: don't use cameras as running in webservice mode."
            )
            assert "pictures" in body
            next_service = body["vision_path"].pop(0)
            body["path_done"].append(self.__class__.__name__)
            self.queue_manager.publish(next_service, body)

        elif self.waiting_cameras:
            self.from_cameras(body)
            if not self.waiting_cameras:
                self.save_body["pictures"] = copy.deepcopy(self.pictures)
                self.save_body["path_done"].append(self.__class__.__name__)
                logger.info("Send pictures !")

                # for path in self.save_body['vision_path']:
                # body_ = self.save_body
                # body_['vision_path'] = path
                next_service = self.save_body["vision_path"].pop(0)
                self.queue_manager.publish(next_service, self.save_body)
                self.pictures = []
                self.save_body = None
        else:
            self.request_pictures_from_all_concern_cameras()
            self.save_body = body

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #23
0
class ColourDetection(object):
    """
    The class ColourDetection purpose is to detect every main colour from objects in
    the given pictures.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "Interpreter"])

        data_file = config.directory.data.joinpath("colour", "lab.txt")

        colour_list = pd.read_csv(data_file,
                                  skiprows=28,
                                  header=None,
                                  names=["l", "a", "b", "name"])
        colour_list = colour_list.values.tolist()[1:]

        self.colour_list_names = [x[3] for x in colour_list]
        self.colour_list_values = np.asarray(
            [np.asarray(x[:3], dtype=np.float32) for x in colour_list])

        logger.info(f"{self.__class__.__name__} ready")

    @staticmethod
    def convert_rgb_to_lab(image: np.ndarray) -> np.ndarray:
        return rgb2lab(image)

    @staticmethod
    def flatten_image(image: np.ndarray) -> np.ndarray:
        dimensions = np.shape(image)

        return np.reshape(image,
                          (dimensions[0] * dimensions[1], dimensions[2]))

    @staticmethod
    def remove_non_unique_pixels(image: np.ndarray) -> np.ndarray:
        return np.unique(image, axis=0)

    @staticmethod
    def create_labelled_image(lab_image) -> np.ndarray:
        return slic(
            lab_image,
            n_segments=200,
            compactness=10,
            sigma=0.1,
            convert2lab=False,
            enforce_connectivity=True,
        )

    @staticmethod
    def create_regions(lab_image, labelled_image):
        region_segments = regionprops(labelled_image)
        image_dimensions = np.shape(labelled_image)

        for region in region_segments:
            region.is_boundary = ColourDetection.is_region_on_boundary(
                region, image_dimensions)
            region.average_colour = ColourDetection.get_region_average_colour(
                region.label, labelled_image, lab_image)

        return region_segments

    @staticmethod
    def is_region_on_boundary(region, image_dimensions):
        if (region.bbox[0] == 0 or region.bbox[1] == 0
                or region.bbox[2] == image_dimensions[0]
                or region.bbox == image_dimensions[1]):
            return True
        return False

    @staticmethod
    def get_pixels_from_label_id(label_id, labelled_image, image):
        label_mask = np.invert(np.isin(labelled_image, label_id))
        label_mask = np.dstack((label_mask, label_mask, label_mask))
        image_mask = np.ma.array(image, mask=label_mask)
        return image_mask

    @staticmethod
    def get_region_average_colour(label_id, labelled_image, image):
        masked_image = ColourDetection.get_pixels_from_label_id(
            label_id, labelled_image, image)
        flattened_masked_image = ColourDetection.flatten_image(masked_image)
        average_colour = np.zeros(3, dtype=np.float32)

        for channel in range(np.shape(image)[2]):
            average_colour[channel] = np.mean(flattened_masked_image[:,
                                                                     channel])

        return average_colour

    @staticmethod
    def get_all_region_colours(region_list):
        return [region.average_colour for region in region_list]

    def detect_colours(self, crop_image):
        lab_image = self.convert_rgb_to_lab(crop_image)
        labelled_image = self.create_labelled_image(lab_image)
        region_list = self.create_regions(lab_image, labelled_image)
        colours = self.get_all_region_colours(region_list)

        colours_found = {}
        for colour in colours:
            d = ((self.colour_list_values - colour)**2).sum(axis=1)
            if not self.colour_list_names[d.argmin()] in colours_found:
                colours_found[self.colour_list_names[d.argmin()]] = 0
            colours_found[self.colour_list_names[d.argmin()]] += 1
        sorted_colours = max(colours_found.items(), key=operator.itemgetter(1))
        pprint(colours_found)
        return sorted_colours[0]

    def callback(self, body, **_):
        body["path_done"].append(self.__class__.__name__)

        for picture in body["pictures"]:
            image = decode(picture["data"], picture["shape"], np.uint8)
            for i, obj in enumerate(body["objects"]):
                crop_img = image[int(picture["shape"][0] *
                                     obj["bbox"][0]):int(picture["shape"][0] *
                                                         obj["bbox"][2]),
                                 int(picture["shape"][1] *
                                     obj["bbox"][1]):int(picture["shape"][1] *
                                                         obj["bbox"][3]), ]
                colour_name = self.detect_colours(crop_img)
                body["objects"][i]["colour"] = colour_name
        del body["pictures"]
        pprint(body)
        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #24
0
class ObjectDetection(object):
    """
    The class ObjectDetection purpose is to detect every object in the given pictures.
    """
    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__,
            "Interpreter",
            "ColourDetection",
            "PositionDetection",
        ])
        self.category_index = coco_category_index

        self.model_path = config.directory.data.joinpath("resnet")
        model = tf.saved_model.load(str(self.model_path))
        self.model = model.signatures["serving_default"]

        logger.info(f"{self.__class__.__name__} ready")

    def run_inference_for_single_image(self, image):
        input_tensor = tf.convert_to_tensor(image)
        input_tensor = input_tensor[tf.newaxis, ...]
        output_dict = self.model(input_tensor)

        num_detections = int(output_dict.pop("num_detections"))
        output_dict = {
            key: value[0, :num_detections].numpy()
            for key, value in output_dict.items()
        }
        output_dict["num_detections"] = num_detections
        output_dict["detection_classes"] = output_dict[
            "detection_classes"].astype(np.int32)
        return output_dict

    def callback(self, body, **_):
        objects = []
        for picture in body["pictures"]:
            image = decode(picture["data"], picture["shape"], np.uint8)
            output = self.run_inference_for_single_image(image)
            for i in range(output["num_detections"]):
                if float(output["detection_scores"][i]) >= 0.5:
                    objects.append({
                        "name":
                        self.category_index[output["detection_classes"][i]]
                        ["name"],
                        "confidence":
                        float(output["detection_scores"][i]),
                        "bbox":
                        output["detection_boxes"][i].tolist(),
                        "from":
                        picture["from"],
                    })
        pprint(objects)
        body["objects"] = objects
        body["path_done"].append(self.__class__.__name__)

        if "ColourDetection" not in body["vision_path"]:
            del body["pictures"]

        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #25
0
class NaturalLanguageGenerator(object):
    """
    The class NaturalLanguageGenerator purpose is to translate the results obtained to a nicely formatted sentence.
    """
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "ExternalInterface"])
        self.answers = {}
        self.description_types = [
            "DESCRIPTION_NOTHING", "DESCRIPTION_ANSWER_S",
            "DESCRIPTION_ANSWER_P", "DESCRIPTION_UNKNOWN"
        ]
        self.build_generator()

        logger.info(f"{self.__class__.__name__} ready")

    def build_generator(self):
        folder_path = config.directory.data.joinpath("sentence_templates")
        for _, _, files in os.walk(folder_path):
            for name in files:
                with open(str(folder_path / name)) as f:
                    self.answers[name] = [line.strip() for line in f]

    def get_det(self, word, context):
        if context == "CONFIDENCE_SOMETHING":
            return ""
        if (word[1] > 1):
            return str(word[1]) + " "
        elif word[1] == 1:
            return "a "
        else:
            return "no "

    def compare_name_value(self, name, value):
        if name == value:
            return True
        elif name == value[:-1] and value[-1] == 's':
            return True
        return False

    def generate_text(self, words, context, obj_cnt):
        answer = choice(self.answers[context])
        if type(words) == str:
            return answer.replace("*", words, 1)
        elif len(words) > 1:
            tmp = (", ".join(
                [self.get_det(w, context) + w[0] for w in words[:-1]]) +
                   " and " + self.get_det(words[-1], context) + words[-1][0])
            return answer.replace("*", tmp, 1)
        elif len(words):
            return answer.replace(
                "*",
                self.get_det(words[0], context) + words[0][0],
                1,
            )
        return answer

    def identify(self, body):
        pprint("identify")

        objects = []
        for o in body["objects"]:
            if o["name"] != "person":
                objects.append(o["name"] + (o["lateral_position"] if o.
                                            get("lateral_position") else ""))
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = self.description_types[obj_cnt if obj_cnt < 2 else 2]
        return objects, context, obj_cnt

    def recognise(self, body):
        pprint("recognise")

        objects = []
        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if self.compare_name_value(o["name"], p["value"]):
                    objects.append(p["value"] +
                                   (o["lateral_position"] if o.
                                    get("lateral_position") else ""))
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = (("POSITIVE" if obj_cnt > 0 else "NEGATIVE") + "_ANSWER_" +
                   ("P" if obj_cnt > 1 else "S"))
        if not obj_cnt:
            objects = [(p["value"], 1) for p in body["intents"]["entities"]]
            obj_cnt = sum(n for _, n in objects)
        return objects, context, obj_cnt

    def read_text(self, body):
        pprint("read_text")

        objects = " ".join(" ".join(t) for t in body["texts"])
        print(objects)
        obj_cnt = 1 if len(objects) > 0 else 0
        context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE")
        return objects, context, obj_cnt

    def detect_colour(self, body):
        pprint("detect_colour")

        obj_cnt = 0
        objects = None
        context = None

        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if self.compare_name_value(o["name"], p["value"]):
                    objects = (p["value"], o["colour"])
                    break
                else:
                    objects = (p["value"], None)
        if objects:
            obj_cnt = 1 if objects[1] else 0
            objects = objects[obj_cnt]
            context = "COLOR_DETECTION" if obj_cnt else "COLOR_DETECTION_N"
        return objects, context, obj_cnt

    def count(self, body):
        pprint("count")
        obj_cnt = 0
        objects = []
        context = ""

        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if self.compare_name_value(o["name"], p["value"]):
                    objects.append(p["value"])
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        for p in body["intents"]["entities"]:
            elements = [x for x in objects if x[0] == p["value"]]
            if not len(elements):
                objects.append((p["value"], 0))
        context = "DESCRIPTION_COUNT"
        return objects, context, obj_cnt

    def confidence(self, body):
        pprint("confidence")
        obj_cnt = 0
        objects = []

        can_answer = len(body["responses"]) > 0
        previous_question = None

        if can_answer:
            previous_question = body["responses"][-1]

        if can_answer and (not previous_question["intents"]["intent"]["name"]
                           in ["identify", "recognise", "locate", "count"]):
            can_answer = False

        if can_answer:
            entities = previous_question["intents"]["entities"]
            if len(entities) == 0:
                entities = [{
                    "value": o["name"]
                } for o in previous_question["objects"]]
            for e in entities:
                percentage = 0
                nb_object = 0
                for o in previous_question["objects"]:
                    if self.compare_name_value(o["name"], e["value"]):
                        percentage += o["confidence"]
                        nb_object += 1
                if nb_object > 0:
                    percentage /= nb_object
                    objects.append(
                        (str(round(percentage * 100)) + "% that there is " +
                         str(nb_object) + " " + e["value"], nb_object))
                else:
                    objects.append(
                        ("more than 50% that there is no " + e["value"], 0))

        obj_cnt = sum(n for _, n in objects)
        context = "CONFIDENCE_SOMETHING" if can_answer else "CONFIDENCE_NOTHING"
        return objects, context, obj_cnt

    def locate(self, body):
        pprint("locate")

        objects = []
        for o in body["objects"]:
            for p in body["intents"]["entities"]:
                if self.compare_name_value(o["name"], p["value"]):
                    pos_str = ""
                    if (len(o.get("anchored_position")) > 0):
                        pos_list = o.get("anchored_position")
                        for pos in pos_list:
                            if pos_list.index(pos) != (len(pos_list) - 1):
                                pos_str += ", " + pos
                            else:
                                pos_str += " and" + pos
                    elif (o.get("hand_position") != ""):
                        pos_str = o.get("hand_position")
                    else:
                        pos_str = o.get("lateral_position")
                    objects.append(p["value"] + pos_str)
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)

        context_index = 0
        if len(objects) == 1:
            context_index = 1
        elif len(objects) > 1:
            context_index = 2
        elif len(body["objects"]) > 0:
            context_index = 3
        context = self.description_types[context_index]

        return objects, context, obj_cnt

    def default(self, body):
        pprint("default")

        # Creates list of object detected in the scene
        objects = [
            o["name"] +
            (o["lateral_position"] if o.get("lateral_position") else "")
            for o in body["objects"]
        ]
        objects = list(set([(o, objects.count(o)) for o in objects]))
        obj_cnt = sum(n for _, n in objects)
        context = self.description_types[obj_cnt if obj_cnt < 2 else 2]
        return objects, context, obj_cnt

    def callback(self, body, **_):
        pprint(body)

        method = getattr(self, body["intents"]["intent"]["name"], self.default)
        pprint("----- METHOD CALLED -----")
        objects, context, obj_cnt = method(body)

        print(objects)
        print(context)

        if objects != None and context != None:
            response = self.generate_text(objects, context, obj_cnt)
        else:
            response = "I didn't understand the question, could you repeat please."

        body["response"] = response
        pprint(body["response"])
        body["path_done"].append(self.__class__.__name__)

        self.queue_manager.publish("ExternalInterface", body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)
コード例 #26
0
ファイル: main.py プロジェクト: jessi678/aye-saac
class OCR(object):
    """
    The class OCR purpose is to detect all the possible text in the picture.
    """
    default_ocr_model, supported_ocr_models = None, []
    import json
    with open("./group-6-config.json") as f:
        data = json.load(f)
        default_ocr_model = data["default-ocr-model"]
        supported_ocr_models = data["supported-ocr-models"]
        print("Using OCR model: " + default_ocr_model)

    if (default_ocr_model == "keras-ocr"):

        def __init__(self):
            self.queue_manager = QueueManager(
                [self.__class__.__name__, "Interpreter"])
            self.pipeline = keras_ocr.pipeline.Pipeline()

        def callback(self, body, **_):
            image = [
                decode(body["pictures"][0]["data"],
                       body["pictures"][0]["shape"], np.uint8)
            ]
            predictions = self.pipeline.recognize(image)[0]

            # Recomment this
            # fig, axs = plt.subplots(nrows=len(image), figsize=(20, 20))
            # keras_ocr.tools.drawAnnotations(image=image[0], predictions=predictions, ax=axs)
            # plt.show()

            pprint(predictions)
            text = bb_to_text(predictions)

            body["texts"] = text
            body["path_done"].append(self.__class__.__name__)
            del body["pictures"]
            pprint(body)
            next_service = body["vision_path"].pop(0)
            self.queue_manager.publish(next_service, body)

            logger.info(f"{self.__class__.__name__} ready")

        def run(self):
            self.queue_manager.start_consuming(self.__class__.__name__,
                                               self.callback)

    elif (default_ocr_model == "tesseract"):

        def __init__(self):
            self.queue_manager = QueueManager(
                [self.__class__.__name__, "Interpreter"])

        def callback(self, body, **_):

            pytesseract.pytesseract.tesseract_cmd = r'../usr/bin/tesseract'

            image = [
                decode(body["pictures"][0]["data"],
                       body["pictures"][0]["shape"], np.uint8)
            ]

            text = pytesseract.image_to_string(image[0])

            body["texts"] = text
            body["path_done"].append(self.__class__.__name__)
            del body["pictures"]
            pprint(body)
            next_service = body["vision_path"].pop(0)
            self.queue_manager.publish(next_service, body)

            logger.info(f"{self.__class__.__name__} ready")

        def run(self):
            self.queue_manager.start_consuming(self.__class__.__name__,
                                               self.callback)
コード例 #27
0
ファイル: main.py プロジェクト: jessi678/aye-saac
 def __init__(self):
     self.queue_manager = QueueManager(
         [self.__class__.__name__, "Interpreter"])
コード例 #28
0
    def __init__(self):
        self.queue_manager = QueueManager(
            [self.__class__.__name__, "CameraManager"])

        logger.info(f"{self.__class__.__name__} ready")
コード例 #29
0
class ObjectDetection(object):
    """
    The class ObjectDetection purpose is to detect every object in the given pictures.
    """

    # define constants
    # confidence threshold for retaining object detection
    CONFIDENCE_THRESHOLD = 0.5
    # IoU threshold for determining whether detections are overlapping
    IOU_THRESHOLD = 0.5
    # list of model preferences for selecting detection
    MODEL_PREFS = ["coco", "epic-kitchens"]

    def __init__(self):
        self.queue_manager = QueueManager([
            self.__class__.__name__,
            "Interpreter",
            "ColourDetection",
            "PositionDetection",
        ])

        self.models = [{
            "name":
            "coco",
            "model_path":
            config.directory.data.joinpath("coco_resnet"),
            "category_index":
            coco_category_index
        }, {
            "name":
            "epic-kitchens",
            "model_path":
            config.directory.data.joinpath("epic_kitchens"),
            "category_index":
            epic_kitchens_category_index
        }]

        for model in self.models:
            tf_model = tf.saved_model.load(str(model["model_path"]))
            model["model"] = tf_model.signatures["serving_default"]

        logger.info(f"{self.__class__.__name__} ready")

    def run_inference_for_single_image(self, image, model):
        input_tensor = tf.convert_to_tensor(image)
        input_tensor = input_tensor[tf.newaxis, ...]
        output_dict = model(input_tensor)

        num_detections = int(output_dict.pop("num_detections"))
        output_dict = {
            key: value[0, :num_detections].numpy()
            for key, value in output_dict.items()
        }
        output_dict["num_detections"] = num_detections
        output_dict["detection_classes"] = output_dict[
            "detection_classes"].astype(np.int32)
        return output_dict

    def filter_objects(self, objects, img_height, img_width):
        '''Method to filter duplicate detections from the output'''
        retained_objects = []
        for obj in objects:
            retain = True
            # duplicates are of the same class and have very high IoU
            for other_obj in objects:
                # ignore self
                if obj == other_obj:
                    continue
                else:
                    # calculate the IoU
                    iou = calculate_iou(obj["bbox"], other_obj["bbox"],
                                        img_height, img_width)
                    # check if IoU is greater than threshold
                    if iou >= ObjectDetection.IOU_THRESHOLD:
                        # we have a duplicate, don't retain the object if the model preference is lower
                        if ObjectDetection.MODEL_PREFS.index(
                                obj["model"]
                        ) > ObjectDetection.MODEL_PREFS.index(
                                other_obj["model"]):
                            retain = False
                            break

            # append the object if it's okay
            if retain:
                retained_objects.append(obj)

        return retained_objects

    def callback(self, body, **_):
        all_objects = []
        for picture in body["pictures"]:
            objects = []
            image = decode(picture["data"], picture["shape"], np.uint8)
            img_height = picture["shape"][0]
            img_width = picture["shape"][1]

            # iterate through the models, performing object detection
            for model in self.models:
                output = self.run_inference_for_single_image(
                    image, model["model"])
                for i in range(output["num_detections"]):
                    if float(output["detection_scores"]
                             [i]) >= ObjectDetection.CONFIDENCE_THRESHOLD:
                        bbox = output["detection_boxes"][i].tolist()
                        objects.append({
                            "name":
                            model["category_index"][output["detection_classes"]
                                                    [i]]["name"],
                            "confidence":
                            float(output["detection_scores"][i]),
                            "bbox":
                            bbox,
                            "from":
                            picture["from"],
                            "model":
                            model["name"],
                            "img_height":
                            img_height,
                            "img_width":
                            img_width
                        })

            bboxes = [obj["bbox"] for obj in objects]
            class_names = [obj["name"] for obj in objects]
            scores = [obj["confidence"] for obj in objects]
            models = [obj["model"] for obj in objects]

            # draw the bounding boxes
            # (outputs image to docker/volumes/aye-saac_output_data/_data/bbox.[uid].png)
            draw_bounding_boxes(image,
                                bboxes,
                                class_names,
                                scores,
                                models,
                                filename="bbox.{u}.png".format(u=body["uid"]))

            # need to filter the results to remove massively overlapping object detections
            # (this can arise when different models identify the same object for example)
            objects = self.filter_objects(objects, img_height, img_width)

            bboxes = [obj["bbox"] for obj in objects]
            class_names = [obj["name"] for obj in objects]
            scores = [obj["confidence"] for obj in objects]
            models = [obj["model"] for obj in objects]

            # draw the bounding boxes
            # (outputs image to docker/volumes/aye-saac_output_data/_data/bbox_filtered.[uid].png)
            draw_bounding_boxes(
                image,
                bboxes,
                class_names,
                scores,
                models,
                filename="bbox_filtered.{u}.png".format(u=body["uid"]))

            # append the objects to all_objects
            all_objects.extend(objects)

        # pprint(objects)
        body["objects"] = all_objects
        body["path_done"].append(self.__class__.__name__)

        if "ColourDetection" not in body["vision_path"]:
            del body["pictures"]

        next_service = body["vision_path"].pop(0)
        self.queue_manager.publish(next_service, body)

    def run(self):
        self.queue_manager.start_consuming(self.__class__.__name__,
                                           self.callback)