class OCR(object): """ The class OCR purpose is to detect all the possible text in the picture. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "Interpreter"]) self.pipeline = keras_ocr.pipeline.Pipeline() def callback(self, body, **_): image = [ decode(body["pictures"][0]["data"], body["pictures"][0]["shape"], np.uint8) ] predictions = self.pipeline.recognize(image)[0] """fig, axs = plt.subplots(nrows=len(image), figsize=(20, 20)) keras_ocr.tools.drawAnnotations(image=image[0], predictions=predictions, ax=axs) plt.show()""" pprint(predictions) text = bb_to_text(predictions) body["texts"] = text body["path_done"].append(self.__class__.__name__) del body["pictures"] pprint(body) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) logger.info(f"{self.__class__.__name__} ready") def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class PositionDetection(object): """ The class PositionDetection purpose provide the global position of the detected objects. """ def __init__(self): self.queue_manager = QueueManager([self.__class__.__name__, "Interpreter"]) logger.info(f"{self.__class__.__name__} ready") def get_pos_str(self, obj, max_pos=(1, 1)): step = (max_pos[0] / 3, max_pos[1] / 3) center = ( (obj["bbox"][0] + obj["bbox"][2]) / 2, (obj["bbox"][1] + obj["bbox"][3]) / 2, ) if center[1] > 2 * step[1]: return " on the left" elif center[1] > step[1]: return " in the center" return " on the right" def callback(self, body, **_): pprint(body) for i, obj in enumerate(body["objects"]): body["objects"][i]["lateral_position"] = self.get_pos_str(obj) body["path_done"].append(self.__class__.__name__) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class WebCam(object): """ The class WebCam goal is to take a picture and send it back to CameraManager. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "CameraManager"]) logger.info(f"{self.__class__.__name__} ready") def callback(self, body, **_): cap = cv2.VideoCapture(0) _, image = cap.read() image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.resize(image, (640, 480), cv2.INTER_AREA) body["picture"] = { "data": encode(image), "shape": image.shape, "from": self.__class__.__name__, } self.queue_manager.publish("CameraManager", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class WebCamBis(object): """ The class WebCamBis goal is to take a picture and send it back to CameraManager. It's a duplicate from WebCam class, it will be replace by a Kinect class in the future. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "CameraManager"]) logger.info(f"{self.__class__.__name__} ready") def callback(self, body, **_): # avoid conflict over opencv between WebCam class time.sleep(2) cap = cv2.VideoCapture(0) _, image = cap.read() image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) image = cv2.resize(image, (640, 480), cv2.INTER_AREA) pprint(image.shape) body["picture"] = { "data": encode(image), "shape": image.shape, "from": self.__class__.__name__, } self.queue_manager.publish("CameraManager", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class Interpreter(object): """ The Interpreter class purpose is a simple comparison with what the vision part find and what the user asked for. (Which object was found and not found) """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "NaturalLanguageGenerator"] ) self.memory = {} logger.info(f"{self.__class__.__name__} ready") def filter_objects(self, body): return body["objects"] def filter_texts(self, body): return body["texts"] def callback(self, body, **_): data = None key = "" if "objects" in body: key = "objects" data = self.filter_objects(body) body["objects"] = data elif "texts" in body: key = "texts" data = self.filter_texts(body) body["texts"] = data if body["wait_package"] == 1: body["path_done"].append(self.__class__.__name__) del body["vision_path"] pprint(body) # TODO: uncomment if you wanna test the NLG, it could be text, objects, # objects + colour, objects + lateral position self.queue_manager.publish("NaturalLanguageGenerator", body) else: if body["intern_token"] not in self.memory: self.memory[body["intern_token"]] = {key: data} elif ( body["intern_token"] in self.memory and body["wait_package"] < len(self.memory[body["intern_token"]]) - 1 ): self.memory[body["intern_token"]][key] = data else: for key in self.memory[body["intern_token"]]: body[key] = self.memory[body["intern_token"]][key] del self.memory[body["intern_token"]][key] pprint(body) # TODO: uncomment if you wanna test the NLG self.queue_manager.publish("NaturalLanguageGenerator", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class Kinect(object): """ The class Kinect goal is to take a picture and send it back to CameraManager. """ def __init__(self, mode=COLOR_MODE): self.kinect = PyKinectRuntime.PyKinectRuntime(mode) if mode & DEPTH_MODE: self.kinect_frame_size = ( self.kinect.depth_frame_desc.Height, self.kinect.depth_frame_desc.Width, ) if mode & COLOR_MODE: self.kinect_frame_size = ( self.kinect.color_frame_desc.Height, self.kinect.color_frame_desc.Width, -1, ) self.transform = mode & DEPTH_MODE and cv2.COLOR_GRAY2RGB or cv2.COLOR_RGBA2RGB self.queue_manager = QueueManager( [self.__class__.__name__, "CameraManager"]) logger.info(f"{self.__class__.__name__} ready") def get_colored_frame(self, size=None): frame = self.kinect.get_last_color_frame() frame = frame.reshape(self.kinect_frame_size).astype(np.uint8) frame = cv2.cvtColor(frame, self.transform) if size: return cv2.resize(frame, size) return frame def callback(self, body, **_): cap = cv2.VideoCapture(0) _, image_np = cap.read() # if mode & DEPTH_MODE: # frame = _kinect.get_last_depth_frame() # frameD = _kinect._depth_frame_data # draw = True # if mode & COLOR_MODE and _kinect.has_new_color_frame(): frame = self.get_colored_frame() body["picture"] = { "data": encode(frame), "shape": frame.shape, "from": self.__class__.__name__, } self.queue_manager.publish("CameraManager", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class Manager(object): """ The class Manager purpose is to create a path inside the dialogue manager depending on the goal of the query. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "CameraManager", "Interpreter"]) # TODO: Missing intent for lateral position self.intents_to_path = { "read_text": [["CameraManager", "OCR", "Interpreter"]], "detect_colour": [[ "CameraManager", "ObjectDetection", "ColourDetection", "Interpreter" ]], "identify": [ ["CameraManager", "OCR", "Interpreter"], ["CameraManager", "ObjectDetection", "Interpreter"], ], "recognise": [["CameraManager", "ObjectDetection", "Interpreter"]], "locate": [[ "CameraManager", "ObjectDetection", "PositionDetection", "Interpreter" ]], "count": [["CameraManager", "ObjectDetection", "Interpreter"]], "confidence": [["Interpreter"]], } logger.info(f"{self.__class__.__name__} ready") def callback(self, body, **_): intern_token = token_hex(8) intent = body["intents"]["intent_ranking"][0]["name"] body["intern_token"] = intern_token body["wait_package"] = (len(self.intents_to_path[intent]) if intent in self.intents_to_path else 0) body["path_done"].append(self.__class__.__name__) intents_path = (copy.deepcopy(self.intents_to_path[intent]) if intent in self.intents_to_path else []) for path in intents_path: pprint(path) body_ = copy.deepcopy(body) body_["vision_path"] = path next_service = body_["vision_path"].pop(0) self.queue_manager.publish(next_service, body_) if "run_as_webservice" not in body: time.sleep(1) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class NaturalLanguageUnderstanding(object): """ The class NaturalLanguageUnderstanding purpose is to sense the objectives of the query. """ def __init__(self): self.queue_manager = QueueManager([self.__class__.__name__, "Manager"]) self.previous_query = None model_path = str(config.directory.data.joinpath("rasa", "nlu")) dirs = [f for f in listdir(model_path) if isdir(join(model_path, f))] dirs.sort(reverse=True) model = join(model_path, dirs[0]) logger.info(f"Model loading: " + model) self.interpreter = Interpreter.load(model) logger.info(f"{self.__class__.__name__} ready") def callback(self, body, **_): body["asking"] = body["query"].split() intents = self.interpreter.parse(body["query"]) try: if (intents["intent"]["name"] == "same_intent" and self.previous_query != None): intents["intent"]["name"] = self.previous_query["intent"][ "name"] if (intents["intent"]["name"] != "recognise" and intents["intent"]["name"] != "identify" and check_followup(body["query"]) == True): intents["entities"].extend(self.previous_query["entities"]) except IndexError as error: logger.error(error) except Exception as exception: logger.warn(exception) self.previous_query = intents body["intents"] = intents body["path_done"].append(self.__class__.__name__) logger.info(body) self.queue_manager.publish("Manager", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
def submit(): service_if_audio = "AutomaticSpeechRecognition" service_if_text = "NaturalLanguageUnderstanding" # Parse user request user_request = UserRequest( service_if_audio=service_if_audio, service_if_text=service_if_text ) # Create queue for Ayesaac and send it ayesaac_queue_manager = QueueManager([user_request.first_service]) ayesaac_queue_manager.publish(user_request.first_service, user_request.body) status_url = url_for("submit_status", task_id=user_request.uid) return ( status_url, 202, {"Location": status_url}, )
class CameraManager(object): """ The class CameraManager goal is to organise the collect of pictures from different camera sources. """ def __init__(self): self.queue_manager = QueueManager([ self.__class__.__name__, "WebCam", "WebCamBis", "ObjectDetection", "OCR" ]) self.camera_names = ["WebCam"] self.pictures = [] self.waiting_cameras = 0 self.save_body = None logger.info(f"{self.__class__.__name__} ready") def from_cameras(self, body): logger.info("Receiving picture from: ", body["picture"]["from"]) logger.info(body) self.pictures.append(body["picture"]) self.waiting_cameras -= 1 def request_pictures_from_all_concern_cameras(self): logger.info("Request pictures !") self.waiting_cameras = len(self.camera_names) for camera_name in self.camera_names: self.queue_manager.publish(camera_name, {"nb_picture": 1}) def callback(self, body, **_): logger.info("Callback triggered") if "run_as_webservice" in body: # skip running cameras, we have an image! logger.info( "Camera management: don't use cameras as running in webservice mode." ) assert "pictures" in body next_service = body["vision_path"].pop(0) body["path_done"].append(self.__class__.__name__) self.queue_manager.publish(next_service, body) elif self.waiting_cameras: self.from_cameras(body) if not self.waiting_cameras: self.save_body["pictures"] = copy.deepcopy(self.pictures) self.save_body["path_done"].append(self.__class__.__name__) logger.info("Send pictures !") # for path in self.save_body['vision_path']: # body_ = self.save_body # body_['vision_path'] = path next_service = self.save_body["vision_path"].pop(0) self.queue_manager.publish(next_service, self.save_body) self.pictures = [] self.save_body = None else: self.request_pictures_from_all_concern_cameras() self.save_body = body def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class NaturalLanguageGenerator(object): """ The class NaturalLanguageGenerator purpose is to translate the results obtained to a nicely formatted sentence. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "ExternalInterface"]) self.answers = {} self.description_types = [ "DESCRIPTION_NOTHING", "DESCRIPTION_ANSWER_S", "DESCRIPTION_ANSWER_P", "DESCRIPTION_UNKNOWN" ] self.build_generator() logger.info(f"{self.__class__.__name__} ready") def build_generator(self): folder_path = config.directory.data.joinpath("sentence_templates") for _, _, files in os.walk(folder_path): for name in files: with open(str(folder_path / name)) as f: self.answers[name] = [line.strip() for line in f] def get_det(self, word, context): if context == "CONFIDENCE_SOMETHING": return "" if (word[1] > 1): return str(word[1]) + " " elif word[1] == 1: return "a " else: return "no " def compare_name_value(self, name, value): if name == value: return True elif name == value[:-1] and value[-1] == 's': return True return False def generate_text(self, words, context, obj_cnt): answer = choice(self.answers[context]) if type(words) == str: return answer.replace("*", words, 1) elif len(words) > 1: tmp = (", ".join( [self.get_det(w, context) + w[0] for w in words[:-1]]) + " and " + self.get_det(words[-1], context) + words[-1][0]) return answer.replace("*", tmp, 1) elif len(words): return answer.replace( "*", self.get_det(words[0], context) + words[0][0], 1, ) return answer def identify(self, body): pprint("identify") objects = [] for o in body["objects"]: if o["name"] != "person": objects.append(o["name"] + (o["lateral_position"] if o. get("lateral_position") else "")) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = self.description_types[obj_cnt if obj_cnt < 2 else 2] return objects, context, obj_cnt def recognise(self, body): pprint("recognise") objects = [] for o in body["objects"]: for p in body["intents"]["entities"]: if self.compare_name_value(o["name"], p["value"]): objects.append(p["value"] + (o["lateral_position"] if o. get("lateral_position") else "")) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = (("POSITIVE" if obj_cnt > 0 else "NEGATIVE") + "_ANSWER_" + ("P" if obj_cnt > 1 else "S")) if not obj_cnt: objects = [(p["value"], 1) for p in body["intents"]["entities"]] obj_cnt = sum(n for _, n in objects) return objects, context, obj_cnt def read_text(self, body): pprint("read_text") objects = " ".join(" ".join(t) for t in body["texts"]) print(objects) obj_cnt = 1 if len(objects) > 0 else 0 context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE") return objects, context, obj_cnt def detect_colour(self, body): pprint("detect_colour") obj_cnt = 0 objects = None context = None for o in body["objects"]: for p in body["intents"]["entities"]: if self.compare_name_value(o["name"], p["value"]): objects = (p["value"], o["colour"]) break else: objects = (p["value"], None) if objects: obj_cnt = 1 if objects[1] else 0 objects = objects[obj_cnt] context = "COLOR_DETECTION" if obj_cnt else "COLOR_DETECTION_N" return objects, context, obj_cnt def count(self, body): pprint("count") obj_cnt = 0 objects = [] context = "" for o in body["objects"]: for p in body["intents"]["entities"]: if self.compare_name_value(o["name"], p["value"]): objects.append(p["value"]) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) for p in body["intents"]["entities"]: elements = [x for x in objects if x[0] == p["value"]] if not len(elements): objects.append((p["value"], 0)) context = "DESCRIPTION_COUNT" return objects, context, obj_cnt def confidence(self, body): pprint("confidence") obj_cnt = 0 objects = [] can_answer = len(body["responses"]) > 0 previous_question = None if can_answer: previous_question = body["responses"][-1] if can_answer and (not previous_question["intents"]["intent"]["name"] in ["identify", "recognise", "locate", "count"]): can_answer = False if can_answer: entities = previous_question["intents"]["entities"] if len(entities) == 0: entities = [{ "value": o["name"] } for o in previous_question["objects"]] for e in entities: percentage = 0 nb_object = 0 for o in previous_question["objects"]: if self.compare_name_value(o["name"], e["value"]): percentage += o["confidence"] nb_object += 1 if nb_object > 0: percentage /= nb_object objects.append( (str(round(percentage * 100)) + "% that there is " + str(nb_object) + " " + e["value"], nb_object)) else: objects.append( ("more than 50% that there is no " + e["value"], 0)) obj_cnt = sum(n for _, n in objects) context = "CONFIDENCE_SOMETHING" if can_answer else "CONFIDENCE_NOTHING" return objects, context, obj_cnt def locate(self, body): pprint("locate") objects = [] for o in body["objects"]: for p in body["intents"]["entities"]: if self.compare_name_value(o["name"], p["value"]): pos_str = "" if (len(o.get("anchored_position")) > 0): pos_list = o.get("anchored_position") for pos in pos_list: if pos_list.index(pos) != (len(pos_list) - 1): pos_str += ", " + pos else: pos_str += " and" + pos elif (o.get("hand_position") != ""): pos_str = o.get("hand_position") else: pos_str = o.get("lateral_position") objects.append(p["value"] + pos_str) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context_index = 0 if len(objects) == 1: context_index = 1 elif len(objects) > 1: context_index = 2 elif len(body["objects"]) > 0: context_index = 3 context = self.description_types[context_index] return objects, context, obj_cnt def default(self, body): pprint("default") # Creates list of object detected in the scene objects = [ o["name"] + (o["lateral_position"] if o.get("lateral_position") else "") for o in body["objects"] ] objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = self.description_types[obj_cnt if obj_cnt < 2 else 2] return objects, context, obj_cnt def callback(self, body, **_): pprint(body) method = getattr(self, body["intents"]["intent"]["name"], self.default) pprint("----- METHOD CALLED -----") objects, context, obj_cnt = method(body) print(objects) print(context) if objects != None and context != None: response = self.generate_text(objects, context, obj_cnt) else: response = "I didn't understand the question, could you repeat please." body["response"] = response pprint(body["response"]) body["path_done"].append(self.__class__.__name__) self.queue_manager.publish("ExternalInterface", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class ObjectDetection(object): """ The class ObjectDetection purpose is to detect every object in the given pictures. """ # define constants # confidence threshold for retaining object detection CONFIDENCE_THRESHOLD = 0.5 # IoU threshold for determining whether detections are overlapping IOU_THRESHOLD = 0.5 # list of model preferences for selecting detection MODEL_PREFS = ["coco", "epic-kitchens"] def __init__(self): self.queue_manager = QueueManager([ self.__class__.__name__, "Interpreter", "ColourDetection", "PositionDetection", ]) self.models = [{ "name": "coco", "model_path": config.directory.data.joinpath("coco_resnet"), "category_index": coco_category_index }, { "name": "epic-kitchens", "model_path": config.directory.data.joinpath("epic_kitchens"), "category_index": epic_kitchens_category_index }] for model in self.models: tf_model = tf.saved_model.load(str(model["model_path"])) model["model"] = tf_model.signatures["serving_default"] logger.info(f"{self.__class__.__name__} ready") def run_inference_for_single_image(self, image, model): input_tensor = tf.convert_to_tensor(image) input_tensor = input_tensor[tf.newaxis, ...] output_dict = model(input_tensor) num_detections = int(output_dict.pop("num_detections")) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict["num_detections"] = num_detections output_dict["detection_classes"] = output_dict[ "detection_classes"].astype(np.int32) return output_dict def filter_objects(self, objects, img_height, img_width): '''Method to filter duplicate detections from the output''' retained_objects = [] for obj in objects: retain = True # duplicates are of the same class and have very high IoU for other_obj in objects: # ignore self if obj == other_obj: continue else: # calculate the IoU iou = calculate_iou(obj["bbox"], other_obj["bbox"], img_height, img_width) # check if IoU is greater than threshold if iou >= ObjectDetection.IOU_THRESHOLD: # we have a duplicate, don't retain the object if the model preference is lower if ObjectDetection.MODEL_PREFS.index( obj["model"] ) > ObjectDetection.MODEL_PREFS.index( other_obj["model"]): retain = False break # append the object if it's okay if retain: retained_objects.append(obj) return retained_objects def callback(self, body, **_): all_objects = [] for picture in body["pictures"]: objects = [] image = decode(picture["data"], picture["shape"], np.uint8) img_height = picture["shape"][0] img_width = picture["shape"][1] # iterate through the models, performing object detection for model in self.models: output = self.run_inference_for_single_image( image, model["model"]) for i in range(output["num_detections"]): if float(output["detection_scores"] [i]) >= ObjectDetection.CONFIDENCE_THRESHOLD: bbox = output["detection_boxes"][i].tolist() objects.append({ "name": model["category_index"][output["detection_classes"] [i]]["name"], "confidence": float(output["detection_scores"][i]), "bbox": bbox, "from": picture["from"], "model": model["name"], "img_height": img_height, "img_width": img_width }) bboxes = [obj["bbox"] for obj in objects] class_names = [obj["name"] for obj in objects] scores = [obj["confidence"] for obj in objects] models = [obj["model"] for obj in objects] # draw the bounding boxes # (outputs image to docker/volumes/aye-saac_output_data/_data/bbox.[uid].png) draw_bounding_boxes(image, bboxes, class_names, scores, models, filename="bbox.{u}.png".format(u=body["uid"])) # need to filter the results to remove massively overlapping object detections # (this can arise when different models identify the same object for example) objects = self.filter_objects(objects, img_height, img_width) bboxes = [obj["bbox"] for obj in objects] class_names = [obj["name"] for obj in objects] scores = [obj["confidence"] for obj in objects] models = [obj["model"] for obj in objects] # draw the bounding boxes # (outputs image to docker/volumes/aye-saac_output_data/_data/bbox_filtered.[uid].png) draw_bounding_boxes( image, bboxes, class_names, scores, models, filename="bbox_filtered.{u}.png".format(u=body["uid"])) # append the objects to all_objects all_objects.extend(objects) # pprint(objects) body["objects"] = all_objects body["path_done"].append(self.__class__.__name__) if "ColourDetection" not in body["vision_path"]: del body["pictures"] next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class OCR(object): """ The class OCR purpose is to detect all the possible text in the picture. """ default_ocr_model, supported_ocr_models = None, [] import json with open("./group-6-config.json") as f: data = json.load(f) default_ocr_model = data["default-ocr-model"] supported_ocr_models = data["supported-ocr-models"] print("Using OCR model: " + default_ocr_model) if (default_ocr_model == "keras-ocr"): def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "Interpreter"]) self.pipeline = keras_ocr.pipeline.Pipeline() def callback(self, body, **_): image = [ decode(body["pictures"][0]["data"], body["pictures"][0]["shape"], np.uint8) ] predictions = self.pipeline.recognize(image)[0] # Recomment this # fig, axs = plt.subplots(nrows=len(image), figsize=(20, 20)) # keras_ocr.tools.drawAnnotations(image=image[0], predictions=predictions, ax=axs) # plt.show() pprint(predictions) text = bb_to_text(predictions) body["texts"] = text body["path_done"].append(self.__class__.__name__) del body["pictures"] pprint(body) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) logger.info(f"{self.__class__.__name__} ready") def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback) elif (default_ocr_model == "tesseract"): def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "Interpreter"]) def callback(self, body, **_): pytesseract.pytesseract.tesseract_cmd = r'../usr/bin/tesseract' image = [ decode(body["pictures"][0]["data"], body["pictures"][0]["shape"], np.uint8) ] text = pytesseract.image_to_string(image[0]) body["texts"] = text body["path_done"].append(self.__class__.__name__) del body["pictures"] pprint(body) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) logger.info(f"{self.__class__.__name__} ready") def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class ColourDetection(object): """ The class ColourDetection purpose is to detect every main colour from objects in the given pictures. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "Interpreter"]) data_file = config.directory.data.joinpath("colour", "lab.txt") colour_list = pd.read_csv(data_file, skiprows=28, header=None, names=["l", "a", "b", "name"]) colour_list = colour_list.values.tolist()[1:] self.colour_list_names = [x[3] for x in colour_list] self.colour_list_values = np.asarray( [np.asarray(x[:3], dtype=np.float32) for x in colour_list]) logger.info(f"{self.__class__.__name__} ready") @staticmethod def convert_rgb_to_lab(image: np.ndarray) -> np.ndarray: return rgb2lab(image) @staticmethod def flatten_image(image: np.ndarray) -> np.ndarray: dimensions = np.shape(image) return np.reshape(image, (dimensions[0] * dimensions[1], dimensions[2])) @staticmethod def remove_non_unique_pixels(image: np.ndarray) -> np.ndarray: return np.unique(image, axis=0) @staticmethod def create_labelled_image(lab_image) -> np.ndarray: return slic( lab_image, n_segments=200, compactness=10, sigma=0.1, convert2lab=False, enforce_connectivity=True, ) @staticmethod def create_regions(lab_image, labelled_image): region_segments = regionprops(labelled_image) image_dimensions = np.shape(labelled_image) for region in region_segments: region.is_boundary = ColourDetection.is_region_on_boundary( region, image_dimensions) region.average_colour = ColourDetection.get_region_average_colour( region.label, labelled_image, lab_image) return region_segments @staticmethod def is_region_on_boundary(region, image_dimensions): if (region.bbox[0] == 0 or region.bbox[1] == 0 or region.bbox[2] == image_dimensions[0] or region.bbox == image_dimensions[1]): return True return False @staticmethod def get_pixels_from_label_id(label_id, labelled_image, image): label_mask = np.invert(np.isin(labelled_image, label_id)) label_mask = np.dstack((label_mask, label_mask, label_mask)) image_mask = np.ma.array(image, mask=label_mask) return image_mask @staticmethod def get_region_average_colour(label_id, labelled_image, image): masked_image = ColourDetection.get_pixels_from_label_id( label_id, labelled_image, image) flattened_masked_image = ColourDetection.flatten_image(masked_image) average_colour = np.zeros(3, dtype=np.float32) for channel in range(np.shape(image)[2]): average_colour[channel] = np.mean(flattened_masked_image[:, channel]) return average_colour @staticmethod def get_all_region_colours(region_list): return [region.average_colour for region in region_list] def detect_colours(self, crop_image): lab_image = self.convert_rgb_to_lab(crop_image) labelled_image = self.create_labelled_image(lab_image) region_list = self.create_regions(lab_image, labelled_image) colours = self.get_all_region_colours(region_list) colours_found = {} for colour in colours: d = ((self.colour_list_values - colour)**2).sum(axis=1) if not self.colour_list_names[d.argmin()] in colours_found: colours_found[self.colour_list_names[d.argmin()]] = 0 colours_found[self.colour_list_names[d.argmin()]] += 1 sorted_colours = max(colours_found.items(), key=operator.itemgetter(1)) pprint(colours_found) return sorted_colours[0] def callback(self, body, **_): body["path_done"].append(self.__class__.__name__) for picture in body["pictures"]: image = decode(picture["data"], picture["shape"], np.uint8) for i, obj in enumerate(body["objects"]): crop_img = image[int(picture["shape"][0] * obj["bbox"][0]):int(picture["shape"][0] * obj["bbox"][2]), int(picture["shape"][1] * obj["bbox"][1]):int(picture["shape"][1] * obj["bbox"][3]), ] colour_name = self.detect_colours(crop_img) body["objects"][i]["colour"] = colour_name del body["pictures"] pprint(body) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class ObjectDetection(object): """ The class ObjectDetection purpose is to detect every object in the given pictures. """ def __init__(self): self.queue_manager = QueueManager([ self.__class__.__name__, "Interpreter", "ColourDetection", "PositionDetection", ]) self.category_index = coco_category_index self.model_path = config.directory.data.joinpath("resnet") model = tf.saved_model.load(str(self.model_path)) self.model = model.signatures["serving_default"] logger.info(f"{self.__class__.__name__} ready") def run_inference_for_single_image(self, image): input_tensor = tf.convert_to_tensor(image) input_tensor = input_tensor[tf.newaxis, ...] output_dict = self.model(input_tensor) num_detections = int(output_dict.pop("num_detections")) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict["num_detections"] = num_detections output_dict["detection_classes"] = output_dict[ "detection_classes"].astype(np.int32) return output_dict def callback(self, body, **_): objects = [] for picture in body["pictures"]: image = decode(picture["data"], picture["shape"], np.uint8) output = self.run_inference_for_single_image(image) for i in range(output["num_detections"]): if float(output["detection_scores"][i]) >= 0.5: objects.append({ "name": self.category_index[output["detection_classes"][i]] ["name"], "confidence": float(output["detection_scores"][i]), "bbox": output["detection_boxes"][i].tolist(), "from": picture["from"], }) pprint(objects) body["objects"] = objects body["path_done"].append(self.__class__.__name__) if "ColourDetection" not in body["vision_path"]: del body["pictures"] next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class NaturalLanguageGenerator(object): """ The class NaturalLanguageGenerator purpose is to translate the results obtained to a nicely formatted sentence. """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "ExternalInterface"] ) self.answers = {} self.description_types = [ "DESCRIPTION_NOTHING", "DESCRIPTION_ANSWER_S", "DESCRIPTION_ANSWER_P", ] self.build_generator() logger.info(f"{self.__class__.__name__} ready") def build_generator(self): folder_path = config.directory.data.joinpath("sentence_templates") for _, _, files in os.walk(folder_path): for name in files: with open(str(folder_path / name)) as f: self.answers[name] = [line.strip() for line in f] def get_det(self, word): return str(word[1]) + " " if word[1] > 1 else "a " def generate_text(self, words, context, obj_cnt): answer = choice(self.answers[context]) if type(words) == str: return answer.replace("*", words, 1) elif len(words) > 1: tmp = ( ", ".join([self.get_det(w) + w[0] for w in words[:-1]]) + " and " + self.get_det(words[-1]) + words[-1][0] ) return answer.replace("*", tmp, 1) elif len(words): return answer.replace( "*", ((str(words[0][1]) + " ") if words[0][1] > 1 else "") + words[0][0], 1, ) return answer def identify(self, body): pprint("identify") objects = [] for o in body["objects"]: if o["name"] != "person": objects.append( o["name"] + (o["lateral_position"] if o.get("lateral_position") else "") ) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = self.description_types[obj_cnt if obj_cnt < 2 else 2] return objects, context, obj_cnt def recognise(self, body): pprint("recognise") objects = [] for o in body["objects"]: for p in body["intents"]["entities"]: if o["name"] == p["value"]: objects.append( o["name"] + (o["lateral_position"] if o.get("lateral_position") else "") ) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = ( ("POSITIVE" if obj_cnt > 0 else "NEGATIVE") + "_ANSWER_" + ("P" if obj_cnt > 1 else "S") ) if not obj_cnt: objects = [(p["value"], 1) for p in body["intents"]["entities"]] obj_cnt = sum(n for _, n in objects) return objects, context, obj_cnt def REAL_read_text(self, body): pprint("read_text") objects = " ".join(" ".join(t) for t in body["texts"]) print(objects) obj_cnt = 1 if len(objects) > 0 else 0 context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE") return objects, context, obj_cnt ''' TEMPORARILY HIJACKING THE read_text FUNCTION ABOVE THIS SHOULD BE CHANGED BACK TO extract_label ONCE SUCH AN INTENT HAS BEEN TRAINED WITH THE NLU!!! ''' def read_text(self, body): # def extract_label(self, body): objects = " ".join(" ".join(t) for t in body["texts"]) print(objects) objects = extract_label(objects) obj_cnt = 1 if len(objects) > 0 else 0 context = "READ_TEXT_" + ("POSITIVE" if obj_cnt > 0 else "NEGATIVE") return objects, context, obj_cnt def detect_colour(self, body): pprint("detect_colour") obj_cnt = 0 objects = None context = None for o in body["objects"]: for p in body["intents"]["entities"]: if o["name"] == p["value"]: objects = (p["value"], o["colour"]) break else: objects = (p["value"], None) if objects: obj_cnt = 1 if objects[1] else 0 objects = objects[obj_cnt] context = "COLOR_DETECTION" if obj_cnt else "COLOR_DETECTION_N" return objects, context, obj_cnt def locate(self, body): pprint("locate") objects = [] for o in body["objects"]: for p in body["intents"]["entities"]: if o["name"] == p["value"]: if ( not o.get("lateral_position") and o.get("bbox") and len(o["bbox"]) >= 4 ): bbox = o["bbox"] yStart = bbox[0] xStart = bbox[1] yEnd = bbox[2] xEnd = bbox[3] xCenter = (xEnd + xStart) / 2 yCenter = (yEnd + yStart) / 2 pprint("xCenter") pprint(xCenter) if xCenter < 0.382: o["lateral_position"] = " on the left" elif xCenter >= 0.382 and xCenter <= 0.618: o["lateral_position"] = " in front" elif xCenter > 0.618: o["lateral_position"] = " on the right" objects.append( o["name"] + (o["lateral_position"] if o.get("lateral_position") else "") ) objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = self.description_types[obj_cnt if obj_cnt < 2 else 2] return objects, context, obj_cnt def default(self, body): pprint("default") # Creates list of object detected in the scene objects = [ o["name"] + (o["lateral_position"] if o.get("lateral_position") else "") for o in body["objects"] ] objects = list(set([(o, objects.count(o)) for o in objects])) obj_cnt = sum(n for _, n in objects) context = self.description_types[obj_cnt if obj_cnt < 2 else 2] return objects, context, obj_cnt def callback(self, body, **_): pprint(body) method = getattr(self, body["intents"]["intent"]["name"], self.default) pprint("----- METHOD CALLED -----") objects, context, obj_cnt = method(body) print(objects) print(context) if objects != None and context != None: response = self.generate_text(objects, context, obj_cnt) else: response = "I didn't understand the question, could you repeat please." body["response"] = response pprint(body["response"]) body["path_done"].append(self.__class__.__name__) self.queue_manager.publish("ExternalInterface", body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)
class PositionDetection(object): """ The class PositionDetection purpose provide the global position of the detected objects. Methods within use bounding boxes to determine positions relative to hands/anchors. A little reminder about bounding boxes returned from TensorFlow... The bounding box coordinates are (top, left, bottom and right) and are normalised to the image width/height (between 0 and 1). Example coordinates: * top = 0.2 * bottom = 0.4 * left = 0.3 * right = 0.6 The bounding box will Look like this: left (0.3) right (0.6) | | top (0.2) -----|----------|---- | | | | | | bottom (0.4) -----|----------|---- | | """ def __init__(self): self.queue_manager = QueueManager( [self.__class__.__name__, "Interpreter"]) logger.info(f"{self.__class__.__name__} ready") def get_lateral_position(self, obj, max_pos=(1, 1)): step = (max_pos[0] / 3, max_pos[1] / 3) center = ( (obj["bbox"][0] + obj["bbox"][2]) / 2, (obj["bbox"][1] + obj["bbox"][3]) / 2, ) if center[1] > 2 * step[1]: return " on the right" elif center[1] > step[1]: return " in the center" return " on the left" def get_hand_position(self, obj, objects): '''Method identifies position relative to hands or people using bounding boxes''' # Set default to "" position_str = "" # Set the hand classes hand_classes = ["hand", "person"] # If the object is a hand itself, it does not need to be positioned if obj["name"] in hand_classes: return position_str # Determine if there are hands in the image: use "hand" or "person" class name hand_objects = copy.deepcopy([ o for o in objects if o["from"] == obj["from"] and o["name"] in hand_classes ]) if len(hand_objects) == 0: return position_str # Not going to be able to use hands to position if there are more than two in this image if len(hand_objects) > 2: return position_str # Now we have hands in the image, find the positioning # Get the bounding box normalised coords top_obj, left_obj, bottom_obj, right_obj = tuple(obj["bbox"]) # get the central coords x_obj = (left_obj + right_obj) / 2 y_obj = (top_obj + bottom_obj) / 2 # If there are two objects, see if the object is positioned between the hands if len(hand_objects) == 2 and hand_objects[0][ "name"] == "hand" and hand_objects[1]["name"] == "hand": if self.__obj_is_between_hands__(x_obj, y_obj, hand_objects): position_str = " is between hands" elif len(hand_objects) == 1: # Get the bounding box normalised coords top_hand, left_hand, bottom_hand, right_hand = tuple( hand_objects[0]["bbox"]) # Get the midpoints x_hand = (left_hand + right_hand) / 2 y_hand = (top_hand + bottom_hand) / 2 if self.__obj_is_right_of_hand__(x_obj, x_hand): position_str = " is to the right of a " + hand_objects[0][ "name"] if self.__obj_is_left_of_hand__(x_obj, x_hand): position_str = " is to the left of a " + hand_objects[0]["name"] return position_str def __obj_is_between_hands__(self, x_obj, y_obj, hand_objects): '''Method to determine if the object is between the hands''' is_between = False # Get the bounding boxes of the hands top_hand1, left_hand1, bottom_hand1, right_hand1 = tuple( hand_objects[0]["bbox"]) top_hand2, left_hand2, bottom_hand2, right_hand2 = tuple( hand_objects[1]["bbox"]) # get the central coords x_hand1 = (left_hand1 + right_hand1) / 2 x_hand2 = (left_hand2 + right_hand2) / 2 y_hand1 = (top_hand1 + bottom_hand1) / 2 y_hand2 = (top_hand2 + bottom_hand2) / 2 # determine if object is between hands if (x_hand1 < x_obj < x_hand2) or (x_hand1 > x_obj > x_hand2): is_between = True elif (y_hand1 < y_obj < y_hand2) or (y_hand1 > y_obj > y_hand2): is_between = True return is_between def __obj_is_right_of_hand__(self, x_obj, x_hand): '''Method to determine if the object is to the right of the hand''' if (x_obj > x_hand): return True else: return False def __obj_is_left_of_hand__(self, x_obj, x_hand): '''Method to determine if the object is to the left of the hand''' if (x_hand > x_obj): return True else: return False def get_anchored_position(self, obj, objects): '''Method identifies position relative to anchors, e.g. "next to fridge"''' # Set default to [] because anchored position may not be possible position_str_list = [] # List the anchors in the image anchors = copy.deepcopy([ o for o in objects if o["from"] == obj["from"] and o["name"] in ANCHORS.keys() ]) # Remove self from anchors if obj["name"] in [anchor["name"] for anchor in anchors]: anchors.remove(obj) # Determine if there are anchors in the image, if not return None if len(anchors) == 0: return position_str_list # Append the relationship info to anchors for anchor in anchors: anchor["relationships"] = ANCHORS[anchor["name"]]["relationships"] # Now we have anchors in the image, find the positioning relationship between the object and the anchor for anchor in anchors: # Get the bounding box normalised coords top_obj, left_obj, bottom_obj, right_obj = tuple(obj["bbox"]) top_anchor, left_anchor, bottom_anchor, right_anchor = tuple( anchor["bbox"]) if "in" in anchor["relationships"]: if self.__obj_is_in_anchor__(left_obj, left_anchor, right_obj, right_anchor, bottom_obj, bottom_anchor, top_obj, top_anchor): position_str_list.append(" it's in the " + anchor["name"]) continue if "on" in anchor["relationships"]: if self.__obj_is_on_anchor__(left_obj, left_anchor, right_obj, right_anchor, bottom_obj, bottom_anchor): position_str_list.append(" it's on the " + anchor["name"]) continue if "next to" in anchor["relationships"]: if self.__obj_is_left_of_anchor__(left_obj, left_anchor, right_obj, right_anchor): position_str_list.append(" it's left of the " + anchor["name"]) continue elif self.__obj_is_right_of_anchor__(left_obj, left_anchor, right_obj, right_anchor): position_str_list.append(" it's right of the " + anchor["name"]) continue if "below" in anchor["relationships"]: if self.__obj_is_below_anchor__(left_obj, left_anchor, right_obj, right_anchor, top_obj, top_anchor): position_str_list.append(" it's below the " + anchor["name"]) continue # Default position if the above conditions cannot be met position_str_list.append(" it's near the " + anchor["name"]) return position_str_list def __obj_is_in_anchor__(self, left_obj, left_anchor, right_obj, right_anchor, bottom_obj, bottom_anchor, top_obj, top_anchor): '''Method to determine whether obj is in anchor.''' is_on = False if (left_obj > left_anchor and right_obj < right_anchor and bottom_obj < bottom_anchor and top_obj > top_anchor): is_on = True return is_on def __obj_is_on_anchor__(self, left_obj, left_anchor, right_obj, right_anchor, bottom_obj, bottom_anchor): '''Method to determine whether obj is on anchor.''' is_on = False if (left_obj > left_anchor and right_obj < right_anchor and bottom_obj < bottom_anchor): is_on = True return is_on def __obj_is_left_of_anchor__(self, left_obj, left_anchor, right_obj, right_anchor): '''Method to determine whether obj is left of anchor.''' is_left_of = False if (left_obj < left_anchor): is_left_of = True return is_left_of def __obj_is_right_of_anchor__(self, left_obj, left_anchor, right_obj, right_anchor): '''Method to determine whether obj is right of anchor.''' is_right_of = False if (right_obj > right_anchor): is_right_of = True return is_right_of def __obj_is_below_anchor__(self, left_obj, left_anchor, right_obj, right_anchor, top_obj, top_anchor): '''Method to determine whether obj is below anchor.''' is_below = False if (left_obj > left_anchor and right_obj < right_anchor and top_obj > top_anchor): is_below = True return is_below def callback(self, body, **_): pprint(body) for i, obj in enumerate(body["objects"]): body["objects"][i]["lateral_position"] = self.get_lateral_position( obj) body["objects"][i][ "anchored_position"] = self.get_anchored_position( obj, body["objects"]) body["objects"][i]["hand_position"] = self.get_hand_position( obj, body["objects"]) print("lateral_position: " + str(body["objects"][i]["lateral_position"])) print("anchored_position: " + str(body["objects"][i]["anchored_position"])) print("hand_position: " + str(body["objects"][i]["hand_position"])) body["path_done"].append(self.__class__.__name__) next_service = body["vision_path"].pop(0) self.queue_manager.publish(next_service, body) def run(self): self.queue_manager.start_consuming(self.__class__.__name__, self.callback)