def plaintextlines_service(): image_uri = request.args.get('imageURI') width = request.args.get('width') height = request.args.get('height') s3 = aws.get_s3_resource() text_data = get_text_index(s3, image_uri) word_index = text_data.get("word_index") o_width = text_data.get("width") o_height = text_data.get("height") scale_w = float(width) / float(o_width) scale_h = float(height) / float(o_height) if 'line_number' not in word_index[0]: return jsonify({"error": "plaintext lines not available"}), 415 lines = [] line = "" line_boxes = [] current_line = -1 for word in word_index: if word['line_number'] != current_line: if current_line != -1: lines.append({"text": line, "xywh": box_join(line_boxes)}) line = "" line_boxes = [] current_line = word['line_number'] line += word['text'] + " " line_boxes.append(get_box(word, scale_w, scale_h)) return jsonify({"lines": lines})
def run(self): self.set_logging() self.sqs = aws.get_sqs_resource() self.transcoder = aws.get_transcoder_client() self.s3 = aws.get_s3_resource() self.notification_queue = self.get_notification_queue() self.response_queue = self.get_response_queue() self.preset_id_map = aws.get_preset_map(self.transcoder, inverse=True) self.inverse_policy_map = self.get_inverse_policy_map() while True: try: while True: if os.path.exists('/tmp/stop.txt'): sys.exit() for message in self.get_messages_from_queue(): if message is not None: try: self.process_message(message) except: logging.exception("Error processing message") finally: message.delete() except Exception as e: logging.exception("Error getting messages") raise e
def source_metadata(): image_uri = request.args.get('imageURI') s3 = aws.get_s3_resource() metadata = get_source_metadata(s3, image_uri) if metadata is None: return "", 404 return metadata
def get_confidence(image): s3 = aws.get_s3_resource() text_data = get_text_index(s3, image) result = {"imageURI": image} confidence = text_data.get('confidence') if confidence is not None: result['confidence'] = confidence return result
def init(self): self.set_logging() self.sqs = aws.get_sqs_resource() self.s3 = aws.get_s3_resource() self.ingest_queue = aws.get_queue_by_name(self.sqs, settings.INGEST_QUEUE) self.error_queue = aws.get_queue_by_name(self.sqs, settings.ERROR_QUEUE) self.text_queue = aws.get_queue_by_name(self.sqs, settings.TEXT_QUEUE) self.iris = iris_client.IrisClient()
def plaintext_service(): image_uri = request.args.get('imageURI') s3 = aws.get_s3_resource() text_data = get_text_index(s3, image_uri) word_index = text_data.get("word_index") text = " ".join(map(lambda w: w['text'], word_index)) return jsonify({image_uri: text})
def get_error_queue(): return aws.get_queue_by_name(sqs, settings.ERROR_QUEUE) def get_pipeline(): return aws.get_pipeline_by_name(transcoder, settings.PIPELINE) def get_random_prefix(): return "x/" + str(random.randint(0, 1000)).zfill(4) + "/" if __name__ == "__main__": if settings.DEBUG: logzero.loglevel(logging.DEBUG) else: logzero.loglevel(logging.INFO) requested_to_quit = False sqs = aws.get_sqs_resource() transcoder = aws.get_transcoder_client() s3 = aws.get_s3_resource() input_queue = get_input_queue() error_queue = get_error_queue() pipeline = get_pipeline() preset_id_map = aws.get_preset_map(transcoder) main()
def get_coords(image): """ {"positions": [[25, 31], 100, 110], "height": 768, "width": 1024, "imageURI": "http://aplaceforstuff.co.uk/x/"} """ boxes = [] s3 = aws.get_s3_resource() image_uri = image.get("imageURI") output = {"image_uri": image_uri} positions = image.get("positions") single_box = image.get("single-box") is True text_data = get_text_index(s3, image_uri) if text_data is None: return output o_width = text_data.get("width") o_height = text_data.get("height") canvas_width = text_data.get("canvas_width") canvas_height = text_data.get("canvas_height") if o_width is None or o_height is None: output = {"imageURI": image_uri, "message": "no coordinates available"} return output word_index = text_data.get("word_index") start_index = text_data.get("start_index") # width to project coordinates in if 'width' in image: width = image.get("width") else: width = canvas_width # height to project coordinates in if 'height' in image: height = image.get("height") else: height = canvas_height scale_w = float(width) / float(o_width) scale_h = float(height) / float(o_height) if word_index is None: raise Exception # TODO : handle better! for phrase_or_position in positions: if isinstance(phrase_or_position, int): # single word idx = start_index.get(str(phrase_or_position)) if idx is None: # we can't generate a box for this single word p_boxes = [get_null_box_object(1)] else: word_data = word_index[idx] p_boxes = box_join(get_box(word_data, scale_w, scale_h), single_box=single_box) else: # phrase phrase_boxes = [] p_boxes = None for position in phrase_or_position: idx = start_index.get(str(position)) if idx is None: # we don't have data for this word, generate a null box for the whole phrase p_boxes = [get_null_box_object(len(phrase_or_position))] break else: word_data = word_index[idx] position_box = get_box(word_data, scale_w, scale_h) phrase_boxes.append(position_box) if not p_boxes: p_boxes = box_join(phrase_boxes, single_box=single_box) boxes.append(p_boxes) output['phrases'] = boxes return output
def get_coords(image): """ {"positions": [[25, 31], 100, 110], "height": 768, "width": 1024, "imageURI": "http://aplaceforstuff.co.uk/x/"} """ boxes = [] s3 = aws.get_s3_resource() image_uri = image.get("imageURI") positions = image.get("positions") text_data = get_text_index(s3, image_uri) o_width = text_data.get("width") o_height = text_data.get("height") if o_width is None or o_height is None: output = { "imageURI": image_uri, "message": "no coordinates available" } return output word_index = text_data.get("word_index") start_index = text_data.get("start_index") width = image.get("width") height = image.get("height") scale_w = float(width) / float(o_width) scale_h = float(height) / float(o_height) if word_index is None: raise Exception # TODO : handle better! for phrase_or_position in positions: if isinstance(phrase_or_position, int): # single word idx = start_index.get(str(phrase_or_position)) if idx is None: # todo handle pass word_data = word_index[idx] p_boxes = box_join(get_box(word_data, scale_w, scale_h)) else: # phrase phrase_boxes = [] for position in phrase_or_position: idx = start_index.get(str(position)) if idx is None: # todo handle pass word_data = word_index[idx] position_box = get_box(word_data, scale_w, scale_h) phrase_boxes.append(position_box) p_boxes = box_join(phrase_boxes) boxes.append(p_boxes) output = { "image_uri": image_uri, "phrases": boxes } return output