Example #1
0
def plaintextlines_service():

    image_uri = request.args.get('imageURI')
    width = request.args.get('width')
    height = request.args.get('height')

    s3 = aws.get_s3_resource()
    text_data = get_text_index(s3, image_uri)
    word_index = text_data.get("word_index")

    o_width = text_data.get("width")
    o_height = text_data.get("height")

    scale_w = float(width) / float(o_width)
    scale_h = float(height) / float(o_height)

    if 'line_number' not in word_index[0]:
        return jsonify({"error": "plaintext lines not available"}), 415

    lines = []
    line = ""
    line_boxes = []
    current_line = -1
    for word in word_index:
        if word['line_number'] != current_line:
            if current_line != -1:
                lines.append({"text": line, "xywh": box_join(line_boxes)})
                line = ""
                line_boxes = []
            current_line = word['line_number']
        line += word['text'] + " "
        line_boxes.append(get_box(word, scale_w, scale_h))
    return jsonify({"lines": lines})
Example #2
0
    def run(self):

        self.set_logging()

        self.sqs = aws.get_sqs_resource()
        self.transcoder = aws.get_transcoder_client()
        self.s3 = aws.get_s3_resource()

        self.notification_queue = self.get_notification_queue()
        self.response_queue = self.get_response_queue()

        self.preset_id_map = aws.get_preset_map(self.transcoder, inverse=True)
        self.inverse_policy_map = self.get_inverse_policy_map()

        while True:
            try:
                while True:
                    if os.path.exists('/tmp/stop.txt'):
                        sys.exit()
                    for message in self.get_messages_from_queue():
                        if message is not None:
                            try:
                                self.process_message(message)
                            except:
                                logging.exception("Error processing message")
                            finally:
                                message.delete()
            except Exception as e:
                logging.exception("Error getting messages")
                raise e
Example #3
0
def source_metadata():

    image_uri = request.args.get('imageURI')

    s3 = aws.get_s3_resource()
    metadata = get_source_metadata(s3, image_uri)
    if metadata is None:
        return "", 404
    return metadata
Example #4
0
def get_confidence(image):

    s3 = aws.get_s3_resource()
    text_data = get_text_index(s3, image)
    result = {"imageURI": image}
    confidence = text_data.get('confidence')
    if confidence is not None:
        result['confidence'] = confidence
    return result
Example #5
0
    def init(self):

        self.set_logging()
        self.sqs = aws.get_sqs_resource()
        self.s3 = aws.get_s3_resource()
        self.ingest_queue = aws.get_queue_by_name(self.sqs, settings.INGEST_QUEUE)
        self.error_queue = aws.get_queue_by_name(self.sqs, settings.ERROR_QUEUE)
        self.text_queue = aws.get_queue_by_name(self.sqs, settings.TEXT_QUEUE)
        self.iris = iris_client.IrisClient()
Example #6
0
def plaintext_service():

    image_uri = request.args.get('imageURI')

    s3 = aws.get_s3_resource()
    text_data = get_text_index(s3, image_uri)
    word_index = text_data.get("word_index")

    text = " ".join(map(lambda w: w['text'], word_index))
    return jsonify({image_uri: text})
Example #7
0
def get_error_queue():
    return aws.get_queue_by_name(sqs, settings.ERROR_QUEUE)


def get_pipeline():
    return aws.get_pipeline_by_name(transcoder, settings.PIPELINE)


def get_random_prefix():
    return "x/" + str(random.randint(0, 1000)).zfill(4) + "/"


if __name__ == "__main__":

    if settings.DEBUG:
        logzero.loglevel(logging.DEBUG)
    else:
        logzero.loglevel(logging.INFO)

    requested_to_quit = False

    sqs = aws.get_sqs_resource()
    transcoder = aws.get_transcoder_client()
    s3 = aws.get_s3_resource()
    input_queue = get_input_queue()
    error_queue = get_error_queue()
    pipeline = get_pipeline()
    preset_id_map = aws.get_preset_map(transcoder)

    main()
Example #8
0
def get_coords(image):
    """

    {"positions": [[25, 31], 100, 110], "height": 768, "width": 1024, "imageURI": "http://aplaceforstuff.co.uk/x/"}

    """

    boxes = []
    s3 = aws.get_s3_resource()
    image_uri = image.get("imageURI")
    output = {"image_uri": image_uri}
    positions = image.get("positions")
    single_box = image.get("single-box") is True
    text_data = get_text_index(s3, image_uri)
    if text_data is None:
        return output

    o_width = text_data.get("width")
    o_height = text_data.get("height")
    canvas_width = text_data.get("canvas_width")
    canvas_height = text_data.get("canvas_height")
    if o_width is None or o_height is None:
        output = {"imageURI": image_uri, "message": "no coordinates available"}
        return output

    word_index = text_data.get("word_index")
    start_index = text_data.get("start_index")

    # width to project coordinates in
    if 'width' in image:
        width = image.get("width")
    else:
        width = canvas_width

    # height to project coordinates in
    if 'height' in image:
        height = image.get("height")
    else:
        height = canvas_height

    scale_w = float(width) / float(o_width)
    scale_h = float(height) / float(o_height)

    if word_index is None:
        raise Exception
        # TODO : handle better!
    for phrase_or_position in positions:
        if isinstance(phrase_or_position, int):
            # single word
            idx = start_index.get(str(phrase_or_position))
            if idx is None:
                # we can't generate a box for this single word
                p_boxes = [get_null_box_object(1)]
            else:
                word_data = word_index[idx]
                p_boxes = box_join(get_box(word_data, scale_w, scale_h),
                                   single_box=single_box)

        else:
            # phrase
            phrase_boxes = []
            p_boxes = None
            for position in phrase_or_position:
                idx = start_index.get(str(position))
                if idx is None:
                    # we don't have data for this word, generate a null box for the whole phrase
                    p_boxes = [get_null_box_object(len(phrase_or_position))]
                    break
                else:
                    word_data = word_index[idx]
                    position_box = get_box(word_data, scale_w, scale_h)
                    phrase_boxes.append(position_box)

            if not p_boxes:
                p_boxes = box_join(phrase_boxes, single_box=single_box)
        boxes.append(p_boxes)

    output['phrases'] = boxes
    return output
Example #9
0
def get_coords(image):

    """

    {"positions": [[25, 31], 100, 110], "height": 768, "width": 1024, "imageURI": "http://aplaceforstuff.co.uk/x/"}

    """

    boxes = []
    s3 = aws.get_s3_resource()
    image_uri = image.get("imageURI")
    positions = image.get("positions")
    text_data = get_text_index(s3, image_uri)

    o_width = text_data.get("width")
    o_height = text_data.get("height")
    if o_width is None or o_height is None:
        output = {
            "imageURI": image_uri,
            "message": "no coordinates available"
        }
        return output

    word_index = text_data.get("word_index")
    start_index = text_data.get("start_index")

    width = image.get("width")
    height = image.get("height")

    scale_w = float(width) / float(o_width)
    scale_h = float(height) / float(o_height)

    if word_index is None:
        raise Exception
        # TODO : handle better!
    for phrase_or_position in positions:
        if isinstance(phrase_or_position, int):
            # single word
            idx = start_index.get(str(phrase_or_position))
            if idx is None:
                # todo handle
                pass
            word_data = word_index[idx]
            p_boxes = box_join(get_box(word_data, scale_w, scale_h))
        else:
            # phrase
            phrase_boxes = []
            for position in phrase_or_position:
                idx = start_index.get(str(position))
                if idx is None:
                    # todo handle
                    pass
                word_data = word_index[idx]
                position_box = get_box(word_data, scale_w, scale_h)
                phrase_boxes.append(position_box)
            p_boxes = box_join(phrase_boxes)
        boxes.append(p_boxes)

    output = {
        "image_uri": image_uri,
        "phrases": boxes
    }
    return output