コード例 #1
0
    def if_azure(self):
        ground_truth, xml_status = self.compute_ground_truth()
        if xml_status != 0: return xml_status, 0
        try:
            subscription_key, endpoint = os.environ[
                'COMPUTER_VISION_SUBSCRIPTION_KEY'], os.environ[
                    'COMPUTER_VISION_ENDPOINT']
            ocr_url = endpoint + "vision/v2.1/ocr"
            headers = {
                'Ocp-Apim-Subscription-Key': subscription_key,
                'Content-Type': 'application/octet-stream'
            }
            params = {'detectOrientation': 'true'}
            response = requests.post(ocr_url,
                                     headers=headers,
                                     params=params,
                                     data=self.content)
            response.raise_for_status()

            lines_info = [
                region["lines"] for region in response.json()["regions"]
            ]
            detected_str = extract_clean_str(''.join([word_info['text'] for line in lines_info \
                                      for word_metadata in line for word_info in word_metadata["words"]]))
            self.detected_str = detected_str
            score = jaccard_similarity(detected_str, ground_truth)
            return 0, score
        except:
            return -4, 0
コード例 #2
0
 def if_gc(self):
     ground_truth, xml_status = self.compute_ground_truth()
     if xml_status != 0: return xml_status, 0
     try:
         vision_client = vision.ImageAnnotatorClient()
         image = vision.types.Image(content=self.content)
         text_detection_response = vision_client.text_detection(image=image)
         detected_str = extract_clean_str(text_detection_response.full_text_annotation.text)
         self.detected_str = detected_str
         score = jaccard_similarity(detected_str, ground_truth)
         return 0, score
     except:
         return -4, 0
コード例 #3
0
    def if_aws(self):
        ground_truth, xml_status = self.compute_ground_truth()
        if xml_status != 0: return xml_status, 0

        try:
            imgobj = {'Bytes': self.content}
            client = boto3.client('rekognition', region_name='us-east-1')
            response = client.detect_text(Image=imgobj)
            detected_str = extract_clean_str(''.join([txt['DetectedText'] for txt in response['TextDetections'] \
                                                   if txt['Type']=='WORD']))
            self.detected_str = detected_str
            score = jaccard_similarity(detected_str, ground_truth)
            return 0, score
        except:
            return -4, 0