def if_azure(self): ground_truth, xml_status = self.compute_ground_truth() if xml_status != 0: return xml_status, 0 try: subscription_key, endpoint = os.environ[ 'COMPUTER_VISION_SUBSCRIPTION_KEY'], os.environ[ 'COMPUTER_VISION_ENDPOINT'] ocr_url = endpoint + "vision/v2.1/ocr" headers = { 'Ocp-Apim-Subscription-Key': subscription_key, 'Content-Type': 'application/octet-stream' } params = {'detectOrientation': 'true'} response = requests.post(ocr_url, headers=headers, params=params, data=self.content) response.raise_for_status() lines_info = [ region["lines"] for region in response.json()["regions"] ] detected_str = extract_clean_str(''.join([word_info['text'] for line in lines_info \ for word_metadata in line for word_info in word_metadata["words"]])) self.detected_str = detected_str score = jaccard_similarity(detected_str, ground_truth) return 0, score except: return -4, 0
def if_gc(self): ground_truth, xml_status = self.compute_ground_truth() if xml_status != 0: return xml_status, 0 try: vision_client = vision.ImageAnnotatorClient() image = vision.types.Image(content=self.content) text_detection_response = vision_client.text_detection(image=image) detected_str = extract_clean_str(text_detection_response.full_text_annotation.text) self.detected_str = detected_str score = jaccard_similarity(detected_str, ground_truth) return 0, score except: return -4, 0
def if_aws(self): ground_truth, xml_status = self.compute_ground_truth() if xml_status != 0: return xml_status, 0 try: imgobj = {'Bytes': self.content} client = boto3.client('rekognition', region_name='us-east-1') response = client.detect_text(Image=imgobj) detected_str = extract_clean_str(''.join([txt['DetectedText'] for txt in response['TextDetections'] \ if txt['Type']=='WORD'])) self.detected_str = detected_str score = jaccard_similarity(detected_str, ground_truth) return 0, score except: return -4, 0