def build_from_api_prediction(self, api_prediction, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param page_n: Page number for multi pages pdf input :return: (void) set the object attributes with api prediction values """ self.country = Field(api_prediction["country"], page_n=page_n) self.id_number = Field(api_prediction["id_number"], page_n=page_n) self.birth_date = Date(api_prediction["birth_date"], "value", page_n=page_n) self.expiry_date = Date(api_prediction["expiry_date"], "value", page_n=page_n) self.issuance_date = Date(api_prediction["issuance_date"], "value", page_n=page_n) self.birth_place = Field(api_prediction["birth_place"], page_n=page_n) self.gender = Field(api_prediction["gender"], page_n=page_n) self.surname = Field(api_prediction["surname"], page_n=page_n) self.mrz1 = Field(api_prediction["mrz1"], page_n=page_n) self.mrz2 = Field(api_prediction["mrz2"], page_n=page_n) self.given_names = [ Field(given_name, page_n=page_n) for given_name in api_prediction["given_names"] ] self.mrz = Field({"value": None, "probability": 0.}, page_n=page_n) self.full_name = Field({ "value": None, "probability": 0. }, page_n=page_n)
def __reconstruct_total_excl_from_tcc_and_taxes(self): """ Set self.total_excl with Amount object The total_excl Amount value is the difference between total_incl and sum of taxes The total_excl Amount probability is the product of self.taxes probabilities multiplied by total_incl probability """ if len(self.taxes) and self.total_incl.value is not None: total_excl = { "value": self.total_incl.value - Field.array_sum(self.taxes), "probability": Field.array_probability(self.taxes) * self.total_incl.probability } self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
def test_equality(): field_dict_1 = { 'value': "test", 'probability': 0.1 } field_dict_2 = { 'value': "other", 'probability': 0.1 } field_1 = Field(field_dict_1) field_2 = Field(field_dict_2) assert field_1 == field_1 assert field_1 != field_2
def __init__( self, api_prediction=None, input_file=None, license_plates=None, page_n=0 ): """ :param api_prediction: Raw prediction from HTTP response :param input_file: Input object :param license_plates: List of license plates values for creating CarPlate object from scratch :param page_n: Page number for multi pages pdf input """ self.type = "CarPlate" self.license_plates = [] if api_prediction is not None: self.build_from_api_prediction(api_prediction, page_n=page_n) else: if license_plates is not None: self.license_plates = [ Field({"value": l}, value_key="value", page_n=page_n) for l in license_plates ] # Invoke Document constructor super(CarPlate, self).__init__(input_file) # Run checks self._checklist() # Reconstruct extra fields self._reconstruct()
def compute_accuracy(passport, ground_truth): """ :param passport: Passport object to compare :param ground_truth: Ground truth Passport object :return: Accuracy metrics """ return { "__acc__country": ground_truth.country == passport.country, "__acc__id_number": ground_truth.id_number == passport.id_number, "__acc__birth_date": ground_truth.birth_date == passport.birth_date, "__acc__expiry_date": ground_truth.expiry_date == passport.expiry_date, "__acc__issuance_date": ground_truth.issuance_date == passport.issuance_date, "__acc__gender": ground_truth.gender == passport.gender, "__acc__surname": ground_truth.surname == passport.surname, "__acc__mrz1": ground_truth.mrz1 == passport.mrz1, "__acc__mrz2": ground_truth.mrz2 == passport.mrz2, "__acc__given_names": Field.compare_arrays(passport.given_names, ground_truth.given_names), "__acc__mrz": ground_truth.mrz == passport.mrz, "__acc__full_name": ground_truth.full_name == passport.full_name, }
def test_constructor_na(): field_dict = { 'value': "N/A", 'probability': 0.1 } field = Field(field_dict) assert field.value is None
def test_constructor_no_segmentation(): field_dict = { 'value': "test", 'probability': 0.1 } field = Field(field_dict) assert len(field.bbox) == 0
def __reconstruct_mrz(self): """ Set self.mrz with Field object The mrz Field value is the concatenation of mrz1 and mr2 The mrz Field probability is the product of mrz1 and mrz2 probabilities """ if self.mrz1.value is not None \ and self.mrz2.value is not None \ and self.mrz.value is None: mrz = { "value": self.mrz1.value + self.mrz2.value, "probability": Field.array_probability( [self.mrz1.probability, self.mrz2.probability]) } self.mrz = Field(mrz, reconstructed=True)
def build_from_api_prediction(self, api_prediction, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param page_n: Page number for multi pages pdf input :return: (void) set the object attributes with api prediction values """ self.license_plates = [ Field(license_plate, page_n=page_n) for license_plate in api_prediction["license_plates"] ]
def build_from_api_prediction(self, api_prediction, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param page_n: Page number for multi pages pdf input :return: (void) set the object attributes with api prediction values """ self.locale = Locale(api_prediction["locale"], page_n=page_n) self.total_incl = Amount(api_prediction["total_incl"], value_key="value", page_n=page_n) self.date = Date(api_prediction["date"], value_key="value", page_n=page_n) self.category = Field(api_prediction["category"], page_n=page_n) self.merchant_name = Field(api_prediction["supplier"], value_key="value", page_n=page_n) self.time = Field(api_prediction["time"], value_key="value", page_n=page_n) self.taxes = [ Tax(tax_prediction, page_n=page_n, value_key="value", rate_key="rate", code_key="code") for tax_prediction in api_prediction["taxes"]] self.orientation = Orientation(api_prediction["orientation"], page_n=page_n) self.total_tax = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n) self.total_excl = Amount({"value": None, "probability": 0.}, value_key="value", page_n=page_n)
def build_from_api_prediction(self, api_prediction, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param page_n: Page number for multi pages pdf input :return: (void) set the object attributes with api prediction values """ self.company_number = [ Field(company_reg, extra_fields={"type"}, page_n=page_n) for company_reg in api_prediction["company_registration"] ] self.invoice_date = Date(api_prediction["date"], value_key="value", page_n=page_n) self.due_date = Date(api_prediction["due_date"], value_key="value", page_n=page_n) self.invoice_number = Field(api_prediction["invoice_number"], page_n=page_n) self.locale = Locale(api_prediction["locale"], value_key="language", page_n=page_n) self.orientation = Orientation(api_prediction["orientation"], page_n=page_n) self.supplier = Field(api_prediction["supplier"], page_n=page_n) self.taxes = [ Tax(tax_prediction, page_n=page_n, value_key="value") for tax_prediction in api_prediction["taxes"] ] self.payment_details = [ PaymentDetails(payment_detail, page_n=page_n) for payment_detail in api_prediction["payment_details"] ] self.total_incl = Amount(api_prediction["total_incl"], value_key="value", page_n=page_n) self.total_excl = Amount(api_prediction["total_excl"], value_key="value", page_n=page_n) self.total_tax = Amount({ "value": None, "probability": 0. }, value_key="value", page_n=page_n)
def __reconstruct_full_name(self): """ Set self.full_name with Field object The full_name Field value is the concatenation of first given name and last name The full_name Field probability is the product of first given name and last name probabilities """ if self.surname.value is not None \ and len(self.given_names) != 0 \ and self.given_names[0].value is not None \ and self.full_name.value is None: full_name = { "value": self.given_names[0].value + " " + self.surname.value, "probability": Field.array_probability([ self.surname.probability, self.given_names[0].probability ]) } self.full_name = Field(full_name, reconstructed=True)
def compute_accuracy(license_plate, ground_truth): """ :param license_plate: CarPlate object to compare :param ground_truth: Ground truth CarPlate object :return: Accuracy metrics """ return { "__acc__license_plates": Field.compare_arrays( license_plate.license_plates, ground_truth.license_plates) }
def __reconstruct_total_tax(self): """ Set self.total_tax with Amount object The total_tax Amount value is the sum of all self.taxes value The total_tax Amount probability is the product of self.taxes probabilities """ if len(self.taxes) and self.total_tax.value is None: total_tax = { "value": sum([tax.value if tax.value is not None else 0 for tax in self.taxes]), "probability": Field.array_probability(self.taxes) } if total_tax["value"] > 0: self.total_tax = Amount(total_tax, value_key="value", reconstructed=True)
def build_from_api_prediction(self, api_prediction, input_file, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param input_file: Input object :param page_n: Page number for multi pages pdf input :return: (void) set the object attributes with api prediction values """ if "invoice_number" in api_prediction.keys(): invoice = Invoice(api_prediction, input_file, page_n=page_n) self.locale = invoice.locale self.total_incl = invoice.total_incl self.total_excl = invoice.total_excl self.date = invoice.invoice_date self.invoice_number = invoice.invoice_number self.due_date = invoice.due_date self.taxes = invoice.taxes self.merchant_name = invoice.supplier self.payment_details = invoice.payment_details self.company_number = invoice.company_number self.orientation = invoice.orientation self.total_tax = invoice.total_tax self.time = Field({"value": None, "probability": 0.}) else: receipt = Receipt(api_prediction, input_file, page_n=page_n) self.orientation = receipt.orientation self.date = receipt.date self.due_date = receipt.date self.taxes = receipt.taxes self.locale = receipt.locale self.total_incl = receipt.total_incl self.total_excl = receipt.total_excl self.merchant_name = receipt.merchant_name self.time = receipt.time self.total_tax = receipt.total_tax self.invoice_number = Field({"value": None, "probability": 0.}) self.payment_details = Field({"value": None, "probability": 0.}) self.company_number = Field({"value": None, "probability": 0.})
def compute_precision(license_plate, ground_truth): """ :param license_plate: CarPlate object to compare :param ground_truth: Ground truth CarPlate object :return: Precisions metrics """ precisions = {} if len(license_plate.license_plates) == 0: precisions["__pre__license_plates"] = None else: precisions["__pre__license_plates"] = Field.compare_arrays( license_plate.license_plates, ground_truth.license_plates) return precisions
def compute_precision(passport, ground_truth): """ :param passport: Passport object to compare :param ground_truth: Ground truth Passport object :return: Precision metrics """ precisions = { "__pre__country": Benchmark.scalar_precision_score(passport.country, ground_truth.country), "__pre__id_number": Benchmark.scalar_precision_score(passport.id_number, ground_truth.id_number), "__pre__birth_date": Benchmark.scalar_precision_score(passport.birth_date, ground_truth.birth_date), "__pre__expiry_date": Benchmark.scalar_precision_score(passport.expiry_date, ground_truth.expiry_date), "__pre__issuance_date": Benchmark.scalar_precision_score(passport.issuance_date, ground_truth.issuance_date), "__pre__gender": Benchmark.scalar_precision_score(passport.gender, ground_truth.gender), "__pre__surname": Benchmark.scalar_precision_score(passport.surname, ground_truth.surname), "__pre__mrz1": Benchmark.scalar_precision_score(passport.mrz1, ground_truth.mrz1), "__pre__mrz2": Benchmark.scalar_precision_score(passport.mrz2, ground_truth.mrz2), "__pre__mrz": Benchmark.scalar_precision_score(passport.mrz, ground_truth.mrz), "__pre__full_name": Benchmark.scalar_precision_score(passport.full_name, ground_truth.full_name), } if len(passport.given_names) == 0: precisions["__pre__given_names"] = None else: precisions["__pre__given_names"] = Field.compare_arrays( passport.given_names, ground_truth.given_names) return precisions
def test_constructor(): field_dict = { 'value': "test", 'probability': 0.1, 'segmentation': { "bounding_box": [ [0.016, 0.707], [0.414, 0.707], [0.414, 0.831], [0.016, 0.831] ] } } field = Field(field_dict) assert field.value == "test" assert field.probability == 0.1 assert len(field.bbox) > 0
def __reconstruct_total_excl_from_tcc_and_taxes(self): """ Set self.total_excl with Amount object The total_excl Amount value is the difference between total_incl and sum of taxes The total_excl Amount probability is the product of self.taxes probabilities multiplied by total_incl probability """ # Check total_tax, total excl and total incl exist if self.total_incl.value is None or len( self.taxes) == 0 or self.total_excl.value is not None: pass else: total_excl = { "value": self.total_incl.value - sum([ tax.value if tax.value is not None else 0 for tax in self.taxes ]), "probability": Field.array_probability(self.taxes) * self.total_incl.probability } self.total_excl = Amount(total_excl, value_key="value", reconstructed=True)
def __init__(self, api_prediction=None, input_file=None, locale=None, total_incl=None, total_excl=None, invoice_date=None, invoice_number=None, due_date=None, taxes=None, supplier=None, payment_details=None, company_number=None, vat_number=None, orientation=None, total_tax=None, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param input_file: Input object :param locale: locale value for creating Invoice object from scratch :param total_incl: total_incl value for creating Invoice object from scratch :param total_excl: total_excl value for creating Invoice object from scratch :param invoice_date: invoice_date value for creating Invoice object from scratch :param invoice_number: invoice_number value for creating Invoice object from scratch :param due_date: due_date value for creating Invoice object from scratch :param taxes: taxes value for creating Invoice object from scratch :param supplier: supplier value for creating Invoice object from scratch :param payment_details: payment_details value for creating Invoice object from scratch :param company_number: company_number value for creating Invoice object from scratch :param vat_number: vat_number value for creating Invoice object from scratch :param orientation: orientation value for creating Invoice object from scratch :param total_tax: total_tax value for creating Invoice object from scratch :param page_n: Page number for multi pages pdf input """ self.type = "Invoice" self.locale = None self.total_incl = None self.total_excl = None self.invoice_date = None self.invoice_number = None self.due_date = None self.taxes = [] self.supplier = None self.payment_details = None self.company_number = None self.orientation = None self.total_tax = None if api_prediction is not None: self.build_from_api_prediction(api_prediction, page_n=page_n) else: self.locale = Locale({"value": locale}, value_key="value", page_n=page_n) self.total_incl = Amount({"value": total_incl}, value_key="value", page_n=page_n) self.date = Date({"value": invoice_date}, value_key="value", page_n=page_n) self.invoice_date = Date({"value": invoice_date}, value_key="value", page_n=page_n) self.due_date = Date({"value": due_date}, value_key="value", page_n=page_n) self.supplier = Field({"value": supplier}, value_key="value", page_n=page_n) if taxes is not None: self.taxes = [ Tax({ "value": t[0], "rate": t[1] }, page_n=page_n, value_key="value", rate_key="rate") for t in taxes ] self.orientation = Orientation({"value": orientation}, value_key="value", page_n=page_n) self.total_tax = Amount({"value": total_tax}, value_key="value", page_n=page_n) self.total_excl = Amount({"value": total_excl}, value_key="value", page_n=page_n) self.invoice_number = Field({"value": invoice_number}, value_key="value", page_n=page_n) self.payment_details = Field({"value": payment_details}, value_key="value", page_n=page_n) self.company_number = Field({"value": company_number}, value_key="value", page_n=page_n) # Invoke Document constructor super(Invoice, self).__init__(input_file) # Run checks self._checklist() # Reconstruct extra fields self._reconstruct()
def test_no_probability(): field_dict = { 'value': "N/A", } field = Field(field_dict) assert field.probability == 0.
def test_array_probability(): fields = [Field({"value": None, "probability": 0.1}), Field({"value": None, "probability": 0.8})] assert Field.array_probability(fields) == 0.8*0.1 fields = [Field({"value": None, "probability": 0.1}), Field({"value": None, "probability": None})] assert Field.array_probability(fields) == 0.
def test_array_sum(): fields = [Field({"value": 1, "probability": 0.1}), Field({"value": 2, "probability": 0.8})] assert Field.array_sum(fields) == 3 fields = [Field({"value": None, "probability": 0.1}), Field({"value": 4, "probability": 0.8})] assert Field.array_sum(fields) == 0.
def __init__(self, api_prediction=None, input_file=None, country=None, id_number=None, birth_date=None, expiry_date=None, issuance_date=None, birth_place=None, gender=None, surname=None, mrz1=None, mrz2=None, given_names=None, mrz=None, full_name=None, page_n=0): """ :param api_prediction: Raw prediction from HTTP response :param input_file: Input object :param country: country value for creating Passport object from scratch :param id_number: id_number value for creating Passport object from scratch :param birth_date: birth_date value for creating Passport object from scratch :param expiry_date: expiry_date value for creating Passport object from scratch :param issuance_date: issuance_date value for creating Passport object from scratch :param birth_place: birth_place value for creating Passport object from scratch :param gender: gender value for creating Passport object from scratch :param surname: surname value for creating Passport object from scratch :param mrz1: mrz1 value for creating Passport object from scratch :param mrz2: mrz2 value for creating Passport object from scratch :param given_names: given_names value for creating Passport object from scratch :param mrz: mrz value for creating Passport object from scratch :param full_name: full_name value for creating Passport object from scratch :param page_n: Page number for multi pages pdf input """ # Raw data self.type = "Passport" self.country = None self.id_number = None self.birth_date = None self.expiry_date = None self.issuance_date = None self.birth_place = None self.gender = None self.surname = None self.mrz1 = None self.mrz2 = None self.given_names = [] self.mrz = None self.full_name = None if api_prediction is not None: self.build_from_api_prediction(api_prediction) else: self.country = Field({"value": country}, value_key="value", page_n=page_n) self.id_number = Field({"value": id_number}, value_key="value", page_n=page_n) self.birth_date = Date({"value": birth_date}, value_key="value", page_n=page_n) self.expiry_date = Date({"value": expiry_date}, value_key="value", page_n=page_n) self.issuance_date = Date({"value": issuance_date}, value_key="value", page_n=page_n) self.birth_place = Field({"value": birth_place}, value_key="value", page_n=page_n) self.gender = Field({"value": gender}, value_key="value", page_n=page_n) self.surname = Field({"value": surname}, value_key="value", page_n=page_n) self.mrz1 = Field({"value": mrz1}, value_key="value", page_n=page_n) self.mrz2 = Field({"value": mrz2}, value_key="value", page_n=page_n) if given_names is not None: self.given_names = [ Field({"value": g}, value_key="value", page_n=page_n) for g in given_names ] self.mrz = Field({"value": mrz}, value_key="value", page_n=page_n) self.full_name = Field({"value": full_name}, value_key="value", page_n=page_n) # Invoke Document constructor super(Passport, self).__init__(input_file) # Run checks self._checklist() # Reconstruct extra fields self._reconstruct()
def __init__( self, api_prediction=None, input_file=None, locale=None, total_incl=None, date=None, category=None, merchant_name=None, time=None, taxes=None, orientation=None, total_tax=None, total_excl=None, page_n=0 ): """ :param api_prediction: Raw prediction from HTTP response :param input_file: Input object :param locale: locale value for creating Receipt object from scratch :param total_incl: total_incl value for creating Receipt object from scratch :param date: date value for creating Receipt object from scratch :param category: category value for creating Receipt object from scratch :param merchant_name: merchant_name value for creating Receipt object from scratch :param time: time value for creating Receipt object from scratch :param taxes: taxes value for creating Receipt object from scratch :param orientation: orientation value for creating Receipt object from scratch :param total_tax: total_tax value for creating Receipt object from scratch :param total_excl: total_excl value for creating Receipt object from scratch :param page_n: Page number for multi pages pdf input """ self.type = "Receipt" self.locale = None self.total_incl = None self.date = None self.category = None self.merchant_name = None self.time = None self.taxes = [] self.orientation = None self.total_tax = None self.total_excl = None if api_prediction is not None: self.build_from_api_prediction(api_prediction, page_n=page_n) else: self.locale = Locale({"value": locale}, value_key="value", page_n=page_n) self.total_incl = Amount({"value": total_incl}, value_key="value", page_n=page_n) self.date = Date({"value": date}, value_key="value", page_n=page_n) self.category = Field({"value": category}, value_key="value", page_n=page_n) self.merchant_name = Field({"value": merchant_name}, value_key="value", page_n=page_n) self.time = Field({"value": time}, value_key="value", page_n=page_n) if taxes is not None: self.taxes = [ Tax({"value": t[0], "rate": t[1]}, page_n=page_n, value_key="value", rate_key="rate") for t in taxes] self.orientation = Orientation({"value": orientation}, value_key="value", page_n=page_n) self.total_tax = Amount({"value": total_tax}, value_key="value", page_n=page_n) self.total_excl = Amount({"value": total_excl}, value_key="value", page_n=page_n) # Invoke Document constructor super(Receipt, self).__init__(input_file) # Run checks self._checklist() # Reconstruct extra fields self._reconstruct()