def parse(cls, value: str) -> Upc: """Parse the given value into a :class:`Upc` object. Args: value: The value to parse. Returns: UPC data structure with the successfully extracted data. The checksum is guaranteed to be valid if an UPC object is returned. Raises: ParseError: If the parsing fails. """ value = value.strip() length = len(value) if length not in (6, 7, 8, 12): raise ParseError( f"Failed to parse {value!r} as UPC: " f"Expected 6, 7, 8, or 12 digits, got {length}." ) if not value.isdecimal(): raise ParseError( f"Failed to parse {value!r} as UPC: Expected a numerical value." ) if length == 12: return cls._parse_upc_a(value) elif length in (6, 7, 8): return cls._parse_upc_e(value) raise Exception("Unhandled UPC length. This is a bug.") # pragma: no cover
def _parse_upc_e(cls, value: str) -> Upc: length = len(value) assert length in (6, 7, 8) if length == 6: # Implicit number system 0, no check digit. number_system_digit = 0 payload = f"{number_system_digit}{value}" upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}0")[:-1] check_digit = numeric_check_digit(upc_a_payload) elif length == 7: # Explicit number system, no check digit. number_system_digit = int(value[0]) payload = value upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}0")[:-1] check_digit = numeric_check_digit(upc_a_payload) elif length == 8: # Explicit number system and check digit. number_system_digit = int(value[0]) payload = value[:-1] check_digit = int(value[-1]) else: raise Exception( # pragma: no cover "Unhandled UPC-E length. This is a bug." ) # Control that the number system digit is correct. if number_system_digit not in (0, 1): raise ParseError( f"Invalid UPC-E number system for {value!r}: " f"Expected 0 or 1, got {number_system_digit!r}." ) # Control that check digit is correct. upc_a_payload = _upc_e_to_upc_a_expansion(f"{payload}{check_digit}")[:-1] calculated_check_digit = numeric_check_digit(upc_a_payload) if check_digit != calculated_check_digit: raise ParseError( f"Invalid UPC-E check digit for {value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}." ) return cls( value=value, format=UpcFormat.UPC_E, payload=payload, number_system_digit=number_system_digit, check_digit=check_digit, )
def extract(cls, value: str) -> GS1ApplicationIdentifier: """Extract the GS1 Application Identifier (AI) from the given value. Args: value: The string to extract an AI from. Returns: Metadata about the extracted AI. Raises: ParseError: If the parsing fails. Example: >>> from biip.gs1 import GS1ApplicationIdentifier >>> GS1ApplicationIdentifier.extract("010703206980498815210526100329") GS1ApplicationIdentifier(ai='01', description='Global Trade Item Number (GTIN)', data_title='GTIN', fnc1_required=False, format='N2+N14') """ for application_identifier in _GS1_APPLICATION_IDENTIFIERS: if value.startswith(application_identifier.ai): return application_identifier raise ParseError( f"Failed to get GS1 Application Identifier from {value!r}.")
def extract(cls, value: str) -> Optional[GS1Prefix]: """Extract the GS1 Prefix from the given value. Args: value: The string to extract a GS1 Prefix from. Returns: Metadata about the extracted prefix, or `None` if the prefix is unknown. Raises: ParseError: If the parsing fails. """ for prefix_range in _GS1_PREFIX_RANGES: prefix = value[:prefix_range.length] if not prefix.isdecimal(): continue number = int(prefix) if prefix_range.min_value <= number <= prefix_range.max_value: return cls(value=prefix, usage=prefix_range.usage) if not prefix.isdecimal(): # `prefix` is now the shortest prefix possible, and should be # numeric even if the prefix assignment is unknown. raise ParseError(f"Failed to get GS1 Prefix from {value!r}.") return None
def extract(cls, value: str) -> SymbologyIdentifier: """Extract the Symbology Identifier from the given value. Args: value: The string to extract a Symbology Identifier from. Returns: Metadata about the extracted Symbology Identifier. Raises: ParseError: If the parsing fails.o """ if not value.startswith("]"): raise ParseError( f"Failed to get Symbology Identifier from {value!r}. " "No initial ']' flag character found." ) try: symbology = Symbology(value[1]) except ValueError: raise ParseError( f"Failed to get Symbology Identifier from {value!r}. " f"{value[1]!r} is not a recognized code character." ) if symbology == Symbology.SYSTEM_EXPANSION: modifiers_length = int(value[2]) + 1 else: modifiers_length = 1 modifiers = value[2 : 2 + modifiers_length] value = f"]{symbology.value}{modifiers}" gs1_symbology: Optional[GS1Symbology] try: gs1_symbology = GS1Symbology(f"{symbology.value}{modifiers}") except ValueError: gs1_symbology = None return cls( value=value, symbology=symbology, modifiers=modifiers, gs1_symbology=gs1_symbology, )
def _set_date(self: GS1ElementString) -> None: if self.ai.ai not in ("11", "12", "13", "15", "16", "17"): return try: self.date = _parse_date(self.value) except ValueError: raise ParseError( f"Failed to parse GS1 AI {self.ai} date from {self.value!r}.")
def parse(cls: Type[Sscc], value: str) -> Sscc: """Parse the given value into a :class:`Sscc` object. Args: value: The value to parse. Returns: SSCC data structure with the successfully extracted data. The checksum is guaranteed to be valid if an SSCC object is returned. Raises: ParseError: If the parsing fails. """ value = value.strip() if len(value) != 18: raise ParseError(f"Failed to parse {value!r} as SSCC: " f"Expected 18 digits, got {len(value)}.") if not value.isnumeric(): raise ParseError( f"Failed to parse {value!r} as SSCC: Expected a numerical value." ) value_without_extension_digit = value[1:] prefix = GS1Prefix.extract(value_without_extension_digit) extension_digit = int(value[0]) payload = value[:-1] check_digit = int(value[-1]) calculated_check_digit = numeric_check_digit(payload) if check_digit != calculated_check_digit: raise ParseError( f"Invalid SSCC check digit for {value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}.") return cls( value=value, prefix=prefix, extension_digit=extension_digit, payload=payload, check_digit=check_digit, )
def extract( cls, value: str, *, rcn_region: Optional[RcnRegion] = None, separator_chars: Iterable[str] = DEFAULT_SEPARATOR_CHARS, ) -> GS1ElementString: """Extract the first GS1 Element String from the given value. Args: value: The string to extract an Element String from. May contain more than one Element String. rcn_region: The geographical region whose rules should be used to interpret Restricted Circulation Numbers (RCN). Needed to extract e.g. variable weight/price from GTIN. separator_chars: Characters used in place of the FNC1 symbol. Defaults to `<GS>` (ASCII value 29). If variable-length fields are not terminated with a separator character, the parser might greedily consume later fields. Returns: A data class with the Element String's parts and data extracted from it. Raises: ValueError: If the ``separator_char`` isn't exactly 1 character long. ParseError: If the parsing fails. """ if any(len(char) != 1 for char in separator_chars): raise ValueError( "All separator characters must be exactly 1 character long, " f"got {list(separator_chars)!r}.") ai = GS1ApplicationIdentifier.extract(value) for separator_char in separator_chars: value = value.split(separator_char, maxsplit=1)[0] pattern = ai.pattern[:-1] if ai.pattern.endswith("$") else ai.pattern matches = re.match(pattern, value) if not matches: raise ParseError( f"Failed to match {value!r} with GS1 AI {ai} pattern '{ai.pattern}'." ) pattern_groups = list(matches.groups()) value = "".join(pattern_groups) element = cls(ai=ai, value=value, pattern_groups=pattern_groups) element._set_gln() element._set_gtin(rcn_region=rcn_region) element._set_sscc() element._set_date() element._set_decimal() return element
def verify_check_digit(self, rcn: Rcn) -> None: if self.check_digit_slice is None: return None value = rcn.value[self.value_slice] check_digit = int(rcn.value[self.check_digit_slice]) calculated_check_digit = checksums.price_check_digit(value) if check_digit != calculated_check_digit: raise ParseError( f"Invalid check digit for variable measure value {value!r} " f"in RCN {rcn.value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}.")
def parse( cls, value: str, *, rcn_region: Optional[RcnRegion] = None, separator_chars: Iterable[str] = DEFAULT_SEPARATOR_CHARS, ) -> GS1Message: """Parse a string from a barcode scan as a GS1 message with AIs. Args: value: The string to parse. rcn_region: The geographical region whose rules should be used to interpret Restricted Circulation Numbers (RCN). Needed to extract e.g. variable weight/price from GTIN. separator_chars: Characters used in place of the FNC1 symbol. Defaults to `<GS>` (ASCII value 29). If variable-length fields in the middle of the message are not terminated with a separator character, the parser might greedily consume the rest of the message. Returns: A message object with one or more element strings. Raises: ParseError: If a fixed-length field ends with a separator character. """ value = value.strip() element_strings = [] rest = value[:] while rest: element_string = GS1ElementString.extract( rest, rcn_region=rcn_region, separator_chars=separator_chars) element_strings.append(element_string) rest = rest[len(element_string):] if rest.startswith(tuple(separator_chars)): if element_string.ai.fnc1_required: rest = rest[1:] else: separator_char = rest[0] raise ParseError( f"Element String {element_string.as_hri()!r} has fixed length " "and should not end with a separator character. " f"Separator character {separator_char!r} found in {value!r}." ) return cls(value=value, element_strings=element_strings)
def _parse_using_british_price_rules(self: Rcn) -> None: # References: # https://www.gs1uk.org/how-to-barcode-variable-measure-items if self.payload[:2] not in ("20", ): return check_digit = int(self.payload[-5]) value = self.payload[-4:] calculated_check_digit = checksums.price_check_digit(value) if check_digit != calculated_check_digit: raise ParseError( f"Invalid price check digit for price data {value!r} " f"in RCN {self.value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}.") pounds_sterling = Decimal(value) self.price = pounds_sterling / 100
def _parse_upc_a(cls, value: str) -> Upc: assert len(value) == 12 payload = value[:-1] number_system_digit = int(value[0]) check_digit = int(value[-1]) calculated_check_digit = numeric_check_digit(payload) if check_digit != calculated_check_digit: raise ParseError( f"Invalid UPC-A check digit for {value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}." ) return cls( value=value, format=UpcFormat.UPC_A, payload=payload, number_system_digit=number_system_digit, check_digit=check_digit, )
def extract(cls: Type[GS1Prefix], value: str) -> GS1Prefix: """Extract the GS1 Prefix from the given value. Args: value: The string to extract a GS1 Prefix from. Returns: Metadata about the extracted prefix. Raises: ParseError: If the parsing fails. """ for prefix_range in _GS1_PREFIX_RANGES: prefix = value[:prefix_range.length] if not prefix.isnumeric(): continue number = int(prefix) if prefix_range.min_value <= number <= prefix_range.max_value: return cls(value=prefix, usage=prefix_range.usage) raise ParseError(f"Failed to get GS1 Prefix from {value!r}.")
def parse( value: str, *, rcn_region: Optional[RcnRegion] = None, separator_chars: Iterable[str] = DEFAULT_SEPARATOR_CHARS, ) -> ParseResult: """Identify data format and parse data. The current strategy is: 1. If Symbology Identifier prefix indicates a GTIN or GS1 Message, attempt to parse and validate as that. 2. Else, if not Symbology Identifier, attempt to parse with all parsers. Args: value: The data to classify and parse. rcn_region: The geographical region whose rules should be used to interpret Restricted Circulation Numbers (RCN). Needed to extract e.g. variable weight/price from GTIN. separator_chars: Characters used in place of the FNC1 symbol. Defaults to `<GS>` (ASCII value 29). If variable-length fields in the middle of the message are not terminated with a separator character, the parser might greedily consume the rest of the message. Returns: A data class depending upon what type of data is parsed. Raises: ParseError: If parsing of the data fails. """ value = value.strip() config = ParseConfig( rcn_region=rcn_region, separator_chars=separator_chars, ) result = ParseResult(value=value) # Extract Symbology Identifier if value.startswith("]"): result.symbology_identifier = SymbologyIdentifier.extract(value) value = value[len(result.symbology_identifier) :] # Select parsers queue: ParseQueue = [] if result.symbology_identifier is not None: if result.symbology_identifier.gs1_symbology in GS1Symbology.with_gtin(): queue.append((_parse_gtin, value)) if ( result.symbology_identifier.gs1_symbology in GS1Symbology.with_ai_element_strings() ): queue.append((_parse_gs1_message, value)) if not queue: # If we're not able to select a subset based on Symbology Identifiers, # run all parsers on the full value. queue = [ (_parse_gs1_message, value), (_parse_gtin, value), (_parse_sscc, value), (_parse_upc, value), ] # Work through queue of parsers and the values to run them on. Any parser may # add additional work to the queue. Only the first result for a field is kept. while queue: (parse_func, val) = queue.pop(0) parse_func(val, config=config, queue=queue, result=result) if result._has_result(): return result else: raise ParseError(f"Failed to parse {value!r}:\n{result._get_errors_list()}")
def parse( cls: Type[Gtin], value: str, *, rcn_region: Optional[RcnRegion] = None ) -> Gtin: """Parse the given value into a :class:`Gtin` object. Both GTIN-8, GTIN-12, GTIN-13, and GTIN-14 are supported. Args: value: The value to parse. rcn_region: The geographical region whose rules should be used to interpret Restricted Circulation Numbers (RCN). Needed to extract e.g. variable weight/price from GTIN. Returns: GTIN data structure with the successfully extracted data. The checksum is guaranteed to be valid if a GTIN object is returned. Raises: ParseError: If the parsing fails. """ from biip.gtin import Rcn value = value.strip() if len(value) not in (8, 12, 13, 14): raise ParseError( f"Failed to parse {value!r} as GTIN: " f"Expected 8, 12, 13, or 14 digits, got {len(value)}." ) if not value.isnumeric(): raise ParseError( f"Failed to parse {value!r} as GTIN: Expected a numerical value." ) stripped_value = _strip_leading_zeros(value) assert len(stripped_value) in (8, 12, 13, 14) num_significant_digits = len(stripped_value) gtin_format = GtinFormat(num_significant_digits) payload = stripped_value[:-1] check_digit = int(stripped_value[-1]) packaging_level: Optional[int] = None if gtin_format == GtinFormat.GTIN_14: packaging_level = int(stripped_value[0]) value_without_packaging_level = stripped_value[1:] prefix = GS1Prefix.extract(value_without_packaging_level) elif gtin_format == GtinFormat.GTIN_12: # Add a zero to convert U.P.C. Company Prefix to GS1 Company Prefix prefix = GS1Prefix.extract(stripped_value.zfill(13)) elif gtin_format == GtinFormat.GTIN_8: prefix = GS1Prefix.extract(stripped_value.zfill(12)) else: prefix = GS1Prefix.extract(stripped_value) calculated_check_digit = numeric_check_digit(payload) if check_digit != calculated_check_digit: raise ParseError( f"Invalid GTIN check digit for {value!r}: " f"Expected {calculated_check_digit!r}, got {check_digit!r}." ) gtin_type: Type[Union[Gtin, Rcn]] if "Restricted Circulation Number" in prefix.usage: gtin_type = Rcn else: gtin_type = Gtin gtin = gtin_type( value=value, format=gtin_format, prefix=prefix, payload=payload, check_digit=check_digit, packaging_level=packaging_level, ) if isinstance(gtin, Rcn) and rcn_region is not None: gtin._parse_with_regional_rules(rcn_region) return gtin
def parse( value: str, *, rcn_region: Optional[RcnRegion] = None, separator_chars: Iterable[str] = DEFAULT_SEPARATOR_CHARS, ) -> ParseResult: """Identify data format and parse data. The current strategy is: 1. If Symbology Identifier prefix indicates a GTIN or GS1 Message, attempt to parse and validate as that. 2. Else, if not Symbology Identifier, attempt to parse with all parsers. Args: value: The data to classify and parse. rcn_region: The geographical region whose rules should be used to interpret Restricted Circulation Numbers (RCN). Needed to extract e.g. variable weight/price from GTIN. separator_chars: Characters used in place of the FNC1 symbol. Defaults to `<GS>` (ASCII value 29). If variable-length fields in the middle of the message are not terminated with a separator character, the parser might greedily consume the rest of the message. Returns: A data class depending upon what type of data is parsed. Raises: ParseError: If parsing of the data fails. """ value = value.strip() result = ParseResult(value=value) # Extract Symbology Identifier if value.startswith("]"): result.symbology_identifier = SymbologyIdentifier.extract(value) value = value[len(result.symbology_identifier):] # Select parsers parsers: List[ParserType] = [] if result.symbology_identifier is not None: if (result.symbology_identifier.gs1_symbology in GS1Symbology.with_gtin()): parsers.append(Gtin) if (result.symbology_identifier.gs1_symbology in GS1Symbology.with_ai_element_strings()): parsers.append(GS1Message) if not parsers: # If we're not able to select a subset based on Symbology Identifiers, # run all parsers in the default order. parsers = [Gtin, Sscc, GS1Message] # Run all parsers in order for parser in parsers: if parser == Gtin: try: result.gtin = Gtin.parse(value, rcn_region=rcn_region) except ParseError as exc: result.gtin_error = str(exc) if parser == Sscc: try: result.sscc = Sscc.parse(value) except ParseError as exc: result.sscc_error = str(exc) if parser == GS1Message: try: result.gs1_message = GS1Message.parse( value, rcn_region=rcn_region, separator_chars=separator_chars, ) except ParseError as exc: result.gs1_message_error = str(exc) else: ai_00 = result.gs1_message.get(ai="00") if ai_00 is not None: # GS1 Message contains an SSCC result.sscc = ai_00.sscc # Clear error from parsing full value a SSCC. result.sscc_error = None ai_01 = result.gs1_message.get(ai="01") if ai_01 is not None: # GS1 Message contains a GTIN. result.gtin = ai_01.gtin # Clear error from parsing full value as GTIN. result.gtin_error = None if result._has_result(): return result else: raise ParseError( f"Failed to parse {value!r}:\n{result._get_errors_list()}")