def read_template(line_stream): """ Read SAFNWC message template into a :class:`.Template` SAFNWC template lines look as follows: ``1 001033 0 0 8 Code table Identification of originating/generating centre`` :param line_stream: Lines of SAFNWC template file :return: the template as Template :rtype: Template :raises ValueError: if the template contains a desccriptor outside range [0,3] """ descriptors = [] metadata = {} for l in line_stream: if l.startswith("#") or l.startswith("/*"): # Ignore comments, does not support multiline comments properly continue elif l.startswith("NUM"): name, num = l.split(" ") metadata[name] = int(num) else: # Input lines look like this: # 1 001033 0 0 8 Code table Identification of originating/generating centre num = int(l[:8]) raw_descriptor = l[8:14] descriptor_code = fxy2int(raw_descriptor) scale = int(l[14:23]) reference = int(l[23:33]) bits = int(l[33:47]) unit = l[47:65].strip()[:24] significance = l[65:].strip()[:64] descr_class = raw_descriptor[0] if descr_class == '0': descriptors.append( ElementDescriptor(descriptor_code, bits, scale, reference, significance, unit)) elif descr_class == '1': f, x, y = fxy(raw_descriptor) descriptors.append( ReplicationDescriptor(descriptor_code, 0, x, y, significance)) elif descr_class == '2': f, x, y = fxy(raw_descriptor) descriptors.append( OperatorDescriptor(descriptor_code, 0, x, y, significance)) elif descr_class == '3': # Ignore sequence descriptors, they are followed by constituent elements in the SAFNWC template format continue else: raise ValueError("Encountered unknown descriptor class: %s" % descr_class) name = "B0000000000%(NUM_ORIGINATING_CENTRE)03d%(NUM_BUFR_MAIN_TABLE)03d%(NUM_BUFR_LOCAL_TABLES)03d.TXT" % metadata return Template(name, descriptors)
def read_template(line_stream): """ Read SAFNWC message template into a :class:`.Template` SAFNWC template lines look as follows: ``1 001033 0 0 8 Code table Identification of originating/generating centre`` :param line_stream: Lines of SAFNWC template file :return: the template as Template :rtype: Template :raises ValueError: if the template contains a desccriptor outside range [0,3] """ descriptors = [] metadata = {} for l in line_stream: if l.startswith("#") or l.startswith("/*"): # Ignore comments, does not support multiline comments properly continue elif l.startswith("NUM"): name, num = l.split(" ") metadata[name] = int(num) else: # Input lines look like this: # 1 001033 0 0 8 Code table Identification of originating/generating centre num = int(l[:8]) raw_descriptor = l[8:14] descriptor_code = fxy2int(raw_descriptor) scale = int(l[14:23]) reference = int(l[23:33]) bits = int(l[33:47]) unit = l[47:65].strip()[:24] significance = l[65:].strip()[:64] descr_class = raw_descriptor[0] if descr_class == "0": descriptors.append(ElementDescriptor(descriptor_code, bits, scale, reference, significance, unit)) elif descr_class == "1": f, x, y = fxy(raw_descriptor) descriptors.append(ReplicationDescriptor(descriptor_code, 0, x, y, significance)) elif descr_class == "2": f, x, y = fxy(raw_descriptor) descriptors.append(OperatorDescriptor(descriptor_code, 0, x, y, significance)) elif descr_class == "3": # Ignore sequence descriptors, they are followed by constituent elements in the SAFNWC template format continue else: raise ValueError("Encountered unknown descriptor class: %s" % descr_class) name = "B0000000000%(NUM_ORIGINATING_CENTRE)03d%(NUM_BUFR_MAIN_TABLE)03d%(NUM_BUFR_LOCAL_TABLES)03d.TXT" % metadata return Template(name, descriptors)
def read_tables(b_line_stream, d_line_stream=None): """ Read BUFR table(s) in from libbufr text file(s). The return value is a dict that combines the tables read. :param b_line_stream: Iterable of lines, contents of the B-table file :param d_line_stream: Iterable of lines, contents of the D-table file :return: Mapping from FXY integers to descriptors :rtype: dict :raises NotImplementedError: if the table contains sequence descriptors :raises ValueError: if the table contains descriptors with illegal class (outside range [0,3]) """ descriptors = {} for line in b_line_stream: # Format from btable.F:146 in libbufr version 000400 parts = slices(line, [1, 6, 1, 64, 1, 24, 1, 3, 1, 12, 1, 3]) if not parts[11]: # Geo::BUFR skips lines without bit width definition, # libbufr defaults bit width to 0 # choosing to skip here continue raw_descriptor = parts[1] descriptor_code = fxy2int(raw_descriptor) significance = parts[3].strip() unit = parts[5].strip() scale = int(parts[7]) reference = int(parts[9]) bits = int(parts[11]) descr_class = raw_descriptor[0] if descr_class == '0': descriptors[descriptor_code] = ElementDescriptor( descriptor_code, bits, scale, reference, significance, unit) elif descr_class == '1': f, x, y = fxy(raw_descriptor) descriptors[descriptor_code] = ReplicationDescriptor( descriptor_code, 0, x, y, significance) elif descr_class == '2': f, x, y = fxy(raw_descriptor) descriptors[descriptor_code] = OperatorDescriptor( descriptor_code, 0, x, y, significance) elif descr_class == '3': raise ValueError( "B-table file should not contain descriptors of class 3: %s" % descr_class) else: raise ValueError("Encountered unknown descriptor class: %s" % descr_class) def group_d_lines(ls): buf = None for line in ls: if line.startswith(' 3'): if buf: yield buf buf = [line] else: buf.append(line) yield buf table = DescriptorTable( descriptors) # descriptors is not copied, just referenced if d_line_stream: for lines in group_d_lines(d_line_stream): # Format inferred parts = slices(lines[0], [1, 6, 1, 2, 1, 6]) raw_d_descriptor = parts[1] d_descriptor_code = fxy2int(raw_d_descriptor) n_elements = int(parts[3]) actual_elements = len(lines) if n_elements != actual_elements: raise ValueError("Expected %d elements, found %d" % (n_elements, actual_elements)) constituent_codes = [] for line in lines: l_parts = slices(line, [1, 6, 1, 2, 1, 6]) constituent_codes.append(fxy2int(l_parts[5])) descriptors[d_descriptor_code] = LazySequenceDescriptor( d_descriptor_code, constituent_codes, '', table) return table
def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError( "Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i, ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars * 8, bits.pos) bits.pos += n_chars * 8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits) - 1) == 0: # Missing value, all-ones value = _decode_raw_value( (1 << descriptor.length) - 1, descriptor, operators) else: value = _decode_raw_value( ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed( bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) else: raise ValueError( "Unexpected delayed replication element %s" % bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1, 2, 3, 4, 7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError( "Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" % op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i, subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets
def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor( fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append( BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append( _decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append( decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError( "Unexpected delayed replication element %s" % bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor( fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor( base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError( "Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values
def decode_section4(stream, descriptors, n_subsets=1, compressed=False): """ Decode Section 4, the data section, of a BUFR message into a :class:`.Section4` object. :param ReadableStream stream: BUFR message, starting at section 4 :param descriptors: List of descriptors specifying message structure :param int n_subsets: Number of data subsets, from section 3 :param bool compressed: Whether message data is compressed or not, from section 3 :raises NotImplementedError: if the message contains operator descriptors :raises NotImplementedError: if the message contains sequence descriptors """ REPLICATION_DESCRIPTORS = set( [fxy2int("031000"), fxy2int("031001"), fxy2int("031002")]) REPETITION_DESCRIPTORS = set([fxy2int("031011"), fxy2int("031012")]) from bitstring import ConstBitStream, Bits length = stream.readint(3) pad = stream.readint(1) data = stream.readbytes(length - 4) bits = ConstBitStream(bytes=data) def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor( fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append( BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append( _decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append( decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError( "Unexpected delayed replication element %s" % bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor( fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor( base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError( "Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError( "Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i, ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars * 8, bits.pos) bits.pos += n_chars * 8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits) - 1) == 0: # Missing value, all-ones value = _decode_raw_value( (1 << descriptor.length) - 1, descriptor, operators) else: value = _decode_raw_value( ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed( bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) else: raise ValueError( "Unexpected delayed replication element %s" % bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1, 2, 3, 4, 7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError( "Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" % op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i, subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets if compressed: subsets = [ BufrSubset(x) for x in decode_compressed(bits, iter(descriptors), n_subsets, {}, {}) ] else: subsets = [ BufrSubset(decode(bits, iter(descriptors), {}, {})) for _ in range(n_subsets) ] return Section4(length, subsets)
def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError("Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i,ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars*8, bits.pos) bits.pos += n_chars*8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits)-1) == 0: # Missing value, all-ones value = _decode_raw_value((1 << descriptor.length)-1, descriptor, operators) else: value = _decode_raw_value(ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) else: raise ValueError("Unexpected delayed replication element %s" %bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1,2,3,4,7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError("Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" %op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i,subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets
def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append(BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append(_decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append(decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError("Unexpected delayed replication element %s" %bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor(fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor(base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError("Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values
def decode_section4(stream, descriptors, n_subsets=1, compressed=False): """ Decode Section 4, the data section, of a BUFR message into a :class:`.Section4` object. :param ReadableStream stream: BUFR message, starting at section 4 :param descriptors: List of descriptors specifying message structure :param int n_subsets: Number of data subsets, from section 3 :param bool compressed: Whether message data is compressed or not, from section 3 :raises NotImplementedError: if the message contains operator descriptors :raises NotImplementedError: if the message contains sequence descriptors """ REPLICATION_DESCRIPTORS = set([fxy2int("031000"), fxy2int("031001"), fxy2int("031002")]) REPETITION_DESCRIPTORS = set([fxy2int("031011"), fxy2int("031012")]) from bitstring import ConstBitStream, Bits length = stream.readint(3) pad = stream.readint(1) data = stream.readbytes(length-4) bits = ConstBitStream(bytes=data) def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append(BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append(_decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append(decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError("Unexpected delayed replication element %s" %bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor(fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor(base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError("Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError("Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i,ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars*8, bits.pos) bits.pos += n_chars*8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits)-1) == 0: # Missing value, all-ones value = _decode_raw_value((1 << descriptor.length)-1, descriptor, operators) else: value = _decode_raw_value(ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) else: raise ValueError("Unexpected delayed replication element %s" %bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1,2,3,4,7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError("Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" %op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i,subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets if compressed: subsets = [BufrSubset(x) for x in decode_compressed(bits, iter(descriptors), n_subsets, {}, {})] else: subsets = [BufrSubset(decode(bits, iter(descriptors), {}, {})) for _ in range(n_subsets)] return Section4(length, subsets)