def decode_subset(json_data): result = [] for el in json_data: if isinstance(el, dict): descriptor = flat_descriptors[el["desc"]] result.append(_decode_raw_value(el["val"], descriptor)) else: result.append(decode_subset(el)) return result
def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError( "Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") ]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i, ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars * 8, bits.pos) bits.pos += n_chars * 8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits) - 1) == 0: # Missing value, all-ones value = _decode_raw_value( (1 << descriptor.length) - 1, descriptor, operators) else: value = _decode_raw_value( ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed( bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append( replication[subset_idx]) else: raise ValueError( "Unexpected delayed replication element %s" % bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1, 2, 3, 4, 7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError( "Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" % op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i, subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets
def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits() - 1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor( descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor( fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append( BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append( _decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list( itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append( decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError( "Unexpected delayed replication element %s" % bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor( fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor( base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError( "Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values
def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param n_subsets: Number of subsets to decode :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ subsets = [[] for x in range(n_subsets)] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) raw_vals = [subset[0].raw_value for subset in _subsets] if len(set(raw_vals)) != 1: raise ValueError("Encountered different reference values for different subsets: %s", raw_vals) ref_value = raw_vals[0] top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")]) vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {}) for i,ss in enumerate(vals): subsets[i].extend(ss) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': ref_value = Bits._readhex(bits, read_length, bits.pos) else: ref_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length n_bits = Bits._readuint(bits, 6, bits.pos) bits.pos += 6 for i in range(n_subsets): if descriptor.unit == 'CCITTIA5': n_chars = n_bits if n_chars: raw_value = Bits._readhex(bits, n_chars*8, bits.pos) bits.pos += n_chars*8 value = _decode_raw_value(raw_value, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) else: if n_bits: increment = Bits._readuint(bits, n_bits, bits.pos) bits.pos += n_bits if increment ^ ((1 << n_bits)-1) == 0: # Missing value, all-ones value = _decode_raw_value((1 << descriptor.length)-1, descriptor, operators) else: value = _decode_raw_value(ref_value + increment, descriptor, operators) else: value = _decode_raw_value(ref_value, descriptor, operators) subsets[i].append(value) elif isinstance(descriptor, ReplicationDescriptor): aggregations = [[] for x in range(n_subsets)] if descriptor.count: bval = None count = descriptor.count else: bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay) for _ in range(count): for subset_idx in range(n_subsets): aggregations[subset_idx].append(replication[subset_idx]) else: raise ValueError("Unexpected delayed replication element %s" %bval) for subset_idx in range(n_subsets): subsets[subset_idx].append(aggregations[subset_idx]) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.opcode in (1,2,3,4,7): if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op else: raise NotImplementedError("Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" %op.opcode) elif isinstance(descriptor, SequenceDescriptor): comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay) for i,subset in enumerate(comp): subsets[i].extend(subset) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return subsets
def decode(bits, descriptors, operators, descriptor_overlay): """ :param bits: Bit stream to decode from :param descriptors: Descriptor iterator :param dict operators: Operators in effect, indexed by opcode :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator """ values = [] for descriptor in descriptors: descriptor = descriptor_overlay.get(descriptor.code, descriptor) if isinstance(descriptor, ElementDescriptor): op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None) if op_crf is not None: ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos) bits.pos += op_crf.bits() top_bit_mask = (1 << op_crf.bits()-1) if ref_value & top_bit_mask: ref_value = -(ref_value & ~top_bit_mask) overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit) descriptor_overlay[descriptor.code] = overlay_descriptor continue op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None) if op_aaf is not None and descriptor.code != fxy2int("031021"): # Don't apply to ASSOCIATED FIELD SIGNIFICANCE associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos) bits.pos += op_aaf.bits() # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr dummy_descriptor = ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC") values.append(BufrValue(associated_value, associated_value, dummy_descriptor)) read_length = _calculate_read_length(descriptor, operators) if descriptor.unit == 'CCITTIA5': raw_value = Bits._readhex(bits, read_length, bits.pos) else: raw_value = Bits._readuint(bits, read_length, bits.pos) bits.pos += read_length values.append(_decode_raw_value(raw_value, descriptor, operators)) elif isinstance(descriptor, ReplicationDescriptor): aggregation = [] if descriptor.count: bval = None count = descriptor.count else: bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0] count = bval.value n_fields = descriptor.fields field_descriptors = list(itertools.islice(descriptors, n_fields)) if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS: # Regular replication, X elements repeated Y or <element value> times in the file for _ in range(count): aggregation.append(decode(bits, iter(field_descriptors), operators, descriptor_overlay)) elif bval.descriptor.code in REPETITION_DESCRIPTORS: # Repeated replication, X elements present once in the file, output <element value> times repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay) for _ in range(count): aggregation.append(repeated_values) else: raise ValueError("Unexpected delayed replication element %s" %bval) values.append(aggregation) elif isinstance(descriptor, OperatorDescriptor): op = descriptor.operator if op.immediate: if op.opcode == OpCode.SIGNIFY_CHARACTER: raw_value = Bits._readhex(bits, op.bits(), bits.pos) bits.pos += op.bits() char_descriptor = ElementDescriptor(fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5") value = _decode_raw_value(raw_value, char_descriptor, {}) values.append(value) elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR: base_descriptor = itertools.islice(descriptors, 1)[0] mod_descriptor = ElementDescriptor(base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit) values.add(decode(bits, descriptors, {}, {})[0].value) else: raise NotImplementedError("Unknown immediate operator: %s" % str(descriptor)) else: if op.neutral(): del operators[op.opcode] else: op.check_conflict(operators) operators[op.opcode] = op elif isinstance(descriptor, SequenceDescriptor): seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay) values.extend(seq) else: raise NotImplementedError("Unknown descriptor type: %s" % descriptor) return values