Example #1
0
    def decode_compressed(bits, descriptors, n_subsets, operators,
                          descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param n_subsets: Number of subsets to decode
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        subsets = [[] for x in range(n_subsets)]
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)

            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    dummy_descriptors = iter([
                        ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0,
                                          0, "ASSOCIATED FIELD", "NUMERIC")
                    ])
                    _subsets = decode_compressed(bits, dummy_descriptors,
                                                 n_subsets, {}, {})
                    raw_vals = [subset[0].raw_value for subset in _subsets]

                    if len(set(raw_vals)) != 1:
                        raise ValueError(
                            "Encountered different reference values for different subsets: %s",
                            raw_vals)

                    ref_value = raw_vals[0]
                    top_bit_mask = (1 << op_crf.bits() - 1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)

                    overlay_descriptor = ElementDescriptor(
                        descriptor.code, descriptor.length, descriptor.scale,
                        ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue

                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptors = iter([
                        ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0,
                                          0, "ASSOCIATED FIELD", "NUMERIC")
                    ])
                    vals = decode_compressed(bits, dummy_descriptors,
                                             n_subsets, {}, {})
                    for i, ss in enumerate(vals):
                        subsets[i].extend(ss)

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    ref_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    ref_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length

                n_bits = Bits._readuint(bits, 6, bits.pos)
                bits.pos += 6

                for i in range(n_subsets):
                    if descriptor.unit == 'CCITTIA5':
                        n_chars = n_bits
                        if n_chars:
                            raw_value = Bits._readhex(bits, n_chars * 8,
                                                      bits.pos)
                            bits.pos += n_chars * 8
                            value = _decode_raw_value(raw_value, descriptor,
                                                      operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor,
                                                      operators)
                    else:
                        if n_bits:
                            increment = Bits._readuint(bits, n_bits, bits.pos)
                            bits.pos += n_bits
                            if increment ^ ((1 << n_bits) -
                                            1) == 0:  # Missing value, all-ones
                                value = _decode_raw_value(
                                    (1 << descriptor.length) - 1, descriptor,
                                    operators)
                            else:
                                value = _decode_raw_value(
                                    ref_value + increment, descriptor,
                                    operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor,
                                                      operators)
                    subsets[i].append(value)
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregations = [[] for x in range(n_subsets)]
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode_compressed(bits,
                                             itertools.islice(descriptors, 1),
                                             n_subsets, {}, {})[0][0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(
                    itertools.islice(descriptors, n_fields))

                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        replication = decode_compressed(
                            bits, iter(field_descriptors), n_subsets,
                            operators, descriptor_overlay)
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(
                                replication[subset_idx])
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    replication = decode_compressed(bits,
                                                    iter(field_descriptors),
                                                    n_subsets, operators,
                                                    descriptor_overlay)
                    for _ in range(count):
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(
                                replication[subset_idx])
                else:
                    raise ValueError(
                        "Unexpected delayed replication element %s" % bval)

                for subset_idx in range(n_subsets):
                    subsets[subset_idx].append(aggregations[subset_idx])
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.opcode in (1, 2, 3, 4, 7):
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
                else:
                    raise NotImplementedError(
                        "Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d"
                        % op.opcode)
            elif isinstance(descriptor, SequenceDescriptor):
                comp = decode_compressed(bits, iter(descriptor.descriptors),
                                         n_subsets, operators,
                                         descriptor_overlay)
                for i, subset in enumerate(comp):
                    subsets[i].extend(subset)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" %
                                          descriptor)
        return subsets
Example #2
0
    def decode_compressed(bits, descriptors, n_subsets, operators, descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param n_subsets: Number of subsets to decode
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        subsets = [[] for x in range(n_subsets)]
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)

            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_crf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")])
                    _subsets = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {})
                    raw_vals = [subset[0].raw_value for subset in _subsets]

                    if len(set(raw_vals)) != 1:
                        raise ValueError("Encountered different reference values for different subsets: %s", raw_vals)

                    ref_value = raw_vals[0]
                    top_bit_mask = (1 << op_crf.bits()-1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)

                    overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue

                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptors = iter([ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")])
                    vals = decode_compressed(bits, dummy_descriptors, n_subsets, {}, {})
                    for i,ss in enumerate(vals):
                        subsets[i].extend(ss)

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    ref_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    ref_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length

                n_bits = Bits._readuint(bits, 6, bits.pos)
                bits.pos += 6
                
                for i in range(n_subsets):
                    if descriptor.unit == 'CCITTIA5':
                        n_chars = n_bits
                        if n_chars:
                            raw_value = Bits._readhex(bits, n_chars*8, bits.pos)
                            bits.pos += n_chars*8
                            value = _decode_raw_value(raw_value, descriptor, operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor, operators)
                    else:
                        if n_bits:
                            increment = Bits._readuint(bits, n_bits, bits.pos)
                            bits.pos += n_bits
                            if increment ^ ((1 << n_bits)-1) == 0: # Missing value, all-ones
                                value = _decode_raw_value((1 << descriptor.length)-1, descriptor, operators)
                            else:
                                value = _decode_raw_value(ref_value + increment, descriptor, operators)
                        else:
                            value = _decode_raw_value(ref_value, descriptor, operators)
                    subsets[i].append(value)
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregations = [[] for x in range(n_subsets)]
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode_compressed(bits, itertools.islice(descriptors, 1), n_subsets, {}, {})[0][0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(itertools.islice(descriptors, n_fields))

                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay)
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(replication[subset_idx])
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    replication = decode_compressed(bits, iter(field_descriptors), n_subsets, operators, descriptor_overlay)
                    for _ in range(count):
                        for subset_idx in range(n_subsets):
                            aggregations[subset_idx].append(replication[subset_idx])
                else:
                    raise ValueError("Unexpected delayed replication element %s" %bval)

                for subset_idx in range(n_subsets):
                    subsets[subset_idx].append(aggregations[subset_idx])
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.opcode in (1,2,3,4,7):
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
                else:
                    raise NotImplementedError("Can only decode operators 201-204 and 207 for compressed BUFR data at the moment, please file an issue on GitHub, found operator: 2%02d" %op.opcode)
            elif isinstance(descriptor, SequenceDescriptor):
                comp = decode_compressed(bits, iter(descriptor.descriptors), n_subsets, operators, descriptor_overlay)
                for i,subset in enumerate(comp):
                    subsets[i].extend(subset)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" % descriptor)
        return subsets
Example #3
0
    def decode(bits, descriptors, operators, descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        values = []
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)
            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos)
                    bits.pos += op_crf.bits()
                    top_bit_mask = (1 << op_crf.bits() - 1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)
                    overlay_descriptor = ElementDescriptor(
                        descriptor.code, descriptor.length, descriptor.scale,
                        ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue

                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    associated_value = Bits._readuint(bits, op_aaf.bits(),
                                                      bits.pos)
                    bits.pos += op_aaf.bits()
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptor = ElementDescriptor(
                        fxy2int("999999"), op_aaf.bits(), 0, 0,
                        "ASSOCIATED FIELD", "NUMERIC")
                    values.append(
                        BufrValue(associated_value, associated_value,
                                  dummy_descriptor))

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    raw_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    raw_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length
                values.append(
                    _decode_raw_value(raw_value, descriptor, operators))
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregation = []
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode(bits, itertools.islice(descriptors, 1), {},
                                  {})[0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(
                    itertools.islice(descriptors, n_fields))
                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        aggregation.append(
                            decode(bits, iter(field_descriptors), operators,
                                   descriptor_overlay))
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    repeated_values = decode(bits, iter(field_descriptors),
                                             operators, descriptor_overlay)
                    for _ in range(count):
                        aggregation.append(repeated_values)
                else:
                    raise ValueError(
                        "Unexpected delayed replication element %s" % bval)
                values.append(aggregation)
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.immediate:
                    if op.opcode == OpCode.SIGNIFY_CHARACTER:
                        raw_value = Bits._readhex(bits, op.bits(), bits.pos)
                        bits.pos += op.bits()
                        char_descriptor = ElementDescriptor(
                            fxy2int(op.code), op.bits(), 0, 0,
                            "CHARACTER INFORMATION", "CCITTIA5")
                        value = _decode_raw_value(raw_value, char_descriptor,
                                                  {})
                        values.append(value)
                    elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR:
                        base_descriptor = itertools.islice(descriptors, 1)[0]
                        mod_descriptor = ElementDescriptor(
                            base_descriptor.code, op.bits(),
                            base_descriptor.scale, base_descriptor.ref,
                            base_descriptor.significance, base_descriptor.unit)
                        values.add(decode(bits, descriptors, {}, {})[0].value)

                    else:
                        raise NotImplementedError(
                            "Unknown immediate operator: %s" % str(descriptor))
                else:
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
            elif isinstance(descriptor, SequenceDescriptor):
                seq = decode(bits, iter(descriptor.descriptors), operators,
                             descriptor_overlay)
                values.extend(seq)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" %
                                          descriptor)
        return values
Example #4
0
    def decode(bits, descriptors, operators, descriptor_overlay):
        """
        :param bits: Bit stream to decode from
        :param descriptors: Descriptor iterator
        :param dict operators: Operators in effect, indexed by opcode
        :param dict descriptor_overlay: Overlay descriptors affected by CHANGE_REFERENCE_VALUES operator
        """
        values = []
        for descriptor in descriptors:
            descriptor = descriptor_overlay.get(descriptor.code, descriptor)
            if isinstance(descriptor, ElementDescriptor):
                op_crf = operators.get(OpCode.CHANGE_REFERENCE_VALUES, None)
                if op_crf is not None:
                    ref_value = Bits._readuint(bits, op_crf.bits(), bits.pos)
                    bits.pos += op_crf.bits()
                    top_bit_mask = (1 << op_crf.bits()-1)
                    if ref_value & top_bit_mask:
                        ref_value = -(ref_value & ~top_bit_mask)
                    overlay_descriptor = ElementDescriptor(descriptor.code, descriptor.length, descriptor.scale, ref_value, descriptor.significance, descriptor.unit)
                    descriptor_overlay[descriptor.code] = overlay_descriptor
                    continue
                
                op_aaf = operators.get(OpCode.ADD_ASSOCIATED_FIELD, None)
                if op_aaf is not None and descriptor.code != fxy2int("031021"):
                    # Don't apply to ASSOCIATED FIELD SIGNIFICANCE
                    associated_value = Bits._readuint(bits, op_aaf.bits(), bits.pos)
                    bits.pos += op_aaf.bits()
                    # Use dummy descriptor 999999 for associated field, like Geo::BUFR and libbufr
                    dummy_descriptor = ElementDescriptor(fxy2int("999999"), op_aaf.bits(), 0, 0, "ASSOCIATED FIELD", "NUMERIC")
                    values.append(BufrValue(associated_value, associated_value, dummy_descriptor))

                read_length = _calculate_read_length(descriptor, operators)
                if descriptor.unit == 'CCITTIA5':
                    raw_value = Bits._readhex(bits, read_length, bits.pos)
                else:
                    raw_value = Bits._readuint(bits, read_length, bits.pos)
                bits.pos += read_length
                values.append(_decode_raw_value(raw_value, descriptor, operators))
            elif isinstance(descriptor, ReplicationDescriptor):
                aggregation = []
                if descriptor.count:
                    bval = None
                    count = descriptor.count
                else:
                    bval = decode(bits, itertools.islice(descriptors, 1), {}, {})[0]
                    count = bval.value
                n_fields = descriptor.fields
                field_descriptors = list(itertools.islice(descriptors, n_fields))
                if bval is None or bval.descriptor.code in REPLICATION_DESCRIPTORS:
                    # Regular replication, X elements repeated Y or <element value> times in the file
                    for _ in range(count):
                        aggregation.append(decode(bits, iter(field_descriptors), operators, descriptor_overlay))
                elif bval.descriptor.code in REPETITION_DESCRIPTORS:
                    # Repeated replication, X elements present once in the file, output <element value> times
                    repeated_values = decode(bits, iter(field_descriptors), operators, descriptor_overlay)
                    for _ in range(count):
                        aggregation.append(repeated_values)
                else:
                    raise ValueError("Unexpected delayed replication element %s" %bval)
                values.append(aggregation)
            elif isinstance(descriptor, OperatorDescriptor):
                op = descriptor.operator
                if op.immediate:
                    if op.opcode == OpCode.SIGNIFY_CHARACTER:
                        raw_value = Bits._readhex(bits, op.bits(), bits.pos)
                        bits.pos += op.bits()
                        char_descriptor = ElementDescriptor(fxy2int(op.code), op.bits(), 0, 0, "CHARACTER INFORMATION", "CCITTIA5")
                        value = _decode_raw_value(raw_value, char_descriptor, {})
                        values.append(value)
                    elif op.opcode == OpCode.SIGNIFY_LOCAL_DESCRIPTOR:
                        base_descriptor = itertools.islice(descriptors, 1)[0]
                        mod_descriptor = ElementDescriptor(base_descriptor.code, op.bits(), base_descriptor.scale, base_descriptor.ref, base_descriptor.significance, base_descriptor.unit)
                        values.add(decode(bits, descriptors, {}, {})[0].value)
                        
                    else:
                        raise NotImplementedError("Unknown immediate operator: %s" % str(descriptor))
                else:
                    if op.neutral():
                        del operators[op.opcode]
                    else:
                        op.check_conflict(operators)
                        operators[op.opcode] = op
            elif isinstance(descriptor, SequenceDescriptor):
                seq = decode(bits, iter(descriptor.descriptors), operators, descriptor_overlay)
                values.extend(seq)
            else:
                raise NotImplementedError("Unknown descriptor type: %s" % descriptor)
        return values