Exemple #1
0
 def read_8bitfloat(self, mantissabits=5, zeroexp=2):
     """Reads a byte-sized representation of a floating point value.
     mantissabits is the number of bits to use for the mantissa
     (with the rest used for the exponent).
     zeroexp is the zero point for the exponent.
     """
     return byte_to_float(self.read_byte(), mantissabits, zeroexp)
Exemple #2
0
    def test_posboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 3):
            docnum += randint(1, 10)
            posns = []
            pos = 0
            for __ in xrange(0, randint(1, 3)):
                pos += randint(1, 10)
                boost = byte_to_float(float_to_byte(random() * 2))
                posns.append((pos, boost))
            postings.append((docnum, posns))

        self.assertEqual(
            postings,
            self.roundtrip(postings, PositionBoosts(None), "position_boosts"))

        as_posns = [(docnum, [pos for pos, boost in posns])
                    for docnum, posns in postings]
        self.assertEqual(
            as_posns,
            self.roundtrip(postings, PositionBoosts(None), "positions"))

        as_freq = [(docnum, len(posns)) for docnum, posns in postings]
        self.assertEqual(
            as_freq, self.roundtrip(postings, PositionBoosts(None),
                                    "frequency"))
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts"))
    
    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters"))
    
    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
 def read_8bitfloat(self, mantissabits = 5, zeroexp = 2):
     """Reads a byte-sized representation of a floating point value.
     mantissabits is the number of bits to use for the mantissa
     (with the rest used for the exponent).
     zeroexp is the zero point for the exponent.
     """
     return byte_to_float(self.read_byte(), mantissabits, zeroexp)
Exemple #5
0
def test_charboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 20):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        endchar = 0
        for __ in xrange(0, randint(1, 10)):
            pos += randint(1, 10)
            startchar = endchar + randint(3, 10)
            endchar = startchar + randint(3, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, startchar, endchar, boost))
        postings.append((docnum, posns))

    assert_equal(postings,
                 roundtrip(postings, CharacterBoosts(), "character_boosts"))

    as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_chars, roundtrip(postings, CharacterBoosts(),
                                     "characters"))

    as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns])
                  for docnum, posns in postings]
    assert_equal(as_posbsts,
                 roundtrip(postings, CharacterBoosts(), "position_boosts"))

    as_posns = [(docnum, [pos for pos, sc, ec, bst in posns])
                for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions"))

    as_freq = [(docnum, len(posns)) for docnum, posns in as_posns]
    assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
 def test_docboost_postings(self):
     postings = []
     docnum = 0
     for _ in xrange(0, 20):
         docnum += randint(1, 10)
         freq = randint(1, 1000)
         boost = byte_to_float(float_to_byte(random() * 2))
         postings.append((docnum, (freq, boost)))
     
     self.assertEqual(postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Exemple #7
0
    def test_docboost_postings(self):
        postings = []
        docnum = 0
        for _ in xrange(0, 20):
            docnum += randint(1, 10)
            freq = randint(1, 1000)
            boost = byte_to_float(float_to_byte(random() * 2))
            postings.append((docnum, (freq, boost)))

        self.assertEqual(
            postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
Exemple #8
0
    def decode_position_boosts(self, valuestring):
        f = StringIO(valuestring)
        read = f.read
        freq = unpack("!I", read(_INT_SIZE))[0]

        # Skip summed boost
        f.seek(_FLOAT_SIZE, 1)

        position = 0
        posns_boosts = []
        for _ in xrange(freq):
            position = read_varint(read) + position
            boost = byte_to_float(read(1))
            posns_boosts.append((position, boost))
        return posns_boosts
 def decode_position_boosts(self, valuestring):
     f = StringIO(valuestring)
     read = f.read
     freq = unpack("!I", read(_INT_SIZE))[0]
     
     # Skip summed boost
     f.seek(_FLOAT_SIZE, 1)
     
     position = 0
     posns_boosts = []
     for _ in xrange(freq):
         position = read_varint(read) + position
         boost = byte_to_float(read(1))
         posns_boosts.append((position, boost))
     return posns_boosts
Exemple #10
0
    def decode_character_boosts(self, valuestring):
        f = StringIO(valuestring)
        read = f.read

        freq = unpack("!I", read(_INT_SIZE))[0]
        # Skip summed boost
        f.seek(_FLOAT_SIZE, 1)

        position = 0
        endchar = 0
        posns_chars = []
        for _ in xrange(freq):
            position = read_varint(read) + position
            startchar = endchar + read_varint(read)
            endchar = startchar + read_varint(read)
            boost = byte_to_float(read(1))
            posns_chars.append((position, startchar, endchar, boost))
        return posns_chars
Exemple #11
0
 def decode_character_boosts(self, valuestring):
     f = StringIO(valuestring)
     read = f.read
     
     freq = unpack("!I", read(_INT_SIZE))[0]
     # Skip summed boost
     f.seek(_FLOAT_SIZE, 1)
     
     position = 0
     endchar = 0
     posns_chars = []
     for _ in xrange(freq):
         position = read_varint(read) + position
         startchar = endchar + read_varint(read)
         endchar = startchar + read_varint(read)
         boost = byte_to_float(read(1))
         posns_chars.append((position, startchar, endchar, boost))
     return posns_chars
def test_posboost_postings():
    postings = []
    docnum = 0
    for _ in xrange(0, 3):
        docnum += randint(1, 10)
        posns = []
        pos = 0
        for __ in xrange(0, randint(1, 3)):
            pos += randint(1, 10)
            boost = byte_to_float(float_to_byte(random() * 2))
            posns.append((pos, boost))
        postings.append((docnum, posns))
    
    assert_equal(postings, roundtrip(postings, PositionBoosts(), "position_boosts"))
    
    as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings]
    assert_equal(as_posns, roundtrip(postings, PositionBoosts(), "positions"))
    
    as_freq = [(docnum, len(posns)) for docnum, posns in postings]
    assert_equal(as_freq, roundtrip(postings, PositionBoosts(), "frequency"))
Exemple #13
0
 def decode_weight(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return freq * docboost * self.field_boost
Exemple #14
0
 def decode_docboosts(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return (freq, docboost)
Exemple #15
0
 def decode_weight(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return freq * docboost * self.field_boost
Exemple #16
0
 def decode_docboosts(self, valuestring):
     freq = unpack("!I", valuestring[:_INT_SIZE])[0]
     docboost = byte_to_float(valuestring[-1])
     return (freq, docboost)