def read_8bitfloat(self, mantissabits=5, zeroexp=2): """Reads a byte-sized representation of a floating point value. mantissabits is the number of bits to use for the mantissa (with the rest used for the exponent). zeroexp is the zero point for the exponent. """ return byte_to_float(self.read_byte(), mantissabits, zeroexp)
def test_posboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 3): docnum += randint(1, 10) posns = [] pos = 0 for __ in xrange(0, randint(1, 3)): pos += randint(1, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, boost)) postings.append((docnum, posns)) self.assertEqual( postings, self.roundtrip(postings, PositionBoosts(None), "position_boosts")) as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings] self.assertEqual( as_posns, self.roundtrip(postings, PositionBoosts(None), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in postings] self.assertEqual( as_freq, self.roundtrip(postings, PositionBoosts(None), "frequency"))
def test_charboost_postings(): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) posns = [] pos = 0 endchar = 0 for __ in xrange(0, randint(1, 10)): pos += randint(1, 10) startchar = endchar + randint(3, 10) endchar = startchar + randint(3, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, startchar, endchar, boost)) postings.append((docnum, posns)) assert_equal(postings, roundtrip(postings, CharacterBoosts(), "character_boosts")) as_chars = [(docnum, [(pos, sc, ec) for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_chars, roundtrip(postings, CharacterBoosts(), "characters")) as_posbsts = [(docnum, [(pos, bst) for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_posbsts, roundtrip(postings, CharacterBoosts(), "position_boosts")) as_posns = [(docnum, [pos for pos, sc, ec, bst in posns]) for docnum, posns in postings] assert_equal(as_posns, roundtrip(postings, CharacterBoosts(), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in as_posns] assert_equal(as_freq, roundtrip(postings, CharacterBoosts(), "frequency"))
def read_8bitfloat(self, mantissabits = 5, zeroexp = 2): """Reads a byte-sized representation of a floating point value. mantissabits is the number of bits to use for the mantissa (with the rest used for the exponent). zeroexp is the zero point for the exponent. """ return byte_to_float(self.read_byte(), mantissabits, zeroexp)
def test_docboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) freq = randint(1, 1000) boost = byte_to_float(float_to_byte(random() * 2)) postings.append((docnum, (freq, boost))) self.assertEqual(postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
def test_docboost_postings(self): postings = [] docnum = 0 for _ in xrange(0, 20): docnum += randint(1, 10) freq = randint(1, 1000) boost = byte_to_float(float_to_byte(random() * 2)) postings.append((docnum, (freq, boost))) self.assertEqual( postings, self.roundtrip(postings, DocBoosts(None), "docboosts"))
def decode_position_boosts(self, valuestring): f = StringIO(valuestring) read = f.read freq = unpack("!I", read(_INT_SIZE))[0] # Skip summed boost f.seek(_FLOAT_SIZE, 1) position = 0 posns_boosts = [] for _ in xrange(freq): position = read_varint(read) + position boost = byte_to_float(read(1)) posns_boosts.append((position, boost)) return posns_boosts
def decode_character_boosts(self, valuestring): f = StringIO(valuestring) read = f.read freq = unpack("!I", read(_INT_SIZE))[0] # Skip summed boost f.seek(_FLOAT_SIZE, 1) position = 0 endchar = 0 posns_chars = [] for _ in xrange(freq): position = read_varint(read) + position startchar = endchar + read_varint(read) endchar = startchar + read_varint(read) boost = byte_to_float(read(1)) posns_chars.append((position, startchar, endchar, boost)) return posns_chars
def test_posboost_postings(): postings = [] docnum = 0 for _ in xrange(0, 3): docnum += randint(1, 10) posns = [] pos = 0 for __ in xrange(0, randint(1, 3)): pos += randint(1, 10) boost = byte_to_float(float_to_byte(random() * 2)) posns.append((pos, boost)) postings.append((docnum, posns)) assert_equal(postings, roundtrip(postings, PositionBoosts(), "position_boosts")) as_posns = [(docnum, [pos for pos, boost in posns]) for docnum, posns in postings] assert_equal(as_posns, roundtrip(postings, PositionBoosts(), "positions")) as_freq = [(docnum, len(posns)) for docnum, posns in postings] assert_equal(as_freq, roundtrip(postings, PositionBoosts(), "frequency"))
def decode_weight(self, valuestring): freq = unpack("!I", valuestring[:_INT_SIZE])[0] docboost = byte_to_float(valuestring[-1]) return freq * docboost * self.field_boost
def decode_docboosts(self, valuestring): freq = unpack("!I", valuestring[:_INT_SIZE])[0] docboost = byte_to_float(valuestring[-1]) return (freq, docboost)