def choose_random_context(self, token, rng=random): if not isinstance(token, types.UnicodeType): raise TypeError("token must be Unicode") token_id = self.tokens.get_id(token) prefix = self._tokens_count_key((token_id, ), self.orders[0]) items = list(self.store.prefix_keys(prefix, strip_prefix=True)) if len(items): context = rng.choice(items) # FIXME: this is a terrible way to split the token ids token_ids = map(varint.encode_one, varint.decode(context)) return [token] + map(self.tokens.get_token, token_ids)
def choose_random_context(self, token, rng=random): if not isinstance(token, types.UnicodeType): raise TypeError("token must be Unicode") token_id = self.tokens.get_id(token) prefix = self._tokens_count_key((token_id,), self.orders[0]) items = list(self.store.prefix_keys(prefix, strip_prefix=True)) if len(items): context = rng.choice(items) # FIXME: this is a terrible way to split the token ids token_ids = map(varint.encode_one, varint.decode(context)) return [token] + map(self.tokens.get_token, token_ids)
def decode(self, buffer): list = [] prev = 0 while not buffer.eof(): encoded_value = varint.decode(buffer) have_subset = encoded_value & 0x1 value = (encoded_value >> 1) + prev prev = value list.append(value) if have_subset: subset = buffer.get() | (buffer.get() << 8) | (buffer.get() << 16) | (buffer.get() << 24) for i in xrange(32): if subset & (1 << i): list.append(value + i + 1) return list
def decrypt(bytes_input, decode=""): """解码 :param bytes_input 传入待解码的`bytes`或`BufferedReader` :param decode 如果需要将二进制解码成字符串 填入编码 """ stream = as_stream(bytes_input) decoded = dict() field = 1 while True: try: flag = varint.decode(stream) if flag & WireType.DOUBLE: wire_type = WireType.FLOAT if flag & WireType.FLOAT == WireType.FLOAT else WireType.DOUBLE field = _test_field(flag, wire_type, field) if wire_type == WireType.FLOAT: length = 4 fmt = "f" else: length = 8 fmt = "d" packed_bytes = stream.read(length) if len(packed_bytes) != length: raise InvalidPBError("not a float") try: value = struct.unpack(fmt, packed_bytes)[0] if fmt == "d": value = Decimal(value) except: raise InvalidPBError("not a float") elif flag & WireType.LENGTHDELIMITED: wire_type = WireType.LENGTHDELIMITED next_field = _test_field(flag, wire_type, field) length = varint.decode(stream) encoded = stream.read(length) if len(encoded) != length: raise InvalidPBError() value = decrypt(encoded, decode) if field == next_field: # repeat struct # TODO: 先判断是否结构与上一个相同 不相同 则认为不是repeat struct if not isinstance(decoded[field], list): decoded[field] = [decoded[field]] else: field = next_field elif not flag & 0x7: wire_type = WireType.VARINT field = _test_field(flag, wire_type, field) value = varint.decode(stream) else: raise InvalidPBError() if field not in decoded: decoded[field] = value else: decoded[field].append(value) _trace_log(str(field) + " " + str(wire_type) + " " + str(value)) except (EOFError, InvalidPBError): # 读取异常 直接返回原串 try: rv = stream.getvalue() except: stream.seek(0) rv = stream.read() if decode: rv = rv.decode(decode) return rv # 是否读取结束 if isinstance(stream, BufferedReader) and stream.peek(1) == b"": break elif isinstance(stream, BytesIO): c_pos = stream.tell() if not c_pos or c_pos == len(bytes_input): break return decoded
def test02_replace_by_64bit_commit_ids(self): env = lmdb.open('test.db', subdir=False, max_dbs=1024) pages_db = [None] # the first element (0) stores None maxpg_db = [None] with env.begin(buffers=True) as txn: value = txn.get('last_branch_id') num_branches = varint.decode(value)[0] self.assertEqual(num_branches, 2) for branch_id in range(1, num_branches + 1): pages_db.append(env.open_db('b' + str(branch_id) + '-pages')) maxpg_db.append(env.open_db('b' + str(branch_id) + '-maxpage')) self.assertEqual(len(pages_db) - 1, branch_id) self.assertEqual(len(maxpg_db) - 1, branch_id) with env.begin(write=True, buffers=True) as txn: value = txn.get('b1.name') self.assertEqual(bytes(value).decode("utf-8"), "master\x00") value = txn.get('b2.name') self.assertEqual(bytes(value).decode("utf-8"), "test\x00") for branch_id in range(1, num_branches + 1): prefix = 'b' + str(branch_id) key = prefix + '.last_commit' value = txn.get(key) last_commit = varint.decode(value)[0] last_commit += v64bit_increment value = varint.encode(last_commit) txn.put(key, value) key = prefix + '.source_commit' value = txn.get(key) source_commit = varint.decode(value)[0] if source_commit > 0: source_commit += v64bit_increment value = varint.encode(source_commit) txn.put(key, value) # iterate all the keys from the sub-db dbx = pages_db[branch_id] for key, value in txn.cursor(db=dbx): res = varint.decode(key) pgno = res[0] size1 = res[1] res = varint.decode(key[size1:len(key)]) commit = res[0] size2 = res[1] if commit < v64bit_increment: commit += v64bit_increment key2 = varint.encode(pgno) + varint.encode(commit) txn.put(key2, value, db=dbx) txn.delete(key, db=dbx) env.close()