Exemple #1
0
    def choose_random_context(self, token, rng=random):
        if not isinstance(token, types.UnicodeType):
            raise TypeError("token must be Unicode")

        token_id = self.tokens.get_id(token)

        prefix = self._tokens_count_key((token_id, ), self.orders[0])
        items = list(self.store.prefix_keys(prefix, strip_prefix=True))

        if len(items):
            context = rng.choice(items)

            # FIXME: this is a terrible way to split the token ids
            token_ids = map(varint.encode_one, varint.decode(context))

            return [token] + map(self.tokens.get_token, token_ids)
Exemple #2
0
    def choose_random_context(self, token, rng=random):
        if not isinstance(token, types.UnicodeType):
            raise TypeError("token must be Unicode")

        token_id = self.tokens.get_id(token)

        prefix = self._tokens_count_key((token_id,), self.orders[0])
        items = list(self.store.prefix_keys(prefix, strip_prefix=True))

        if len(items):
            context = rng.choice(items)

            # FIXME: this is a terrible way to split the token ids
            token_ids = map(varint.encode_one, varint.decode(context))

            return [token] + map(self.tokens.get_token, token_ids)
Exemple #3
0
	def decode(self, buffer):
		list = []
		prev = 0
		while not buffer.eof():
			encoded_value = varint.decode(buffer)
			have_subset = encoded_value & 0x1

			value = (encoded_value >> 1) + prev
			prev = value

			list.append(value)

			if have_subset:
				subset = buffer.get() | (buffer.get() << 8) | (buffer.get() << 16) | (buffer.get() << 24)
				for i in xrange(32):
					if subset & (1 << i):
						list.append(value + i + 1)

		return list
Exemple #4
0
def decrypt(bytes_input, decode=""):
    """解码

    :param bytes_input 传入待解码的`bytes`或`BufferedReader`
    :param decode 如果需要将二进制解码成字符串 填入编码
    """

    stream = as_stream(bytes_input)
    decoded = dict()
    field = 1
    while True:
        try:
            flag = varint.decode(stream)
            if flag & WireType.DOUBLE:
                wire_type = WireType.FLOAT if flag & WireType.FLOAT == WireType.FLOAT else WireType.DOUBLE
                field = _test_field(flag, wire_type, field)
                if wire_type == WireType.FLOAT:
                    length = 4
                    fmt = "f"
                else:
                    length = 8
                    fmt = "d"
                packed_bytes = stream.read(length)
                if len(packed_bytes) != length:
                    raise InvalidPBError("not a float")
                try:
                    value = struct.unpack(fmt, packed_bytes)[0]
                    if fmt == "d":
                        value = Decimal(value)
                except:
                    raise InvalidPBError("not a float")
            elif flag & WireType.LENGTHDELIMITED:
                wire_type = WireType.LENGTHDELIMITED
                next_field = _test_field(flag, wire_type, field)
                length = varint.decode(stream)
                encoded = stream.read(length)
                if len(encoded) != length:
                    raise InvalidPBError()
                value = decrypt(encoded, decode)
                if field == next_field:
                    # repeat struct
                    # TODO: 先判断是否结构与上一个相同 不相同 则认为不是repeat struct
                    if not isinstance(decoded[field], list):
                        decoded[field] = [decoded[field]]
                else:
                    field = next_field
            elif not flag & 0x7:
                wire_type = WireType.VARINT
                field = _test_field(flag, wire_type, field)
                value = varint.decode(stream)
            else:
                raise InvalidPBError()
            if field not in decoded:
                decoded[field] = value
            else:
                decoded[field].append(value)
            _trace_log(str(field) + " " + str(wire_type) + " " + str(value))
        except (EOFError, InvalidPBError):
            # 读取异常 直接返回原串
            try:
                rv = stream.getvalue()
            except:
                stream.seek(0)
                rv = stream.read()
            if decode:
                rv = rv.decode(decode)
            return rv
        # 是否读取结束
        if isinstance(stream, BufferedReader) and stream.peek(1) == b"":
            break
        elif isinstance(stream, BytesIO):
            c_pos = stream.tell()
            if not c_pos or c_pos == len(bytes_input):
                break
    return decoded
Exemple #5
0
    def test02_replace_by_64bit_commit_ids(self):

        env = lmdb.open('test.db', subdir=False, max_dbs=1024)

        pages_db = [None]  # the first element (0) stores None
        maxpg_db = [None]

        with env.begin(buffers=True) as txn:

            value = txn.get('last_branch_id')
            num_branches = varint.decode(value)[0]
            self.assertEqual(num_branches, 2)

            for branch_id in range(1, num_branches + 1):
                pages_db.append(env.open_db('b' + str(branch_id) + '-pages'))
                maxpg_db.append(env.open_db('b' + str(branch_id) + '-maxpage'))
                self.assertEqual(len(pages_db) - 1, branch_id)
                self.assertEqual(len(maxpg_db) - 1, branch_id)

        with env.begin(write=True, buffers=True) as txn:

            value = txn.get('b1.name')
            self.assertEqual(bytes(value).decode("utf-8"), "master\x00")

            value = txn.get('b2.name')
            self.assertEqual(bytes(value).decode("utf-8"), "test\x00")

            for branch_id in range(1, num_branches + 1):
                prefix = 'b' + str(branch_id)

                key = prefix + '.last_commit'
                value = txn.get(key)
                last_commit = varint.decode(value)[0]
                last_commit += v64bit_increment
                value = varint.encode(last_commit)
                txn.put(key, value)

                key = prefix + '.source_commit'
                value = txn.get(key)
                source_commit = varint.decode(value)[0]
                if source_commit > 0:
                    source_commit += v64bit_increment
                    value = varint.encode(source_commit)
                    txn.put(key, value)

                # iterate all the keys from the sub-db
                dbx = pages_db[branch_id]
                for key, value in txn.cursor(db=dbx):
                    res = varint.decode(key)
                    pgno = res[0]
                    size1 = res[1]
                    res = varint.decode(key[size1:len(key)])
                    commit = res[0]
                    size2 = res[1]
                    if commit < v64bit_increment:
                        commit += v64bit_increment
                        key2 = varint.encode(pgno) + varint.encode(commit)
                        txn.put(key2, value, db=dbx)
                        txn.delete(key, db=dbx)

        env.close()