Beispiel #1
0
def svd_inv_sqrt(C, mineig=0, hashtable=None):
    """Fast stable inverse using SVD. This can handle near-singular matrices.
    Also return the square root.
    """

    # If we have a hash table, look for the precalculated solution
    h = None
    if hashtable is not None:
        # If arrays are in Fortran ordering, they are not hashable.
        if not C.flags['C_CONTIGUOUS']:
            C = C.copy(order='C')
        h = xxhash.xxh64_digest(C)
        if h in hashtable:
            return hashtable[h]

    # Cholesky decomposition seems to be too unstable for solving this
    # problem, so we use eigendecompostition instead.
    D, P = eigh(C)
    Ds = s.diag(1 / s.sqrt(D))
    L = P @ Ds
    Cinv_sqrt = L @ P.T
    Cinv = L @ L.T

    # If there is a hash table, cache our solution.  Bound the total cache
    # size by removing any extra items in FIFO order.
    if hashtable is not None:
        hashtable[h] = (Cinv, Cinv_sqrt)
        while len(hashtable) > max_table_size:
            hashtable.popitem(last=False)

    return Cinv, Cinv_sqrt
Beispiel #2
0
    def pack(self, data):
        (   "pack("
                "data:object"
            ") -> str" """

        Generate a encrypted token from the marshalable data.
        """)
        serialized = self.magic_bytes + marshal.dumps(data)
        salt = Crypto.Random.get_random_bytes(8)
        key  = xxhash.xxh64_digest(salt + self.rc4_key)
        cipher = Crypto.Cipher.ARC4.new(key)
        ct     = cipher.encrypt(serialized)
        serialized = salt + ct
        salt = Crypto.Random.get_random_bytes(16)
        key  = hashlib.md5(salt + self.aes_key).digest()
        cipher = Crypto.Cipher.AES.new(key, Crypto.Cipher.AES.MODE_CBC)
        ct     = \
            cipher.encrypt(
                Crypto.Util.Padding.pad(
                    serialized,
                    Crypto.Cipher.AES.block_size
                )
            )
        serialized = salt + cipher.iv + ct
        token = base64.b64encode(serialized).decode('utf-8')
        if len(token) > self.max_token:
            raise ValueError('the generated token is too large')
        self.cache[token] = Holder(data)
        return token
Beispiel #3
0
def svd_inv_sqrt(C, mineig=0, hashtable=None):
    """Fast stable inverse using SVD. This can handle near-singular matrices.

    Also return the square root.
    """

    # If we have a hash table, look for the precalculated solution
    h = None
    if hashtable is not None:
        h = xxhash.xxh64_digest(C)
        if h in hashtable:
            return hashtable[h]

    U, V, D = svd(C)
    ignore = s.where(V < mineig)[0]
    Vi = 1.0 / V
    Vi[ignore] = 0
    Visqrt = s.sqrt(Vi)
    Cinv = (D.T).dot(s.diag(Vi)).dot(U.T)
    Cinv_sqrt = (D.T).dot(s.diag(Visqrt)).dot(U.T)

    # If there is a hash table, cache our solution.  Bound the total cache
    # size by removing any extra items in FIFO order.
    if hashtable is not None:
        hashtable[h] = (Cinv, Cinv_sqrt)
        while len(hashtable) > max_table_size:
            hashtable.popitem(last=False)

    return Cinv, Cinv_sqrt
Beispiel #4
0
 def get_indices_and_finger(self, string_item):
     hash_value = xxh64_digest(string_item)
     index_1 = int.from_bytes(hash_value,
                              byteorder="big") % self._filter_capacity
     int_finger = int.from_bytes(hash_value, byteorder="big")
     index_2 = (index_1 ^ self.obtain_index_from_hash(hash_value)
                ) % self._filter_capacity
     return index_1, index_2, int_finger
Beispiel #5
0
def svd_inv_sqrt(C: np.array,
                 hashtable: OrderedDict = None,
                 max_hash_size: int = None) -> (np.array, np.array):
    """Matrix inversion, based on decomposition.  Built to be stable, and positive.

    Args:
        C: matrix to invert
        hashtable: if used, the hashtable to store/retrieve results in/from
        max_hash_size: maximum size of hashtable

    Return:
        (np.array, np.array): inverse of C and square root of the inverse of C

    """

    # If we have a hash table, look for the precalculated solution
    h = None
    if hashtable is not None:
        # If arrays are in Fortran ordering, they are not hashable.
        if not C.flags['C_CONTIGUOUS']:
            C = C.copy(order='C')
        h = xxhash.xxh64_digest(C)
        if h in hashtable:
            return hashtable[h]

    D, P = scipy.linalg.eigh(C)
    for count in range(3):
        if np.any(D < 0) or np.any(np.isnan(D)):
            inv_eps = 1e-6 * (count - 1) * 10
            D, P = scipy.linalg.eigh(C +
                                     np.diag(np.ones(C.shape[0]) * inv_eps))
        else:
            break

        if count == 2:
            raise ValueError(
                'Matrix inversion contains negative values,' +
                'even after adding {} to the diagonal.'.format(inv_eps))

    Ds = np.diag(1 / np.sqrt(D))
    L = P @ Ds
    Cinv_sqrt = L @ P.T
    Cinv = L @ L.T

    # If there is a hash table, cache our solution.  Bound the total cache
    # size by removing any extra items in FIFO order.
    if (hashtable is not None) and (max_hash_size is not None):
        hashtable[h] = (Cinv, Cinv_sqrt)
        while len(hashtable) > max_hash_size:
            hashtable.popitem(last=False)

    return Cinv, Cinv_sqrt
Beispiel #6
0
def get_or_create(tr, allocator, value):
    bytes = fdb.tuple.pack((value,))
    hash = xxhash.xxh64_digest(bytes)
    key = fdb.tuple.pack((MAGIC, HASH_TO_UID, hash))
    uid = tr.get(key)
    if uid != None:
        return uid
    # otherwise create it
    uid = allocator.allocate(tr)
    tr.set(key, uid)
    tr.set(fdb.tuple.pack((MAGIC, UID_TO_HASH, uid)), hash)
    tr.set(fdb.tuple.pack((MAGIC, UID_TO_VALUE, uid)), bytes)
    return uid
    def seed(self, value=None) -> None:
        """Re-initialize the random generator with a new seed. Resets
        the sequence to its first value.

        Caution:
            This method cannot be called from within :meth:`cascade`,
            and will raise a :class:`RuntimeError` if attempted.

        Args:
            value (int, str, bytes, bytearray): The value to seed with.
                If this is a sequence type, the sequence is first hashed
                with seed 0.

        Raises:
            ValueError: If the seed value is not a supported type.
        """
        if value is None:
            value = 0

        self._seed = value
        if isinstance(value, int):
            # Convert int to at least 8 bytes, then hash them with
            # seed 0 to find the sequence's hash input.
            num_bytes = max(8, (value.bit_length() + 7) // 8)
            self._hash_input = xxhash.xxh64_digest(value.to_bytes(
                num_bytes, 'big'),
                                                   seed=0)
        elif isinstance(value, (str, bytes, bytearray)):
            if isinstance(value, str):
                value = value.encode()
            # Hash the input with seed 0.
            self._hash_input = xxhash.xxh64_digest(value, seed=0)
        else:
            raise ValueError('Seed must be an int, str, bytes, or bytearray.')

        self.reset()
Beispiel #8
0
    def unpack(self, token):
        (   "unpack("
                "token:str"
            ") -> object" """

        Validate the token and return the data contained in the encrypted
        token string.
        """)
        if len(token) > self.max_token:
            raise ValueError('token too large')
        holder = self.cache.get(token)
        if holder is None:
            block_size = Crypto.Cipher.AES.block_size
            serialized = base64.b64decode(token)
            p      = 16 + block_size
            salt   = serialized[ 0:16]
            iv     = serialized[16:p ]
            ct     = serialized[ p:  ]
            if len(salt) != 16         or \
               len( iv ) != block_size or \
               len( ct )  % block_size != 0:
                raise VerificationError('Invalid token')
            key    = hashlib.md5(salt + self.aes_key).digest()
            cipher = \
                Crypto.Cipher.AES.new(
                    key,
                    Crypto.Cipher.AES.MODE_CBC,
                    iv
                )
            serialized = \
                Crypto.Util.Padding.unpad(
                    cipher.decrypt(ct),
                    Crypto.Cipher.AES.block_size
                )
            salt   = serialized[0:8]
            ct     = serialized[8: ]
            key    = xxhash.xxh64_digest(salt + self.rc4_key)
            cipher = Crypto.Cipher.ARC4.new(key)
            serialized = cipher.encrypt(ct)
            if serialized[0:self.magic_bytes_len] != self.magic_bytes:
                raise VerificationError('Invalid token')
            data = marshal.loads(serialized[self.magic_bytes_len:])
            self.cache[token] = Holder(data)
            return data
        else:
            return holder.data
Beispiel #9
0
def svd_inv_sqrt(C, mineig=0, hashtable=None):
    """Fast stable inverse using SVD. This can handle near-singular matrices.
       Also return the square root."""

    h = None
    if hashtable is not None:
        h = xxhash.xxh64_digest(C)
        if h in hashtable:
            return hashtable[h]
    U, V, D = svd(C)
    ignore = s.where(V < mineig)[0]
    Vi = 1.0 / V
    Vi[ignore] = 0
    Visqrt = s.sqrt(Vi)
    Cinv = (D.T).dot(s.diag(Vi)).dot(U.T)
    Cinv_sqrt = (D.T).dot(s.diag(Visqrt)).dot(U.T)
    if hashtable is not None:
        hashtable[h] = (Cinv, Cinv_sqrt)
    return Cinv, Cinv_sqrt
Beispiel #10
0
def svd_inv_sqrt(C, mineig=0, hashtable=None):
    """Fast stable inverse using SVD. This can handle near-singular matrices.
    Also return the square root.
    """

    # If we have a hash table, look for the precalculated solution
    h = None
    if hashtable is not None:
        # If arrays are in Fortran ordering, they are not hashable.
        if not C.flags['C_CONTIGUOUS']:
            C = C.copy(order='C')
        h = xxhash.xxh64_digest(C)
        if h in hashtable:
            return hashtable[h]

    D, P = scipy.linalg.eigh(C)
    for count in range(3):
        if np.any(D < 0) or np.any(np.isnan(D)):
            inv_eps = 1e-6 * (count - 1) * 10
            D, P = scipy.linalg.eigh(C +
                                     np.diag(np.ones(C.shape[0]) * inv_eps))
        else:
            break

        if count == 2:
            raise ValueError(
                'Matrix inversion contains negative values,' +
                'even after adding {} to the diagonal.'.format(inv_eps))

    Ds = np.diag(1 / np.sqrt(D))
    L = P @ Ds
    Cinv_sqrt = L @ P.T
    Cinv = L @ L.T

    # If there is a hash table, cache our solution.  Bound the total cache
    # size by removing any extra items in FIFO order.
    if hashtable is not None:
        hashtable[h] = (Cinv, Cinv_sqrt)
        while len(hashtable) > max_table_size:
            hashtable.popitem(last=False)

    return Cinv, Cinv_sqrt
Beispiel #11
0
    def test_xxh64_overflow(self):
        s = 'I want an unsigned 64-bit seed!'
        a = xxhash.xxh64(s, seed=0)
        b = xxhash.xxh64(s, seed=2**64)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=0))
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=0))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=0))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64))

        a = xxhash.xxh64(s, seed=1)
        b = xxhash.xxh64(s, seed=2**64 + 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**64 + 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64 + 1))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**64 + 1))

        a = xxhash.xxh64(s, seed=2**65 - 1)
        b = xxhash.xxh64(s, seed=2**66 - 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**65 - 1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**66 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**65 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**66 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**65 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**66 - 1))
Beispiel #12
0
    def from_file(cls: Type[DB], path: Union[str, PathLike], create_new=False) -> DB:
        """Load a Database from a path."""
        path = Path(path)
        if not path.exists() and create_new:
            logger = logging.getLogger(__name__)
            logger.warning(
                "Database file does not exist. Starting with blank database."
            )
            return cls()

        if path.suffix == ".gz":
            with gzip.open(path, "rb") as f:
                s = f.read()
        elif path.suffix == ".zst":
            with open(path, "rb") as f:
                c = f.read()
                has_checksum, checksum = (
                    zstd.get_frame_parameters(c).has_checksum,
                    c[-4:],
                )
                s = zstd.decompress(c)
                del c
                s_hash = xxhash.xxh64_digest(s)
                if has_checksum and checksum != s_hash[-4:][::-1]:
                    raise DatabaseException(
                        f"zstd content checksum verification failed: "
                        f"{checksum.hex()} != {s_hash.hex()}"
                    )
        else:
            with open(path, "rb") as f:
                s = f.read()

        db = orjson.loads(s)
        del s
        db = cls.from_dict(db)
        return db
Beispiel #13
0
    def test_xxh64_update(self):
        x = xxhash.xxh64()
        x.update('a')
        self.assertEqual(xxhash.xxh64('a').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a'), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab'), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc'), x.digest())

        seed = random.randint(0, 2**64)
        x = xxhash.xxh64(seed=seed)
        x.update('a')
        self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a', seed), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab', seed), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc', seed), x.digest())
Beispiel #14
0
 def obtain_index_from_hash(self, string_item):
     hash_value = xxh64_digest(string_item)
     index = int.from_bytes(hash_value, byteorder="big")
     index = index % self._filter_capacity
     return index
Beispiel #15
0
def make_key_u(name):
    return name.encode()[:8] + xxh64_digest(name)
Beispiel #16
0
def make_key(name):
    return name[:8] + xxh64_digest(name)