コード例 #1
0
ファイル: test_siphashc.py プロジェクト: WeblateOrg/siphashc
    def test_hash(self):
        """Test simple hashing."""
        result = siphash("sixteencharstrng", "i need a hash of this")
        self.assertEqual(10796923698683394048, result)

        result = siphash("0123456789ABCDEF", "a")
        self.assertEqual(12398370950267227270, result)
コード例 #2
0
ファイル: group_by.py プロジェクト: mabel-dev/mabel
    def _map(self, collect_columns):
        """
        Create Tuples of records in the Groups (GroupID, CollectedColumn, Value)

        The GroupID is a hash of the grouped columns, we do this because we don't actually
        care about the column values, just that we can uniquely identify records with
        the same values.

        For each column we're collecting, we emit a record of the column and the value
        in the column.

        This is akin to the MAP step in a MapReduce algo, we're creating a set of values
        which standardize the format of the data to be processed and could allow the
        data to be processed in parallel.
        """
        if collect_columns == self._columns == {"*"}:
            # if we're doing COUNT(*), short-cut the processing
            self._group_keys["*"] = [("*", "*")]
            for record in self._dictset:
                yield ("*", "*", "*")
            return

        for record in self._dictset:
            try:
                group_key: cython.uint64_t = siphash(
                    HASH_SEED,
                    "".join([str(record[column]) for column in self._columns]),
                )
            except KeyError:
                group_key: cython.uint64_t = siphash(
                    HASH_SEED,
                    "".join([
                        f"{record.get(column, '')}" for column in self._columns
                    ]),
                )
            if group_key not in self._group_keys.keys():
                self._group_keys[group_key] = [(column, record.get(column))
                                               for column in self._columns]
                if len(self._group_keys) >= 4999999:
                    raise TooManyGroups(
                        f"Groups are not selective enough and too many Groups have been found (stopped at {len(self._group_keys)})."
                    )

            for column in collect_columns:
                if column == "*":
                    yield (group_key, column, "*")
                else:
                    v = record.get(column)  # ignore nulls
                    if v is not None:
                        yield (group_key, column, record[column])
コード例 #3
0
ファイル: unpack.py プロジェクト: vbauerster/trifles
def unpack(stream):

    while True:
        data = stream.read(4)
        if len(data) != 4:
            break
        (pktSize, ) = struct.unpack('<I', data)
        data = stream.read(8 + 2)
        if len(data) != 8 + 2:
            sys.stderr.write('short read')
            break
        (checksum, lenfname) = struct.unpack('<QH', data)
        fname = stream.read(lenfname)
        if len(fname) != lenfname:
            sys.stderr.write('short read')
            break
        data = stream.read(4)
        if len(data) != 4:
            sys.stderr.write('short read')
            break
        (fsize, ) = struct.unpack('<I', data)
        compressedSize = pktSize - 4 - 8 - 2 - 4 - lenfname
        data = stream.read(compressedSize)
        if len(data) != compressedSize:
            sys.stderr.write('short read')
            break

        data = lz4.uncompress(data)
        got = siphashc.siphash('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', data)
        if got == checksum:
            sys.stderr.write('%s: %d -> %d\n' % (fname, compressedSize, fsize))
        else:
            sys.stderr.write('%s: checksum fail: got %d, want %d\n' %
                             (fname, got, checksum))
コード例 #4
0
ファイル: cursor.py プロジェクト: mabel-dev/mabel
    def load_cursor(self, cursor):
        from bitarray import bitarray

        if cursor is None:
            return

        if isinstance(cursor, str):
            cursor = orjson.loads(cursor)

        if (not "location" in cursor.keys() or not "map" in cursor.keys()
                or not "partition" in cursor.keys()):
            raise InvalidCursor(f"Cursor is malformed or corrupted {cursor}")

        self.location = cursor["location"]
        find_partition = [
            blob for blob in self.readable_blobs
            if siphash("%" * 16, blob) == cursor["partition"]
        ]
        if len(find_partition) == 1:
            self.partition = find_partition[0]
        map_bytes = bytes.fromhex(cursor["map"])
        blob_map = bitarray()
        blob_map.frombytes(map_bytes)
        self.read_blobs = [
            self.readable_blobs[i] for i in range(len(self.readable_blobs))
            if blob_map[i]
        ]
コード例 #5
0
ファイル: unpack.py プロジェクト: 2opremio/trifles
def unpack(stream):

    while True:
        data = stream.read(4)
        if len(data) != 4:
            break
        (pktSize,) = struct.unpack('<I', data)
        data = stream.read(8+2)
        if len(data) != 8+2:
            sys.stderr.write('short read')
            break
        (checksum, lenfname) = struct.unpack('<QH', data)
        fname = stream.read(lenfname)
        if len(fname) != lenfname:
            sys.stderr.write('short read')
            break
        data = stream.read(4)
        if len(data) != 4:
            sys.stderr.write('short read')
            break
        (fsize,) = struct.unpack('<I', data)
        compressedSize = pktSize - 4 - 8 - 2 - 4 - lenfname
        data = stream.read(compressedSize)
        if len(data) != compressedSize:
            sys.stderr.write('short read')
            break

        data = lz4.uncompress(data)
        got = siphashc.siphash('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', data)
        if got == checksum:
            sys.stderr.write('%s: %d -> %d\n' % (fname, compressedSize, fsize))
        else:
            sys.stderr.write('%s: checksum fail: got %d, want %d\n' % (fname, got, checksum))
コード例 #6
0
ファイル: test_siphashc.py プロジェクト: carlopires/siphashc3
 def test_reference_vectors(self):
     vectors = [
       0x726fdb47dd0e0e31, 0x74f839c593dc67fd, 0x0d6c8009d9a94f5a,
       0x85676696d7fb7e2d, 0xcf2794e0277187b7, 0x18765564cd99a68d,
       0xcbc9466e58fee3ce, 0xab0200f58b01d137, 0x93f5f5799a932462,
       0x9e0082df0ba9e4b0, 0x7a5dbbc594ddb9f3, 0xf4b32f46226bada7,
       0x751e8fbc860ee5fb, 0x14ea5627c0843d90, 0xf723ca908e7af2ee,
       0xa129ca6149be45e5, 0x3f2acc7f57c29bdb, 0x699ae9f52cbe4794,
       0x4bc1b3f0968dd39c, 0xbb6dc91da77961bd, 0xbed65cf21aa2ee98,
       0xd0f2cbb02e3b67c7, 0x93536795e3a33e88, 0xa80c038ccd5ccec8,
       0xb8ad50c6f649af94, 0xbce192de8a85b8ea, 0x17d835b85bbb15f3,
       0x2f2e6163076bcfad, 0xde4daaaca71dc9a5, 0xa6a2506687956571,
       0xad87a3535c49ef28, 0x32d892fad841c342, 0x7127512f72f27cce,
       0xa7f32346f95978e3, 0x12e0b01abb051238, 0x15e034d40fa197ae,
       0x314dffbe0815a3b4, 0x027990f029623981, 0xcadcd4e59ef40c4d,
       0x9abfd8766a33735c, 0x0e3ea96b5304a7d0, 0xad0c42d6fc585992,
       0x187306c89bc215a9, 0xd4a60abcf3792b95, 0xf935451de4f21df2,
       0xa9538f0419755787, 0xdb9acddff56ca510, 0xd06c98cd5c0975eb,
       0xe612a3cb9ecba951, 0xc766e62cfcadaf96, 0xee64435a9752fe72,
       0xa192d576b245165a, 0x0a8787bf8ecb74b2, 0x81b3e73d20b49b6f,
       0x7fa8220ba3b2ecea, 0x245731c13ca42499, 0xb78dbfaf3a8d83bd,
       0xea1ad565322a1a0b, 0x60e61c23a3795013, 0x6606d7e446282b93,
       0x6ca4ecb15c5f91e1, 0x9f626da15c9625f3, 0xe51b38608ef25f57,
       0x958a324ceb064572]
     k = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f'
     m = ''
     for i in range(64):
         self.assertEqual(siphash(k, m), vectors[i])
         m += chr(i)
コード例 #7
0
ファイル: cursor.py プロジェクト: mabel-dev/mabel
 def next_blob(self, previous_blob=None):
     if previous_blob:
         self.read_blobs.append(previous_blob)
         self.partition = ""
         self.location = -1
     if self.partition and self.location > 0:
         if self.partition in self.readable_blobs:
             return self.partition
         partition_finder = [
             blob for blob in self.readable_blobs
             if siphash("%" * 16, blob) == self.partition
         ]
         if len(partition_finder) != 1:
             raise ValueError(
                 f"Unable to determine current partition ({self.partition})"
             )
         return partition_finder[0]
     unread = [
         blob for blob in self.readable_blobs if blob not in self.read_blobs
     ]
     if len(unread) > 0:
         self.partition = unread[0]
         self.location = -1
         return self.partition
     return None
コード例 #8
0
ファイル: base.py プロジェクト: roptat/weblate
 def get_cache_key(self, unit, pos):
     return "check:{}:{}:{}:{}".format(
         self.check_id,
         unit.pk,
         siphash("Weblate   Checks", unit.all_flags.format()),
         pos,
     )
コード例 #9
0
ファイル: hash.py プロジェクト: janimik/weblate-1
def calculate_hash(source, context):
    """Calculate checksum identifying translation."""
    if source is not None:
        data = source.encode('utf-8') + context.encode('utf-8')
    else:
        data = context.encode('utf-8')
    # Need to convert it from unsigned 64-bit int to signed 64-bit int
    return siphash('Weblate Sip Hash', data) - 2**63
コード例 #10
0
ファイル: test_siphashc.py プロジェクト: carlopires/siphashc3
 def test_errors(self):
     with self.assertRaises(ValueError):
         siphash('not long enough', 'a')
     with self.assertRaises(ValueError):
         siphash('toooooooooooooooooooooooo long', 'a')
     with self.assertRaises(ValueError):
         siphash('', 'a')
コード例 #11
0
ファイル: index.py プロジェクト: mabel-dev/mabel
 def add(self, position, record):
     ret_val = []
     if record.get(self.column_name):
         # index lists of items separately
         values = record[self.column_name]
         if not isinstance(values, list):
             values = [values]
         for value in values:
             entry = (format(siphash(SEED, f"{value}") % MAX_INDEX,
                             "x"), position)
             ret_val.append(entry)
     self.temporary_index += ret_val
     return ret_val
コード例 #12
0
ファイル: index.py プロジェクト: mabel-dev/mabel
 def search(self, search_term) -> Iterable:
     """
     Search the index for a value. Returns a list of row numbers, if the value is
     not found, the list is empty.
     """
     if not isinstance(search_term, (list, set, tuple)):
         search_term = [search_term]
     result: list = []
     for term in search_term:
         key = format(siphash(SEED, f"{term}") % MAX_INDEX, "x")
         if key in self._index:  # type:ignore
             result[0:0] = self._index[key]  # type:ignore
     return result
コード例 #13
0
ファイル: test_siphashc.py プロジェクト: WeblateOrg/siphashc
 def test_errors(self):
     """Test error handling."""
     with self.assertRaises(ValueError):
         siphash("not long enough", "a")
     with self.assertRaises(ValueError):
         siphash("toooooooooooooooooooooooo long", "a")
     with self.assertRaises(ValueError):
         siphash("", "a")
コード例 #14
0
ファイル: cursor.py プロジェクト: mabel-dev/mabel
    def __getitem__(self, item):
        from bitarray import bitarray

        if item == "map":
            blob_map = bitarray("".join([
                "1" if blob in self.read_blobs else "0"
                for blob in self.readable_blobs
            ]))
            return blob_map.tobytes().hex()
        if item == "partition":
            return siphash("%" * 16, self.partition)
        if item == "location":
            return self.location
        return None
コード例 #15
0
ファイル: pack.py プロジェクト: vbauerster/trifles
def pack(fname):

    global total, compressed


    f = open(fname)
    data = f.read()
    f.close()
    checksum = siphashc.siphash('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', data)
    fsize = len(data)
    data = lz4.compress(data)

    # size of packet(4), checksum(8), fnamelen(2)+fname, uncompressed size(4), compressed data
    l = len(data)
    pktlen = 4 + 8 + 2 + len(fname) + 4 + l

    total += fsize
    compressed += len(data)

    sys.stderr.write("%s: %d -> %d\n"  %(fname, fsize, len(data)))
    sys.stdout.write( struct.pack('<IQH%dsI%ds' % (len(fname), l), pktlen, checksum, len(fname), fname,fsize,data))
    sys.stdout.flush()
コード例 #16
0
ファイル: base_inner_reader.py プロジェクト: mabel-dev/mabel
    def read_blob(self, blob: str) -> IOBase:
        """
        Read-thru cache
        """
        cache_server = memcached_server()
        # if cache isn't configured, read and get out of here
        if not cache_server:
            result = self.get_blob_bytes(blob)
            return io.BytesIO(result)

        # hash the blob name for the look up
        from siphashc import siphash

        blob_hash = str(siphash("RevengeOfTheBlob", blob))

        # try to fetch the cached file
        result = cache_server.get(blob_hash)

        # if the item was a miss, get it from storage and add it to the cache
        if result is None:
            result = self.get_blob_bytes(blob)
            cache_server.set(blob_hash, result)

        return io.BytesIO(result)
コード例 #17
0
ファイル: inline_functions.py プロジェクト: mabel-dev/mabel
    "ADDDAYS": add_days,
    "DAYSDIFF": diff_days,
    # STRINGS
    "UCASE": lambda x: str(x).upper(),
    "UPPER": lambda x: str(x).upper(),
    "LCASE": lambda x: str(x).lower(),
    "LOWER": lambda x: str(x).lower(),
    "TRIM": lambda x: str(x).strip(),
    "LEN": len,
    "STRING": to_string,
    "LEFT": lambda x, y: str(x)[: int(y)],
    "RIGHT": lambda x, y: str(x)[-int(y) :],
    "MID": lambda x, y, z: str(x)[int(y) :][: int(z)],
    "CONCAT": concat,
    # NUMBERS
    "ROUND": round,
    "TRUNC": parse_number(float, truncate),
    "INTEGER": parse_number(float, int),
    "DOUBLE": parse_number(float, float),
    # BOOLEAN
    "BOOLEAN": lambda x: str(x).upper() != "FALSE",
    "ISNONE": lambda x: x is None,
    # HASHING & ENCODING
    "HASH": lambda x: format(siphash("INCOMPREHENSIBLE", str(x)), "X"),
    "MD5": get_md5,
    "RANDOM": get_random,  # return a random number 0-99
    # OTHER
    "BETWEEN": lambda val, low, high: low < val < high,
    "SORT": lambda x: sorted(x),
}
コード例 #18
0
ファイル: test_siphashc.py プロジェクト: WeblateOrg/siphashc
 def test_reference_vectors(self):
     """Test reference vectors."""
     vectors = [
         0x726FDB47DD0E0E31,
         0x74F839C593DC67FD,
         0x0D6C8009D9A94F5A,
         0x85676696D7FB7E2D,
         0xCF2794E0277187B7,
         0x18765564CD99A68D,
         0xCBC9466E58FEE3CE,
         0xAB0200F58B01D137,
         0x93F5F5799A932462,
         0x9E0082DF0BA9E4B0,
         0x7A5DBBC594DDB9F3,
         0xF4B32F46226BADA7,
         0x751E8FBC860EE5FB,
         0x14EA5627C0843D90,
         0xF723CA908E7AF2EE,
         0xA129CA6149BE45E5,
         0x3F2ACC7F57C29BDB,
         0x699AE9F52CBE4794,
         0x4BC1B3F0968DD39C,
         0xBB6DC91DA77961BD,
         0xBED65CF21AA2EE98,
         0xD0F2CBB02E3B67C7,
         0x93536795E3A33E88,
         0xA80C038CCD5CCEC8,
         0xB8AD50C6F649AF94,
         0xBCE192DE8A85B8EA,
         0x17D835B85BBB15F3,
         0x2F2E6163076BCFAD,
         0xDE4DAAACA71DC9A5,
         0xA6A2506687956571,
         0xAD87A3535C49EF28,
         0x32D892FAD841C342,
         0x7127512F72F27CCE,
         0xA7F32346F95978E3,
         0x12E0B01ABB051238,
         0x15E034D40FA197AE,
         0x314DFFBE0815A3B4,
         0x027990F029623981,
         0xCADCD4E59EF40C4D,
         0x9ABFD8766A33735C,
         0x0E3EA96B5304A7D0,
         0xAD0C42D6FC585992,
         0x187306C89BC215A9,
         0xD4A60ABCF3792B95,
         0xF935451DE4F21DF2,
         0xA9538F0419755787,
         0xDB9ACDDFF56CA510,
         0xD06C98CD5C0975EB,
         0xE612A3CB9ECBA951,
         0xC766E62CFCADAF96,
         0xEE64435A9752FE72,
         0xA192D576B245165A,
         0x0A8787BF8ECB74B2,
         0x81B3E73D20B49B6F,
         0x7FA8220BA3B2ECEA,
         0x245731C13CA42499,
         0xB78DBFAF3A8D83BD,
         0xEA1AD565322A1A0B,
         0x60E61C23A3795013,
         0x6606D7E446282B93,
         0x6CA4ECB15C5F91E1,
         0x9F626DA15C9625F3,
         0xE51B38608EF25F57,
         0x958A324CEB064572,
     ]
     k = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
     message = ""
     for i in range(64):
         self.assertEqual(siphash(k, message), vectors[i])
         message += chr(i)
コード例 #19
0
def get_hash(string, queue_count=const.AMQ.NUM_QUEUES):
    """Generates a hash for given string"""
    # Only use the last 31 bits of the 64-bit hash because of serious
    # PHP-retardedness
    hash32 = siphashc.siphash(const.AMQ.HASH, string) & 0x7FFFFFFF
    return hash32 % queue_count
コード例 #20
0
ファイル: gpg.py プロジェクト: xiaozwu2018/weblate
def gpg_cache_key(suffix: str) -> str:
    return "gpg:{}:{}".format(
        siphash("Weblate GPG hash", settings.WEBLATE_GPG_IDENTITY), suffix
    )
コード例 #21
0
def hash_text(name):
    """Hash text for use in HTML id."""
    return hash_to_checksum(siphash("Weblate URL hash", name.encode()))
コード例 #22
0
ファイル: test_siphashc.py プロジェクト: carlopires/siphashc3
    def test_hash(self):
        result = siphash('sixteencharstrng', 'i need a hash of this')
        self.assertEqual(expected_hash1, result)

        result = siphash('0123456789ABCDEF', 'a')
        self.assertEqual(expected_hash2, result)
コード例 #23
0
ファイル: hash.py プロジェクト: xiaozwu2018/weblate
def raw_hash(*parts: str):
    """Calculates checksum identifying translation."""
    data = "".join(part for part in parts)
    return siphash("Weblate Sip Hash", data)
コード例 #24
0
 def sip(val):
     return siphash("TheApolloMission", val)