Ejemplo n.º 1
0
def generate_backoff_key(uri, extra_headers):
	return "backoff:" + binascii.b2a_base64(mmh3.hash_bytes(
		"/".join(
			[uri,
			_get_arguments_key(extra_headers),
		)
	)).strip()

def _get_arguments_key(kwargs):
	return ("|".join(
			[":".join([key, value]) for key, value in kwargs.iteritems()]
			)
		)

def backoff_http_request(
	uri,
	decoderclass=BodyDecoder,
	extra_headers={},
	backoff_key = None
):

	if not backoff_key:
		backoff_key = generate_backoff_key(uri, extra_headers)

	request_url = str(uri)

	def handle_error(error, *args):
		error.raiseException()

	def handle_http_response(response):
		if (response.code == 200):
			finishe_d = defer.Deferred()
			response.deliverBody(decoderclass(finishe_d))
			return finishe_d
		else:
			pickled_non200_response = pickle.dumps(response)
			redis_d = Config.redis_client.set(
				backoff_key,
				pickled_non200_response,
				expire = non200_timeout_seconds,
				only_if_not_exists = True
			)
			raise HTTPRequestError(response.code,
				"got code %s talking to a remote http server: %s" %
				(str(response.code), response.phrase))

	http_request_d = Core.http_agent.request(
		'GET',
		request_url,
		Headers(headers),
		None)

	pickle.loads(zlib.decompress(redis_result))

	http_request_d.addCallback(handle_http_response)
	http_request_d.addErrback(handle_error, request_url)
	#http_request_d.addErrback(log.err, uri)
	return http_request_d
Ejemplo n.º 2
0
def get_word_offset(buckets_file, buckets_cnt, buckets_offsets, word):
    hash_bytes = mmh3.hash_bytes(word.encode('utf-8'))
    bucket_id = abs(hash_bytes.__hash__()) % buckets_cnt
    bucket_offset = buckets_offsets[bucket_id]
    buckets_file.seek(bucket_offset)
    bucket_len = buckets_file.read(4)
    if bucket_len == '':
        return None
    int_decoder = struct.Struct('I')
    bucket_len = int_decoder.unpack(bucket_len)[0]
    data = buckets_file.read(__hash_entry_size * bucket_len)

    l = -1
    r = bucket_len
    while l < r - 1:
        m = (l + r) / 2
        entry_hash_bytes = data[m * __hash_entry_size:m * __hash_entry_size + __hash_bytes_len]
        if entry_hash_bytes < hash_bytes:
            l = m
        else:
            r = m

    if r == bucket_len:
        return None
    found_hash_bytes = data[r * __hash_entry_size:r * __hash_entry_size + __hash_bytes_len]
    if hash_bytes == found_hash_bytes:
        return int_decoder.unpack(data[r * __hash_entry_size + __hash_bytes_len: (r + 1) * __hash_entry_size])[0]
    return None
Ejemplo n.º 3
0
def compute_entity_id(entity_key: EntityKeyProto) -> str:
    """
    Compute Entity id given Feast Entity Key for online stores.
    Remember that Entity here refers to `EntityKeyProto` which is used in some online stores to encode the keys.
    It has nothing to do with the Entity concept we have in Feast.
    """
    return mmh3.hash_bytes(serialize_entity_key(entity_key)).hex()
Ejemplo n.º 4
0
 def hash_object(o: Any) -> str:
     """Returns a 16-character hash code of arbitrary Python objects."""
     with io.BytesIO() as buffer:
         dill.dump(o, buffer)
         hash = mmh3.hash_bytes(buffer.getvalue(), x64arch=True)
     hash = base64.b32encode(hash).decode("UTF-8")
     return hash[:16].lower()
Ejemplo n.º 5
0
def save_dictionary(file_name, dictionary):
    words_cnt = len(dictionary)
    buckets_cnt = (words_cnt + __entries_per_bucket - 1) / __entries_per_bucket
    buckets = [[] for _ in xrange(buckets_cnt)]
    for word, offset in dictionary.items():
        hash_bytes = mmh3.hash_bytes(word.encode('utf-8'))
        bucket_id = abs(hash_bytes.__hash__()) % buckets_cnt
        buckets[bucket_id].append((hash_bytes, offset))

    bucket_offsets = dict()
    int_encoder = struct.Struct('I')
    with open(file_name, 'wb') as out:
        for id in xrange(buckets_cnt):
            bucket = buckets[id]
            bucket.sort(key=lambda b: b[0])
            byte_array = bytearray()
            byte_array.extend(int_encoder.pack(len(bucket)))
            for (hash_bytes, offset) in bucket:
                assert __hash_bytes_len == len(hash_bytes)
                byte_array.extend(hash_bytes)
                byte_array.extend(int_encoder.pack(offset))

            bucket_offsets[id] = out.tell()
            out.write(byte_array)
    return buckets_cnt, bucket_offsets
Ejemplo n.º 6
0
def test_64bit():
    if sys.maxsize < (1 << 32):  # Skip this test under 32-bit environments
        return
    a = np.zeros(2**32, dtype=np.int8)
    assert mmh3.hash(a) == -1988950868
    assert mmh3.hash64(a) == (-6319308327427928234, -8156928649350215884)
    assert mmh3.hash128(a) == 189813591698865711411311444615608766294
    assert mmh3.hash_bytes(a) == b'V\x8f}\xad\x8eNM\xa84\x07FU\x9c\xc4\xcc\x8e'
Ejemplo n.º 7
0
 def fingerprint(self, item):
     '''
     Takes a string and returns its fingerprint in bits.
     The length of the fingerprint is given by fingerprint_size.
     To calculate this fingerprint, we hash the string with MurmurHash3 and truncate the hash.
     '''
     item_hash = mmh3.hash_bytes(item)
     return item_hash[:self.fingerprint_size]
Ejemplo n.º 8
0
Archivo: gcp.py Proyecto: smarthi/feast
def compute_datastore_entity_id(entity_key: EntityKeyProto) -> str:
    """
    Compute Datastore Entity id given Feast Entity Key.

    Remember that Datastore Entity is a concept from the Datastore data model, that has nothing to
    do with the Entity concept we have in Feast.
    """
    return mmh3.hash_bytes(serialize_entity_key(entity_key)).hex()
Ejemplo n.º 9
0
 def index(self, item):
     '''
     Calculate the (first) index of an item in the filter.
     '''
     item_hash = mmh3.hash_bytes(item)
     # Because of this modular computation, it will be tricky to increase
     # the capacity of the filter directly
     return int(codecs.encode(item_hash, 'hex'), 16) % self.capacity
Ejemplo n.º 10
0
def seq_sim_hash(tokenized_string: Sequence[AnyStr], token_weight: Callable[[AnyStr], float] = lambda token: 1) -> \
Tuple[AnyStr, AnyStr]:
    """ Returns 2x 16-byte hashes """
    length = len(tokenized_string)
    if length == 0:
        return ZERO_HASH, ZERO_HASH
    elif length == 1:
        hash_bytes = mmh3.hash_bytes(tokenized_string[0])
        return hash_bytes, hash_bytes
    elif length == 3:
        # Special case: if common approach is used, then hash of the middle element is ignored:
        # first element hash has weight 2, middle element hash has weight 1, thus, first element always dominates.
        # Thus, as an exception, put all elements with the same weight.
        vector1 = [0] * 16 * 8
        vector2 = [0] * 16 * 8
        hash_bytes0 = mmh3.hash_bytes(tokenized_string[0])
        token_weight0 = token_weight(tokenized_string[0])
        hash_bytes1 = mmh3.hash_bytes(tokenized_string[1])
        token_weight1 = token_weight(tokenized_string[1])
        hash_bytes2 = mmh3.hash_bytes(tokenized_string[2])
        token_weight2 = token_weight(tokenized_string[2])
        add_hash_to_vector(hash_bytes0, vector1, token_weight0)
        add_hash_to_vector(hash_bytes1, vector1, token_weight1)
        add_hash_to_vector(hash_bytes1, vector2, token_weight1)
        add_hash_to_vector(hash_bytes2, vector2, token_weight2)
        return binarize_vector_to_hash(vector1), binarize_vector_to_hash(
            vector2)
    else:
        vector1 = [0] * 16 * 8
        vector2 = [0] * 16 * 8
        for i in range(length):
            token = tokenized_string[i]
            hash_bytes = mmh3.hash_bytes(token)
            weight = token_weight(token)
            count_after = length - 1 - i
            if count_after > 0:
                add_hash_to_vector(hash_bytes, vector1, count_after * weight)
            count_before = i
            if count_before > 0:
                add_hash_to_vector(hash_bytes, vector2, count_before * weight)
        return binarize_vector_to_hash(vector1), binarize_vector_to_hash(
            vector2)
Ejemplo n.º 11
0
def hash_kwargs(doc):
    """
    Create a hash from the values of a document.
    TODO maybe add the keys too?
    """
    acc = b''
    for k in sorted(doc.keys()):
        v = doc[k]
        acc += bytes_hasher[cls_finder(v)](v)
    hash_ = mmh3.hash_bytes(acc)
    return binascii.hexlify(hash_).decode()
Ejemplo n.º 12
0
    def fingerprint(self, item):
        '''
        Take an item and returns its fingerprint in bits.  The fingerprint of
        an item is computed by truncating its Murmur hashing (murmur3) to the
        fingerprint size.

        Return a bit array representation of the fingerprint.
        '''
        mmh3_hash = bitarray()
        mmh3_hash.frombytes(mmh3.hash_bytes(item))
        # Only get up to the size of the fingerprint
        return mmh3_hash[:self.fingerprint_size]
Ejemplo n.º 13
0
def write_urls(batch_size):
    t = time.time()
    tp = turnip.Turnip()
    batch = []
    cnt = 0
    for url in sys.stdin:
        cnt = cnt + 1
        batch.append((mmh3.hash_bytes(url), url))
        if len(batch) >= batch_size:
            tp.write_batch(batch)
            batch = []
    tp.write_batch(batch)
    print 'write ', time.time() - t, cnt / (time.time()-t)
Ejemplo n.º 14
0
    def obtain_index_from_hash(self, string_item):

        hash_value = mmh3.hash_bytes(string_item)

        # this is new for python 3, i.e. how you go from
        # bytes/bits to int/index values
        index = int.from_bytes(hash_value, byteorder="big")

        # modulo the obtained index by the filter capacity
        # this helps to restrict indices to 0 - filter_capacity
        index = index % self.filter_capacity

        return index
Ejemplo n.º 15
0
    def add_query_profiler_data(self, query_without_params: str,
                                params: Union[list, str, None], target_db: str,
                                query_execution_time_in_micros: int,
                                db_row_count: Optional[int]) -> None:
        """ This function adds to the bucket in the last index of the list, if the profiler is on """

        if not self._query_profiler_enabled:
            return

        start_time = time()
        if self._current_query_profiler_level.normalize_sql and params:
            sql_normalized = re.sub(RE_NORMALIZE_REPEATED_PARAMS_PERCENT, '%s',
                                    query_without_params)
        else:
            sql_normalized = query_without_params

        app_stack_trace, django_stack_trace = find_stack_trace(
            app_module_names_to_exclude=settings.
            DJANGO_QUERY_PROFILER_APP_MODULES_TO_EXCLUDE,
            django_module_names_to_include=(django_base_model.__name__, ),
            max_depth=self._current_query_profiler_level.stack_trace_depth)

        # New query_signature & query_signature_statistics instances
        query_signature = QuerySignature(query_without_params=sql_normalized,
                                         app_stack_trace=app_stack_trace,
                                         django_stack_trace=django_stack_trace,
                                         target_db=target_db)
        query_signature_statistics = QuerySignatureStatistics(
            frequency=
            1,  # Number of sql calls would be 1, when we entered this block
            query_execution_time_in_micros=query_execution_time_in_micros,
            db_row_count=db_row_count)

        query_params_db_key = (query_without_params, params or '', target_db)
        query_params_db_key_hash = hexlify(
            mmh3.hash_bytes(str(query_params_db_key)))

        new_query_profiled_data = QueryProfiledData(
            query_signature_to_query_signature_statistics={
                query_signature: query_signature_statistics
            },
            _query_params_db_hash_counter=Counter(
                {query_params_db_key_hash: 1}),
            time_spent_profiling_in_micros=int(
                (time() - start_time) * 1000 * 1000))

        # Add to existing data and set it back
        existing_query_profiled_data: QueryProfiledData = self._query_profiled_data_list[
            -1]
        combined_query_profiled_data: QueryProfiledData = existing_query_profiled_data + new_query_profiled_data
        self._query_profiled_data_list[-1] = combined_query_profiled_data
Ejemplo n.º 16
0
def hash_to_64(value: str, count: int = 1) -> List[int]:
    """
    gets a list of numbers between 0 and 63 for use in a filter vector
    :param value: string to be hashed
    :param count: must be less than 6
    :return:
    """
    digest = mmh3.hash_bytes(value)
    results = []
    for i in range(0, count):
        bytes = digest[i * 2:i * 2 + 2]
        value = struct.unpack("h", bytes)
        results.append(value[0] % 64)
    return results
def hash_to_64(value: str, count:int=1) -> List[int]:
    """
    gets a list of numbers between 0 and 63 for use in a filter vector
    :param value: string to be hashed
    :param count: must be less than 6
    :return:
    """
    digest = mmh3.hash_bytes(value)
    results = []
    for i in range(0, count):
        bytes = digest[i * 2:i * 2 + 2]
        value = struct.unpack("h", bytes)
        results.append(value[0] % 64)
    return results
Ejemplo n.º 18
0
def find_duplicates(db, warc, options):
    for record in ArchiveIterator(warc):
        id_ = get_record_id(record)
        try:
            text = get_record_text_content(record)
        except ValueError as e:
            logging.error(e)
            continue
        text_hash = mmh3.hash_bytes(text)
        seen = db.get(text_hash, None)
        byte_id = id_.encode('utf-8')
        if seen is None:
            db[text_hash] = byte_id
        elif seen == byte_id:
            pass  # same record
        else:
            seen = seen.decode('utf-8')
            print(f'{id_}\t{seen}')
Ejemplo n.º 19
0
def hashfileobject(f, sample_threshhold=SAMPLE_THRESHOLD, sample_size=SAMPLE_SIZE, hexdigest=False):
    #get file size from file object
    f.seek(0, os.SEEK_END)
    size = f.tell()
    f.seek(0, os.SEEK_SET)

    if size < sample_threshhold or sample_size < 1:
        data = f.read()
    else:
        data = f.read(sample_size)
        f.seek(size//2)
        data += f.read(sample_size)
        f.seek(-sample_size, os.SEEK_END)
        data += f.read(sample_size)

    hash_tmp = mmh3.hash_bytes(data)
    hash_ = hash_tmp[7::-1] + hash_tmp[16:7:-1]
    enc_size = varint.encode(size)
    digest = enc_size + hash_[len(enc_size):]

    return binascii.hexlify(digest).decode() if hexdigest else digest
Ejemplo n.º 20
0
def hashfile(filename,
             sample_threshhold=SAMPLE_THRESHOLD,
             sample_size=SAMPLE_SIZE,
             hexdigest=False):
    size = os.path.getsize(filename)

    with open(filename, 'rb') as f:
        if size < sample_threshhold or sample_size < 1:
            data = f.read()
        else:
            data = f.read(sample_size)
            f.seek(size // 2)
            data += f.read(sample_size)
            f.seek(-sample_size, os.SEEK_END)
            data += f.read(sample_size)

    hash_tmp = mmh3.hash_bytes(data)
    hash_ = hash_tmp[7::-1] + hash_tmp[16:7:-1]
    enc_size = varint.encode(size)
    digest = enc_size + hash_[len(enc_size):]

    return binascii.hexlify(digest) if hexdigest else digest
Ejemplo n.º 21
0
def hashfileobject(f, sample_threshhold=SAMPLE_THRESHOLD, sample_size=SAMPLE_SIZE, hexdigest=False):
    #get file size from file object
    f.seek(0, os.SEEK_END)
    size = f.tell()
    f.seek(0, os.SEEK_SET)

    if size < sample_threshhold or sample_size < 1:
        data = f.read()
    else:
        data = f.read(sample_size)
        f.seek(size//2)
        data += f.read(sample_size)
        f.seek(-sample_size, os.SEEK_END)
        data += f.read(sample_size)

    hash_tmp = mmh3.hash_bytes(data)
    hash_ = hash_tmp[7::-1] + hash_tmp[16:7:-1]
    enc_size = varint.encode(size)
    digest = enc_size + hash_[len(enc_size):]

    f.seek(0, os.SEEK_SET)

    return binascii.hexlify(digest).decode() if hexdigest else digest
Ejemplo n.º 22
0
def hash128(content):
    return mmh3.hash_bytes(content)
Ejemplo n.º 23
0
 def obfuscate(self, blob):
     return str(mmh3.hash_bytes(blob))
Ejemplo n.º 24
0
def generateIPV6Address():  # generate 128 random bits as byte array
    return mmh3.hash_bytes('', incrementSeed())
Ejemplo n.º 25
0
def chunk_hash(data):
    return b16encode(mmh3.hash_bytes(data)).decode('ascii')
Ejemplo n.º 26
0
def chunk_uuid(data):
    return b16encode(mmh3.hash_bytes(uuid1().bytes)).decode('ascii')
Ejemplo n.º 27
0
 def _hashDigest(self, key, val, iv):
     return mmh3.hash_bytes(str(val) + str(iv), key)
Ejemplo n.º 28
0
def hash128(content):
    return mmh3.hash_bytes(content)
Ejemplo n.º 29
0
def hash_file(path):
    with open(path, "rb") as file:
        return mmh3.hash_bytes(file.read()).hex()
Ejemplo n.º 30
0
    def fetch_artist_cover(self, artist_id):
        try:
            artist = get_database().query(Artist).filter_by(id=artist_id).one()
        except NoResultFound:
            return

        remotes_artist = None
        tries = 0

        # try and sleep until we get the remotes_artist.
        while remotes_artist is None and tries < 8:
            remotes_artist = remotes.get_artist(artist)

            tries += 1

            if remotes_artist is None:
                # exponential backoff
                time.sleep(tries ** 2)

        lastfm_artist = None

        if remotes_artist is not None:
            lastfm_artist = remotes_artist["lastfm"]

        if lastfm_artist is None or lastfm_artist["cover"] is None:
            google_images = google.get_artist_images(artist)

            if google_images is not None:
                urls = google_images
            else:
                return
        else:
            urls = [lastfm_artist["cover"]]

        cover = None

        for url in urls:
            cover, resize_cover, resize_cover_large, cover_ext = self.retrieve_and_resize(url)

            if cover is None:
                continue

        if cover is None:
            return

        track_dirs = set()

        for track in artist.tracks:
            for path in track.paths:
                track_dirs.add(os.path.dirname(path.path))

        for track_dir in track_dirs:
            if not os.path.exists(track_dir):
                os.makedirs(track_dir)

            cover_dest = os.path.join(track_dir, ("%s%s" % (artist.slug, cover_ext)).encode("utf8"))

            if not os.path.exists(cover_dest):
                with open(cover_dest, "wb") as file:
                    file.write(cover)

            artist.cover_path = cover_dest

        import mmh3

        artist.cover = resize_cover
        artist.cover_large = resize_cover_large
        artist.cover_hash = base64.b64encode(mmh3.hash_bytes(artist.cover))

        try:
            get_database().commit()
        except StaleDataError:
            # artist was removed, ignore
            get_database().rollback()
            return

        ws.emit_all("covers.artist.update", artist.id)
Ejemplo n.º 31
0
 def _normalize_key(key):
     return base64.encodebytes(mmh3.hash_bytes(key)).strip()
Ejemplo n.º 32
0
 def key(self):
     b = mmh3.hash_bytes(self.name)[:self.name_hash_size]
     return b + self.name
Ejemplo n.º 33
0
 def add_string( self, value ):
     """Adds a string to the record key byte array."""
     string_hash = mmh3.hash_bytes( value )
     self.buffer_value.append( bytearray( string_hash ) )
import random
import sys
from string import ascii_letters, digits, punctuation

try:
    import mmh3
except:
    raise ImportError("Run `pip install mmh3` to install mmh3 for test")

visible = ascii_letters + digits + punctuation + " \t"


def get_random_word():
    return ''.join(
        random.choice(visible) for i in range(random.randint(1, 20))).rstrip()


with open(sys.argv[1], "w") as f:
    py_32bit_out = open("python_32.out", "w")
    py_128bit_out = open("python_128.out", "w")
    for i in range(1000):
        word = get_random_word()
        f.write(word + "\n")
        py_32bit_out.write(str(mmh3.hash(word, i) & 0xffffffff) + "\n")
        py_128bit_out.write(mmh3.hash_bytes(word, i) + "\n")
Ejemplo n.º 35
0
 def compute_hashes( self ):
     """Compute the Murmur hash of the key.
     """
     self.routing_hash = mmh3.hash_bytes( self.buffer_value )
     self.hash_code = int( self.routing_hash ^ (self.routing_hash >> 32) )
Ejemplo n.º 36
0
def test_hash_bytes():
    assert mmh3.hash_bytes(
        "foo") == b"aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~"
Ejemplo n.º 37
0
    def test_bucket(self):
        '''
        Adding and deleting items in a bucket.
        '''
        bucket = Bucket()

        # By default, a bucket has the capacity of 4
        cases = [
            {
                'item': '192.168.1.190',
                'transformer': lambda string: string,

                'action': bucket.insert,
                'expected': True,

                'full': False,
                'included': True,
            },

            {
                'item': '192.168.1.191',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.insert,
                'expected': True,

                'full': False,
                'included': True,
            },

            {
                'item': '192.168.1.192',
                'transformer': lambda string: string,

                'action': bucket.insert,
                'expected': True,

                'full': False,
                'included': True,
            },

            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.insert,
                'expected': True,

                'full': True,
                'included': True,
            },

            {
                'item': '192.168.1.194',
                'transformer': lambda string: string,

                'action': bucket.insert,
                'expected': False,

                'full': True,
                'included': False,
            },

            {
                'item': '192.168.1.195',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.insert,
                'expected': False,

                'full': True,
                'included': False,
            },

            {
                'item': '192.168.1.195',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.delete,
                'expected': False,

                'full': True,
                'included': False,
            },

            {
                'item': '192.168.1.192',
                'transformer': lambda string: string,

                'action': bucket.delete,
                'expected': True,

                'full': False,
                'included': False,
            },

            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.delete,
                'expected': True,

                'full': False,
                'included': False,
            },

            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.insert,
                'expected': True,

                'full': False,
                'included': True,
            },

            # Add the same item again
            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.insert,
                'expected': True,

                'full': True,
                'included': True,
            },

            # Remove a duplicated item
            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.delete,
                'expected': True,

                'full': False,
                'included': True,
            },

            # Remove the last copy of the duplicated item
            {
                'item': '192.168.1.193',
                'transformer': lambda string: str(int(IPAddress(string))),

                'action': bucket.delete,
                'expected': True,

                'full': False,
                'included': False,
            },
        ]

        for case in cases:
            item = case['transformer'](case['item'])

            # Generate all the fingerprints
            fingerprint = bitarray()
            fingerprint.frombytes(mmh3.hash_bytes(item))

            self.assertEqual(case['action'](fingerprint), case['expected'], 'Save {0} into the bucket ok'.format(item))
            self.assertEqual(bucket.is_full(), case['full'], 'Bucket capacity is ok')

            # Make sure that all items are in the bucket
            self.assertEqual(bucket.contains(fingerprint), case['included'], 'Item {0} is in the bucket'.format(item))
            self.assertEqual(fingerprint in bucket, case['included'], 'Item {0} is in the bucket'.format(item))
Ejemplo n.º 38
0
def blockHash(block):
    hashBytes = mmh3.hash_bytes(block)
    return binascii.hexlify(hashBytes)
Ejemplo n.º 39
0
def test_hash_bytes():
    assert mmh3.hash_bytes(
        'foo') == b'aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~'
Ejemplo n.º 40
0
def hash(data):
    return binascii.hexlify(mmh3.hash_bytes(data)).decode('ascii')
Ejemplo n.º 41
0
 def test_hash_bytes(self):
     h = mmh3.hash_bytes('foo')
     assert h == b'aE\xf5\x01W\x86q\xe2\x87}\xba+\xe4\x87\xaf~'
Ejemplo n.º 42
0
 def _hash_murmurhash(buf):
     """
     Produce a 16-bytes hash of *buf* using MurmurHash.
     """
     return mmh3.hash_bytes(buf)
Ejemplo n.º 43
0
def generateByteArray(size):
    # generate 128 random bits as byte array
    ba = mmh3.hash_bytes('', incrementSeed())
    for i in xrange(size / 8):
        ba = ba + mmh3.hash_bytes('', incrementSeed())
    return bytearray(ba[0:size])
Ejemplo n.º 44
0
def generateIPV6Address(): # generate 128 random bits as byte array
        return mmh3.hash_bytes('', incrementSeed()) 
Ejemplo n.º 45
0
 def obtain_fingerprint(self, string_item):
     hash_value = mmh3.hash_bytes(string_item)
     fingerprint = hash_value[:self.item_fingerprint_size]
     return fingerprint
Ejemplo n.º 46
0
def generateByteArray(size):
	# generate 128 random bits as byte array
        ba = mmh3.hash_bytes('', incrementSeed()) 
	for i in xrange(size/8):
		ba = ba + mmh3.hash_bytes('', incrementSeed())
	return bytearray(ba[0:size])
Ejemplo n.º 47
0
    def get_cover(self, type, slug, size="default"):
        if type not in ["album", "artist"]:
            raise ValueError("Invalid type %s supplied" % type)

        entity = None

        if type == "album":
            entity = library_dao.get_album_by_slug(slug)

            if entity is None:
                raise ValueError("Entity not found")

            remotes.update_album(entity)

            if entity.cover_path is None or not os.path.exists(entity.cover_path):
                try:
                    cherrypy.engine.bgtask.put_unique(self.fetch_album_cover, 15, entity.id)
                except NonUniqueQueueError:
                    pass

        elif type == "artist":
            entity = library_dao.get_artist_by_slug(slug)

            if entity is None:
                raise ValueError("Entity not found")

            remotes.update_artist(entity)

            if entity.cover_path is None or not os.path.exists(entity.cover_path):
                try:
                    cherrypy.engine.bgtask.put_unique(self.fetch_artist_cover, 15, entity.id)
                except NonUniqueQueueError:
                    pass

        if entity is None:
            raise ValueError("Entity not found")

        if entity.cover_path is not None:
            if entity.cover is None:
                cover_ext = os.path.splitext(entity.cover_path)[1].decode("utf8")
                temp_cover = self._mktemp(cover_ext).encode("utf8")
                temp_cover_large = self._mktemp(cover_ext).encode("utf8")

                cover = image_service.resize(
                    entity.cover_path, temp_cover, Covers.DEFAULT_WIDTH, Covers.DEFAULT_HEIGHT, Covers.DEFAULT_GRAVITY
                )

                large_offset = self._get_image_offset(Covers.LARGE_WIDTH, Covers.LARGE_HEIGHT, Covers.LARGE_GRAVITY)

                cover_large = image_service.resize(
                    entity.cover_path,
                    temp_cover_large,
                    Covers.LARGE_WIDTH,
                    Covers.LARGE_HEIGHT,
                    Covers.LARGE_GRAVITY,
                    large_offset,
                )

                if cover and cover_large:
                    import mmh3

                    with open(temp_cover, "rb") as file:
                        entity.cover = file.read()
                        entity.cover_hash = base64.b64encode(mmh3.hash_bytes(entity.cover))

                    with open(temp_cover_large, "rb") as file:
                        entity.cover_large = file.read()

                    os.remove(temp_cover)
                    os.remove(temp_cover_large)

                    get_database().commit()

            return self.guess_mime(entity), entity.cover_large if size == "large" else entity.cover

        return None, None
Ejemplo n.º 48
0
 def index_hash(self, item):
     '''Calculate the (first) index of an item in the filter.'''
     item_hash = mmh3.hash_bytes(item)
     index = int.from_bytes(item_hash, byteorder='big') % self.capacity
     return index
Ejemplo n.º 49
0
 def _hash_murmurhash(buf):
     """
     Produce a 16-bytes hash of *buf* using MurmurHash.
     """
     return mmh3.hash_bytes(buf)
Ejemplo n.º 50
0
def blockHash(block):
    hashBytes = mmh3.hash_bytes(block)
    return binascii.hexlify(hashBytes)