def test_string_hashing(self): from solariat_bottle.utils.hash import mhash asciis = 'Asdfghjjkl;' self.assertTrue(1 < mhash(asciis) < 2**32) unicodes = u'Фываолдж' self.assertTrue(mhash(unicodes) < 2**32) xs = 'Фыва' self.assertTrue(1 < mhash(xs) < 2**32) xsd = 'Фываd'.decode('utf-8') self.assertTrue(1 < mhash(xsd) < 2**32) test_phrase = 'jumps over the lazy dog' test_phrase_hash = 2515788540 self.assertEqual(mhash(test_phrase), test_phrase_hash)
def make_request_id(self, method_name): auth = self.base_api.auth key = auth.consumer_key, auth.consumer_secret, \ auth.access_token, auth.access_token_secret, \ method_name hashed_key = mhash(key, n=128) return str(hashed_key)
def get_post_hash(post, index=None): """Encodes the post, based on its id """ if isinstance(post, (int, long)): return post & ((1 << POST_WIDTH) - 1) elif hasattr(post, 'id'): assert index != None and isinstance(index, (int, long)) return mhash("%s:%d" % (str(post.id), index), n=POST_WIDTH) else: raise RuntimeError('unsupported post type: %r' % post)
def get_topic_hash(topic): if topic is None or topic == NO_TOPIC: return 0 if topic == ALL_TOPICS: return (1 << TOPIC_WIDTH) - 1 elif isinstance(topic, (int, long)): return topic & ((1 << TOPIC_WIDTH) - 1) elif isinstance(topic, basestring): return mhash(topic.lower(), n=TOPIC_WIDTH) else: raise RuntimeError('unsupported topic type: %r' % topic)
def _prepare_post_checking_duplicates(self, klass, **kw): """Generates post_id, checks for duplicates and creates post""" actual_channels = [(c.id if isinstance(c, Channel) else c) for c in list(kw['channels'])] lang_data = kw.pop('lang', None) native_id = kw.pop('native_id', None) from pymongo.errors import DuplicateKeyError # Some events lack ms resolution on time stamps. So we # need to pad the timestampe with additional resolution. We compute # this as a hash of the native_id. from solariat_bottle.utils.hash import mhash from solariat_bottle.utils.id_encoder import MILLISECONDS_PER_SECOND padding = mhash(native_id, n=20) % MILLISECONDS_PER_SECOND p_id = kw.pop('_id', self.gen_id(padding=padding, **kw)) # Now reset the native id if we were not provided one. native_id = native_id if native_id else str(p_id) if lang_data: kw['_lang_code'] = lang_data.lang try: post = klass.create(self, _id=p_id, _native_id=native_id, **kw) return post, False except DuplicateKeyError: # If it is a duplicate, fetch the original and update the channels. Note that this use case can # probably ne handled with an UPSERT and just set the new channels to this actual_channels # list. Not clear which some channels would not be passed in but are still necessary. post = self.find_one(_id=p_id) post.channels = list(set(post.channels) | set(actual_channels)) return post, False return None, True
def key(self): if not self.id: footprint = self.filters self.id = mhash(footprint, n=128) return self.id
def tweepy_cache_id(token, secret, method_name, args, kwargs): key = [token, secret, method_name] key = map(str, key) key.extend(map(str, args)) [key.extend([k, str(v)]) for k, v in sorted(kwargs.iteritems())] return str(mhash(key, n=128))
def hash_dict(d): return mhash(tuple(sorted(d.items())), n=128)