Python xxh64 Examples, xxhash.xxh64 Python Examples

Example #1

2

Show file

File: feature_hash.py Project: awesome-python/similarities

    def __init__(self, tokens, length=100000):

        """Calculates a Charikar simhash with appropriate bitlength.
        
        Input can be any iterable, but for strings it will automatically
        break it into words first, assuming you don't want to iterate
        over the individual characters. Returns nothing.
        
        """
        if isinstance(tokens,basestring):
            tokens = tokens.split()

        v = {}
        if isinstance(tokens,dict):
            for value,w in tokens.iteritems():
                k = xxhash.xxh64(value).intdigest()
                x = v.get(k%length,0)
                if k & 1 << 63:
                    v[k%length] = x + w
                else:
                    v[k%length] = x - w
        else:
            for value in tokens:
                k = xxhash.xxh64(value).intdigest()
                x = v.get(k%length,0)
                if k & 1 << 63:
                    v[k%length] = x + 1
                else:
                    v[k%length] = x - 1
    
        self.hash = v
        self.vector = v

Example #2

0

Show file

File: DecoratorFactory.py Project: onenoc/memo

 def __hash_from_argument(self, argument):
     arg_string = ""
     if hasattr(argument, 'md5hash'):
         return argument.md5hash
     if hasattr(argument, 'xxhash64'):
         return argument.xxhash64
     if type(argument) is numpy.ndarray:
         if argument.size > 181440000:
             return self.__hash_choice(argument.data)
         else:
             return str(xxhash.xxh64(argument.data))
     if type(argument) is pandas.core.frame.DataFrame:
         col_values_list = list(argument.columns.values)
         try:
             col_values_string = ''.join(col_values_list)
             arg_string = col_values_string
             if argument.values.size > 181440000:
                 return str(xxh.hash64(argument.data)) + "+" + str(xxhash.xxh64(arg_string))
             else:
                 return self.__hash_choice(argument.values.data) + "+" + str(xxhash.xxh64(arg_string))
         except:
             if argument.values.size > 181440000:
                 return str(xxh.hash64(argument.values.data))
             else:
                 return self.__hash_choice(argument.values.data)
     if type(argument) is list or type(argument) is tuple:
         arg_string = str(len(argument))
     arg_string += str(argument)
     return self.__hash_choice(arg_string)

Example #3

0

Show file

File: caching.py Project: mikedh/trimesh

    def _xxhash(self):
        """
        An xxhash.b64 hash of the array.

        Returns
        -------------
        xx: int, xxhash.xxh64 hash of array.
        """
        # repeat the bookkeeping to get a contiguous array inside
        # the function to avoid additional function calls
        # these functions are called millions of times so everything helps
        if self._modified_x or not hasattr(self, '_hashed_xx'):
            if self.flags['C_CONTIGUOUS']:
                hasher = xxhash.xxh64(self)
                self._hashed_xx = hasher.intdigest()
            else:
                # the case where we have sliced our nice
                # contiguous array into a non- contiguous block
                # for example (note slice *after* track operation):
                # t = util.tracked_array(np.random.random(10))[::-1]
                contiguous = np.ascontiguousarray(self)
                hasher = xxhash.xxh64(contiguous)
                self._hashed_xx = hasher.intdigest()
        self._modified_x = False
        return self._hashed_xx

Example #4

0

Show file

File: util.py Project: C-Sto/pokemongo-api

def hashRequests(authTicket, payload):
    baseHash = xxhash.xxh64(
        authTicket.SerializeToString(),
        seed=0x1B845238
    ).intdigest()

    # Serialize and hash each request
    return [xxhash.xxh64(
        request.SerializeToString(),
        seed=baseHash
    ).intdigest() for request in payload]

Example #5

0

Show file

File: functions.py Project: awesome-python/similarities

def string_hash(value,length=11):
    s = ''
    for i in range(0,length,11):
        s = s + xxhash.xxh64(value+str(i)).hexdigest()
    s = encode_hash(int(s,16))[:length]
    if len(s) < length:
        s = s + "A" * (length - len(s))
    return s

Example #6

0

Show file

File: functions.py Project: awesome-python/similarities

def string_hash_bits(value,length_in_bits=128):
    ''' Length must be a multiple of 4'''
    hex_length = length_in_bits / 4
    s = ''
    for i in range(0,length_in_bits,64):
        s = s + xxhash.xxh64(value+str(i)).hexdigest()
    s = s[:hex_length]
    x = int(s,16)
    return x

Example #7

0

Show file

File: read_dir_apply.py Project: nbari/ftrunk

def xxhash64(path, block_size=4096):
    try:
        with open(path, 'rb') as rf:
            h = xxhash.xxh64()
            for chunk in iter(lambda: rf.read(block_size), b''):
                h.update(chunk)
        return h.hexdigest(), path
    except IOError:
        return None, path

Example #8

0

Show file

File: motif.py Project: simonvh/gimmemotifs

    def hash(self):
        """Return hash of motif.

        This is an unique identifier of a motif, regardless of the id.

        Returns:
        hash : str
        """
        return xxhash.xxh64(self._pwm_to_str(3)).hexdigest()

Example #9

0

Show file

File: subscriptions.py Project: hias234/OffenesParlament

def subscribe(request):
    """
    Subcribe the given email to the given URL.

    TODO BEN: Include Subscription title or description in POST variables
    """
    url = request.POST['subscription_url']
    email = request.POST['email']

    user, created_user = User.objects.get_or_create(email=email)
    if created_user:
        user_verification_hash = uuid.uuid4().hex
        user_verification = Verification.objects.create(
            verified=False,
            verification_hash=user_verification_hash)
        user.verification = user_verification
        user.save()

    content, created_content = SubscribedContent.objects.get_or_create(url=url)
    if created_content:
        content_response = requests.get(url)
        content_hash = xxhash.xxh64(content_response.text).hexdigest()
        content.latest_content_hash = content_hash

    if not Subscription.objects.filter(user=user, content=content).exists():
        verification_hash = uuid.uuid4().hex
        verification_url = request.build_absolute_uri(
            reverse(
                'verify',
                kwargs={
                    'email': email,
                    'key': verification_hash}
            )
        )
        verification_item = Verification.objects.create(
            verified=False,
            verification_hash=verification_hash
        )

        Subscription.objects.create(
            user=user,
            content=content,
            verification=verification_item
        )

        email_sent = EMAIL.VERIFY_SUBSCRIPTION.send(
            email, {'verification_url': verification_url})
        if email_sent:
            message = MESSAGES.EMAIL.VERIFICATION_SENT.format(email)
        else:
            message = MESSAGES.EMAIL.ERROR_SENDING_EMAIL.format(
                email)
    else:
        message = MESSAGES.EMAIL.ALREADY_SUBSCRIBED

    messages.add_message(request, messages.INFO, message)
    return redirect(request.META['HTTP_REFERER'], {'message': message})

Example #10

0

Show file

File: test.py Project: pansapiens/python-xxhash

    def test_XXH64_reset(self):
        x = xxhash.xxh64()
        h = x.intdigest()

        for i in range(10, 50):
            x.update(os.urandom(i))

        x.reset()

        self.assertEqual(h, x.intdigest())

Example #11

0

Show file

File: models.py Project: Blackrose/niji

 def save(self, *args, **kwargs):
     new_hash = xxhash.xxh64(self.content_raw).hexdigest()
     mentioned_users = []
     if new_hash != self.raw_content_hash or (not self.pk):
         # To (re-)render the content if content changed or topic is newly created
         self.content_rendered, mentioned_users = render_content(self.content_raw, sender=self.user.username)
     super(Topic, self).save(*args, **kwargs)
     self.raw_content_hash = new_hash
     for to in mentioned_users:
             notify.delay(to=to.username, sender=self.user.username, topic=self.pk)

Example #12

0

Show file

File: allow_rev3.py Project: olage/iasa-wifi

    def cached_parse_dhcp(self, lines, cur_time=None):
        if cur_time is None:
            cur_time = dt.utcnow()
        m = xxhash.xxh64()
        m.update("".join(lines[:self.dhcp_cache_len]).encode("utf8"))
        new_hash = m.digest()
        # new_len = len(lines)

        if new_hash != self.dhcp_hash:
            self.dhcp_cache_len = 0
            self.dhcp_cache = []
            m = xxhash.xxh64()

        lines = lines[self.dhcp_cache_len:]
        self.dhcp_cache.extend(self.from_dhcp(lines, cur_time))
        m.update("".join(lines).encode("utf8"))
        self.dhcp_hash = m.digest()
        self.dhcp_cache_len += len(lines)

        return self.dhcp_cache

Example #13

0

Show file

File: b3GetFolder.py Project: cabrennan/s3-boto-tools

def xxhash_file(srcfile, logger, block_size=2**20):
    f_name = func_name()
    logger.info(f_name+"\t\tCalculating xx-hash on : "+srcfile)
    f = open(srcfile, 'r')  
    x = xxhash.xxh64()
    while True:
        data = f.read(block_size)
        if not data:
            break
        x.update(data)
    return  x.hexdigest()

Example #14

0

Show file

File: models.py Project: stefanm7/OffenesParlament

    def generate_content_hashes(self):
        """
        Generate a dictionary which maps parl_ids to their respective hashes

        Used for speedy comparison of changes
        """
        es_response = json.loads(self.get_content())
        content_hashes = {}
        for res in es_response['result']:
            content_hashes[res['parl_id']] = xxhash.xxh64(
                json.dumps(res)).hexdigest()
        return json.dumps(content_hashes)

Example #15

0

Show file

File: models.py Project: Blackrose/niji

 def save(self, *args, **kwargs):
     new_hash = xxhash.xxh64(self.content_raw).hexdigest()
     mentioned_users = []
     if new_hash != self.raw_content_hash or (not self.pk):
         self.content_rendered, mentioned_users = render_content(self.content_raw, sender=self.user.username)
     super(Post, self).save(*args, **kwargs)
     t = self.topic
     t.reply_count = t.get_reply_count()
     t.last_replied = t.get_last_replied()
     t.save(update_fields=['last_replied', 'reply_count'])
     for to in mentioned_users:
             notify.delay(to=to.username, sender=self.user.username, post=self.pk)

Example #16

0

Show file

File: memoize.py Project: huilen/hutils

 def memoize_wrapper(*args, **kwargs):
     hash = xxhash.xxh64(str(args) + str(kwargs)).intdigest()
     path = path_pattern.format(hash=hash)
     try:
         with open(path, 'rb') as file:
             logger.debug("Loading pickle %s", path)
             data = pickle.load(file)
     except (FileNotFoundError, EOFError):
         data = fn(*args, **kwargs)
         with open(path, 'wb') as file:
             pickle.dump(data, file)
     return data

Example #17

0

Show file

File: test.py Project: pansapiens/python-xxhash

 def test_XXH64(self):
     x = xxhash.xxh64()
     x.update('a')
     self.assertEqual(xxhash.xxh64('a').digest(), x.digest())
     x.update('b')
     self.assertEqual(xxhash.xxh64('ab').digest(), x.digest())
     x.update('c')
     self.assertEqual(xxhash.xxh64('abc').digest(), x.digest())
     seed = random.randint(0, 2**32)
     x = xxhash.xxh64(seed=seed)
     x.update('a')
     self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest())
     x.update('b')
     self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest())
     x.update('c')
     self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest())

Example #18

0

Show file

File: example.py Project: okey/aduana

    def parse(self, response):
        soup = BeautifulSoup(response.body)
        for script in soup(["script", "style"]):
            script.extract()
        text = soup.get_text()

        response.meta.update(score=KeywordScorer.score(text))
        response.meta.update(
            content_hash=xxhash.xxh64(text.encode('ascii', 'ignore')).intdigest())

        for link in self.link_extractor.extract_links(response):
            request = Request(url=link.url)
            request.meta.update(link_text=link.text)
            link_score = KeywordScorer.score(link.text)
            request.meta.update(score=link_score)
            yield request

Example #19

0

Show file

File: rxnhash.py Project: zhaojhao/DRP

def _calculate(reaction, descriptorDict, verbose=False, whitelist=None):
    """Calculate descriptors for this plugin with descriptorDict already created."""
    # descriptor Value classes
    cat = DRP.models.CatRxnDescriptorValue
    perm = DRP.models.CategoricalDescriptorPermittedValue

    # reaction space descriptor
    heading = 'rxnSpaceHash1'
    if whitelist is None or heading in whitelist:
        h = xxhash.xxh64()  # generates a hash
        for reactant in reaction.compounds.order_by('abbrev'):
            h.update(reactant.abbrev)
        p = perm.objects.get_or_create(descriptor=descriptorDict[
                                       heading], value=h.hexdigest())[0]
        cat.objects.update_or_create(defaults={
                                     'value': p}, reaction=reaction, descriptor=descriptorDict['rxnSpaceHash1'])[0]

Example #20

0

Show file

File: blar.py Project: jbeisser/blarpy

def feature_hash_string(s, window, dim):

    start = time.clock()

    # Generate window-char Markov chains & create feature hash vector

    v = {}
    for x in range(0, dim):
        v[x] = 0
    length = len(s)
    max_num = 2.0 ** 64
    for x in range(0, length - window):
        key = xxhash.xxh64(s[x:x + window]) % dim
        v[key] += 0x1

    return numpy.asarray(v.values())

Example #21

0

Show file

File: cache.py Project: suquant/django-vimeo

 def wrapper(*args, **kwds):
     if not cache:
         return f(*args, **kwds)
     if key_func:
         key = 'django_vimeo_cache:{}'.format(key_func(*args, **kwds))
     else:
         key = 'django_vimeo_cache:' + f.__name__ + ':' +\
               str(list(args) + list(sorted(kwds.items())))
     key = xxhash.xxh64(key).hexdigest()
     value = cache.get(key)
     if value is None:
         value = f(*args, **kwds)
         cache.set(key, value, expires)
         value = cache.get(key)
         if value is None:
             raise Exception('failed to fetch cached value, try again')
     return value

Example #22

0

Show file

File: BinaryView.py Project: CySHell/Binja4J

    def bv_hash(self):
        """
        Iterate over all the BinaryView (flat iteration over the hex values themselves)
        :return:(INT) Hash  of the whole file
        """

        # create file object
        br = BinaryReader(self.bv)

        # calculate file hash
        file_hash = xxhash.xxh64()
        # for some reason a BinaryReader won't read more then 1000 or so bytes
        temp_hash = br.read(1000)
        while temp_hash:
            file_hash.update(temp_hash)
            temp_hash = br.read(1000)

        return file_hash.hexdigest()

Example #23

0

Show file

File: PC_Miner.py Project: koenvaneijk/duino-coin

def ducos1xxh(lastBlockHash, expectedHash, difficulty):
    # XXHASH algorithm
    # Measure starting time
    timeStart = time()
    # Loop from 1 too 100*diff
    for ducos1xxres in range(100 * int(difficulty) + 1):
        # Generate hash
        ducos1xx = xxhash.xxh64(str(lastBlockHash) + str(ducos1xxres),
                                seed=2811)
        ducos1xx = ducos1xx.hexdigest()
        # Check if result was found
        if ducos1xx == expectedHash:
            # Measure finish time
            timeStop = time()
            # Calculate hashrate
            timeDelta = timeStop - timeStart
            hashrate = ducos1xxres / timeDelta
            return [ducos1xxres, hashrate]

Example #24

0

Show file

File: response_actions.py Project: samuelsh/pyfs_stress

def read(mount_point, incoming_data, **kwargs):
    outgoing_data = {}
    flock = kwargs['flock']
    offset = incoming_data['offset']
    chunk_size = incoming_data['repeats']
    f_path = ''.join([mount_point, incoming_data['target']])
    with open(f_path, 'rb') as f:
        f.seek(offset)
        flock.lockf(f.fileno(), fcntl.LOCK_SH | fcntl.LOCK_NB, chunk_size,
                    offset, 0)
        buf = f.read(chunk_size)
        flock.lockf(f.fileno(), fcntl.LOCK_UN, chunk_size, offset)
        outgoing_data['hash'] = xxhash.xxh64(buf).intdigest()
        outgoing_data['offset'] = offset
        outgoing_data['chunk_size'] = chunk_size
        outgoing_data['uuid'] = incoming_data['uuid']
        outgoing_data['tid'] = incoming_data['tid']
        # outgoing_data['buffer'] = buf[:256].decode()
        return outgoing_data

Example #25

0

Show file

    def extract_attribute(self, base_object: BDBasicBlock) -> int:
        # Check if value already exists
        BasicBlockHash_value = base_object.get_attribute_value(
            'BasicBlockHash')

        if BasicBlockHash_value:
            pass
        else:
            hash_value = xxhash.xxh64()
            for instruction_expression in base_object.underlying_obj:
                for instruction in instruction_expression[0]:
                    hash_value.update(instruction.text)

            base_object.add_attribute_value('BasicBlockHash',
                                            {'hash': hash_value.intdigest()})
            BasicBlockHash_value = base_object.get_attribute_value(
                'BasicBlockHash')

        return BasicBlockHash_value['hash'] if BasicBlockHash_value else None

Example #26

0

Show file

    def write(self, path, data, offset, fh):
        realpath = self.remotepath(path)
        cachefile = self.cachefile(realpath)
        if not os.path.exists(cachefile):
            if self.empty_file(realpath):
                self.create(path, 'wb')
            else:
                raise FuseOSError(ENOENT)

        with open(cachefile, 'rb+') as outfile:
            outfile.seek(offset, 0)
            outfile.write(data)

        self.attributes.insert(realpath, self.extract(os.lstat(cachefile)))
        task = Task(
            xxhash.xxh64(realpath).intdigest(), self._write, realpath, data,
            offset)
        self.taskpool.submit(task)
        return len(data)

Example #27

0

Show file

    def set_motifs(self, motifs):
        try:
            # Check if motifs is a list of Motif instances
            motifs[0].to_pwm()
            tmp = NamedTemporaryFile(mode="w", delete=False)
            for m in motifs:
                tmp.write("{}\n".format(m.to_pwm()))
            tmp.close()
            motif_file = tmp.name
        except AttributeError as e:
            motif_file = motifs

        self.motifs = motif_file
        with open(motif_file) as f:
            self.motif_ids = [m.id for m in read_motifs(f)]
        self.checksum = {}
        if self.use_cache:
            chksum = xxhash.xxh64("\n".join(sorted(self.motif_ids))).digest()
            self.checksum[self.motif_file] = chksum

Example #28

0

Show file

File: sakamoto.py Project: naphthasl/sakamoto

def filesget(id):
    # helper: *deep level route*

    rfile = StaticFile.get(id=id)
    mimetype = (lambda x: 'application/octet-stream'
                if x == None else x)(mimetypes.guess_type(rfile.original,
                                                          strict=True))
    response.set_header('Accept-Ranges', 'bytes')
    response.set_header('Content-Length', str(rfile.length))
    response.set_header('Content-Type', mimetype)
    response.set_header(
        'Last-Modified',
        datetime.fromtimestamp(
            rfile.created,
            tz=pytz.timezone('GMT')).strftime('%a, %d %b %Y %H:%M:%S GMT'))
    response.set_header(
        'ETag', '"{0}"'.format(xxhash.xxh64(rfile.content).hexdigest()))

    return io.BytesIO(rfile.content)

Example #29

0

Show file

File: validate.py Project: joeljohnston/mediastruct

 def iter_archive(self, archive_dir):
     archivehashes = []
     if os.path.isdir(archive_dir):
         for path, dirs, files in walk(archive_dir):
             for filename in files:
                 filepath = joinpath(path, filename)
                 if os.path.isfile(filepath):
                     filesize = stat(filepath).st_size
                     try:
                         filehash = xxhash.xxh64(
                             open(filepath, 'rb').read()).hexdigest()
                     except:
                         print("Didnt like this file", filepath)
                     if filehash != '':
                         archivehashes.append([{
                             'filehash': filehash,
                             'path': filepath
                         }])
     return archivehashes

Example #30

0

Show file

File: check_hash_collisions.py Project: nihaoCC/DRP

 def handle(self, *args, **kwargs):
     hashDictionary = {}
     collisionCount = 0
     for reaction in Reaction.objects.all():
         reactantString = ''
         h = xxhash.xxh64()
         for reactant in reaction.compounds:
             h.update(reactant.abbrev)
             reactantString += reactant.abbrev
         if h in hashDictionary:
             if hashDictionary[h].hexdigest() != reactantString:
                 collisionCount += 1
         else:
             hashDictionary[h] = reactantString
     if collisionCount > 0:
         e = EmailToAdmins('Dark Reactions Project: Hash Collision Failure',
                           'A collision between reaction space hashes has occured. Please contact the DRP development team and file a bug report.')
         e.send()
         exit(1)

Example #31

0

Show file

def data(path, D):
    ''' GENERATOR: 
            Apply hash-trick to the original csv row
            and for simplicity, we one-hot-encode everything

        INPUT:
            path: path to training or testing file
            D: the max index that we can hash to

        YIELDS:
            x: a list of hashed and one-hot-encoded 'indices'
               we only need the index since all values are either 0 or 1
            y: y = 1 if we have a click, else we have y = 0
    '''

    with open(path, 'r', encoding='utf-8') as f:
        csvreader = reader(f)  # create a CSV reader
        header = next(csvreader)
        for row in csvreader:  # iterate over the available rows
            row = dict(zip(header, row))

            # ts and bid_id are used only while updating train data
            for feat in ['bid_id', 'ts']:
                if feat in row:
                    del row[feat]

            # process clicks
            y = 0.
            target = 'click'
            if target in row:
                if row[target] == '1':
                    y = 1.
                del row[target]

            # build x
            x = []
            for key in row:
                value = row[key]
                # one-hot encode everything with hash trick
                index = xxh64(key + '_' + value).intdigest() % D
                x.append(index)

            yield x, y

Example #32

0

Show file

File: validate.py Project: joeljohnston/mediastruct

 def iter_duplicates(self, duplicates_dir):
     tobevalidated = []
     if os.path.isdir(duplicates_dir):
         for path, dirs, files in walk(duplicates_dir):
             for filename in files:
                 filepath = joinpath(path, filename)
                 if os.path.isfile(filepath):
                     filesize = stat(filepath).st_size
                     try:
                         filehash = xxhash.xxh64(
                             open(filepath, 'rb').read()).hexdigest()
                     except:
                         print("Didnt like this file: ", filepath)
                     if filehash != '':
                         tobevalidated.append([{
                             'filehash': filehash,
                             'path': filepath
                         }])
     return tobevalidated

Example #33

0

Show file

File: dht1.py Project: Sairakan/Projects

def store(val, srcID):
    # if srcID not seen yet, make new entry in srcIDs
    if srcID not in srcIDs:
        srcIDs[srcID] = []
    key = xxhash.xxh64(val).intdigest() & 0xffff
    # remove expired keys in srcID's list
    srcIDs[srcID] = [k for k in srcIDs[srcID] if data[k][2] + 300 >= time.time()]
    # if over the limit, discard the store
    if len(srcIDs[srcID]) >= srcLimit:
        return
    # if key not yet stored
    if key not in data:
        data[key] = (val, srcID, time.time())
        srcIDs[srcID].append(key)
    # check for key expiration
    elif data[key][2] + 300 < time.time():
        srcIDs[data[key][1]].remove(key)
        data[key] = (val, srcID, time.time())
        srcIDs[srcID].append(key)

Example #34

0

Show file

    def generate_content_hashes(self, content=None):
        """
        Generate a dictionary which maps parl_ids to their respective hashes

        Used for speedy comparison of changes
        """
        if not content:
            es_response = json.loads(self.get_content())
        else:
            try:
                es_response = json.loads(content)
            except:
                es_response = json.loads(self.get_content())

        content_hashes = {}
        for res in es_response['result']:
            content_hashes[res['parl_id']] = xxhash.xxh64(
                json.dumps(res)).hexdigest()
        return json.dumps(content_hashes)

Example #35

0

Show file

def hash_file2(fpath, blocksize=65536, hasher='xx64'):
    r"""
    Hashes the data in a file on disk using xxHash

    xxHash is much faster than sha1, bringing computation time down from .57
    seconds to .12 seconds for a 387M file.

    my_weights_fpath_ = ub.truepath('~/tmp/my_weights.pt')


    xdata = 2 ** np.array([8, 12, 14, 16])
    ydatas = ub.ddict(list)
    for blocksize in xdata:
        print('blocksize = {!r}'.format(blocksize))
        ydatas['sha1'].append(ub.Timerit(2).call(ub.hash_file, my_weights_fpath_, hasher='sha1', blocksize=blocksize).min())
        ydatas['sha256'].append(ub.Timerit(2).call(ub.hash_file, my_weights_fpath_, hasher='sha256', blocksize=blocksize).min())
        ydatas['sha512'].append(ub.Timerit(2).call(ub.hash_file, my_weights_fpath_, hasher='sha512', blocksize=blocksize).min())
        ydatas['md5'].append(ub.Timerit(2).call(ub.hash_file, my_weights_fpath_, hasher='md5', blocksize=blocksize).min())
        ydatas['xx32'].append(ub.Timerit(2).call(hash_file2, my_weights_fpath_, hasher='xx32', blocksize=blocksize).min())
        ydatas['xx64'].append(ub.Timerit(2).call(hash_file2, my_weights_fpath_, hasher='xx64', blocksize=blocksize).min())

    import netharn as nh
    nh.util.qtensure()
    nh.util.multi_plot(xdata, ydatas)
    """
    import xxhash
    if hasher == 'xx32':
        hasher = xxhash.xxh32()
    elif hasher == 'xx64':
        hasher = xxhash.xxh64()

    with open(fpath, 'rb') as file:
        buf = file.read(blocksize)
        # otherwise hash the entire file
        while len(buf) > 0:
            hasher.update(buf)
            buf = file.read(blocksize)
    # Get the hashed representation
    text = ub.util_hash._digest_hasher(hasher,
                                       hashlen=None,
                                       base=ub.util_hash.DEFAULT_ALPHABET)
    return text

Example #36

0

Show file

 def handle(self, *args, **kwargs):
     """Handle the command call."""
     hashDictionary = {}
     collisionCount = 0
     for reaction in Reaction.objects.all():
         reactantString = ''
         h = xxhash.xxh64()
         for reactant in reaction.compounds:
             h.update(reactant.abbrev)
             reactantString += reactant.abbrev
         if h in hashDictionary:
             if hashDictionary[h].hexdigest() != reactantString:
                 collisionCount += 1
         else:
             hashDictionary[h] = reactantString
     if collisionCount > 0:
         e = EmailToAdmins('Dark Reactions Project: Hash Collision Failure',
                           'A collision between reaction space hashes has occured. Please contact the DRP development team and file a bug report.')
         e.send()
         exit(1)

Example #37

0

Show file

def _encValKey(v):
    '''
    Encode a value as used in a key.

    Non-negative numbers are msgpack encoded.  Negative numbers are encoded as a marker, then the
    encoded negative of that value, so that the ordering of the encodings is easily mapped to the
    ordering of the negative numbers.  Strings too long are hashed.  Note that this scheme prevents
    interleaving of value types: all string encodings compare larger than all negative number
    encodings compare larger than all nonnegative encodings.
    '''
    if isinstance(v, int):
        if v >= 0:
            return s_msgpack.en(v)
        else:
            return NEGATIVE_VAL_MARKER_ENC + s_msgpack.en(-v)
    else:
        if len(v) >= LARGE_STRING_SIZE:
            return (HASH_VAL_MARKER_ENC + s_msgpack.en(xxhash.xxh64(v).intdigest()))
        else:
            return STRING_VAL_MARKER_ENC + s_msgpack.en(v)

Example #38

0

Show file

File: fs.py Project: radoslawg/DupFinder

def hash_file(path):
    """
    Calculates hash for the file of given path

    Parameters
    ----------
    path: string
        Path to file to be hashed

    Returns
    -------
        Hash digest calculated for given file
    """
    if path is None or not os.path.isfile(path):
        return None
    fo = open(path, 'rb')
    c = fo.read()
    r = xxhash.xxh64(c).hexdigest()
    fo.close()
    return r

Example #39

0

Show file

        def validate(data_dst, data):
            match = False
            try:
                dst_checksum = data_dst.attrs["checksum"]
            except KeyError:
                # checksum does not exist, since it is only updated when dump was
                # completed
                logger.warning(
                    f'"{data_dst.path}" contains partial dump, rewrite')
            else:
                src_checksum = xxhash.xxh64(data.compute()).hexdigest()
                if dst_checksum == src_checksum:
                    match = True
                else:
                    # checksum mismatch, reset
                    logger.warning(
                        f'"{data_dst.path}" does not match the source')
                    del data_dst.attrs["checksum"]

            return match, (data_dst, data)

Example #40

0

Show file

File: getHashes.py Project: magicnum/pywtk

def getHashSum(file_path):
    hashsums = {}
    result = {}
    hashsums['xxh32'] = xxhash.xxh32()
    hashsums['xxh64'] = xxhash.xxh64()
    hashsums['md5'] = hashlib.md5()

    with open(file_path, 'rb') as f:
        while True:
            chunk = f.read(64 * 1024)
            if len(chunk):
                for key in hashsums.keys():
                    hashsums[key].update(chunk)
            else:
                break

    for key, value in hashsums.items():
        result[key] = value.hexdigest()

    return result

Example #41

0

Show file

    def _digestAndWrite(self, myqueue, topath):
        digest = xxhash.xxh64()
        try:
            os.makedirs(os.path.dirname(topath))
        except:
            pass
        finally:
            with open(topath, 'w') as f:
                while True:
                    item = myqueue.get()

                    if isinstance(item, exitcode):
                        myqueue.put(digest.hexdigest())
                        sys.stdout.write(self.greencolor +
                                         '[COPY PASS]'.rjust(16))
                        sys.stdout.flush()
                        break

                    digest.update(item)
                    f.write(item)

Example #42

0

Show file

File: util.py Project: coltekin/scrape-trnews

def write_content(content, **kwargs):

    h64 = xxhash.xxh64(content).hexdigest()
    filepath = 'trnews-data/' + h64[:2] + '/' + h64[2:4] + '/' + h64

    d = os.path.dirname(filepath)
    if not os.path.isdir(d):
        os.makedirs(d)
    elif os.path.exists(filepath):
        return filepath, False

    with open(filepath + ".meta", "wb") as fp:
        fp.write(yaml.safe_dump(kwargs, 
            default_flow_style = False, 
            allow_unicode=True, indent=2, encoding="utf-8"))

    with gzip.open(filepath, "wb") as fp:
        fp.write(content)

    return filepath, True

Example #43

0

Show file

    def constrained(pattern: Union[str, re.Pattern]) -> Type[SymbolName]:
        """Create a new SymbolName subclass using the provided string as validation RE."""
        if isinstance(pattern, re.Pattern):
            regex = pattern
            pattern = pattern.pattern
        else:
            try:
                regex = re.compile(pattern)
            except re.error as e:
                raise TypeError(
                    f"Invalid regular expression definition:  '{pattern}'."
                ) from e

        assert isinstance(pattern, str)
        xxh64 = xxhash.xxh64()
        xxh64.update(pattern.encode())
        subclass_name = f"SymbolName_{xxh64.hexdigest()[-8:]}"
        namespace = dict(regex=regex)

        return type(subclass_name, (SymbolName, ), namespace)

Example #44

0

Show file

File: __init__.py Project: nakedou/stock

def _calculate_asset_hash(asset_file, dev_mode):
    if dev_mode:
        return random.random()

    """
    1. calculate the hash of asset file, use the hash as version number to control(maximize) the HTTP cache.
    2. the hash value will be cached in memory until the python app server restarted.
    3. only process text asset file(js and css), no binary file(img, fonts) processed. ##Todo##
    """
    hash = _asset_hash_cache_.get(asset_file)

    if not hash:
        file = os.path.join(os.path.dirname(__file__), *[x for x in asset_file.split('/')])
        if os.path.isfile(file):
            with open(file, 'r', encoding='utf-8') as f:
                data = f.read()
                hash = xxhash.xxh64(data).hexdigest()
                _asset_hash_cache_[asset_file] = hash

    return hash

Example #45

0

Show file

File: dirtree.py Project: samuelsh/pyfs_stress

def build_recursive_tree(tree, base, depth, width):
    """
    Args:
        tree: Tree
        base: Node
        depth: int
        width: int
    """
    if depth >= 0:
        depth -= 1
        for _ in range(width):
            directory = Directory(None)
            tree.create_node("{0}".format(directory.name), "{0}".format(xxhash.xxh64(directory.name)),
                             parent=base.identifier, data=directory)
        dirs_nodes = tree.children(base.identifier)
        for dir_node in dirs_nodes:
            newbase = tree.get_node(dir_node.identifier)
            build_recursive_tree(tree, newbase, depth, width)
    else:
        return

Example #46

0

Show file

File: locations.py Project: zanachka/aduana

    def parse(self, response):
        soup = BeautifulSoup(response.body)
        for script in soup(["script", "style"]):
            script.extract()
        text = soup.get_text()

        if text:
            response.meta.update(
                content_hash=xxhash.xxh64(text.encode('ascii', 'ignore')).intdigest())

            try:
                langid = detect(text)
            except LangDetectException:
                return

            if langid == 'en':
                tagged = filter(lambda x: x[2] >= 0.99,
                                tag_locations(MySpider.geo_names, text))
                gid_count = collections.Counter(gid for name, gid, score in tagged)

                score = scorer(
                    float(sum(gid_count.itervalues()))/
                    float(len(text))
                )
                response.meta.update(score=score)

                for link in self.link_extractor.extract_links(response):
                    request = Request(url=link.url)
                    request.meta.update(link_text=link.text)
                    request.meta.update(score=score)
                    yield request

                date = datetime.datetime.now()
                for gid, count in gid_count.iteritems():
                    yield LocationsItem(
                        date=date,
                        geoname_id=gid,
                        count=count
                    )
            else:
                response.meta.update(score=0)

Example #47

0

Show file

File: imgui_cv.py Project: pthom/imgui_datascience

def _hash_image(image):
    """
    Two hash variant are possible :
    - if imgui_cv.USE_FAST_HASH is True : select 100 random pixels and hash them
    - otherwise : compute the hash of the whole image (using xxhash for performance)
    :param image:
    :return:hash
    """
    if USE_FAST_HASH:
        rng = np.random.RandomState(89)
        inds = rng.randint(low=0, high=image.size, size=100)
        b = image.flat[inds]
        result = hash(tuple(b.data))
        return result
    else:
        # cf https://stackoverflow.com/questions/16589791/most-efficient-property-to-hash-for-numpy-array
        h = xxhash.xxh64()
        h.update(image)
        result = h.intdigest()
        h.reset()
        return result

Example #48

0

Show file

File: MLIL.py Project: CySHell/NinJSON

    def parse_binary_view(self):
        bv_hash = xxhash.xxh64()
        bv_hash.update(self.bv.file.filename)
        bv_object = BinaryView.BinaryViewNode(self.bv, str(bv_hash.hexdigest()), parent_uuid='0',
                                              parent_node_label='RootNode')

        # Update node list
        self.node_list.append(bv_object)

        func_index = 0
        # Iterate all functions
        for function in self.bv.functions:
            # NOTE: THE FOLLOWING CODE IS NOT THREAD SAFE~!!!!!!!!!!!!
            # Each function needs its own basic block cache
            self.basic_block_cache = dict()
            self.parse_function(function.mlil, bv_object, str(func_index))
            func_index += 1

        self.run_post_processing()

        return self.node_list

Example #49

0

Show file

def ducos1xxh(lastBlockHash, expectedHash, difficulty, efficiency):
    # XXHASH algorithm
    # Measure starting time
    timeStart = time()
    # Loop from 1 too 100*diff
    for ducos1xxres in range(100 * int(difficulty) + 1):
        # If efficiency lower than 100% sleep to use less CPU
        if ducos1xxres % 1000000 == 0 and float(100 - efficiency * 100) < 100:
            sleep(float(efficiency))
        # Generate hash
        ducos1xx = xxhash.xxh64(str(lastBlockHash) + str(ducos1xxres),
                                seed=2811)
        ducos1xx = ducos1xx.hexdigest()
        # Check if result was found
        if ducos1xx == expectedHash:
            # Measure finish time
            timeStop = time()
            # Calculate hashrate
            timeDelta = timeStop - timeStart
            hashrate = ducos1xxres / timeDelta
            return [ducos1xxres, hashrate]

Example #50

0

Show file

def test_hash_file(fs):
    """ Test to hash a file
    Cases
    -----
    - Not existing file (should return None)
    - Existing file (should return Digest)
    - None (should return None)
    - Pass directory (should return None)
    """
    # prepare file system
    fs.create_file('/phonyDir/testfile', contents='test')
    # Not existing file (should return None)
    assert DupFinder.fs.hash_file('notexisting.txt') is None
    # Existing file (should return Digest)
    assert DupFinder.fs.hash_file('/phonyDir/testfile') == xxhash.xxh64(
        'test').hexdigest()
    # None (should return None)
    assert DupFinder.fs.hash_file(None) is None
    # Pass directory (should return None)
    assert DupFinder.fs.hash_file('/phonyDir') is None
    assert DupFinder.fs.hash_file('/phonyDir/') is None

Example #51

0

Show file

File: test_base_store.py Project: panoramichq/data-collection-fb

    def test_key_s3_incomprehensible_range_start(self):
        """
        Check that the key is constructed as we expect
        """
        import common.tztools

        job_scope = JobScope(
            ad_account_id=gen_string_id(),
            report_type=ReportType.day_platform,
            report_variant=Entity.Campaign,
            range_start='blah-blah',
        )

        # even though range_start is provided ^ above, it's not date-like and we
        # should be ok with that and just fall back to datetime.utcnow()
        now_dt = datetime(2000, 1, 2, 3, 4, 5)
        with mock.patch.object(common.tztools, 'now', return_value=now_dt) as now_mocked, mock.patch.object(
            uuid, 'uuid4', return_value='UUID-HERE'
        ):

            storage_key = cold_storage.store({'data': 'yeah!'}, job_scope)

        assert now_mocked.called

        prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6]

        expected_key = (
            f'fb/'
            + f'{prefix}-{job_scope.ad_account_id}/'
            + f'{job_scope.report_type}/'
            + f'{now_dt.strftime("%Y")}/'
            + f'{now_dt.strftime("%m")}/'
            + f'{now_dt.strftime("%d")}/'
            + f'{now_dt.strftime("%Y-%m-%dT%H:%M:%SZ")}-'
            + f'{job_scope.job_id}-'
            + f'UUID-HERE'
            + f'.json'
        )

        assert storage_key == expected_key

Example #52

0

Show file

File: response_actions.py Project: samuelsh/pyfs_stress

def touch_success(logger, incoming_message, dir_tree):
    logger.debug(f"Successful touch arrived incoming_message['target']")
    path = incoming_message['target'].split('/')[1:]  # folder:file
    syncdir = dir_tree.get_dir_by_name(path[0])
    dir_index = xxhash.xxh64(path[0]).hexdigest()
    if not syncdir:
        logger.debug(
            f"Directory {path[0]} already removed from active dirs list, dropping touch {path[1]}"
        )
        return
    # There might be a raise when successful mkdir message will arrive after successful touch message
    # So we won't check here if dir is already synced

    f = syncdir.data.get_file_by_name(path[1])
    #  Now, when we got reply from client that file was created,
    #  we can mark it as synced
    syncdir.data.size += 1
    f.ondisk = True
    f.creation_time = datetime.datetime.strptime(incoming_message['timestamp'],
                                                 '%Y/%m/%d %H:%M:%S.%f')
    f.uuid = uuid.uuid4().hex[
        -5:]  # Unique session ID, will be modified on each file modify action
    logger.debug(f"File {path[0]}/{path[1]} was created at: {f.creation_time}")
    logger.debug(
        f"File {path[0]}/{path[1]} is synced. Directory size updated to {syncdir.data.size} bytes"
    )
    if syncdir.data.size > MAX_FILES_PER_DIR:
        try:
            logger.debug(
                f"Directory {path[0]} going to be removed from dir tree")
            dir_tree.remove_dir_by_name(path[0])
            del dir_tree.synced_nodes[dir_index]
            del dir_tree.nids[dir_index]
            logger.debug(
                f"Directory {path[0]} is reached its size limit and removed from active dirs list"
            )
        except (NodeIDAbsentError, KeyError):
            logger.debug(
                f"Directory {path[0]} already removed from active dirs list, skipping...."
            )

Example #53

0

Show file

    def test_xxh64_update(self):
        x = xxhash.xxh64()
        x.update('a')
        self.assertEqual(xxhash.xxh64('a').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a'), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab'), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc'), x.digest())

        seed = random.randint(0, 2**64)
        x = xxhash.xxh64(seed=seed)
        x.update('a')
        self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a', seed), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab', seed), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc', seed), x.digest())

Example #54

0

Show file

def _job_scope_to_storage_key(
        job_scope: JobScope,
        chunk_marker: Optional[int] = DEFAULT_CHUNK_NUMBER,
        custom_namespace: Optional[str] = None) -> str:
    """
    Puts together the S3 object key we need for given report data. This is
    just a helper function

    :param job_scope: The job scope (dict representation)
    :param chunk_marker: Order number of written chunk
    :param custom_namespace: Custom job namespace
    :return string: The full S3 key to use
    """
    assert isinstance(job_scope, JobScope)

    prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6]

    # datetime is a subclass of date, so we must check for date first
    if isinstance(job_scope.range_start, date):
        report_datetime = datetime.combine(job_scope.range_start,
                                           datetime.min.time())
    elif isinstance(job_scope.range_start, datetime):
        report_datetime = job_scope.range_start
    else:
        # long import line to allow mocking of call to now() in tests.
        report_datetime = common.tztools.now()

    key = (f'{custom_namespace or job_scope.namespace}/'
           f'{prefix}-{job_scope.ad_account_id}/'
           f'{job_scope.report_type}/'
           f'{report_datetime.strftime("%Y")}/'
           f'{report_datetime.strftime("%m")}/'
           f'{report_datetime.strftime("%d")}/'
           f'{report_datetime.strftime("%Y-%m-%dT%H:%M:%SZ")}-'
           f'{job_scope.job_id}-'
           f'{str(chunk_marker)+"-" if chunk_marker else ""}'
           f'{uuid.uuid4()}'
           f'.json')

    return key

Example #55

0

Show file

File: utils.py Project: rgv151/reader

def import_opml(user_id, path):
    _opml = opml.parse(path)

    uncategorized = None
    for outline in _opml:
        if hasattr(outline, 'xmlUrl'):
            if uncategorized is None:  # does not defined yet
                uncategorized = Category.query.filter_by(user_id=user_id, name="Uncategorized").first()
                if uncategorized is None:  # not found
                    uncategorized = Category(user_id, "Uncategorized", order_id=9999)
                    uncategorized.save()

            feed = Feed(outline.xmlUrl)
            feed.save()

            user_feed = UserFeed(user_id, uncategorized.id, feed.id, outline.text)
            user_feed.save()

        else:
            category = Category.query.filter_by(user_id=user_id, name=outline.text).first()
            if category is None:
                category = Category(user_id, outline.text)
                category.save()

            for child in outline:
                if hasattr(child, 'xmlUrl'):
                    hash = xxhash.xxh64()
                    feed = Feed.query.filter_by(feed_url_hash=hash).first()
                    if feed is None:
                        feed = Feed(child.xmlUrl)
                        feed.save()

                    user_feed = UserFeed(user_id=user_id, category_id=category.id, feed_id=feed.id, feed_name=child.text)
                    user_feed.save()
                else:
                    logger.warn("Nested category is not supported yet, ignored!")

Example #56

0

Show file

File: hash_directories.py Project: bretttjohnson1/Duplicate_Discoverer

    def hashdirectory(self,directory,map):
        hashfunc = xxhash.xxh32()
        for file in os.listdir(directory):
            if(os.path.isdir(os.path.join(directory,file))):
                #print os.path.join(directory,file)
                key = self.hashdirectory(os.path.join(directory,file),map)
                if key in map:
                    map[key] = map[key] + "?"+os.path.join(directory,file)
                else:
                    map[key] = os.path.join(directory,file)
                hashfunc.update(key)
            if(os.path.isfile(os.path.join(directory,file))):
                hf = xxhash.xxh64()
                f = open(os.path.join(directory,file),'rb').read()
                byts = bytes(f)
                #mem = memoryview(byts)
                buffersize = 1048576
                bytesize = sys.getsizeof(byts)
                self.ldb.pgb.step(bytesize/1024)
                if bytesize-buffersize>0:
                    for i in range(0,bytesize-buffersize,buffersize):
                        if bytesize-i>buffersize:
                            hf.update(byts[i:(i+buffersize)])
                        else:
                            hf.update(byts[i:])
                else:
                    hf.update(byts[0:])

                key = hf.digest()
                if key in map:
                    map[key] = map[key] + "?"+os.path.join(directory,file)
                else:
                    map[key] = os.path.join(directory,file)
                hashfunc.update(key)
        key = hashfunc.digest()
        return key

Example #57

0

Show file

File: hashing.py Project: floriango/dask

 def _hash_xxhash(buf):
     """
     Produce a 8-bytes hash of *buf* using xxHash.
     """
     return xxhash.xxh64(buf).digest()

Example #58

0

Show file

File: models.py Project: Blackrose/niji

 def __init__(self, *args, **kwargs):
     super(Topic, self).__init__(*args, **kwargs)
     self.raw_content_hash = xxhash.xxh64(self.content_raw).hexdigest()

Example #59

0

Show file

File: utilities.py Project: weissi1994/pgoapi-1

def generateRequestHash(authticket, request):
    firstHash = xxhash.xxh64(authticket, seed=0x1B845238).intdigest()                      
    return xxhash.xxh64(request, seed=firstHash).intdigest()