コード例 #1
0
ファイル: pipelines.py プロジェクト: coep-rankweb/spyder
	def writeWebMatrix(self, item):
		'''
		Builds web graph in matrix market format file
		'''
		u = self.r.get("%s:%s" % (self.URL2ID, hashxx(item['url'])))
		v = 0
		for link in set(item['link_set']):
			v = self.r.get("%s:%s" % (self.URL2ID, hashxx(link)))
			self.f_mat.write("%s\t%s\t1\n" % (u, v))
コード例 #2
0
def hash_one_value(val):
    '''
    Use the hashxx function to initialize, add data, and compute the
    digest in one go. Add the seed parameter if you need to control
    the initial state of the digest.
    '''
    print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0)))
コード例 #3
0
ファイル: simple.py プロジェクト: Rockyspade/pyhashxx
def hash_one_value(val):
    '''
    Use the hashxx function to initialize, add data, and compute the
    digest in one go. Add the seed parameter if you need to control
    the initial state of the digest.
    '''
    print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0)))
コード例 #4
0
        def g(key, obj):
            for pat, e in extractors.items():
                if fnmatch.fnmatch(key, pat):
                    unhashable = False
                    vals = []

                    for f in e:
                        v = f(obj)
                        if isinstance(v, bool):
                            if not v:
                                unhashable = True
                                break
                        else:
                            vals.append(f(obj))

                    if unhashable:
                        logger.debug('pat %s obj %s is unhashable' %
                                     (pat, key))
                        continue

                    for v in itertools.product(*vals):
                        s = str('\0'.join(v))
                        logger.debug('hash string for %s %s' % (key, s))
                        yield pyhashxx.hashxx(s), s
                    break
コード例 #5
0
    def push(self, batch):
        """"""
        if batch is None:
            map(lambda i: self._qs[i].push(None), range(len(self._qs)))
            return

        self._lock.acquire()
        self._records += len(batch)
        self._lock.release()

        # [todo] - performance: splitting batch too small?
        rdef = batch.record_def()

        # distribute records using hash function
        partitioned_records = [
            []  # array of Record
            for i in range(len(self._qs))
        ]
        key_idx = rdef.colindex_by_colname(self._key)
        for rec in batch:
            val     = rec[key_idx]
            h       = pyhashxx.hashxx(bytes(val))  # [todo] - customize hash function?
            records = partitioned_records[h % len(self._qs)]
            records.append(rec)

        # really push distributed records into BatchQueue
        for i in range(len(self._qs)):
            self._qs[i].push(Batch(rdef, partitioned_records[i]))
コード例 #6
0
    def get(self):
        # Looks up literally every filter anyone has ever looked up in descending order by how often
        # For now...let's build all of them!
        allFilterSetsWithCounts = db.session.query(
            Filter.filter_params,
            func.count(Filter.filter_params).label('times_filtered')).group_by(
                Filter.filter_params).order_by('times_filtered DESC').all()
        results = {}

        for (filter_string, times_filtered) in allFilterSetsWithCounts:
            print 'running ' + filter_string
            print 'has been run ' + str(times_filtered) + ' times.'
            filter_params = json.loads(filter_string)
            rules = filter_params['rules']

            cache_key = str(hashxx(json.dumps(filter_params)))
            from_cache = client.get(cache_key)

            results[cache_key] = {}
            results[cache_key]['rules'] = rules

            if from_cache is None:
                tasks.buildCache.delay(cache_key, rules)
                results[cache_key]['existing-cache'] = False
            else:
                results[cache_key]['existing-cache'] = True

        return results
コード例 #7
0
def hash_strings(strings, seed, mod, encoded=False):
    if not encoded:
        strings = map(str.encode, strings)

    arr = [hashxx(s, seed=seed) for s in strings]

    return np.mod(arr, mod)
コード例 #8
0
def get_acmuevent_hash(glpatt="/sys/class/tty/ttyACM?/device/uevent"):
    f = glob(glpatt)
    if len(f) == 0:
        return ""
    else:
        with open(f[0], "rb") as ue:
            return str(hashxx(ue.read()))
コード例 #9
0
ファイル: model.py プロジェクト: Craven-Biostat-Lab/mihifepe
    def predict(self, target, static_data, temporal_data):
        """
        Predicts the model's output (loss, prediction) for the given target and instance.
        In general, at least one of static_data and temporal_data must be non-empty.
        In this case, the model only uses static_data.

        Args:
            target:         classification label or regression output (scalar value)
            static_data:    static data (vector)
            temporal_data:  temporal data (matrix, where number of rows are variable across instances)

        Returns:
            loss:           model's output loss
            prediction:     model's output prediction, only used for classifiers
        """
        hashval = hashxx(static_data.data.tobytes())
        self.rng.seed(hashval)
        if self.noise_type == constants.NO_NOISE:
            prediction = self.model_fn(static_data, [])
        elif self.noise_type == constants.EPSILON_IRRELEVANT:
            # Add noise - small random non-zero coefficients for irrelevant features
            noise = self.noise_multiplier * self.rng.uniform(
                -1, 1, static_data.size)
            prediction = self.model_fn(static_data, noise)
        elif self.noise_type == constants.ADDITIVE_GAUSSIAN:
            # Add noise - additive Gaussian, sampled for every instance/perturbed instance
            prediction = self.model_fn(
                static_data, self.rng.normal(0, self.noise_multiplier))
        else:
            raise NotImplementedError("Unknown noise type")
        loss = self.loss(prediction, target)
        return (loss, prediction)
コード例 #10
0
    def post(self):
        try:
            parser = reqparse.RequestParser()
            parser.add_argument('filter_params', type=str)
            args = parser.parse_args()
            filter_params = json.loads(args['filter_params'])
            rules = filter_params['rules']
            if 'prediction_threshold' in filter_params.keys():
                metaseek_power = filter_params['prediction_threshold']
                print "getting metaseek power"
            else:
                metaseek_power = 0.9
            print metaseek_power

            cache_key = str(hashxx(json.dumps(filter_params)))
            from_cache = client.get(cache_key)
            if from_cache:
                print "cached"

            db.session.add(Filter(args['filter_params']))
            db.session.commit()

            if from_cache is None:
                summary = summarizeDatasets(Dataset.query,
                                            rules,
                                            sampleRate=0.05,
                                            metaseek_power=metaseek_power)
                client.set(cache_key, summary)
                return summary
            else:
                return from_cache

        except Exception as e:
            return {'error': str(e)}
コード例 #11
0
ファイル: nofilter.py プロジェクト: coep-rankweb/spyder
	def request_seen(self, request):
		print "filter:", request.url
		uid = self.r.get("%s:%s" % (self.URL2ID, hashxx(request.url)))
		if not uid or int(uid) > 0:
			pass
		else:
			log.msg("FILTER SEEN:%s" % request.url, level = log.CRITICAL)
			return True
コード例 #12
0
	def makeHashes(self,inp):
		self.inp = inp
		partial = []
		self.spooky = hash64(inp) % size
		partial.append(self.spooky)
		self.hashxx = hashxx(inp) % size
		partial.append(self.hashxx)
		self.mmh = abs(mmh3.hash(inp)) % size
		partial.append(self.mmh)
		return partial
コード例 #13
0
def get_identifier(entity_name, data):
    """
    Get identifier from BrAPI object or generate one from hashed string json representation
    """
    entity_id = entity_name + 'DbId'
    data_id = data.get(entity_id)
    if not data_id:
        simplified_object = remove_falsey(
            data, predicate=lambda x: x and not isinstance(x, set))
        json_rep = json.dumps(simplified_object, sort_keys=True)
        data_id = str(hashxx(json_rep.encode()))
    data[entity_id] = str(data_id)
    return data_id
コード例 #14
0
def make_key(key, key_prefix, version):
    """
    Makes a memcached-safe cache key using pyhashxx. Use as a KEY_FUNCTION

    """
    clean_key = bad_key_chars.sub('', key)
    full_key = '%s:%s:%s' % (key_prefix, version, clean_key)

    if clean_key != key or len(full_key) > MAX_LENGTH:
        hashed_key = str(hashxx(key))
        abbrev_keylen = MAX_LENGTH - len(hashed_key) - 4 - len(key_prefix) - len(str(version))
        new_key = '%s[%s]' % (clean_key[:abbrev_keylen], hashed_key)
        full_key = '%s:%s:%s' % (key_prefix, version, new_key)

    return full_key
コード例 #15
0
ファイル: pipelines.py プロジェクト: coep-rankweb/spyder
	def buildWordIndex(self, item):
		'''
		Get current url id
		For each word in current url's text,
			add the url to the set of urls which contain that word
		'''
		url_id = self.r.get("%s:%s" % (self.URL2ID, hashxx(item['url'])))
		word_id = ""
		for word in item['words']:
			if self.r.sadd(self.WORD_SET, word):
				word_id = str(self.r.incr(self.WORD_CTR, 1))
				self.r.set("%s:%s" % (self.WORD2ID, word), word_id)
			else:
				word_id = self.r.get("%s:%s" % (self.WORD2ID, word))
			self.r.sadd("%s:%s" % (self.WORD_IN, word_id), url_id)
コード例 #16
0
    def steps_cost(self, state_to_find):

        # compute the hash_index for state_to_find, look that many bytes into the
        # file/self.conten and retrieve a single hex character. This hex character
        # is the number of steps required to solve the corresponding state.
        hash_raw = hashxx(state_to_find.encode('utf-8'))
        hash_index = int(hash_raw % self.bucketcount)

        result = int(chr(self.content[hash_index]), 16)

        # This should never be zero
        if not result:
            #log.warning("%s: state_to_find %s, hash_raw %s. hash_index %s, result is %s" % (self, state_to_find, hash_raw, hash_index, result))
            raise SolveError(
                "%s: state_to_find %s, hash_raw %s. hash_index %s, result is %s"
                % (self, state_to_find, hash_raw, hash_index, result))

        return result
コード例 #17
0
 def setUp(self):
     self.date = str(datetime.datetime.now())
     self.uuid = str(hashxx('wut.jpg')) + str(self.date)
     self.formatted_image = {'data': {
         "id": self.uuid,
         "title": "Monkey Socks",
         "type": "image/jpeg",
         "creation_date": self.date,
         "size": 52191,
         "url": "http://dumpon.us/imgs/" + self.uuid + '.jpg'
     }, 'status': 200 }
     self.image_keys = ['id', 'title', 'type', 'creation_date', 'size', 'url']
     self.json_image = json.dumps(self.formatted_image)
     self.app = dumponus.app.test_client()
     self.image_model = Photo('wut.jpg', 'Monkey Socks', 'image/jpeg', 52191, "http://dumpon.us/imgs/" + self.uuid + '.jpg', creation_date=self.date)
     self.db = dumponus.db
     #keep track of ids (they are changing) and delete test data on tearDown
     self.ids = []
     self.images = []
     self.url = '/api/images'
コード例 #18
0
ファイル: utils.py プロジェクト: c01db33f/smt
 def string_hash(string):
     return pyhashxx.hashxx(string.encode('utf8'))
コード例 #19
0
def client_fuzzer(fd, lfd, args=None, **kwargs):
    """
    Client AFL fuzzer. Executed by AFL, fed to STDIN.
    Communicates with the fuzzer server, reads response, changes SHM.

    :param fd:
    :param lfd:
    :param args:
    :return:
    """
    global stdin_compat
    in_afl = os.getenv('PYTHON_AFL_PERSISTENT', None)

    llog(fd, 'init1')
    sys.settrace(None)
    llog(fd, 'init2, in afl: %s' % in_afl)

    # Argument processing
    tpler = Templater(args)
    llog(fd, 'templater: %s' % tpler)

    # by default, start with 4byte input - fuzz instruction with empty data
    tpler.gen_inputs()

    # Call our fuzzer
    try:
        # s = csock()  # Pre-fork connection. needs more sophisticated reconnect if socket is broken.
        while afl.loop(3):
            sys.settrace(None)
            stdin_compat.seek(0)
            buffer = stdin_compat.read()
            buffer = tpler.transform(buffer)
            if buffer is None:
                continue

            llog(fd, 'init4, buffer: %s' % binascii.hexlify(bytes(buffer)))

            s = SockComm(server=False)
            s.connect()
            s.send(bytes([0]) + bytes(buffer))

            resp = s.read()
            llog(fd, 'Recv: %s' % binascii.hexlify(resp))
            if resp[0] != 0:
                llog(fd, 'Invalid response code: %s' % resp[0])
                continue

            sw1 = resp[1]
            sw2 = resp[2]
            timing = resp[3:5]
            data = resp[5:]
            statuscode = (sw1 << 8) + sw2

            llog(fd, 'status: %04x timing: %s' % (statuscode, timing))
            if in_afl:
                afl.trace_offset(hashxx(bytes([sw1, sw2])))
                afl.trace_offset(hashxx(timing))
                afl.trace_offset(hashxx(bytes(data)))

    except Exception as e:
        llog(fd, 'Exc: %s\n' % e)
        traceback.print_exc(file=fd)
        fd.flush()

    except KeyboardInterrupt:
        return

    finally:
        fd.close()
        os._exit(0)
コード例 #20
0
def prefix_fuzzing(fd, lfd, args=None, **kwargs):
    """
    Original forking fuzzer with AFL without TCP binding.
    Only for demo purposes. Did not work well with libpcsc and forking.

    :param fd:
    :param args:
    :return:
    """

    global stdin_compat
    in_afl = os.getenv('PYTHON_AFL_PERSISTENT', None)

    # reader = get_reader()
    # card = connect_card(reader)

    llog(fd, 'init1')
    fwd = FileWriter(fd=lfd)
    sys.settrace(None)
    llog(fd, 'init2, in afl: %s' % in_afl)

    # Call our fuzzer
    try:
        while afl.loop(3):  # afl.init()
            sys.settrace(None)

            stdin_compat.seek(0)
            buffer = stdin_compat.read()
            buffer = form_buffer(buffer)
            if buffer is None:
                continue

            llog(fd, 'init4, buffer: %s' % binascii.hexlify(bytes(buffer)))
            ln = int(buffer[4]) if len(buffer) >= 5 else 0
            test_elem = FuzzerObject(int(buffer[0]), int(buffer[1]),
                                     int(buffer[2]), int(buffer[3]), ln,
                                     list(bytearray(buffer[5:])))

            if args.dry:
                elem = test_elem
                sw1 = 0
                sw2 = 0
                out = bytes()

            else:
                card_interactor = CardInteractor(CARD_READER_ID)
                llog(fd, 'reader: %s' % (card_interactor, ))

                elem = card_interactor.send_element(test_elem)
                sw1 = elem.out['sw1']
                sw2 = elem.out['sw2']
                out = elem.out['data']

            statuscode = (sw1 << 8) + sw2
            time_bin = int(test_elem.misc['timing'] // 10)
            if time_bin < 0:
                time_bin = 0

            serialized_element = elem.serialize()
            fwd.print_to_file("%s" % json.dumps(serialized_element))

            llog(fd, 'status: %04x timing: %s' % (statuscode, time_bin))
            if in_afl:
                afl.trace_offset(hashxx(bytes([sw1, sw2])))
                afl.trace_offset(hashxx(bytes(time_bin.to_bytes(2, 'big'))))
                afl.trace_offset(hashxx(out))
            os._exit(0)

    except Exception as e:
        llog(fd, 'Exc: %s\n' % e)
        traceback.print_exc(file=fd)
        fd.flush()

    finally:
        fd.close()
        os._exit(0)
コード例 #21
0
def get_hash(string):
    return hashxx(string.encode('utf-8'))
コード例 #22
0
    def test_seeds(self):
        self.assertNotEqual(hashxx(b'hello', seed=0), hashxx(b'hello', seed=1))

        self.assertEqual(hashxx(b'hello', seed=0), self.hash_value(b'hello', seed=0))
        self.assertEqual(hashxx(b'hello', seed=1), self.hash_value(b'hello', seed=1))
        self.assertEqual(hashxx(b'hello', seed=2), self.hash_value(b'hello', seed=2))
コード例 #23
0
ファイル: binaries.py プロジェクト: Murodese/pynab
def generate_hash(name, group_name, posted_by, total_parts):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(name.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'), total_parts.encode('utf-8')
    )
コード例 #24
0
def create_id(name, lang, typ, visibility):
    time_now = dt.now().strftime('%H:%M:%S')
    return str(
        hashxx("{}{}{}{}{}".format(name[0], lang[0], typ[0], visibility[0],
                                   time_now)))[2:6]
コード例 #25
0
def generate_hash(name, group_name, posted_by, total_parts):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(name.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'),
                           total_parts.encode('utf-8'))
コード例 #26
0
ファイル: pairoverlap.py プロジェクト: c3c/CSM
from pprint import pprint
import cPickle
import glob
import sys
from itertools import *
import os.path
import pyhashxx
from UserDict import UserDict
import random
import operator
from gurobipy import *

nulhash = int(pyhashxx.hashxx('\0' * 4096)/10)

freq = {}
tables = []
fi = 0
frqc = 0

if os.path.isfile("dumps/dumps.cache"):
	print "Loading hashlist... (cached)"
	(frqc, tables) = cPickle.load(open("dumps/dumps.cache", "rb"))
else:
	print "Loading hashes and purging sole occurences"
	for file in sorted(glob.glob("dumps/*.dump")):
		tp = cPickle.load(open(file, "rb"))
		table = UserDict(tp)
		table.filename = os.path.basename(file).replace(".dump","")
		table.idx = fi
		fi += 1
		table.memory = (768,1024)[random.randint(0,1)]
コード例 #27
0
def convert_to_cost_only(filename, bucketcount, filename_statetargets):

    state_targets = set()
    with open(filename_statetargets, 'r') as fh:
        for line in fh:
            line = line.replace("'", "").replace(",", "").strip()
            state_targets.add(line)

    filename_new = filename.replace('.txt', '.hash-cost-only.txt')
    prev_state_int = None
    first_permutation_rank = None

    bucket = bytearray(bucketcount)
    collisions = 0

    with open(filename, 'r') as fh:
        for (line_number, line) in enumerate(fh):
            (state, steps) = line.strip().split(':')
            steps = steps.split()

            hash_raw = hashxx(state.encode('utf-8'))
            hash_index = int(hash_raw % bucketcount)

            # Write the steps_len
            if state in state_targets:
                #log.info("found state_target %s" % state)
                steps_len = 0
            else:
                if steps[0].isdigit():
                    steps_len = int(steps[0])
                else:
                    steps_len = len(steps)

            #log.info("state: %s, hash_index %s, steps_len %s" % (state, hash_index, steps_len))

            if not bucket[hash_index]:
                bucket[hash_index] = steps_len
            else:
                collisions += 1

                if bucket[hash_index] > steps_len:
                    bucket[hash_index] = steps_len

            if line_number % 1000000 == 0:
                log.info(line_number)
            #if line_number >= 1000:
            #    break

    log.info("%d collisions" % collisions)
    log.info("begin writing %s" % filename_new)
    with open(filename_new, 'w') as fh_new:
        to_write = []

        for (index, x) in enumerate(bucket):
            if x > 15:
                to_write.append('f')
            else:
                # Convert steps_len to hex and ignore the 0x part of the string
                to_write.append(hex(x)[2])

            if index % 100000 == 0:
                fh_new.write(''.join(to_write))
                to_write = []

        if to_write:
            fh_new.write(''.join(to_write))

        fh_new.write('\n')
    log.info("end writing %s" % filename_new)
コード例 #28
0
ファイル: pipelines.py プロジェクト: coep-rankweb/spyder
		'''
		Assign id to current url
		Each link's url is assigned an ID and vice versa

		This stage will only be reached if the 'if' condition in nofilter.py fails and the function returns true.
		The only way the 'if' condition fails is if the url_id of this item's url exists and is negative (=> uit has been processed before)

		Thus, either the url has been assigned an id or it hasnt. If it has, negate its current id . If it hasnt, get a new id from URL_CTR, negate it and assign it to this url. Finally, change URL2ID, ID2URL correspondingly in either case.

		Ultimately,
		+ve id => assigned id but not processed
		-ve id => assigned id and processed
		no id => not assigned id and not processed
		'''
<<<<<<< HEAD
		hashed_url = hashxx(item['url'])
		url_id = self.r.get("%s:%s" % (self.URL2ID, hashed_url))

		if not url_id:
			url_id = -1 * self.r.incr(self.URL_CTR, 1)
		else:
			self.r.delete("%s:%s" % (self.ID2URL, url_id))
			url_id = -1 * int(url_id)

		self.r.sadd(self.URL_SET, url_id)
		self.r.set("%s:%d" % (self.ID2URL, url_id), item['url'])
		self.r.set("%s:%s" % (self.URL2ID, hashed_url), url_id)

		for link in item['link_set']:
			hashed_link = hashxx(link)
			if not self.r.get("%s:%s" % (self.URL2ID, hashed_link)):
コード例 #29
0
 def generate_seed(key, value):
     combination = key + value
     value = hashxx(combination.encode())
     return value
コード例 #30
0
 def test_empty_string(self):
     self.assertEqual(hashxx(b''), self.hash_value(b''))
コード例 #31
0
def generate_hash(subject, posted_by, group_name, total_segments):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(subject.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'),
                           struct.pack('I', total_segments))
コード例 #32
0
def make_hash_name(name):
    #makes the hashed name to be used as the id
    i = str(hashxx(name + str(datetime.datetime.now()))) + '.'
    return i
コード例 #33
0
ファイル: hashing.py プロジェクト: truemped/streamingds
 def _gen_hash_fn(self, seed, num_hash_fns):
     return lambda x: hashxx(str(x), seed=seed) % self.bits
コード例 #34
0
ファイル: utils.py プロジェクト: shadown/smt
 def string_hash(string):
     return pyhashxx.hashxx(string.encode('utf8'))
コード例 #35
0
def my_hashxx(x, seed=0):
    x = str(x)
    seed = int(seed)
    return hashxx(x, seed=seed)
コード例 #36
0
from pyhashxx import hashxx
import leveldb

db = leveldb.LevelDB('./db')
new_db = leveldb.LevelDB('./new_db')

count = 0
for record in db.RangeIter():
    count += 1
    if count % 1000 == 0:
        print(count)
    new_db.Put(str(hashxx(record[0])), record[1])

# count = 0

# for line in iter(log):
#   json_line = json.loads(line)

#   if 'terms' in json_line['val']:
    # count += 1
    # if count % 1000 == 0:
    #   print(count)

#     db.Put(str(json_line['key']), json.dumps(json_line['val']))


# count = 0

# for doc in db.RangeIter():
#   count += 1
#   if count % 1000 == 0:
コード例 #37
0
ファイル: hashing.py プロジェクト: mliesenberg/streamingds
 def _gen_hash_fn(self, seed, slices):
     return lambda x: hashxx(str(x), seed=seed) % slices
コード例 #38
0
 def test_string(self):
     self.assertEqual(hashxx(b'hello'), self.hash_value(b'hello'))
コード例 #39
0
ファイル: myscript.py プロジェクト: chiragsachdev/Data-Mining
def ft_hashxx(text):
	global m
	hash_value = hashxx(text.encode())
	return (hash_value % m)
コード例 #40
0
ファイル: parts.py プロジェクト: Murodese/pynab
def generate_hash(subject, posted_by, group_name, total_segments):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(subject.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'), struct.pack('I', total_segments)
    )
コード例 #41
0
ファイル: lsh.py プロジェクト: skoppula/yoho-experiments
 def hxx(x):
     return post_hash_fn(hashxx(x))