Beispiel #1
0
	def writeWebMatrix(self, item):
		'''
		Builds web graph in matrix market format file
		'''
		u = self.r.get("%s:%s" % (self.URL2ID, hashxx(item['url'])))
		v = 0
		for link in set(item['link_set']):
			v = self.r.get("%s:%s" % (self.URL2ID, hashxx(link)))
			self.f_mat.write("%s\t%s\t1\n" % (u, v))
Beispiel #2
0
def hash_one_value(val):
    '''
    Use the hashxx function to initialize, add data, and compute the
    digest in one go. Add the seed parameter if you need to control
    the initial state of the digest.
    '''
    print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0)))
Beispiel #3
0
def hash_one_value(val):
    '''
    Use the hashxx function to initialize, add data, and compute the
    digest in one go. Add the seed parameter if you need to control
    the initial state of the digest.
    '''
    print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0)))
        def g(key, obj):
            for pat, e in extractors.items():
                if fnmatch.fnmatch(key, pat):
                    unhashable = False
                    vals = []

                    for f in e:
                        v = f(obj)
                        if isinstance(v, bool):
                            if not v:
                                unhashable = True
                                break
                        else:
                            vals.append(f(obj))

                    if unhashable:
                        logger.debug('pat %s obj %s is unhashable' %
                                     (pat, key))
                        continue

                    for v in itertools.product(*vals):
                        s = str('\0'.join(v))
                        logger.debug('hash string for %s %s' % (key, s))
                        yield pyhashxx.hashxx(s), s
                    break
    def push(self, batch):
        """"""
        if batch is None:
            map(lambda i: self._qs[i].push(None), range(len(self._qs)))
            return

        self._lock.acquire()
        self._records += len(batch)
        self._lock.release()

        # [todo] - performance: splitting batch too small?
        rdef = batch.record_def()

        # distribute records using hash function
        partitioned_records = [
            []  # array of Record
            for i in range(len(self._qs))
        ]
        key_idx = rdef.colindex_by_colname(self._key)
        for rec in batch:
            val     = rec[key_idx]
            h       = pyhashxx.hashxx(bytes(val))  # [todo] - customize hash function?
            records = partitioned_records[h % len(self._qs)]
            records.append(rec)

        # really push distributed records into BatchQueue
        for i in range(len(self._qs)):
            self._qs[i].push(Batch(rdef, partitioned_records[i]))
Beispiel #6
0
    def get(self):
        # Looks up literally every filter anyone has ever looked up in descending order by how often
        # For now...let's build all of them!
        allFilterSetsWithCounts = db.session.query(
            Filter.filter_params,
            func.count(Filter.filter_params).label('times_filtered')).group_by(
                Filter.filter_params).order_by('times_filtered DESC').all()
        results = {}

        for (filter_string, times_filtered) in allFilterSetsWithCounts:
            print 'running ' + filter_string
            print 'has been run ' + str(times_filtered) + ' times.'
            filter_params = json.loads(filter_string)
            rules = filter_params['rules']

            cache_key = str(hashxx(json.dumps(filter_params)))
            from_cache = client.get(cache_key)

            results[cache_key] = {}
            results[cache_key]['rules'] = rules

            if from_cache is None:
                tasks.buildCache.delay(cache_key, rules)
                results[cache_key]['existing-cache'] = False
            else:
                results[cache_key]['existing-cache'] = True

        return results
Beispiel #7
0
def hash_strings(strings, seed, mod, encoded=False):
    if not encoded:
        strings = map(str.encode, strings)

    arr = [hashxx(s, seed=seed) for s in strings]

    return np.mod(arr, mod)
def get_acmuevent_hash(glpatt="/sys/class/tty/ttyACM?/device/uevent"):
    f = glob(glpatt)
    if len(f) == 0:
        return ""
    else:
        with open(f[0], "rb") as ue:
            return str(hashxx(ue.read()))
Beispiel #9
0
    def predict(self, target, static_data, temporal_data):
        """
        Predicts the model's output (loss, prediction) for the given target and instance.
        In general, at least one of static_data and temporal_data must be non-empty.
        In this case, the model only uses static_data.

        Args:
            target:         classification label or regression output (scalar value)
            static_data:    static data (vector)
            temporal_data:  temporal data (matrix, where number of rows are variable across instances)

        Returns:
            loss:           model's output loss
            prediction:     model's output prediction, only used for classifiers
        """
        hashval = hashxx(static_data.data.tobytes())
        self.rng.seed(hashval)
        if self.noise_type == constants.NO_NOISE:
            prediction = self.model_fn(static_data, [])
        elif self.noise_type == constants.EPSILON_IRRELEVANT:
            # Add noise - small random non-zero coefficients for irrelevant features
            noise = self.noise_multiplier * self.rng.uniform(
                -1, 1, static_data.size)
            prediction = self.model_fn(static_data, noise)
        elif self.noise_type == constants.ADDITIVE_GAUSSIAN:
            # Add noise - additive Gaussian, sampled for every instance/perturbed instance
            prediction = self.model_fn(
                static_data, self.rng.normal(0, self.noise_multiplier))
        else:
            raise NotImplementedError("Unknown noise type")
        loss = self.loss(prediction, target)
        return (loss, prediction)
Beispiel #10
0
    def post(self):
        try:
            parser = reqparse.RequestParser()
            parser.add_argument('filter_params', type=str)
            args = parser.parse_args()
            filter_params = json.loads(args['filter_params'])
            rules = filter_params['rules']
            if 'prediction_threshold' in filter_params.keys():
                metaseek_power = filter_params['prediction_threshold']
                print "getting metaseek power"
            else:
                metaseek_power = 0.9
            print metaseek_power

            cache_key = str(hashxx(json.dumps(filter_params)))
            from_cache = client.get(cache_key)
            if from_cache:
                print "cached"

            db.session.add(Filter(args['filter_params']))
            db.session.commit()

            if from_cache is None:
                summary = summarizeDatasets(Dataset.query,
                                            rules,
                                            sampleRate=0.05,
                                            metaseek_power=metaseek_power)
                client.set(cache_key, summary)
                return summary
            else:
                return from_cache

        except Exception as e:
            return {'error': str(e)}
Beispiel #11
0
	def request_seen(self, request):
		print "filter:", request.url
		uid = self.r.get("%s:%s" % (self.URL2ID, hashxx(request.url)))
		if not uid or int(uid) > 0:
			pass
		else:
			log.msg("FILTER SEEN:%s" % request.url, level = log.CRITICAL)
			return True
Beispiel #12
0
	def makeHashes(self,inp):
		self.inp = inp
		partial = []
		self.spooky = hash64(inp) % size
		partial.append(self.spooky)
		self.hashxx = hashxx(inp) % size
		partial.append(self.hashxx)
		self.mmh = abs(mmh3.hash(inp)) % size
		partial.append(self.mmh)
		return partial
def get_identifier(entity_name, data):
    """
    Get identifier from BrAPI object or generate one from hashed string json representation
    """
    entity_id = entity_name + 'DbId'
    data_id = data.get(entity_id)
    if not data_id:
        simplified_object = remove_falsey(
            data, predicate=lambda x: x and not isinstance(x, set))
        json_rep = json.dumps(simplified_object, sort_keys=True)
        data_id = str(hashxx(json_rep.encode()))
    data[entity_id] = str(data_id)
    return data_id
def make_key(key, key_prefix, version):
    """
    Makes a memcached-safe cache key using pyhashxx. Use as a KEY_FUNCTION

    """
    clean_key = bad_key_chars.sub('', key)
    full_key = '%s:%s:%s' % (key_prefix, version, clean_key)

    if clean_key != key or len(full_key) > MAX_LENGTH:
        hashed_key = str(hashxx(key))
        abbrev_keylen = MAX_LENGTH - len(hashed_key) - 4 - len(key_prefix) - len(str(version))
        new_key = '%s[%s]' % (clean_key[:abbrev_keylen], hashed_key)
        full_key = '%s:%s:%s' % (key_prefix, version, new_key)

    return full_key
Beispiel #15
0
	def buildWordIndex(self, item):
		'''
		Get current url id
		For each word in current url's text,
			add the url to the set of urls which contain that word
		'''
		url_id = self.r.get("%s:%s" % (self.URL2ID, hashxx(item['url'])))
		word_id = ""
		for word in item['words']:
			if self.r.sadd(self.WORD_SET, word):
				word_id = str(self.r.incr(self.WORD_CTR, 1))
				self.r.set("%s:%s" % (self.WORD2ID, word), word_id)
			else:
				word_id = self.r.get("%s:%s" % (self.WORD2ID, word))
			self.r.sadd("%s:%s" % (self.WORD_IN, word_id), url_id)
    def steps_cost(self, state_to_find):

        # compute the hash_index for state_to_find, look that many bytes into the
        # file/self.conten and retrieve a single hex character. This hex character
        # is the number of steps required to solve the corresponding state.
        hash_raw = hashxx(state_to_find.encode('utf-8'))
        hash_index = int(hash_raw % self.bucketcount)

        result = int(chr(self.content[hash_index]), 16)

        # This should never be zero
        if not result:
            #log.warning("%s: state_to_find %s, hash_raw %s. hash_index %s, result is %s" % (self, state_to_find, hash_raw, hash_index, result))
            raise SolveError(
                "%s: state_to_find %s, hash_raw %s. hash_index %s, result is %s"
                % (self, state_to_find, hash_raw, hash_index, result))

        return result
 def setUp(self):
     self.date = str(datetime.datetime.now())
     self.uuid = str(hashxx('wut.jpg')) + str(self.date)
     self.formatted_image = {'data': {
         "id": self.uuid,
         "title": "Monkey Socks",
         "type": "image/jpeg",
         "creation_date": self.date,
         "size": 52191,
         "url": "http://dumpon.us/imgs/" + self.uuid + '.jpg'
     }, 'status': 200 }
     self.image_keys = ['id', 'title', 'type', 'creation_date', 'size', 'url']
     self.json_image = json.dumps(self.formatted_image)
     self.app = dumponus.app.test_client()
     self.image_model = Photo('wut.jpg', 'Monkey Socks', 'image/jpeg', 52191, "http://dumpon.us/imgs/" + self.uuid + '.jpg', creation_date=self.date)
     self.db = dumponus.db
     #keep track of ids (they are changing) and delete test data on tearDown
     self.ids = []
     self.images = []
     self.url = '/api/images'
Beispiel #18
0
 def string_hash(string):
     return pyhashxx.hashxx(string.encode('utf8'))
Beispiel #19
0
def client_fuzzer(fd, lfd, args=None, **kwargs):
    """
    Client AFL fuzzer. Executed by AFL, fed to STDIN.
    Communicates with the fuzzer server, reads response, changes SHM.

    :param fd:
    :param lfd:
    :param args:
    :return:
    """
    global stdin_compat
    in_afl = os.getenv('PYTHON_AFL_PERSISTENT', None)

    llog(fd, 'init1')
    sys.settrace(None)
    llog(fd, 'init2, in afl: %s' % in_afl)

    # Argument processing
    tpler = Templater(args)
    llog(fd, 'templater: %s' % tpler)

    # by default, start with 4byte input - fuzz instruction with empty data
    tpler.gen_inputs()

    # Call our fuzzer
    try:
        # s = csock()  # Pre-fork connection. needs more sophisticated reconnect if socket is broken.
        while afl.loop(3):
            sys.settrace(None)
            stdin_compat.seek(0)
            buffer = stdin_compat.read()
            buffer = tpler.transform(buffer)
            if buffer is None:
                continue

            llog(fd, 'init4, buffer: %s' % binascii.hexlify(bytes(buffer)))

            s = SockComm(server=False)
            s.connect()
            s.send(bytes([0]) + bytes(buffer))

            resp = s.read()
            llog(fd, 'Recv: %s' % binascii.hexlify(resp))
            if resp[0] != 0:
                llog(fd, 'Invalid response code: %s' % resp[0])
                continue

            sw1 = resp[1]
            sw2 = resp[2]
            timing = resp[3:5]
            data = resp[5:]
            statuscode = (sw1 << 8) + sw2

            llog(fd, 'status: %04x timing: %s' % (statuscode, timing))
            if in_afl:
                afl.trace_offset(hashxx(bytes([sw1, sw2])))
                afl.trace_offset(hashxx(timing))
                afl.trace_offset(hashxx(bytes(data)))

    except Exception as e:
        llog(fd, 'Exc: %s\n' % e)
        traceback.print_exc(file=fd)
        fd.flush()

    except KeyboardInterrupt:
        return

    finally:
        fd.close()
        os._exit(0)
Beispiel #20
0
def prefix_fuzzing(fd, lfd, args=None, **kwargs):
    """
    Original forking fuzzer with AFL without TCP binding.
    Only for demo purposes. Did not work well with libpcsc and forking.

    :param fd:
    :param args:
    :return:
    """

    global stdin_compat
    in_afl = os.getenv('PYTHON_AFL_PERSISTENT', None)

    # reader = get_reader()
    # card = connect_card(reader)

    llog(fd, 'init1')
    fwd = FileWriter(fd=lfd)
    sys.settrace(None)
    llog(fd, 'init2, in afl: %s' % in_afl)

    # Call our fuzzer
    try:
        while afl.loop(3):  # afl.init()
            sys.settrace(None)

            stdin_compat.seek(0)
            buffer = stdin_compat.read()
            buffer = form_buffer(buffer)
            if buffer is None:
                continue

            llog(fd, 'init4, buffer: %s' % binascii.hexlify(bytes(buffer)))
            ln = int(buffer[4]) if len(buffer) >= 5 else 0
            test_elem = FuzzerObject(int(buffer[0]), int(buffer[1]),
                                     int(buffer[2]), int(buffer[3]), ln,
                                     list(bytearray(buffer[5:])))

            if args.dry:
                elem = test_elem
                sw1 = 0
                sw2 = 0
                out = bytes()

            else:
                card_interactor = CardInteractor(CARD_READER_ID)
                llog(fd, 'reader: %s' % (card_interactor, ))

                elem = card_interactor.send_element(test_elem)
                sw1 = elem.out['sw1']
                sw2 = elem.out['sw2']
                out = elem.out['data']

            statuscode = (sw1 << 8) + sw2
            time_bin = int(test_elem.misc['timing'] // 10)
            if time_bin < 0:
                time_bin = 0

            serialized_element = elem.serialize()
            fwd.print_to_file("%s" % json.dumps(serialized_element))

            llog(fd, 'status: %04x timing: %s' % (statuscode, time_bin))
            if in_afl:
                afl.trace_offset(hashxx(bytes([sw1, sw2])))
                afl.trace_offset(hashxx(bytes(time_bin.to_bytes(2, 'big'))))
                afl.trace_offset(hashxx(out))
            os._exit(0)

    except Exception as e:
        llog(fd, 'Exc: %s\n' % e)
        traceback.print_exc(file=fd)
        fd.flush()

    finally:
        fd.close()
        os._exit(0)
Beispiel #21
0
def get_hash(string):
    return hashxx(string.encode('utf-8'))
Beispiel #22
0
    def test_seeds(self):
        self.assertNotEqual(hashxx(b'hello', seed=0), hashxx(b'hello', seed=1))

        self.assertEqual(hashxx(b'hello', seed=0), self.hash_value(b'hello', seed=0))
        self.assertEqual(hashxx(b'hello', seed=1), self.hash_value(b'hello', seed=1))
        self.assertEqual(hashxx(b'hello', seed=2), self.hash_value(b'hello', seed=2))
Beispiel #23
0
def generate_hash(name, group_name, posted_by, total_parts):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(name.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'), total_parts.encode('utf-8')
    )
Beispiel #24
0
def create_id(name, lang, typ, visibility):
    time_now = dt.now().strftime('%H:%M:%S')
    return str(
        hashxx("{}{}{}{}{}".format(name[0], lang[0], typ[0], visibility[0],
                                   time_now)))[2:6]
Beispiel #25
0
def generate_hash(name, group_name, posted_by, total_parts):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(name.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'),
                           total_parts.encode('utf-8'))
Beispiel #26
0
from pprint import pprint
import cPickle
import glob
import sys
from itertools import *
import os.path
import pyhashxx
from UserDict import UserDict
import random
import operator
from gurobipy import *

nulhash = int(pyhashxx.hashxx('\0' * 4096)/10)

freq = {}
tables = []
fi = 0
frqc = 0

if os.path.isfile("dumps/dumps.cache"):
	print "Loading hashlist... (cached)"
	(frqc, tables) = cPickle.load(open("dumps/dumps.cache", "rb"))
else:
	print "Loading hashes and purging sole occurences"
	for file in sorted(glob.glob("dumps/*.dump")):
		tp = cPickle.load(open(file, "rb"))
		table = UserDict(tp)
		table.filename = os.path.basename(file).replace(".dump","")
		table.idx = fi
		fi += 1
		table.memory = (768,1024)[random.randint(0,1)]
def convert_to_cost_only(filename, bucketcount, filename_statetargets):

    state_targets = set()
    with open(filename_statetargets, 'r') as fh:
        for line in fh:
            line = line.replace("'", "").replace(",", "").strip()
            state_targets.add(line)

    filename_new = filename.replace('.txt', '.hash-cost-only.txt')
    prev_state_int = None
    first_permutation_rank = None

    bucket = bytearray(bucketcount)
    collisions = 0

    with open(filename, 'r') as fh:
        for (line_number, line) in enumerate(fh):
            (state, steps) = line.strip().split(':')
            steps = steps.split()

            hash_raw = hashxx(state.encode('utf-8'))
            hash_index = int(hash_raw % bucketcount)

            # Write the steps_len
            if state in state_targets:
                #log.info("found state_target %s" % state)
                steps_len = 0
            else:
                if steps[0].isdigit():
                    steps_len = int(steps[0])
                else:
                    steps_len = len(steps)

            #log.info("state: %s, hash_index %s, steps_len %s" % (state, hash_index, steps_len))

            if not bucket[hash_index]:
                bucket[hash_index] = steps_len
            else:
                collisions += 1

                if bucket[hash_index] > steps_len:
                    bucket[hash_index] = steps_len

            if line_number % 1000000 == 0:
                log.info(line_number)
            #if line_number >= 1000:
            #    break

    log.info("%d collisions" % collisions)
    log.info("begin writing %s" % filename_new)
    with open(filename_new, 'w') as fh_new:
        to_write = []

        for (index, x) in enumerate(bucket):
            if x > 15:
                to_write.append('f')
            else:
                # Convert steps_len to hex and ignore the 0x part of the string
                to_write.append(hex(x)[2])

            if index % 100000 == 0:
                fh_new.write(''.join(to_write))
                to_write = []

        if to_write:
            fh_new.write(''.join(to_write))

        fh_new.write('\n')
    log.info("end writing %s" % filename_new)
Beispiel #28
0
		'''
		Assign id to current url
		Each link's url is assigned an ID and vice versa

		This stage will only be reached if the 'if' condition in nofilter.py fails and the function returns true.
		The only way the 'if' condition fails is if the url_id of this item's url exists and is negative (=> uit has been processed before)

		Thus, either the url has been assigned an id or it hasnt. If it has, negate its current id . If it hasnt, get a new id from URL_CTR, negate it and assign it to this url. Finally, change URL2ID, ID2URL correspondingly in either case.

		Ultimately,
		+ve id => assigned id but not processed
		-ve id => assigned id and processed
		no id => not assigned id and not processed
		'''
<<<<<<< HEAD
		hashed_url = hashxx(item['url'])
		url_id = self.r.get("%s:%s" % (self.URL2ID, hashed_url))

		if not url_id:
			url_id = -1 * self.r.incr(self.URL_CTR, 1)
		else:
			self.r.delete("%s:%s" % (self.ID2URL, url_id))
			url_id = -1 * int(url_id)

		self.r.sadd(self.URL_SET, url_id)
		self.r.set("%s:%d" % (self.ID2URL, url_id), item['url'])
		self.r.set("%s:%s" % (self.URL2ID, hashed_url), url_id)

		for link in item['link_set']:
			hashed_link = hashxx(link)
			if not self.r.get("%s:%s" % (self.URL2ID, hashed_link)):
Beispiel #29
0
 def generate_seed(key, value):
     combination = key + value
     value = hashxx(combination.encode())
     return value
Beispiel #30
0
 def test_empty_string(self):
     self.assertEqual(hashxx(b''), self.hash_value(b''))
Beispiel #31
0
def generate_hash(subject, posted_by, group_name, total_segments):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(subject.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'),
                           struct.pack('I', total_segments))
def make_hash_name(name):
    #makes the hashed name to be used as the id
    i = str(hashxx(name + str(datetime.datetime.now()))) + '.'
    return i
Beispiel #33
0
 def _gen_hash_fn(self, seed, num_hash_fns):
     return lambda x: hashxx(str(x), seed=seed) % self.bits
Beispiel #34
0
 def string_hash(string):
     return pyhashxx.hashxx(string.encode('utf8'))
Beispiel #35
0
def my_hashxx(x, seed=0):
    x = str(x)
    seed = int(seed)
    return hashxx(x, seed=seed)
Beispiel #36
0
from pyhashxx import hashxx
import leveldb

db = leveldb.LevelDB('./db')
new_db = leveldb.LevelDB('./new_db')

count = 0
for record in db.RangeIter():
    count += 1
    if count % 1000 == 0:
        print(count)
    new_db.Put(str(hashxx(record[0])), record[1])

# count = 0

# for line in iter(log):
#   json_line = json.loads(line)

#   if 'terms' in json_line['val']:
    # count += 1
    # if count % 1000 == 0:
    #   print(count)

#     db.Put(str(json_line['key']), json.dumps(json_line['val']))


# count = 0

# for doc in db.RangeIter():
#   count += 1
#   if count % 1000 == 0:
Beispiel #37
0
 def _gen_hash_fn(self, seed, slices):
     return lambda x: hashxx(str(x), seed=seed) % slices
Beispiel #38
0
 def test_string(self):
     self.assertEqual(hashxx(b'hello'), self.hash_value(b'hello'))
Beispiel #39
0
def ft_hashxx(text):
	global m
	hash_value = hashxx(text.encode())
	return (hash_value % m)
Beispiel #40
0
def generate_hash(subject, posted_by, group_name, total_segments):
    """Generates a mostly-unique temporary hash for a part."""
    return pyhashxx.hashxx(subject.encode('utf-8'), posted_by.encode('utf-8'),
                           group_name.encode('utf-8'), struct.pack('I', total_segments)
    )
Beispiel #41
0
 def hxx(x):
     return post_hash_fn(hashxx(x))