예제 #1
0
 def __init__(self, cache_nodes, db_node, write_mode):
     super().__init__(cache_nodes, db_node, write_mode)
     self.node_loads = {}
     self.hash_fn = pyhash.fnv1_32()
     self.hash_seed_a = 0
     self.hash_seed_b = 1
     for node in self.cache_nodes:
         self.node_loads[node.id] = 0
예제 #2
0
 def __init__(self, cache_nodes, db_node, write_mode, nrkeys, c):
     super().__init__(cache_nodes, db_node, write_mode)
     self.nrkeys = nrkeys
     self.c = c
     self.node_loads = {}
     self.key_rates = {}
     self.replicated_keys = SortedSet(
         key=lambda x: self.key_rates[x].rate())
     self.key_node_map = {}
     self.hash_fn = pyhash.fnv1_32()
     for node in self.cache_nodes:
         self.node_loads[node.id] = 0
예제 #3
0
 def __init__(self,filter_size):
   '''
   First, we initialize the Bloom filter and create a bit array of filter_size entered by the user.
   In addition, we initialize each of the hash functions (Murmur3 and FNV-1) of our filter.
   
   filter_size: size of the vector
   
   '''
     self.filter = filter_size
     self.bit_array = bitarray(filter_size)
     self.bit_array.setall(0) #since bitarray doesn't ensure that all bits are set to 0
     self.hasher1 = pyhash.fnv1_32()
     self.hasher2 = pyhash.murmur3_32()
예제 #4
0
    def __init__(self, bit_vector_size):

        self.bit_vector_size = bit_vector_size

        # The size of the bit_vector must be positive
        if bit_vector_size <= 0:
            raise ValueError("Bit vector too small.")

        # Initialize vector with all zeros.
        self.bit_vector = [False] * self.bit_vector_size
        # Define the hash functions to use.
        self.hash_functions = [pyhash.fnv1_32(), pyhash.murmur1_32()]
        # Number of hash functions used.
        self.number_of_hashes = len(self.hash_functions)
예제 #5
0
and i don't care if it indicates if an item is there or if it
is not

EG: I run website and want to keep track of IP
addresses that are blocked. I dont care if a blocked IP
is occasionally able to access my website, but I do care if someone not 
on the blocked list is unable to access the site

bit_vector is list of bits
"""
import pyhash

bit_vector = [0] * 20

#Non Cryptographic hash functions (murmer and FNV)
fnv = pyhash.fnv1_32()
murmur = pyhash.murmur3_32()

#Calculate the output of FNV and Murmur hash functions for Pikachu and Charmander
fnv_pika = fnv("Pikachu") % 20
fnv_char = fnv("Charmander") % 20

murmur_pika = murmur("Pikachu") % 20
murmur_char = murmur("Charmander") % 20

bit_vector[fnv_pika] = 1
bit_vector[murmur_pika] = 1

bit_vector[fnv_char] = 1
bit_vector[murmur_char] = 1
def bloomIPs(clientSize):
    """
    I will implement the IP blocker above example.
    For simplicity, lets assume our IP system is composed of values between
    0-100000
    """
    #My bit vector
    bitVector = [0] * clientSize
    """
    We use 3 hash functions, Murmur, FNV and metro hash systems. They are 
    non cryptographic hence will return the same value any time we pass in
    the same value. We modularize them by our bit  Vector size to make them
    fit into it as it is our reference sheet.
    """
    fnv = pyhash.fnv1_32()
    murmur = pyhash.murmur3_32()
    metro = pyhash.metro_128()
    """
    Now lets imagine we identified a set of just 1000 hackers in our world.
    Their IPs range from 0 to 1000 as follows.
    """
    hackerSize = 1000
    hackersList = range(0, hackerSize)
    """
    To keep our random clients happy, we come up with a repo of all the 
    hackers Known. Our customers are very stubborn but they love being safe.
    Its a very dangerous world out there.
    We mark the hackers.
    """
    for hacker in hackersList:
        #Hash them with our 3 functions
        bitVector[fnv(str(hacker)) % clientSize] = 1
        bitVector[murmur(str(hacker)) % clientSize] = 1
        bitVector[metro(str(hacker)) % clientSize] = 1
    """
    Now our true clients make requests. We have say 100000 of them.
    We look them up in our list and determine if they are hackers or not
    An approved request is marked as Perfect. Lets count, of the 700,
    False Positives are clients Noted as Hackers
    How many will be marked perfect
    """
    perfect = 0
    falsePositive = 0
    for cust in range(0, 100000):
        trueClient = random.randrange(10000, 100000)
        check1 = bitVector[fnv(str(trueClient)) % clientSize]
        check2 = bitVector[murmur(str(trueClient)) % clientSize]
        check3 = bitVector[metro(str(trueClient)) % clientSize]
        #print("{}-{}-{}").format(check1,check2,check3)
        """
        We will not grant perfection to them if they are detected as hackers
        by any of our security systems, we mark the false positive.
        Othewise we just think of them as false negatives
        """
        checkFinale = (check1 == check2 == check3 == 1)
        if checkFinale is True:
            falsePositive += 1
        else:
            perfect += 1
    doc = """
    Running our check, we wil throw warnings to some true clients thinking
    they are hackers when they are in fact not. Run it again to see how
    many true clients connect. At least we know they are safe. But as seen.
    there is a possibility of our clients, whose IPs are not even in the
    same range as the hackers to be detected as hackers."""
    return {
        "hackerSize": hackerSize,
        "falsePositive": falsePositive,
        "clientSize": clientSize,
        "doc": doc,
        "perfect": perfect
    }
        the_page = response.read()
        return the_page


def print_wget(name):
    filename = "frequency/{}.request".format(name)
    command = """
if [ ! -f {} ]; then
    echo "fetching {}"...
    curl -s {} -o "{}"
fi
"""
    print(command.format(filename, nameurl(name), nameurl(name), filename))


hasher = pyhash.fnv1_32()


def get_id(page):
    global hasher
    b = str(tuple(sorted([str(x) for x in page['dados']])))
    c = hasher(b)
    return c


savefile = "sobrenomes.json"


def saveData():
    global pg
    global acc
예제 #8
0
import pyhash as ph

# Non cryptographic hash functions (Murmur and FNV)
fnv = ph.fnv1_32()
murmur = ph.murmur3_32()

# Calculate the output of FNV and Murmur hash functions for pikachu and chamander

bit_vector = [0] * 20

fnv_pika = fnv("Pikachu") % 20
murmur_pika = murmur("Pikachu") % 20

fnv_char = fnv("Charmander") % 20
murmur_char = murmur("Charmander") % 20

print("fnv_pika\t", fnv_pika)
print("fnv_char\t", fnv_char)
print("murmur_pika\t", murmur_pika)
print("murmur_char\t", murmur_char)

bit_vector[fnv_char] = 1
bit_vector[fnv_pika] = 1

bit_vector[murmur_char] = 1
bit_vector[murmur_pika] = 1
print(bit_vector)

# Se um deles der 0 ou os dois, o item não está no bloom filter
예제 #9
0
    return hash(req.string)

def python_integer(req):
    """
    Hashing an integer key
    """
    return hash(req.integer)





import pyhash
#https://code.google.com/p/pyfasthash/

h_fnv1_32 = pyhash.fnv1_32()
def fnv1_32(req):
    return h_fnv1_32(str(req))


h_lookup3 = pyhash.lookup3_big()
def lookup3(req):
    return h_lookup3(str(req))

h_super_fast_hash = pyhash.super_fast_hash()
def super_fast_hash(req):
    return h_super_fast_hash(str(req))


h_murmur2_x64_64a = pyhash.murmur2_x64_64a()
def murmur2_x64_64a(req):
예제 #10
0
 def __init__(self):
     self.DIVISOR = 20
     self.big_vector = [0] * self.DIVISOR
     self.fnv = fnv1_32()
     self.murmur = murmur3_32()
예제 #11
0
파일: app.py 프로젝트: RQuintin/quarchive
def get_hasher():
    return pyhash.fnv1_32()
 def __init__(self, Nwords=100000):
     self.hasher = pyhash.fnv1_32()
     self.Nwords = Nwords
예제 #13
0
import pyhash
bit_vector = [0]*20

fvn = pyhash.fnv1_32()
murmur = pyhash.murmur3_32()
예제 #14
0
                        help="approximate set type")
    parser.add_argument('--outputfile', default="", help="output file")
    args = parser.parse_args()

    keys = []
    check_keys = []
    with open(args.keysfile) as f:
        for _ in range(args.nkeys):
            keys.append(f.readline())

        for _ in range(args.ntrials):
            check_keys.append(f.readline())

    hash_fns = []
    for i in range(args.k):
        hash_fns.append(approxset.HashFn(pyhash.fnv1_32(), i))

    if args.type == 'bloom':
        aset = approxset.BloomFilter(hash_fns, args.m)
    elif args.type == 'cmsketch':
        aset = approxset.CMSketch(hash_fns, args.m // args.k)

    simulator = Simulator(aset, keys, check_keys)
    (false_pos, false_neg) = simulator.run()

    print("Expected false positive rate:", "{0:.6f}".format(aset.expected_false_positives()))
    print("False positive rate:", "{0:.6f}".format(false_pos))
    print("False negative rate:", "{0:.6f}".format(false_neg))

    if len(args.outputfile) > 0:
        with open(args.outputfile, 'a') as f: