Example #1
0
 def test_xxh64(self):
     self.assertEqual(xxhash.xxh64('a').intdigest(), 15154266338359012955)
     self.assertEqual(
         xxhash.xxh64('a', 0).intdigest(), 15154266338359012955)
     self.assertEqual(
         xxhash.xxh64('a', 1).intdigest(), 16051599287423682246)
     self.assertEqual(
         xxhash.xxh64('a', 2**64 - 1).intdigest(), 6972758980737027682)
Example #2
0
    def test_xxh64_reset(self):
        x = xxhash.xxh64()
        h = x.intdigest()

        for i in range(10, 50):
            x.update(os.urandom(i))

        x.reset()

        self.assertEqual(h, x.intdigest())
Example #3
0
    def test_xxh64_copy(self):
        a = xxhash.xxh64()
        a.update('xxhash')

        b = a.copy()
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())

        b.update('xxhash')
        self.assertNotEqual(a.digest(), b.digest())
        self.assertNotEqual(a.intdigest(), b.intdigest())
        self.assertNotEqual(a.hexdigest(), b.hexdigest())

        a.update('xxhash')
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
Example #4
0
    def test_xxh64_update(self):
        x = xxhash.xxh64()
        x.update('a')
        self.assertEqual(xxhash.xxh64('a').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a'), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab'), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc').digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc'), x.digest())

        seed = random.randint(0, 2**64)
        x = xxhash.xxh64(seed=seed)
        x.update('a')
        self.assertEqual(xxhash.xxh64('a', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('a', seed), x.digest())
        x.update('b')
        self.assertEqual(xxhash.xxh64('ab', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('ab', seed), x.digest())
        x.update('c')
        self.assertEqual(xxhash.xxh64('abc', seed).digest(), x.digest())
        self.assertEqual(xxhash.xxh64_digest('abc', seed), x.digest())
Example #5
0
    def test_xxh64_overflow(self):
        s = 'I want an unsigned 64-bit seed!'
        a = xxhash.xxh64(s, seed=0)
        b = xxhash.xxh64(s, seed=2**64)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=0))
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=2**64))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=0))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=0))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=2**64))

        a = xxhash.xxh64(s, seed=1)
        b = xxhash.xxh64(s, seed=2**64 + 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(), xxhash.xxh64_intdigest(s, seed=1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**64 + 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**64 + 1))
        self.assertEqual(a.hexdigest(), xxhash.xxh64_hexdigest(s, seed=1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**64 + 1))

        a = xxhash.xxh64(s, seed=2**65 - 1)
        b = xxhash.xxh64(s, seed=2**66 - 1)
        self.assertEqual(a.seed, b.seed)
        self.assertEqual(a.intdigest(), b.intdigest())
        self.assertEqual(a.hexdigest(), b.hexdigest())
        self.assertEqual(a.digest(), b.digest())
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**65 - 1))
        self.assertEqual(a.intdigest(),
                         xxhash.xxh64_intdigest(s, seed=2**66 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**65 - 1))
        self.assertEqual(a.digest(), xxhash.xxh64_digest(s, seed=2**66 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**65 - 1))
        self.assertEqual(a.hexdigest(),
                         xxhash.xxh64_hexdigest(s, seed=2**66 - 1))
Example #6
0
def sim_run(threadName, seed, cardinalities, r_thr, hll_bits, grouped_hll,
            num_bits):
    np.random.seed(seed)
    start = datetime.now()
    try:
        f2 = threadName + "_" + str(hll_bits) + "-" + str(
            grouped_hll) + "_" + str(num_bits) + ".txt"
        fr2 = open(f2, 'w')
        f3 = threadName + "_timing.txt"
        fr3 = open(f3, 'a')
    except IndexError:
        print("Error: Filename ")
        sys.exit(2)

    timer_list = []
    timer1_list = []
    hll_st_list = []
    hll_st1_list = []
    hll_st2_list = []
    gelement_list = []
    abs_err_list = []
    abs_err1_list = []
    abs_err2_list = []

    hll_size = 1 << hll_bits

    for car_thr in cardinalities:
        gelement = {}
        hll_items = []
        while len(gelement) < car_thr:
            #hll_item = str(np.random.randint(1, car_thr<<6,1))
            hll_item = str(np.random.zipf(1.0001, 1))
            gelement[hll_item] = 1
            hll_items.append(hll_item)

        for runs in range(0, r_thr):
            xseed = seed + (12 + runs) * runs
            random.shuffle(hll_items)
            print(threadName, car_thr, runs, hll_bits, grouped_hll, num_bits)
            #----HLL streaming INIT
            hll_reg0 = [0] * hll_size
            q_st = float(hll_size)
            hll_st = 0
            timer0 = start - start
            #----Debug
            gelement = {}
            debug_info = {}
            debug_info['pkt'] = 0
            tot_pkts = 0
            #----HLL grouped streaming INIT
            mask_bits = (1 << num_bits) - 1
            num_reg = hll_size / grouped_hll
            hll_reg2 = [num_bits] * num_reg
            #hll_reg2=[0]*num_reg
            hll_vec2 = {}
            for i in range(0, num_reg):
                hll_vec2[i] = [0] * grouped_hll
            hll_st2 = 0
            hll_st1 = 0
            timer1 = start - start
            #----

            #while len(gelement) < car_thr:
            for hll_item in hll_items:
                #hll_item = str(np.random.zipf(zipf_a, 1))
                #hll_item = str(np.random.randint(1, car_thr<<6,1))

                tot_pkts = tot_pkts + 1
                debug_info['pkt'] = debug_info['pkt'] + 1

                #Exact counting
                #Gloobal counting
                gelement[hll_item] = 1

                #HLL streaming
                hash0 = xxhash.xxh64(hll_item.encode('ascii'),
                                     seed=xseed).intdigest()
                #-----HLL_ADDING_PHASE
                index0 = hash0 >> (64 - hll_bits)
                rank0 = hll_rank(hash0, hll_bits)

                start_0 = datetime.now()
                if rank0 > hll_reg0[index0]:
                    #streaming update
                    #hll_st += float(hll_size)/q_st
                    #q_st += 1.0/(1<<rank0) -1.0/(1<<hll_reg0[index0])
                    q_st = 0
                    for i in range(0, hll_size):
                        q_st += 1.0 / (1 << hll_reg0[i])
                    hll_st += float(hll_size) / q_st

                    # reg update
                    hll_reg0[index0] = rank0
                end_0 = datetime.now()
                timer0 += end_0 - start_0

                #HLL grouped streaming
                #hash2 = xxhash.xxh64(hll_item.encode('ascii')).intdigest()
                hash2 = hash0
                #-----HLL_ADDING_PHASE
                index2 = hash2 >> (64 - hll_bits)
                reg_index = index2 / grouped_hll
                vec_index = index2 % grouped_hll
                rank2 = hll_rank(hash2, hll_bits)

                start_1 = datetime.now()
                #print(q_st2, hll_st2, rank2, reg_index,vec_index, hll_vec2[reg_index], hll_reg2[reg_index])
                if rank2 > hll_reg2[reg_index]:
                    #streaming prob to modify sketch
                    q_st2 = 0
                    for i in range(0, num_reg):
                        q_st2 += 4.0 / (1 << (hll_reg2[i]))
                        for j in range(0, grouped_hll):
                            for k in range(0, num_bits):
                                q_st2 += (
                                    ((~hll_vec2[i][j] >> k) & 1) +
                                    0.0) / (1 <<
                                            (hll_reg2[i] - num_bits + 1 + k))

                    # reg update
                    shift = rank2 - hll_reg2[reg_index]
                    hll_reg2[reg_index] = rank2
                    for j in range(0, grouped_hll):
                        hll_vec2[reg_index][j] = (
                            hll_vec2[reg_index][j] >> shift) & mask_bits
                    hll_vec2[reg_index][vec_index] |= (
                        1 << (num_bits - 1)) & mask_bits
                    hll_st2 += float(hll_size) / q_st2
                elif hll_reg2[reg_index] - rank2 < num_bits:
                    if ((hll_vec2[reg_index][vec_index] >>
                         (num_bits - 1 - hll_reg2[reg_index] + rank2))
                            & 1) == 0:
                        #streaming prob to modify sketch
                        q_st2 = 0
                        for i in range(0, num_reg):
                            q_st2 += 4.0 / (1 << (hll_reg2[i]))
                            for j in range(0, grouped_hll):
                                for k in range(0, num_bits):
                                    q_st2 += ((
                                        (~hll_vec2[i][j] >> k) & 1) + 0.0) / (
                                            1 <<
                                            (hll_reg2[i] - num_bits + 1 + k))

                        #vec update
                        hll_st2 += float(hll_size) / q_st2
                        hll_vec2[reg_index][vec_index] |= (
                            1 << (num_bits - 1 - hll_reg2[reg_index] +
                                  rank2)) & mask_bits
                #print(q_st2, hll_st2, rank2, reg_index,vec_index, hll_vec2[reg_index], hll_reg2[reg_index])
                end_1 = datetime.now()
                timer1 += end_1 - start_1

            #streaming prob to modify sketch - estimation at the end of the data stream
            q_st2 = 0
            for i in range(0, num_reg):
                q_st2 += 4.0 / (1 << (hll_reg2[i]))
                for j in range(0, grouped_hll):
                    for k in range(0, num_bits):
                        q_st2 += (((~hll_vec2[i][j] >> k) & 1) +
                                  0.0) / (1 <<
                                          (hll_reg2[i] - num_bits + 1 + k))
            hll_st1 = q_st2 / float(hll_size)

            fr2.write(
                str(runs) + "\t" + str(round(hll_st, 3)) + "\t" +
                str(round(q_st / float(hll_size), 6)) + "\t" +
                str(round(hll_st2, 3)) + "\t" + str(round(hll_st1, 6)) + "\t" +
                str(len(gelement)) + "\t" + str(tot_pkts) + "\t" +
                str(timer0.total_seconds()) + "\t" +
                str(timer1.total_seconds()) + "\n")
            print(threadName, runs, hll_st, q_st / float(hll_size), hll_st2,
                  hll_st1, len(gelement), tot_pkts, (timer0.total_seconds()),
                  (timer1.total_seconds()))
            hll_st_list.append(hll_st)
            hll_st1_list.append(hll_st1)
            hll_st2_list.append(hll_st2)
            gelement_list.append(len(gelement))
            abs_err_list.append(abs(hll_st - len(gelement)))
            abs_err1_list.append(abs(hll_st1 - len(gelement)))
            abs_err2_list.append(abs(hll_st2 - len(gelement)))
            timer_list.append(timer0)
            timer1_list.append(timer1)

    fr3.write("------------------------------------------\n")
    now = datetime.now()
    fr3.write(threadName + '-command line: ' + ' '.join(sys.argv) + '\n')
    fr3.write('simulation time: ' + str(now - start) + '\n')
    fr3.write("==========================================\n")
    fr3.write(
        str(np.mean(hll_st_list)) + " " + str(np.mean(hll_st2_list)) + " " +
        str(np.mean(gelement_list)) + " - " + str(np.mean(abs_err_list)) +
        " " + str(np.mean(abs_err2_list)) + "\n")
    fr3.write(
        str(np.std(hll_st_list)) + " " + str(np.std(hll_st2_list)) + " " +
        str(np.std(gelement_list)) + " - " + str(np.std(abs_err_list)) + " " +
        str(np.std(abs_err2_list)) + "\n")

    fr2.close()
    fr3.close()
Example #7
0
def sim_run(threadName, seed, hll_bits, grouped_hll, num_bits, filenames):
    parsing_mode = 4
    start = datetime.now()
    np.random.seed(seed)
    try:
        f2 = threadName + "_" + str(hll_bits) + "-" + str(
            grouped_hll) + "_" + str(num_bits) + ".txt"
        fr2 = open(f2, 'w')
        f3 = threadName + "_timing.txt"
        fr3 = open(f3, 'a')
    except IndexError:
        print("Error: Filename ")
        sys.exit(2)

    hll_st_list = []
    hll_st1_list = []
    hll_st2_list = []
    gelement_list = []
    abs_err_list = []
    abs_err1_list = []
    abs_err2_list = []

    hll_size = 1 << hll_bits

    runis = 0
    for filename in filenames:
        runis = runis + 1
        for runs in range(0, 10):
            xseed = seed + (12 + runs) * (runs + runis)
            try:
                fr1 = open(filename, 'r')
            except IndexError:
                print("Error: Filename ")
                sys.exit(2)
            print(threadName, filename, runs, hll_bits, grouped_hll, num_bits)

            pcap = dpkt.pcap.Reader(fr1)

            #----HLL streaming INIT
            hll_reg0 = [0] * hll_size
            q_st = float(hll_size)
            hll_st = 0
            timer0 = start - start
            #----Debug
            gelement = {}
            debug_info = {}
            debug_info['pkt'] = 0
            tot_pkts = 0
            #----HLL grouped streaming INIT
            mask_bits = (1 << num_bits) - 1
            num_reg = hll_size / grouped_hll
            hll_reg2 = [num_bits] * num_reg
            #hll_reg2=[0]*num_reg
            hll_vec2 = {}
            for i in range(0, num_reg):
                hll_vec2[i] = [0] * grouped_hll
            hll_st2 = 0
            hll_st1 = 0
            timer1 = start - start
            #----

            for ts, buf in pcap:
                verbose = False
                try:
                    if pcap.datalink() == dpkt.pcap.DLT_LINUX_SLL:
                        eth = dpkt.sll.SLL(raw_pkt)
                    else:
                        eth = dpkt.ethernet.Ethernet(buf)
                except NotImplementedError:
                    verbose = True
                    print("Not Implemented for pkt: " + str(tot_pkts))

                #eth=dpkt.ethernet.Ethernet(buf)
                if eth.type != dpkt.ethernet.ETH_TYPE_IP:
                    continue
                ip = eth.data
                sport = 0
                dport = 0

                #Packet Parsing
                src = socket.inet_ntop(socket.AF_INET, ip.src)
                dst = socket.inet_ntop(socket.AF_INET, ip.dst)

                try:
                    if (ip.p == dpkt.ip.IP_PROTO_TCP) and (len(ip) > 24):
                        tcp = ip.data
                        sport = tcp.sport
                        dport = tcp.dport
                    elif (ip.p == dpkt.ip.IP_PROTO_UDP) and (len(ip) > 24):
                        udp = ip.data
                        sport = udp.sport
                        dport = udp.dport
                    else:
                        sport = 0
                        dport = 0

                except AttributeError:
                    verbose = False
                #verbose=True
                #print(src, dst, ip.p, len(ip), len(ip.data), len(eth))

                # function mode (<flow key , discriminator key>)
                # 1 <src_ip , dst_ip>
                # 2 <dst_ip , src_ip>
                # 3 <src_ip , 5-tuple>
                # 4 <dst_ip , 5-tuple>
                if (parsing_mode == 2):
                    hll_item = dst
                elif (parsing_mode == 3):
                    hll_item = src + " " + dst
                elif (parsing_mode == 4):
                    hll_item = src + " " + dst + " " + str(sport) + " " + str(
                        dport)
                else:
                    hll_item = src

                tot_pkts = tot_pkts + 1
                debug_info['pkt'] = debug_info['pkt'] + 1

                #Exact counting
                #Gloobal counting
                gelement[hll_item] = 1

                #HLL streaming
                hash0 = xxhash.xxh64(hll_item.encode('ascii'),
                                     seed=xseed).intdigest()
                #-----HLL_ADDING_PHASE
                index0 = hash0 >> (64 - hll_bits)
                rank0 = hll_rank(hash0, hll_bits)

                start_0 = datetime.now()
                if rank0 > hll_reg0[index0]:
                    #streaming update
                    #hll_st += float(hll_size)/q_st
                    #q_st += 1.0/(1<<rank0) -1.0/(1<<hll_reg0[index0])
                    q_st = 0
                    for i in range(0, hll_size):
                        q_st += 1.0 / (1 << hll_reg0[i])
                    hll_st += float(hll_size) / q_st

                    # reg update
                    hll_reg0[index0] = rank0

                end_0 = datetime.now()
                timer0 += end_0 - start_0

                #HLL grouped streaming
                #hash2 = xxhash.xxh64(hll_item.encode('ascii')).intdigest()
                hash2 = hash0
                #-----HLL_ADDING_PHASE
                index2 = hash2 >> (64 - hll_bits)
                reg_index = index2 / grouped_hll
                vec_index = index2 % grouped_hll
                rank2 = hll_rank(hash2, hll_bits)

                start_1 = datetime.now()
                #print(q_st2, hll_st2, rank2, reg_index,vec_index, hll_vec2[reg_index], hll_reg2[reg_index])
                if rank2 > hll_reg2[reg_index]:
                    #streaming prob to modify sketch
                    q_st2 = 0
                    for i in range(0, num_reg):
                        q_st2 += 4.0 / (1 << (hll_reg2[i]))
                        for j in range(0, grouped_hll):
                            for k in range(0, num_bits):
                                q_st2 += (
                                    ((~hll_vec2[i][j] >> k) & 1) +
                                    0.0) / (1 <<
                                            (hll_reg2[i] - num_bits + 1 + k))

                    # reg update
                    shift = rank2 - hll_reg2[reg_index]
                    hll_reg2[reg_index] = rank2
                    for j in range(0, grouped_hll):
                        hll_vec2[reg_index][j] = (
                            hll_vec2[reg_index][j] >> shift) & mask_bits
                    hll_vec2[reg_index][vec_index] |= (
                        1 << (num_bits - 1)) & mask_bits
                    hll_st2 += float(hll_size) / q_st2
                elif hll_reg2[reg_index] - rank2 < num_bits:
                    if ((hll_vec2[reg_index][vec_index] >>
                         (num_bits - 1 - hll_reg2[reg_index] + rank2))
                            & 1) == 0:
                        #streaming prob to modify sketch
                        q_st2 = 0
                        for i in range(0, num_reg):
                            q_st2 += 4.0 / (1 << (hll_reg2[i]))
                            for j in range(0, grouped_hll):
                                for k in range(0, num_bits):
                                    q_st2 += ((
                                        (~hll_vec2[i][j] >> k) & 1) + 0.0) / (
                                            1 <<
                                            (hll_reg2[i] - num_bits + 1 + k))

                        #vec update
                        hll_st2 += float(hll_size) / q_st2
                        hll_vec2[reg_index][vec_index] |= (
                            1 << (num_bits - 1 - hll_reg2[reg_index] +
                                  rank2)) & mask_bits
                end_1 = datetime.now()
                timer1 += end_1 - start_1
                #print(q_st2, hll_st2, rank2, reg_index,vec_index, hll_vec2[reg_index], hll_reg2[reg_index])

            #streaming prob to modify sketch - estimation at the end of the data stream
            q_st2 = 0
            for i in range(0, num_reg):
                q_st2 += 4.0 / (1 << (hll_reg2[i]))
                for j in range(0, grouped_hll):
                    for k in range(0, num_bits):
                        q_st2 += (((~hll_vec2[i][j] >> k) & 1) +
                                  0.0) / (1 <<
                                          (hll_reg2[i] - num_bits + 1 + k))
            hll_st1 = q_st2 / float(hll_size)

            fr2.write(
                str(runs) + "\t" + str(round(hll_st, 3)) + "\t" +
                str(round(q_st / float(hll_size), 6)) + "\t" +
                str(round(hll_st2, 3)) + "\t" + str(round(hll_st1, 6)) + "\t" +
                str(len(gelement)) + "\t" + str(tot_pkts) + "\t" + filename +
                "\t" + str(timer0.total_seconds()) + "\t" +
                str(timer1.total_seconds()) + "\n")
            print(threadName, runs, hll_st, q_st / float(hll_size), hll_st2,
                  hll_st1, len(gelement), tot_pkts, filename,
                  (timer0.total_seconds()), (timer1.total_seconds()))
            hll_st_list.append(hll_st)
            hll_st1_list.append(hll_st1)
            hll_st2_list.append(hll_st2)
            gelement_list.append(len(gelement))
            abs_err_list.append(abs(hll_st - len(gelement)))
            abs_err1_list.append(abs(hll_st1 - len(gelement)))
            abs_err2_list.append(abs(hll_st2 - len(gelement)))

    fr3.write("------------------------------------------\n")
    now = datetime.now()
    fr3.write(threadName + '-command line: ' + ' '.join(sys.argv) + '\n')
    fr3.write('simulation time: ' + str(now - start) + '\n')
    fr3.write("==========================================\n")
    fr3.write(
        str(np.mean(hll_st_list)) + " " + str(np.mean(hll_st2_list)) + " " +
        str(np.mean(gelement_list)) + " - " + str(np.mean(abs_err_list)) +
        " " + str(np.mean(abs_err2_list)) + "\n")
    fr3.write(
        str(np.std(hll_st_list)) + " " + str(np.std(hll_st2_list)) + " " +
        str(np.std(gelement_list)) + " - " + str(np.std(abs_err_list)) + " " +
        str(np.std(abs_err2_list)) + "\n")

    fr2.close()
    fr3.close()