Beispiel #1
0
def run(blocks=1024, width=64, k=3, groups=2, factor=8, hash_f='md5', swap=1):

    # Factor A=mul*N for the different experiments
    mul = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}

    # factor * blocks elements are to be stored
    maxin = factor * blocks

    # Number of times to execute each experiment to get an average
    totalIterations = 10

    # A*t tests (negative element checks in the filter) are executed per iteration
    # A = mul * maxin (stored elements)
    #t=100
    t = 10

    # Directory to get the positives and negatives
    directory = './data_test/'
    # File with the positives to be stored in the filter
    positivesFile = 'positives'
    # File with the negatives to check the Bloom filter behavior
    negativesFile = 'negatives'

    # Definition of the name of the output files.
    logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                factor)
    logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                    factor)

    # LogNull does not print, LogFile prints to file and LogScreen to the default output
    # Change the objects depending on which one you want to use
    log = LogNull()
    log2 = LogFile(directory + logOutput, "w")
    sc = LogScreen()

    # Message printing the parameters used for the experiment
    info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % (
        blocks, width, k, groups, factor, hash_f, swap)
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")

    # For each of the A/N factors to be checked
    for i in mul:
        # Log the start of the experiment
        info = "Starting execution of A=%d*%d*%d" % (factor, blocks, i)
        sc.write(info)
        log.write(info + "\n")
        log2.write(info + "\n")

        # Initialized the completed tests
        completed = 0

        tfp = 0  # Total false positives
        ttp = 0  # Total true positives
        ttn = 0  # Total true negatives
        sfpr = 0  # Accumulating False Positive Rates to calculate average
        # The total number of tests is A*t = i*maxin*t =
        totalTests = maxin * t * i

        # List to store the negative elements
        l = list()
        while True:
            # Run for the number of iterations expected
            if completed >= totalIterations:
                break
            # Clear the list from previous iteration
            l.clear()

            # Create a dataset object
            ds = DataSet()
            # Create the Bloom Filter
            if hash_f == 'sha512':
                sha = GenericHashFunctionsSHA512(words=blocks,
                                                 bits=width,
                                                 nhash=k,
                                                 hash_groups=groups)
                abf = GenericAdaptiveBloomFilter(words=blocks,
                                                 bits=width,
                                                 nhash=k,
                                                 hash_groups=groups,
                                                 hash_f=sha)
            elif hash_f == 'sha512b':
                sha = GenericHashFunctionsSHA512All(words=blocks,
                                                    bits=width,
                                                    nhash=k,
                                                    hash_groups=groups)
                abf = GenericAdaptiveBloomFilter(words=blocks,
                                                 bits=width,
                                                 nhash=k,
                                                 hash_groups=groups,
                                                 hash_f=sha)
            # Otherwise build it using the default MD5 hash
            else:
                abf = GenericAdaptiveBloomFilter(words=blocks,
                                                 bits=width,
                                                 nhash=k,
                                                 hash_groups=groups)

            # Call to the generateRandomElements function to create
            # the maxin positives that will be stored in the filter and the memory
            while ds.length() < maxin:
                generateRandomElements(maxin, abf, ds)
                sc.write("length stored: %s" % ds.length())

                # Call to the generateRandomElements function to create
                # the mul*maxin negatives that will be checked against the filter.
                # Exclude the positives and check that everything worked properly
                l.clear()
                generateRandomElements(maxin * i, lis=l, exclude=ds.data)
                if ds.data.isdisjoint(l):
                    break
                sc.write("False positive found")

            count = 0  # count of the tests performed
            fp = 0  # false positives in this test
            tn = 0  # true negatives in this test

            while True:
                # finish if all the tests were run
                if count >= totalTests:
                    break
                # select a random index among all the elements in the negative list
                idx = random.randint(0, len(l) - 1)
                # extract the element
                element = l[idx]
                # By default, suppose it is a true negative
                tn += 1
                # Check if it gives a falso positive
                if abf.check(element):
                    # Add it to the false positive count
                    fp += 1
                    tn -= 1  # No longer true negative
                    # Swap between functions will ocurr every "swap" false
                    # positives found. Use module operator to detect
                    if fp % swap == 0:
                        abf.swaphash(element)  # Bloom filter adaptation
                # A test has been completed
                count += 1
            # Print results of current iteration
            info = "Iteration %s. FP=%d, TN=%d,FPR=%s." % (completed, fp, tn,
                                                           fp / (fp + tn))
            sc.write(info)
            log.write(info + "\n")
            # Increase the number of completed iterations.
            completed += 1
            # Accumulate the number of false positives and true negatives for all iterations
            tfp += fp
            ttn += tn
            # Accumulate the false positive rate (will be averaged by iterations)
            sfpr += fp / (fp + tn)

        # Print the total number of iterations, false positives and true negatives
        info = "Completados %s. TFP=%s, TTN=%s." % (completed, tfp, ttn)
        sc.write(info)
        log.write(info + "\n")

        # Print the average values of false positives and true negatives
        info = "Mean: TFP=%s, TTN=%s." % (tfp / completed, ttn / completed)
        sc.write(info)
        log.write(info + "\n")

        # Calculate the mean FPR value based on the total accumulated TFP and TTN numbers
        info = "TFP/(TFP+TTN) for %s*%s*%d = %s" % (factor, blocks, i, tfp /
                                                    (tfp + ttn))
        sc.write(info)
        log.write(info + "\n")

        # Calculate the mean FPR value dividing the accumulated sfpr by the number of iterations
        info = "Mean FPR for %s*%s*%d = %s" % (factor, blocks, i,
                                               round(sfpr / completed, 4))
        sc.write(info)
        log.write(info + "\n")
        #log2.write(info+"\n")
        log2.write("%s\n" % round(sfpr / completed, 4))

        log.flush()
        log2.flush()

    log.close()
    log2.close()
    return
Beispiel #2
0
from LogFile import LogFile
from DelimFile import DelimFile

log = LogFile("log.txt")
myDelim = DelimFile("data.csv", ",")

log.write("This is a log message")
log.write("This is another log message")

myDelim.write(['a', 'b', 'c', 'd'])
myDelim.write(['1', '2', '3', '4'])
Beispiel #3
0
def run(traces,
        folder,
        blocks=1024,
        width=64,
        k=3,
        groups=2,
        factor=8,
        hash_f='md5',
        swap=1):
    # Number of times to execute each experiment to get an average
    # There must exist as many files with the elements to be stored
    # as iterations.
    totalIterations = 10

    # Definition of the name of the output files.
    logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                factor)
    logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                    factor)

    # LogNull does not print, LogFile prints to file and LogScreen to the default output
    # Change the objects depending on which one you want to use
    log = LogNull()
    log2 = LogFile(folder + logOutput, "w")
    sc = LogScreen()

    # Message explaining the file to be read for the traces
    info = "Traces file=%s" % (traces)
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")

    # Message printing the parameters used for the experiment
    info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % (
        blocks, width, k, groups, factor, hash_f, swap)
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")

    # False positive rate accumulation element
    fpr = 0

    # Run the iterations and get the average
    for i in range(1, totalIterations + 1):
        # The file name should be similar to "/directory/shuf8N_1024B_1.txt"
        shuf_file = "%sshuf%sN_%sB_%s.txt" % (folder, factor, blocks, i)
        # Data set that keeps the actual elements that were added to the filter
        # to perform false positive check
        ds = DataSet()
        # AdaptiveBloomFilter file
        abf = None
        # Build the filter passing a SHA512 hash function
        if hash_f == 'sha512':
            sha = GenericHashFunctionsSHA512(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups)
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups,
                                             hash_f=sha)
        elif hash_f == 'sha512b':
            sha = GenericHashFunctionsSHA512All(words=blocks,
                                                bits=width,
                                                nhash=k,
                                                hash_groups=groups)
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups,
                                             hash_f=sha)
        # Otherwise build it using the default MD5 hash
        else:
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups)
        # False positives initialized to zero
        fp = 0
        # True positives initialized to zero
        tp = 0
        # True negatives initialized to zero
        tn = 0
        # factor * blocks elements are to be stored
        maxin = factor * blocks

        # Print the file name with the storable elements that is going to be used
        sc.write(shuf_file)
        # Open the file
        dataToStore = open(shuf_file, 'r')

        # Initializing the number of elements stored to zero
        stored = 0
        # Keep storing until factor*blocks is reached or the file ends
        while True:
            if stored >= maxin:
                break

            entry = dataToStore.readline()
            if not entry:
                break
            stored += 1
            # Store into the Bloom filter
            abf.add(entry)
            # Store in the slow memory for all the groups of functions
            abf.addslow(entry)
            # Store the actual value to check for false positives
            ds.add(entry)

        # Close the file
        dataToStore.close()

        # Message to verify if we stored the expected number of elements
        sc.write("length stored: %s" % ds.length())

        # Open the file with the traces
        caida = open(folder + traces, 'r')

        # Process all elements
        while True:
            # Read next element
            element = caida.readline()
            if not element:
                break
            # By default, consider it a true negative
            tn += 1
            # If there is a match in the filter
            if abf.check(element):
                # If it is not an element that was stored
                if not ds.test(element):
                    # Then it is a false positive
                    fp += 1
                    # No longer considered true negative
                    tn -= 1
                    # Swap between functions will ocurr every "swap" false
                    # positives found. Use module operator to detect
                    if fp % swap == 0:
                        abf.swaphash(element)
                # It was found and it was actually stored
                else:
                    # It is a true positive
                    tp += 1
                    # No longer considered true negative
                    tn -= 1

        # Close the file with the traces
        caida.close()

        # Accumulate the False positive rate. It will be divided by the number of iterations
        fpr += fp / (fp + tn)

        # Print the result of the iteration
        info = "Iteration %s. FP=%d, TP=%d, TN=%d, FPR=%s." % (i, fp, tp, tn,
                                                               fp / (fp + tn))
        sc.write(info)
        log.write(info + "\n")
        log2.write(info + "\n")

    # Print the final result
    info = "FPR for  %sx%s. FPR %s." % (factor, blocks,
                                        round(fpr / totalIterations, 6))
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")