def run(blocks=1024, width=64, k=3, groups=2, factor=8, hash_f='md5', swap=1): # Factor A=mul*N for the different experiments mul = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} # factor * blocks elements are to be stored maxin = factor * blocks # Number of times to execute each experiment to get an average totalIterations = 10 # A*t tests (negative element checks in the filter) are executed per iteration # A = mul * maxin (stored elements) #t=100 t = 10 # Directory to get the positives and negatives directory = './data_test/' # File with the positives to be stored in the filter positivesFile = 'positives' # File with the negatives to check the Bloom filter behavior negativesFile = 'negatives' # Definition of the name of the output files. logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) # LogNull does not print, LogFile prints to file and LogScreen to the default output # Change the objects depending on which one you want to use log = LogNull() log2 = LogFile(directory + logOutput, "w") sc = LogScreen() # Message printing the parameters used for the experiment info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % ( blocks, width, k, groups, factor, hash_f, swap) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # For each of the A/N factors to be checked for i in mul: # Log the start of the experiment info = "Starting execution of A=%d*%d*%d" % (factor, blocks, i) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # Initialized the completed tests completed = 0 tfp = 0 # Total false positives ttp = 0 # Total true positives ttn = 0 # Total true negatives sfpr = 0 # Accumulating False Positive Rates to calculate average # The total number of tests is A*t = i*maxin*t = totalTests = maxin * t * i # List to store the negative elements l = list() while True: # Run for the number of iterations expected if completed >= totalIterations: break # Clear the list from previous iteration l.clear() # Create a dataset object ds = DataSet() # Create the Bloom Filter if hash_f == 'sha512': sha = GenericHashFunctionsSHA512(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) elif hash_f == 'sha512b': sha = GenericHashFunctionsSHA512All(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) # Otherwise build it using the default MD5 hash else: abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups) # Call to the generateRandomElements function to create # the maxin positives that will be stored in the filter and the memory while ds.length() < maxin: generateRandomElements(maxin, abf, ds) sc.write("length stored: %s" % ds.length()) # Call to the generateRandomElements function to create # the mul*maxin negatives that will be checked against the filter. # Exclude the positives and check that everything worked properly l.clear() generateRandomElements(maxin * i, lis=l, exclude=ds.data) if ds.data.isdisjoint(l): break sc.write("False positive found") count = 0 # count of the tests performed fp = 0 # false positives in this test tn = 0 # true negatives in this test while True: # finish if all the tests were run if count >= totalTests: break # select a random index among all the elements in the negative list idx = random.randint(0, len(l) - 1) # extract the element element = l[idx] # By default, suppose it is a true negative tn += 1 # Check if it gives a falso positive if abf.check(element): # Add it to the false positive count fp += 1 tn -= 1 # No longer true negative # Swap between functions will ocurr every "swap" false # positives found. Use module operator to detect if fp % swap == 0: abf.swaphash(element) # Bloom filter adaptation # A test has been completed count += 1 # Print results of current iteration info = "Iteration %s. FP=%d, TN=%d,FPR=%s." % (completed, fp, tn, fp / (fp + tn)) sc.write(info) log.write(info + "\n") # Increase the number of completed iterations. completed += 1 # Accumulate the number of false positives and true negatives for all iterations tfp += fp ttn += tn # Accumulate the false positive rate (will be averaged by iterations) sfpr += fp / (fp + tn) # Print the total number of iterations, false positives and true negatives info = "Completados %s. TFP=%s, TTN=%s." % (completed, tfp, ttn) sc.write(info) log.write(info + "\n") # Print the average values of false positives and true negatives info = "Mean: TFP=%s, TTN=%s." % (tfp / completed, ttn / completed) sc.write(info) log.write(info + "\n") # Calculate the mean FPR value based on the total accumulated TFP and TTN numbers info = "TFP/(TFP+TTN) for %s*%s*%d = %s" % (factor, blocks, i, tfp / (tfp + ttn)) sc.write(info) log.write(info + "\n") # Calculate the mean FPR value dividing the accumulated sfpr by the number of iterations info = "Mean FPR for %s*%s*%d = %s" % (factor, blocks, i, round(sfpr / completed, 4)) sc.write(info) log.write(info + "\n") #log2.write(info+"\n") log2.write("%s\n" % round(sfpr / completed, 4)) log.flush() log2.flush() log.close() log2.close() return
from LogFile import LogFile from DelimFile import DelimFile log = LogFile("log.txt") myDelim = DelimFile("data.csv", ",") log.write("This is a log message") log.write("This is another log message") myDelim.write(['a', 'b', 'c', 'd']) myDelim.write(['1', '2', '3', '4'])
def run(traces, folder, blocks=1024, width=64, k=3, groups=2, factor=8, hash_f='md5', swap=1): # Number of times to execute each experiment to get an average # There must exist as many files with the elements to be stored # as iterations. totalIterations = 10 # Definition of the name of the output files. logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) # LogNull does not print, LogFile prints to file and LogScreen to the default output # Change the objects depending on which one you want to use log = LogNull() log2 = LogFile(folder + logOutput, "w") sc = LogScreen() # Message explaining the file to be read for the traces info = "Traces file=%s" % (traces) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # Message printing the parameters used for the experiment info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % ( blocks, width, k, groups, factor, hash_f, swap) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # False positive rate accumulation element fpr = 0 # Run the iterations and get the average for i in range(1, totalIterations + 1): # The file name should be similar to "/directory/shuf8N_1024B_1.txt" shuf_file = "%sshuf%sN_%sB_%s.txt" % (folder, factor, blocks, i) # Data set that keeps the actual elements that were added to the filter # to perform false positive check ds = DataSet() # AdaptiveBloomFilter file abf = None # Build the filter passing a SHA512 hash function if hash_f == 'sha512': sha = GenericHashFunctionsSHA512(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) elif hash_f == 'sha512b': sha = GenericHashFunctionsSHA512All(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) # Otherwise build it using the default MD5 hash else: abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups) # False positives initialized to zero fp = 0 # True positives initialized to zero tp = 0 # True negatives initialized to zero tn = 0 # factor * blocks elements are to be stored maxin = factor * blocks # Print the file name with the storable elements that is going to be used sc.write(shuf_file) # Open the file dataToStore = open(shuf_file, 'r') # Initializing the number of elements stored to zero stored = 0 # Keep storing until factor*blocks is reached or the file ends while True: if stored >= maxin: break entry = dataToStore.readline() if not entry: break stored += 1 # Store into the Bloom filter abf.add(entry) # Store in the slow memory for all the groups of functions abf.addslow(entry) # Store the actual value to check for false positives ds.add(entry) # Close the file dataToStore.close() # Message to verify if we stored the expected number of elements sc.write("length stored: %s" % ds.length()) # Open the file with the traces caida = open(folder + traces, 'r') # Process all elements while True: # Read next element element = caida.readline() if not element: break # By default, consider it a true negative tn += 1 # If there is a match in the filter if abf.check(element): # If it is not an element that was stored if not ds.test(element): # Then it is a false positive fp += 1 # No longer considered true negative tn -= 1 # Swap between functions will ocurr every "swap" false # positives found. Use module operator to detect if fp % swap == 0: abf.swaphash(element) # It was found and it was actually stored else: # It is a true positive tp += 1 # No longer considered true negative tn -= 1 # Close the file with the traces caida.close() # Accumulate the False positive rate. It will be divided by the number of iterations fpr += fp / (fp + tn) # Print the result of the iteration info = "Iteration %s. FP=%d, TP=%d, TN=%d, FPR=%s." % (i, fp, tp, tn, fp / (fp + tn)) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # Print the final result info = "FPR for %sx%s. FPR %s." % (factor, blocks, round(fpr / totalIterations, 6)) sc.write(info) log.write(info + "\n") log2.write(info + "\n")