def translate(filename): DIGITS = DataSet(500, 750) STRINGS = DataSet(750, 1000) scanner = Scanner(filename) attributesTable = getTableOfAttributes() buf = '' while True: if buf: c = buf buf = '' scanner.decreaseCol() else: c = scanner.read(True) if not c: break if attributesTable[ord(c)] == SymbolType.empty: continue if attributesTable[ord(c)] == SymbolType.delim: scanner.append(ord(c), c) continue if attributesTable[ord(c)] == SymbolType.letter: res = get_string(c, attributesTable, scanner) str = res['str'] buf = res['c'] if is_keyword(str): scanner.append(keywords[str], str) else: scanner.append(STRINGS.add(str), str) continue if attributesTable[ord(c)] == SymbolType.digit: dgstr = c is_digit_flag = True while True: c = scanner.read() if attributesTable[ord(c)] == SymbolType.digit: dgstr += c continue elif attributesTable[ord( c)] == SymbolType.delim or attributesTable[ord( c)] == SymbolType.empty or not c: break elif attributesTable[ord(c)] == SymbolType.letter: res = get_string(c, attributesTable, scanner) dgstr += res['str'] buf = res['c'] is_digit_flag = False break else: raise UnexpectedSymbolException( [scanner.line, scanner.column - 1], c) if is_digit_flag: scanner.append(DIGITS.add(dgstr), dgstr) elif is_keyword(dgstr): scanner.append(keywords[dgstr], dgstr) else: scanner.append(STRINGS.add(dgstr), dgstr) if attributesTable[ord(c)] == SymbolType.delim: buf = c continue if c == '(': c = scanner.read(True) if c == '*': while True: c = scanner.read(True) if c == '*': while c == '*': c = scanner.read(True) if c == ')': flag = True break else: continue elif not c: scanner.exception(EndOfFileException) else: continue if flag: continue else: raise UnexpectedSymbolException( [scanner.line, scanner.column - 2], '(') scanner.exception(UnexpectedSymbolException, c) # scanner.handleEndOfReading() return { 'out': scanner.out, 'STRINGS': STRINGS.set, 'DIGITS': DIGITS.set, 'positions': scanner.positions }
while True: for submission in subreddit.hot(limit=100): submission.comments.replace_more() comment: Comment = None for comment in submission.comments.list(): author_name = comment.author.name if saved_data.is_in("id", comment.id) is None: print("new comment") if saved_data.is_in( "body", comment.body ) is not None and author_name != 'RepostDeputy': author: Redditor = comment.author print("messaging " + author.name + " about post deletion") author.message(subject="Post removed", message="Your post " + comment.body + " is a repost") saved_data.add({ "id": comment.id, "body": comment.body, "flagged": "t" }) comment.mod.remove() else: saved_data.add({ "id": comment.id, "body": comment.body, "flagged": "f" }) else: print("l")
def run(traces, folder, blocks=1024, width=64, k=3, groups=2, factor=8, hash_f='md5', swap=1): # Number of times to execute each experiment to get an average # There must exist as many files with the elements to be stored # as iterations. totalIterations = 10 # Definition of the name of the output files. logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups, factor) # LogNull does not print, LogFile prints to file and LogScreen to the default output # Change the objects depending on which one you want to use log = LogNull() log2 = LogFile(folder + logOutput, "w") sc = LogScreen() # Message explaining the file to be read for the traces info = "Traces file=%s" % (traces) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # Message printing the parameters used for the experiment info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % ( blocks, width, k, groups, factor, hash_f, swap) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # False positive rate accumulation element fpr = 0 # Run the iterations and get the average for i in range(1, totalIterations + 1): # The file name should be similar to "/directory/shuf8N_1024B_1.txt" shuf_file = "%sshuf%sN_%sB_%s.txt" % (folder, factor, blocks, i) # Data set that keeps the actual elements that were added to the filter # to perform false positive check ds = DataSet() # AdaptiveBloomFilter file abf = None # Build the filter passing a SHA512 hash function if hash_f == 'sha512': sha = GenericHashFunctionsSHA512(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) elif hash_f == 'sha512b': sha = GenericHashFunctionsSHA512All(words=blocks, bits=width, nhash=k, hash_groups=groups) abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups, hash_f=sha) # Otherwise build it using the default MD5 hash else: abf = GenericAdaptiveBloomFilter(words=blocks, bits=width, nhash=k, hash_groups=groups) # False positives initialized to zero fp = 0 # True positives initialized to zero tp = 0 # True negatives initialized to zero tn = 0 # factor * blocks elements are to be stored maxin = factor * blocks # Print the file name with the storable elements that is going to be used sc.write(shuf_file) # Open the file dataToStore = open(shuf_file, 'r') # Initializing the number of elements stored to zero stored = 0 # Keep storing until factor*blocks is reached or the file ends while True: if stored >= maxin: break entry = dataToStore.readline() if not entry: break stored += 1 # Store into the Bloom filter abf.add(entry) # Store in the slow memory for all the groups of functions abf.addslow(entry) # Store the actual value to check for false positives ds.add(entry) # Close the file dataToStore.close() # Message to verify if we stored the expected number of elements sc.write("length stored: %s" % ds.length()) # Open the file with the traces caida = open(folder + traces, 'r') # Process all elements while True: # Read next element element = caida.readline() if not element: break # By default, consider it a true negative tn += 1 # If there is a match in the filter if abf.check(element): # If it is not an element that was stored if not ds.test(element): # Then it is a false positive fp += 1 # No longer considered true negative tn -= 1 # Swap between functions will ocurr every "swap" false # positives found. Use module operator to detect if fp % swap == 0: abf.swaphash(element) # It was found and it was actually stored else: # It is a true positive tp += 1 # No longer considered true negative tn -= 1 # Close the file with the traces caida.close() # Accumulate the False positive rate. It will be divided by the number of iterations fpr += fp / (fp + tn) # Print the result of the iteration info = "Iteration %s. FP=%d, TP=%d, TN=%d, FPR=%s." % (i, fp, tp, tn, fp / (fp + tn)) sc.write(info) log.write(info + "\n") log2.write(info + "\n") # Print the final result info = "FPR for %sx%s. FPR %s." % (factor, blocks, round(fpr / totalIterations, 6)) sc.write(info) log.write(info + "\n") log2.write(info + "\n")