예제 #1
0
def translate(filename):
    DIGITS = DataSet(500, 750)
    STRINGS = DataSet(750, 1000)
    scanner = Scanner(filename)
    attributesTable = getTableOfAttributes()

    buf = ''

    while True:
        if buf:
            c = buf
            buf = ''
            scanner.decreaseCol()
        else:
            c = scanner.read(True)

        if not c:
            break

        if attributesTable[ord(c)] == SymbolType.empty:
            continue

        if attributesTable[ord(c)] == SymbolType.delim:
            scanner.append(ord(c), c)
            continue

        if attributesTable[ord(c)] == SymbolType.letter:
            res = get_string(c, attributesTable, scanner)

            str = res['str']
            buf = res['c']

            if is_keyword(str):
                scanner.append(keywords[str], str)
            else:
                scanner.append(STRINGS.add(str), str)

            continue

        if attributesTable[ord(c)] == SymbolType.digit:
            dgstr = c
            is_digit_flag = True

            while True:
                c = scanner.read()
                if attributesTable[ord(c)] == SymbolType.digit:
                    dgstr += c
                    continue
                elif attributesTable[ord(
                        c)] == SymbolType.delim or attributesTable[ord(
                            c)] == SymbolType.empty or not c:
                    break
                elif attributesTable[ord(c)] == SymbolType.letter:
                    res = get_string(c, attributesTable, scanner)

                    dgstr += res['str']
                    buf = res['c']

                    is_digit_flag = False
                    break
                else:
                    raise UnexpectedSymbolException(
                        [scanner.line, scanner.column - 1], c)

            if is_digit_flag:
                scanner.append(DIGITS.add(dgstr), dgstr)
            elif is_keyword(dgstr):
                scanner.append(keywords[dgstr], dgstr)
            else:
                scanner.append(STRINGS.add(dgstr), dgstr)

            if attributesTable[ord(c)] == SymbolType.delim:
                buf = c

            continue

        if c == '(':
            c = scanner.read(True)
            if c == '*':
                while True:
                    c = scanner.read(True)

                    if c == '*':
                        while c == '*':
                            c = scanner.read(True)
                        if c == ')':
                            flag = True
                            break
                        else:
                            continue
                    elif not c:
                        scanner.exception(EndOfFileException)
                    else:
                        continue

                if flag:
                    continue
            else:
                raise UnexpectedSymbolException(
                    [scanner.line, scanner.column - 2], '(')

        scanner.exception(UnexpectedSymbolException, c)

    # scanner.handleEndOfReading()

    return {
        'out': scanner.out,
        'STRINGS': STRINGS.set,
        'DIGITS': DIGITS.set,
        'positions': scanner.positions
    }
예제 #2
0
while True:
    for submission in subreddit.hot(limit=100):
        submission.comments.replace_more()
        comment: Comment = None
        for comment in submission.comments.list():
            author_name = comment.author.name
            if saved_data.is_in("id", comment.id) is None:
                print("new comment")
                if saved_data.is_in(
                        "body", comment.body
                ) is not None and author_name != 'RepostDeputy':
                    author: Redditor = comment.author
                    print("messaging " + author.name + " about post deletion")
                    author.message(subject="Post removed",
                                   message="Your post " + comment.body +
                                   " is a repost")
                    saved_data.add({
                        "id": comment.id,
                        "body": comment.body,
                        "flagged": "t"
                    })
                    comment.mod.remove()
                else:
                    saved_data.add({
                        "id": comment.id,
                        "body": comment.body,
                        "flagged": "f"
                    })
            else:
                print("l")
예제 #3
0
def run(traces,
        folder,
        blocks=1024,
        width=64,
        k=3,
        groups=2,
        factor=8,
        hash_f='md5',
        swap=1):
    # Number of times to execute each experiment to get an average
    # There must exist as many files with the elements to be stored
    # as iterations.
    totalIterations = 10

    # Definition of the name of the output files.
    logOutput = 'result_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                factor)
    logOutput2 = 'resultmin_b%s_w%s_k%s_g%s_f%s' % (blocks, width, k, groups,
                                                    factor)

    # LogNull does not print, LogFile prints to file and LogScreen to the default output
    # Change the objects depending on which one you want to use
    log = LogNull()
    log2 = LogFile(folder + logOutput, "w")
    sc = LogScreen()

    # Message explaining the file to be read for the traces
    info = "Traces file=%s" % (traces)
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")

    # Message printing the parameters used for the experiment
    info = "Initializing parameters blocks=%d, width=%d, k=%d, groups=%d, factor=%d, hash_f=%s, swap=%s" % (
        blocks, width, k, groups, factor, hash_f, swap)
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")

    # False positive rate accumulation element
    fpr = 0

    # Run the iterations and get the average
    for i in range(1, totalIterations + 1):
        # The file name should be similar to "/directory/shuf8N_1024B_1.txt"
        shuf_file = "%sshuf%sN_%sB_%s.txt" % (folder, factor, blocks, i)
        # Data set that keeps the actual elements that were added to the filter
        # to perform false positive check
        ds = DataSet()
        # AdaptiveBloomFilter file
        abf = None
        # Build the filter passing a SHA512 hash function
        if hash_f == 'sha512':
            sha = GenericHashFunctionsSHA512(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups)
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups,
                                             hash_f=sha)
        elif hash_f == 'sha512b':
            sha = GenericHashFunctionsSHA512All(words=blocks,
                                                bits=width,
                                                nhash=k,
                                                hash_groups=groups)
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups,
                                             hash_f=sha)
        # Otherwise build it using the default MD5 hash
        else:
            abf = GenericAdaptiveBloomFilter(words=blocks,
                                             bits=width,
                                             nhash=k,
                                             hash_groups=groups)
        # False positives initialized to zero
        fp = 0
        # True positives initialized to zero
        tp = 0
        # True negatives initialized to zero
        tn = 0
        # factor * blocks elements are to be stored
        maxin = factor * blocks

        # Print the file name with the storable elements that is going to be used
        sc.write(shuf_file)
        # Open the file
        dataToStore = open(shuf_file, 'r')

        # Initializing the number of elements stored to zero
        stored = 0
        # Keep storing until factor*blocks is reached or the file ends
        while True:
            if stored >= maxin:
                break

            entry = dataToStore.readline()
            if not entry:
                break
            stored += 1
            # Store into the Bloom filter
            abf.add(entry)
            # Store in the slow memory for all the groups of functions
            abf.addslow(entry)
            # Store the actual value to check for false positives
            ds.add(entry)

        # Close the file
        dataToStore.close()

        # Message to verify if we stored the expected number of elements
        sc.write("length stored: %s" % ds.length())

        # Open the file with the traces
        caida = open(folder + traces, 'r')

        # Process all elements
        while True:
            # Read next element
            element = caida.readline()
            if not element:
                break
            # By default, consider it a true negative
            tn += 1
            # If there is a match in the filter
            if abf.check(element):
                # If it is not an element that was stored
                if not ds.test(element):
                    # Then it is a false positive
                    fp += 1
                    # No longer considered true negative
                    tn -= 1
                    # Swap between functions will ocurr every "swap" false
                    # positives found. Use module operator to detect
                    if fp % swap == 0:
                        abf.swaphash(element)
                # It was found and it was actually stored
                else:
                    # It is a true positive
                    tp += 1
                    # No longer considered true negative
                    tn -= 1

        # Close the file with the traces
        caida.close()

        # Accumulate the False positive rate. It will be divided by the number of iterations
        fpr += fp / (fp + tn)

        # Print the result of the iteration
        info = "Iteration %s. FP=%d, TP=%d, TN=%d, FPR=%s." % (i, fp, tp, tn,
                                                               fp / (fp + tn))
        sc.write(info)
        log.write(info + "\n")
        log2.write(info + "\n")

    # Print the final result
    info = "FPR for  %sx%s. FPR %s." % (factor, blocks,
                                        round(fpr / totalIterations, 6))
    sc.write(info)
    log.write(info + "\n")
    log2.write(info + "\n")