Exemple #1
0
import utilities
import string, os, sys

if __name__ == '__main__':
    if len(sys.argv) < 3:
        print("Usage: python rot13.py <infile> <outfile>")
        sys.exit(0)

    infile = sys.argv[1]
    outfile = sys.argv[2]

    f = open(infile, 'r')
    fw = open(outfile, 'w')

    for line in f.readlines():
        (val1, val2) = line.split(' : ')
        to_write = utilities.rot13(val1) + ' : ' + val2
        fw.write(to_write)

    fw.close()
    f.close()
Exemple #2
0
def profanityScore(text):
    """
    Returns a number between 1 and 10 that represents the profanity score
    for the text of string
    """

    profane_word_weights = utilities.readPropertiesFile(
        'profane_words.txt', 'int')
    profane_words = profane_word_weights.keys()

    words_dict = utilities.readPropertiesFile('dict_words.txt', 'list')

    profane_words_transpose = {}
    for w in profane_words:
        profane_words_transpose[w] = getTransposedWords(w)

    # utilities.prettyPrintDict(profane_words_transpose)

    words = [w.rstrip('.') for w in text.lower().split()]

    score = 0.0
    words_count = defaultdict(int)

    for w in words:
        inDict = w in words_dict[w[0]]

        w = utilities.rot13(w)

        # If the exact word appears in the list of profane words
        if w in profane_words:
            words_count[w] += 1

        # Check if the word is a transpose of the profane words
        for pw in profane_words_transpose.keys():
            if w in profane_words_transpose[pw]:
                words_count[pw] += 1

        # Check if profane words is a substring of the word
        for pw in profane_words:
            if w.find(pw) != -1 and not inDict:
                words_count[pw] += 1

    # Take words GROUP_SIZE at a time and see if they either form a
    # profane word, or the beginning of one.
    # If they form a profane word, update the count. If they form the
    # beginning of a profane word, then see if it actually matches one
    # and then count as one

    for i in range(len(words) - GROUP_SIZE):
        concat_word = words[i] + words[i + 1] + words[i + 2]
        concat_word = utilities.rot13(concat_word)

        for pw in profane_words:

            if pw.find(concat_word) != -1:
                if checkMatchPercent(len(concat_word), len(pw)):
                    words_count[pw] += 1
                    break

                # check further...
                j = 2
                while True:
                    j += 1
                    if i + j > len(words) - 1:
                        break

                    concat_word += utilities.rot13(words[i + j])

                    # continue while we keep on concatenating the
                    # letters and find that it is a substring of an
                    # actual profane word
                    if pw.find(concat_word) != -1:
                        continue
                    # once the concatenated word is not a substring of
                    # an actual profane word, see how far we have
                    # reached i.e. does the word match a significant
                    # number of characters of a profane word to be
                    # counted as an actual profanity or not.
                    # If that is the case, and the word is not a
                    # dictionary word, then count it as an actual
                    # profanity that was disguised
                    elif checkMatchPercent(len(concat_word) - 1, len(pw)):
                        words_count[pw] += 1
                        break

    # utilities.prettyPrintDict(words_count)

    # compute the score
    running_sum = 0
    count = 0
    for w in words_count.keys():
        running_sum += words_count[w] * profane_word_weights[w]
        count += words_count[w]

    if count == 0:
        score = 0
    else:
        score = running_sum / count

    return score
Exemple #3
0
def profanityScore(text):
    """
    Returns a number between 1 and 10 that represents the profanity score
    for the text of string
    """

    profane_word_weights = utilities.readPropertiesFile('profane_words.txt', 'int')
    profane_words = profane_word_weights.keys()

    words_dict = utilities.readPropertiesFile('dict_words.txt', 'list')

    profane_words_transpose = {}
    for w in profane_words:
        profane_words_transpose[w] = getTransposedWords(w)

    #utilities.prettyPrintDict(profane_words_transpose)

    words = [w.rstrip('.') for w in text.lower().split()]

    score = 0.0
    words_count = defaultdict(int)

    for w in words:
        inDict = w in words_dict[w[0]]

        w = utilities.rot13(w)

        # If the exact word appears in the list of profane words
        if w in profane_words:
            words_count[w] += 1

        # Check if the word is a transpose of the profane words
        for pw in profane_words_transpose.keys():
            if w in profane_words_transpose[pw]:
                words_count[pw] += 1

        # Check if profane words is a substring of the word
        for pw in profane_words:
            if w.find(pw) != -1 and not inDict:
                words_count[pw] += 1


    # Take words GROUP_SIZE at a time and see if they either form a
    # profane word, or the beginning of one.
    # If they form a profane word, update the count. If they form the
    # beginning of a profane word, then see if it actually matches one
    # and then count as one

    for i in range(len(words)-GROUP_SIZE):
        concat_word = words[i] + words[i+1] + words[i+2]
        concat_word = utilities.rot13(concat_word)

        for pw in profane_words:

            if pw.find(concat_word) != -1:
                if checkMatchPercent(len(concat_word), len(pw)):
                    words_count[pw] += 1
                    break

                # check further...
                j = 2
                while True:
                    j += 1
                    if i + j > len(words)-1:
                        break

                    concat_word += utilities.rot13(words[i+j])

                    # continue while we keep on concatenating the
                    # letters and find that it is a substring of an
                    # actual profane word
                    if pw.find(concat_word) != -1:
                        continue
                    # once the concatenated word is not a substring of
                    # an actual profane word, see how far we have
                    # reached i.e. does the word match a significant
                    # number of characters of a profane word to be
                    # counted as an actual profanity or not.
                    # If that is the case, and the word is not a
                    # dictionary word, then count it as an actual
                    # profanity that was disguised
                    elif checkMatchPercent(len(concat_word)-1, len(pw)):
                        words_count[pw] += 1
                        break

    #utilities.prettyPrintDict(words_count)

    # compute the score
    running_sum = 0
    count = 0
    for w in words_count.keys():
        running_sum += words_count[w] * profane_word_weights[w]
        count += words_count[w]

    if count == 0:
        score = 0
    else:
        score = running_sum/count

    return score