import utilities import string, os, sys if __name__ == '__main__': if len(sys.argv) < 3: print("Usage: python rot13.py <infile> <outfile>") sys.exit(0) infile = sys.argv[1] outfile = sys.argv[2] f = open(infile, 'r') fw = open(outfile, 'w') for line in f.readlines(): (val1, val2) = line.split(' : ') to_write = utilities.rot13(val1) + ' : ' + val2 fw.write(to_write) fw.close() f.close()
def profanityScore(text): """ Returns a number between 1 and 10 that represents the profanity score for the text of string """ profane_word_weights = utilities.readPropertiesFile( 'profane_words.txt', 'int') profane_words = profane_word_weights.keys() words_dict = utilities.readPropertiesFile('dict_words.txt', 'list') profane_words_transpose = {} for w in profane_words: profane_words_transpose[w] = getTransposedWords(w) # utilities.prettyPrintDict(profane_words_transpose) words = [w.rstrip('.') for w in text.lower().split()] score = 0.0 words_count = defaultdict(int) for w in words: inDict = w in words_dict[w[0]] w = utilities.rot13(w) # If the exact word appears in the list of profane words if w in profane_words: words_count[w] += 1 # Check if the word is a transpose of the profane words for pw in profane_words_transpose.keys(): if w in profane_words_transpose[pw]: words_count[pw] += 1 # Check if profane words is a substring of the word for pw in profane_words: if w.find(pw) != -1 and not inDict: words_count[pw] += 1 # Take words GROUP_SIZE at a time and see if they either form a # profane word, or the beginning of one. # If they form a profane word, update the count. If they form the # beginning of a profane word, then see if it actually matches one # and then count as one for i in range(len(words) - GROUP_SIZE): concat_word = words[i] + words[i + 1] + words[i + 2] concat_word = utilities.rot13(concat_word) for pw in profane_words: if pw.find(concat_word) != -1: if checkMatchPercent(len(concat_word), len(pw)): words_count[pw] += 1 break # check further... j = 2 while True: j += 1 if i + j > len(words) - 1: break concat_word += utilities.rot13(words[i + j]) # continue while we keep on concatenating the # letters and find that it is a substring of an # actual profane word if pw.find(concat_word) != -1: continue # once the concatenated word is not a substring of # an actual profane word, see how far we have # reached i.e. does the word match a significant # number of characters of a profane word to be # counted as an actual profanity or not. # If that is the case, and the word is not a # dictionary word, then count it as an actual # profanity that was disguised elif checkMatchPercent(len(concat_word) - 1, len(pw)): words_count[pw] += 1 break # utilities.prettyPrintDict(words_count) # compute the score running_sum = 0 count = 0 for w in words_count.keys(): running_sum += words_count[w] * profane_word_weights[w] count += words_count[w] if count == 0: score = 0 else: score = running_sum / count return score
def profanityScore(text): """ Returns a number between 1 and 10 that represents the profanity score for the text of string """ profane_word_weights = utilities.readPropertiesFile('profane_words.txt', 'int') profane_words = profane_word_weights.keys() words_dict = utilities.readPropertiesFile('dict_words.txt', 'list') profane_words_transpose = {} for w in profane_words: profane_words_transpose[w] = getTransposedWords(w) #utilities.prettyPrintDict(profane_words_transpose) words = [w.rstrip('.') for w in text.lower().split()] score = 0.0 words_count = defaultdict(int) for w in words: inDict = w in words_dict[w[0]] w = utilities.rot13(w) # If the exact word appears in the list of profane words if w in profane_words: words_count[w] += 1 # Check if the word is a transpose of the profane words for pw in profane_words_transpose.keys(): if w in profane_words_transpose[pw]: words_count[pw] += 1 # Check if profane words is a substring of the word for pw in profane_words: if w.find(pw) != -1 and not inDict: words_count[pw] += 1 # Take words GROUP_SIZE at a time and see if they either form a # profane word, or the beginning of one. # If they form a profane word, update the count. If they form the # beginning of a profane word, then see if it actually matches one # and then count as one for i in range(len(words)-GROUP_SIZE): concat_word = words[i] + words[i+1] + words[i+2] concat_word = utilities.rot13(concat_word) for pw in profane_words: if pw.find(concat_word) != -1: if checkMatchPercent(len(concat_word), len(pw)): words_count[pw] += 1 break # check further... j = 2 while True: j += 1 if i + j > len(words)-1: break concat_word += utilities.rot13(words[i+j]) # continue while we keep on concatenating the # letters and find that it is a substring of an # actual profane word if pw.find(concat_word) != -1: continue # once the concatenated word is not a substring of # an actual profane word, see how far we have # reached i.e. does the word match a significant # number of characters of a profane word to be # counted as an actual profanity or not. # If that is the case, and the word is not a # dictionary word, then count it as an actual # profanity that was disguised elif checkMatchPercent(len(concat_word)-1, len(pw)): words_count[pw] += 1 break #utilities.prettyPrintDict(words_count) # compute the score running_sum = 0 count = 0 for w in words_count.keys(): running_sum += words_count[w] * profane_word_weights[w] count += words_count[w] if count == 0: score = 0 else: score = running_sum/count return score