def test_custom_wordlist(self): custom_badwords = ["happy", "jolly", "merry"] profanity.load_words(custom_badwords) # make sure it doesn't find real profanity anymore self.failIf(profanity.contains_profanity("F**k you!")) # make sure it finds profanity in a sentence containing custom_badwords self.failUnless(profanity.contains_profanity("Have a merry day! :)"))
def verify_nice(word): profanity.load_words(profane_words()) clear_word = remove_duplicates(word) if not word.isalpha(): raise Exception('Input can contain only letters') if any([w for w in [word, clear_word] if profanity.contains_profanity(w)]): raise Exception('Input should not be a profanity')
async def on_message(self, message): custom_badwords = ["bitch", "shit", "ass", "dumbass"] profanity.load_words(custom_badwords) if profanity.contains_profanity(message.content): await message.delete() await message.channel.send( "Stop cursing this server doesn't allow it.")
def test_custom_wordlist(self): custom_badwords = ['happy', 'jolly', 'merry'] profanity.load_words(custom_badwords) # make sure it doesn't find real profanity anymore self.failIf(profanity.contains_profanity("F**k you!")) # make sure it finds profanity in a sentence containing custom_badwords self.failUnless(profanity.contains_profanity("Have a merry day! :)"))
def check(k): file = open(r"withoutabusewords.txt", "a") custom_words = ['f**k', 'bullshit', 'punkass', 'shit', 'pervert'] profanity.load_words(custom_words) censor = profanity.censor(k) print(censor, end=' ') file.write(censor) file.write(" ")
def remove_profanity(name: str) -> str: profanity.load_words(['bitch', 'penis']) profanity.set_censor_characters(' ') name = profanity.censor(name).strip() name = re.sub( ' +', ' ', name) # We just replaced profanity with a space so compress spaces. return name
def __init__(self, encodings, languages, checkProfanity=False, wordlist=None, listhash=None): self.encodings = encodings self.languages = languages self.profanities = checkProfanity """loading wordlists (Design: should be part of class - A)""" # needs a test if wordlist: profanity.load_words(wordlist)
async def on_message(message): if message.content.startswith(client.user.mention): InitialMsg = await client.send_message(message.channel, message.author.mention + " **Thinking...**") SlashResponse = message.content SlashResponse = SlashResponse.replace(client.user.mention, "") if profanity.contains_profanity(SlashResponse): response = """```ERROR: You cannot send controversial messages using this bot.```""" else: response = str(chatbot.get_response(SlashResponse)) if profanity.contains_profanity(response): response = str(profanity.censor(response)) await client.edit_message(InitialMsg, message.author.mention + " " + response) if message.content.startswith('$$reload'): cw = [line.rstrip('\n') for line in open('controversialwords.txt')] profanity.load_words(cw) await client.send_message(message.channel, message.author.mention + " **Controversial words reloaded!**")
def __get_random_title(self, url): """ Returns a (one) random post title """ try: titles = self.__get_titles(self.__get_json(url)) post = titles[self.__rand()] retries = 20 custom_badwords = ['reddit', 'redditor'] profanity.load_words(custom_badwords) while profanity.contains_profanity(post): post = titles[self.__rand()] retries -= 1 if retries < 1: break return post except Exception as ex: return 'There is a feed problem at the moment: ' + str(ex)
def filtered_words(wordlist): ''' Input an unfiltered list of words as the first arg `python language_filter.py unfiltered_words.txt` Return a word if it is not considered offensive. Arguments: word : string Returns: Word, if it is not included in the offensive_filter ''' # Load custom offensive dictionary with open('offensive_filter.txt', 'r') as readfile: bad_words = readfile.readlines() bad_words = [word.strip() for word in bad_words] profanity.load_words(bad_words) with open(wordlist, 'r') as readfile: for word in readfile: if not profanity.contains_profanity(word): print word
def filter_words(word_list): """ Input an unfiltered list of words as the first arg Return a list of words not contained in the profanity banlist Arguments: word_list: list of words Returns: filtered list of words """ # Load custom offensive dictionary filtered_words = [] with open("offensive_filter.txt", "r") as readfile: bad_words = readfile.readlines() bad_words = [word.strip() for word in bad_words] profanity.load_words(bad_words) for word in word_list: if not profanity.contains_profanity(word): print(word) filtered_words.append(word) return filtered_words
@date 01/02/17 Functions for posting and storing results. """ import pandas as pd import os from shutil import copyfile from htmlTable import htmlTable import time import profanity.profanity as profanity import webbrowser # Load the list of bad words and add to profanity filter with open('badWordsShort.txt', 'r') as f: badWords = [x.strip() for x in f.readlines()] profanity.load_words(badWords) def openHTML_Browser(filename): """ Open the HTML filename given in a new browser tab """ assert filename[-4:] == 'html' path = os.path.abspath(filename) url = 'file://' + path webbrowser.open(url) def updateResults(filename, initial, timeScore): """ Filename (tihout extension). open the filename.hdf and store results. then write the results to a HTML file """
'/shop': 20, '/bet': 30, '/sm': 300, '/beg': 30 } last = {} cmds = [] mutedIDs = [] filter_words = ['2g1c', '2 girls 1 cup', 'acrotomophilia', 'alabama hot pocket', 'alaskan pipeline', 'anal', 'anilingus', 'anus', 'apeshit', 'arsehole', 'ass', 'asshole', 'assmunch', 'auto erotic', 'autoerotic', 'babeland', 'baby batter', 'baby juice', 'ball gag', 'ball gravy', 'ball kicking', 'ball licking', 'ball sack', 'ball sucking', 'bangbros', 'bareback', 'barely legal', 'barenaked', 'bastard', 'bastardo', 'bastinado', 'bbw', 'bdsm', 'beaner', 'beaners', 'beaver cleaver', 'beaver lips', 'bestiality', 'big black', 'big breasts', 'big knockers', 'big t**s', 'bimbos', 'birdlock', 'bitch', 'bitches', 'black c**k', 'blonde action', 'blonde on blonde action', 'b*****b', 'blow job', 'blow your load', 'blue waffle', 'blumpkin', 'bollocks', 'bondage', 'boner', 'boob', 'boobs', 'booty call', 'brown showers', 'brunette action', 'bukkake', 'bulldyke', 'bullet vibe', 'bullshit', 'bung hole', 'bunghole', 'busty', 'butt', 'buttcheeks', 'butthole', 'camel toe', 'camgirl', 'camslut', 'camwhore', 'carpet muncher', 'carpetmuncher', 'chocolate rosebuds', 'circlejerk', 'cleveland steamer', 'c**t', 'clitoris', 'clover clamps', 'clusterfuck', 'c**k', 'cocks', 'coprolagnia', 'coprophilia', 'cornhole', 'coon', 'coons', 'creampie', 'cum', 'cumming', 'cunnilingus', 'c**t', 'darkie', 'date rape', 'daterape', 'deep throat', 'deepthroat', 'dendrophilia', 'dick', 'd***o', 'dingleberry', 'dingleberries', 'dirty pillows', 'dirty sanchez', 'doggie style', 'doggiestyle', 'doggy style', 'doggystyle', 'dog style', 'dolcett', 'domination', 'dominatrix', 'dommes', 'donkey punch', 'double dong', 'double penetration', 'dp action', 'dry hump', 'dvda', 'eat my ass', 'ecchi', 'e*********n', 'erotic', 'erotism', 'escort', 'eunuch', 'f****t', 'fecal', 'felch', 'f******o', 'feltch', 'female squirting', 'femdom', 'figging', 'fingerbang', 'fingering', 'fisting', 'foot fetish', 'footjob', 'frotting', 'f**k', 'f**k buttons', 'f****n', 'f*****g', 'fucktards', 'fudge packer', 'f*********r', 'futanari', 'gang bang', 'gay sex', 'genitals', 'giant c**k', 'girl on', 'girl on top', 'girls gone wild', 'goatcx', 'g****e', 'god damn', 'gokkun', 'golden shower', 'goodpoop', 'goo girl', 'goregasm', 'grope', 'group sex', 'g-spot', 'guro', 'hand job', 'handjob', 'hard core', 'hardcore', 'hentai', 'homoerotic', 'honkey', 'hooker', 'hot carl', 'hot chick', 'how to kill', 'how to murder', 'huge fat', 'humping', 'incest', 'intercourse', 'jack off', 'jail bait', 'jailbait', 'jelly donut', 'jerk off', 'jigaboo', 'jiggaboo', 'jiggerboo', 'j**z', 'juggs', 'kike', 'kinbaku', 'kinkster', 'kinky', 'knobbing', 'leather restraint', 'leather straight jacket', 'lemon party', 'lolita', 'lovemaking', 'make me come', 'male squirting', 'm********e', 'menage a trois', 'milf', 'missionary position', 'm**********r', 'mound of venus', 'mr hands', 'muff diver', 'muffdiving', 'nambla', 'nawashi', 'negro', 'neonazi', 'n***a', 'nigger', 'nig nog', 'nimphomania', 'nipple', 'nipples', 'nsfw images', 'nude', 'nudity', 'nympho', 'nymphomania', 'octopussy', 'omorashi', 'one cup two girls', 'one guy one jar', 'o****m', 'orgy', 'paedophile', 'paki', 'panties', 'panty', 'pedobear', 'pedophile', 'pegging', 'penis', 'phone sex', 'piece of shit', 'pissing', 'piss pig', 'pisspig', 'playboy', 'pleasure chest', 'pole smoker', 'ponyplay', 'poof', 'poon', 'poontang', 'punany', 'poop chute', 'poopchute', 'p**n', 'porno', 'pornography', 'prince albert piercing', 'pthc', 'pubes', 'pussy', 'queaf', 'queef', 'quim', 'raghead', 'raging boner', 'rape', 'raping', 'rapist', 'rectum', 'reverse cowgirl', 'rimjob', 'rimming', 'rosy palm', 'rosy palm and her 5 sisters', 'rusty trombone', 'sadism', 'santorum', 'scat', 'schlong', 'scissoring', 's***n', 'sex', 'sexo', 'sexy', 'shaved beaver', 'shaved pussy', 'shemale', 'shibari', 'shit', 'shitblimp', 'shitty', 'shota', 'shrimping', 'skeet', 'slanteye', 's**t', 's&m', 'smut', 'snatch', 'snowballing', 'sodomize', 'sodomy', 'spic', 'splooge', 'splooge moose', 'spooge', 'spread legs', 'spunk', 'strap on', 'strapon', 'strappado', 'strip club', 'style doggy', 'suck', 'sucks', 'suicide girls', 'sultry women', 'swastika', 'swinger', 'tainted love', 'taste my', 'tea bagging', 'threesome', 'throating', 'tied up', 'tight white', 'tit', 't**s', 'titties', 'titty', 'tongue in a', 'topless', 'tosser', 'towelhead', 'tranny', 'tribadism', 'tub girl', 'tubgirl', 'tushy', 'twat', 'twink', 'twinkie', 'two girls one cup', 'undressing', 'upskirt', 'urethra play', 'urophilia', 'v****a', 'venus mound', 'vibrator', 'violet wand', 'vorarephilia', 'voyeur', 'vulva', 'wank', 'wetback', 'wet dream', 'white power', 'wrapping men', 'wrinkled starfish', 'xxx', 'yaoi', 'yellow showers', 'yiffy', 'zoophilia', '🖕', 'aand', 'aandu', 'balatkar', 'beti chod', 'bhadva', 'bhadve', 'bhandve', 'bhootni ke', 'bhosad', 'bhosadi ke', 'boobe', 'chakke', 'chinaal', 'chinki', 'chod', 'chodu', 'chodu bhagat', 'chooche', 'choochi', 'choot', 'choot ke baal', 'chootia', 'chootiya', 'chuche', 'chuchi', 'chudai khanaa', 'chudan chudai', 'chut', 'chut ke baal', 'chut ke dhakkan', 'chut maarli', 'chutad', 'chutadd', 'chutan', 'chutia', 'chutiya', 'gaand', 'gaandfat', 'gaandmasti', 'gaandufad', 'gandu', 'gashti', 'gasti', 'ghassa', 'ghasti', 'harami', 'haramzade', 'hawas', 'hawas ke pujari', 'hijda', 'hijra', 'jhant', 'jhant chaatu', 'jhant ke baal', 'jhantu', 'kamine', 'kaminey', 'kanjar', 'kutta', 'kutta kamina', 'kutte ki aulad', 'kutte ki jat', 'kuttiya', 'loda', 'lode', 'lavde', 'lauda', 'lodu', 'lund', 'lund choos', 'lund khajoor', 'lundtopi', 'lundure', 'maa ki chut', 'maal', 'madar chod', 'mooh mein le', 'mutth', 'najayaz', 'najayaz aulaad', 'najayaz paidaish', 'paki', 'pataka', 'patakha', 'raand', 'randi', 'saala', 'saala kutta', 'saali kutti', 'saali randi', 'suar', 'suar ki aulad', 'tatte', 'tatti', 'teri maa ka bhosada', 'teri maa ka boba chusu', 'teri maa ki chut', 'tharak', 'tharki', 'madarchod', 'lawde', 'lawda',] filter_words.extend([ x.lower() for x in open(some.badfile).read().split('\n') if x != '' ]) if _profanity: profanity.load_words(filter_words) if _googletrans: translator = Translator() def chatCmd_loop(): global cmds if cmds: for v in cmds: chatCmd.cmd(v) cmds = [] with bs.Context('UI'): bs.realTimer(100, chatCmd_loop) chatCmd_loop()
async def on_ready(): profanity.load_words(wordlist=word_list) print('ready')
def clean_tweets(self, tweets_type=None): # identify the type of tweet if tweets_type == None: self.tweets_type = "stream" else: self.tweets_type = tweets_type # if streamed tweets have not been cleaned before, then open the streamed tweets. if self.step_2_cleaned_streamed_tweets == 0 and self.tweets_type == "stream": try: with open(self.streamed_tweets_json, 'r') as file: tweets = json.load(file) except Exception as e: print( f"Message: Streamed tweets file not found: '{os.path.relpath(self.streamed_tweets_json)}'" ) return False # check if streamed tweets have already been cleaned before. elif self.step_2_cleaned_streamed_tweets == 1 and self.tweets_type == "stream": print( f"Message: You already cleaned your tweet stream. Csv file here: '{os.path.relpath(self.cleaned_streamed_tweets_csv)}'" ) return False # check if full tweets have already been cleaned before. elif self.step_4_cleaned_full_tweets == 1 and self.tweets_type == "full": print( f"Message: You already cleaned the full tweets for this stream. Truncated file here: '{os.path.relpath(self.full_tweets_trunc_clean_csv)}'" ) return False # if full tweets data is available, then open it. elif self.step_3_fetched_full_tweets == 1 and self.tweets_type == "full": try: with open(self.full_tweets_trunc_json, 'r') as file: tweets = json.load(file) except Exception as e: print( f"Message: Full tweets file not found: '{os.path.relpath(self.full_tweets_trunc_json)}'" ) return False # for every tweet, remove all links and get text length for tweet in tweets: text = tweet.pop(3) text = re.sub(r'http\S+', '', text, flags=re.MULTILINE) text_len = len(text) tweet.append(text) tweet.append(text_len) # regex pattern to filter out invalid tweets from streamed data. See Regex_README.md for details. regex = re.compile( r'[^\u0000-\u007F]{5,}|^\s*$|\W{7,}|^(\d)+|^(#\w\b)+|^\"|^\*|^[A-Z]{5, }|[A-Z]$|^How|^The\s\d+|^Photos|(\'\')+|(top\s\d+|free|buy|get|class|connect|discount|now|read|job|video|news|follow|added|review|publish|clubs|manager|study|success|limited|sex|release|help|gift|ideas|massage|schedule|services|check|join|pain|therapy|alternative|new\schallenge|product|need|learn|for\smen|for\swomen|revolution|leadership|weight\sloss|diet\splan|ebay|click|promo|certified|store|pick|sign|log-in|login|tips|meet|secret|improve|listen|(\w+)for(\w+)|trainer)|(\$|\+|\@|\?|\?$)|(\.\n\.)|^$', re.IGNORECASE) # get all users in tweet list and create list to hold excluded users all_users = set([tweet[2] for tweet in tweets]) excluded_users = [] # get median hashtag count and also a list of hashtag counts median, hashtag_counts = self._get_median_hashtags(tweets) # get stdev of hashtag count stdev = math.ceil(statistics.stdev(hashtag_counts)) # if median is 0, then use mean if median == 0: median = math.ceil(statistics.mean(hashtag_counts)) # get list of profanities and load it profanity_list = get_profanity_list() profanity.load_words(profanity_list) # loop through tweets and hashtag counts and build exclusion index for i, values in enumerate(zip(tweets, hashtag_counts)): tweet = values[0][3] user_id = values[0][2] hashtag_count = values[1] mid_68 = median + stdev # testing for spammy tweets current_tweet = tweets[i][3] past_tweet = tweets[i - 1][3] test = self._percent_same(current_tweet, past_tweet) # testing for profanity profanity_test = profanity.contains_profanity(tweet) # exclude authors/users of spammy tweets, or tweets that have profanity, or a hashtag_count above the median + 1 stdev if test >= .5 or profanity_test == True or hashtag_count > mid_68: excluded_users.append(user_id) else: match = regex.search(tweet) if match: excluded_users.append(user_id) else: pass # filter out excluded_users from all_users and return filtered tweets excluded_users = set(excluded_users) filtered_users = list( filter(lambda x: x not in excluded_users, all_users)) filtered_tweets = [ tweet for tweet in tweets if tweet[2] in filtered_users ] try: # print to csv if self.tweets_type == "stream": output_filename_csv = self.cleaned_streamed_tweets_csv else: output_filename_csv = self.full_tweets_trunc_clean_csv with open(output_filename_csv, 'w') as new_file: field_names = [ 'index', 'screen_name', 'user_id', 'tweet_text', 'length' ] writer = csv.writer(new_file, delimiter=',') writer.writerow(field_names) writer.writerows(filtered_tweets) # print to json if tweets_type is full data and not a stream if self.tweets_type == "full": output_filename_json = self.full_tweets_trunc_clean_json with open(output_filename_json, 'w') as new_file: new_file.write(json.dumps(filtered_tweets)) self.step_4_cleaned_full_tweets = 1 self.save_obj() print( f"Successfully cleaned tweets. See csv file for review: '{os.path.relpath(output_filename_csv)}'" ) return output_filename_json else: self.step_2_cleaned_streamed_tweets = 1 self.save_obj() print( f"Successfully cleaned tweets. See csv file for review: '{os.path.relpath(output_filename_csv)}'" ) return output_filename_csv except Exception as e: print("Error: ", e) return False
def prepare_profanities(self, language): if language == "fr": badwords = ['merde', 'Putain', 'Enculer', 'Salaud'] else: badwords = ['f**k', 'shit', 'c**k'] profanity.load_words(badwords)
print("Discord.py version: {}".format(discord.version_info)) print("-------------------") logging.basicConfig(level=logging.INFO) client = discord.Client() chatbot = ChatBot("Slash") chatbot.set_trainer(ChatterBotCorpusTrainer) chatbot.train( "chatterbot.corpus.english.greetings", "chatterbot.corpus.english.conversations" ) cw = [line.rstrip('\n') for line in open('controversialwords.txt')] profanity.load_words(cw) @client.event async def on_message(message): if message.content.startswith(client.user.mention): InitialMsg = await client.send_message(message.channel, message.author.mention + " **Thinking...**") SlashResponse = message.content SlashResponse = SlashResponse.replace(client.user.mention, "") if profanity.contains_profanity(SlashResponse): response = """```ERROR: You cannot send controversial messages using this bot.```""" else: response = str(chatbot.get_response(SlashResponse)) if profanity.contains_profanity(response): response = str(profanity.censor(response)) await client.edit_message(InitialMsg, message.author.mention + " " + response)
async def say(self, ctx, *message): profanity.load_words(predefined) message = " ".join(message) if profanity.contains_profanity(message.lower()): message = "I don't think i should be saying that." await ctx.channel.send(message)
swear_words = [ 'arse', 'bastard', 'bitch', 'biatch', 'bollock', 'bollok', 'boner', 'boob', 'bugger', 'bum', 'butt', 'b******g', 'clitoris', 'c**k', 'coon', 'crap', 'c**t', 'cunts', 'damn', 'dick', 'dyke', 'f*g', 'feck', 'f*****e', 'f******o', 'f******g', 'f**k', 'f u c k', 'f*****g', 'f*********r', 'fudge packer', 'flange', 'f****t', 'paki', 'knob', 'cuntflaps', 's***n', 'h**o', 'jerk', 'j**z', 'k*****d', 'k*****d', 'knob end', 'labia', 'muff', 'nigger', 'n***a', 'penis', 'piss', 'piss', 'poop', 'prick', 'pube', 'pussy', 'queer', 'scrotum', 'sex', 'shit', 's hit', 's**t', 's****a', 'spunk', 'tosser', 'retard', 'retards', 'twat', 'twats', 'v****a', 'wank', 'wanker', 'w***e' ] #load custom bad words profanity.load_words(swear_words) critical_train = [ ('Cannot upload csv', 'alert'), ('Please help me guys @myhermes having issues with your online chat room.', 'alert'), ('Payment not working', 'alert'), ('Tracking not working', 'alert'), ('claims process not working', 'alert'), ('cannot log in to myhermes account', 'alert'), ('cannot process my quotes', 'alert'), ('website down', 'critical'), ('site down', 'critical'), ('website offline', 'critical'), ('site offline', 'critical'), ('website takendown', 'critical'), ('website broken', 'critical'), ('is your website down', 'critical'), ('when your website will be back online', 'critical'), ('Is your site still broken? I am just about to move to Collect plus.', 'critical') ]
csrf = CSRF(config=CSRF_CONFIG) app = csrf.init_app(app) limiter = Limiter( app, key_func=get_ipaddr, default_limits=['5/second'], ) profanity.load_words([ 'trump', 'pewdipie', 'rowling', 'bollocks', 'google', 'ch00beh', ';--', 'homestuck', ]) nonsense = lrucache(10000) def super_secret(a, b): """shhhh""" return ''.join([chr(ord(aa) ^ ord(bb)) for aa, bb in zip(a, b)]) def super_safe_encrypt(a, b): return urllib.parse.quote(super_secret(a, b), safe='')
from flask import Flask, Blueprint from flask import render_template, request, session, jsonify, redirect, Flask, url_for from flask_socketio import * from flask.ext.socketio import emit, join_room, leave_room from VOH import open_db_connection, close_db_connection import datetime import time from VOH import socketio from VOH.main.database import TA import os, subprocess from profanity import profanity profanity.load_words(['Shit', 'F**k', 'Dumb', 'Idiot', 'Stupid', 'Ass', 'Arse', 'Bitch', 'F****r', 'Asshole']) profanity.set_censor_characters('****') @socketio.on('join', namespace='/chat_session') def join(message): """ join function catches any a join signal emitted by socketIO client adds client to a particular room identified by the message passed in from the socket client. Join Message returned is broadcasted to everyone in that specific room :param message: Join Message """ join_room(str(message['room'])) client, db = open_db_connection() room = message['room'] old_messages = list(db['chat_log'][message['room'].replace("-", "_")].find({})) close_db_connection(client) for key, value in enumerate(old_messages): del value['_id']
from flask import Flask, Blueprint from flask import render_template, request, session, jsonify, redirect, Flask, url_for from flask_socketio import * from flask.ext.socketio import emit, join_room, leave_room from VOH import open_db_connection, close_db_connection import datetime import time from VOH import socketio from VOH.main.database import TA import os, subprocess from profanity import profanity profanity.load_words([ 'Shit', 'F**k', 'Dumb', 'Idiot', 'Stupid', 'Ass', 'Arse', 'Bitch', 'F****r', 'Asshole' ]) profanity.set_censor_characters('****') @socketio.on('join', namespace='/chat_session') def join(message): """ join function catches any a join signal emitted by socketIO client adds client to a particular room identified by the message passed in from the socket client. Join Message returned is broadcasted to everyone in that specific room :param message: Join Message """ join_room(str(message['room'])) client, db = open_db_connection() room = message['room'] old_messages = list(db['chat_log'][message['room'].replace("-",