Example #1
0
def check_profanity(user_message):
    debug_log('in check_profanity, user_message: ' + user_message)
    user_message_words = re.compile("\W+").split(user_message)
    actual_words_in_user_message = [
        word for word in user_message_words if len(word) > 0
    ]
    if len(actual_words_in_user_message) < 1:
        return False
    else:
        debug_log('in check_profanity, user_message_words: ' +
                  str(actual_words_in_user_message))
        overall_profanity = sum(predict(actual_words_in_user_message))
        if overall_profanity > 0:
            return True
        debug_log('in check_profanity, overall_profanity: ' +
                  str(overall_profanity))
        overall_profanity_average = overall_profanity / len(
            actual_words_in_user_message)
        debug_log('in check_profanity, overall_profanity_average: ' +
                  str(overall_profanity_average))
        debug_log('in check_profanity, profanity probability: ' +
                  str(predict_prob(actual_words_in_user_message)))
        debug_log('in check_profanity, profanity probability: ' + str(
            sum(predict_prob(actual_words_in_user_message)) /
            len(actual_words_in_user_message)))
        if overall_profanity_average > 0.5:
            return True
    return False
Example #2
0
    async def check(self, ctx, *, message):
        """Vérifie les insultes d'un message."""
        from profanity_check import predict_prob

        p = predict_prob([message])
        judgments = [
            "est gentil",
            "est cool",
            "est sympathique",
            "est pas fou",
            "est moyen",
            "est pas cool",
            "est insultant",
            "est vulgaire",
            "est violent",
            "est innacceptable",
            "mérite un ban",
        ]
        i = min(int(p * len(judgments)), len(judgments) - 1)
        judgment = judgments[i]
        title = f"`{int(p*100)}% vulgaire`"
        r = int(255 * p)
        g = int(255 * (1 - p))
        color = 256**2 * r + 256 * g
        color = discord.Color(color)
        description = f"Ce message {judgment}!"
        embed = discord.Embed(title=title,
                              description=description,
                              color=color)
        await ctx.send(embed=embed)
Example #3
0
def sent(message: str):
    binPred = predict([message])
    probPred = predict_prob([message])

    dct = {"bin": str(binPred[0]), "prob": str(probPred[0])}
    jsonData = jsonable_encoder(dct)
    return JSONResponse(jsonData)
Example #4
0
def sentence_profanity_prob(string):
    """ returns the probability that a string is profanity """
    try:
        result = predict_prob([ string ])
        return result.item()
    except:
        return 0.
Example #5
0
def profanity_score_min_max(*args):
    profanity_score_list_ = []
    for artist in set(artists_list(*args)):
        for song in artist_s_cleaned_songs_list(artist, *args):
            words_of_lyrics = []
            raw_text = ""
            f = open(dir_given(*args) + '/Cleaned_Songs/' + song, 'rb')
            for line in f.readlines():
                this_line_wordlist = line.decode('utf-8').split()
                for word in this_line_wordlist:
                    words_of_lyrics.append(word)
            for word_ in words_of_lyrics:
                raw_text += word_ + " "

            song_lyrics_for_profanity_check_ = [raw_text]
            profanity_check = predict_prob(song_lyrics_for_profanity_check_)
            profanity_score = 1 - float(' '.join(map(str, profanity_check)))
            profanity_score_list_.append(profanity_score)


#            print(profanity_score)

    min_profanity_score_list_ = round(min(profanity_score_list_), 2)
    max_profanity_score_list_ = round(max(profanity_score_list_), 2)

    return min_profanity_score_list_, max_profanity_score_list_
Example #6
0
def main(argv):
    min_prob = float(sys.argv[1])
    ret = 0

    with open(sys.argv[2], 'r') as f:
        ignoreds = f.readlines()
    ignoreds = [v.strip() for v in ignoreds]
    print(ignoreds)

    summary = ''
    for fn in sys.argv[3:]:
        with open(fn, 'r') as f:
            lines = f.readlines()
            ps = predict_prob(lines)
            for i in range(len(lines)):
                if ps[i] >= min_prob:
                    keysrc = "{}\n{}".format(fn, lines[i]).encode()
                    key = '{} {}'.format(
                        hashlib.sha256(keysrc).hexdigest(), fn)
                    summary += key + '\n'
                    if key not in ignoreds:
                        ret = ret + 1
                        print("\n❌ L{} of {}:\n{}".format(
                            i + 1, fn, lines[i][:-1]),
                              file=sys.stderr)
                        print("ℹ️  Add '{}' to {} to ignore this issue".format(
                            key, sys.argv[2]),
                              file=sys.stderr)
    print('\n\n\nSummary to ignore everything using {}:\n{}'.format(
        sys.argv[2], summary))
    return ret
Example #7
0
def main(argv):
    min_prob = float(sys.argv[1])
    ret = 0

    with open(sys.argv[2], 'r') as f:
        ignoreds = f.readlines()
    ignoreds = [v.strip() for v in ignoreds]
    print(ignoreds)

    for fn in sys.argv[3:]:
        with open(fn, 'r') as f:
            lines = f.readlines()
            ps = predict_prob(lines)
            for i in range(len(lines)):
                if ps[i] >= min_prob:
                    keysrc = "{}\n{}".format(fn, lines[i]).encode()
                    key = '{} {}'.format(
                        hashlib.sha256(keysrc).hexdigest(), fn)
                    if key not in ignoreds:
                        ret = ret + 1
                        print("\n\n\n❌ L{} of {}:\n{}".format(
                            i + 1, fn, lines[i]),
                              file=sys.stderr)
                        print("ℹ️ Add '{}' to {} to ignore this issue".format(
                            key, sys.argv[2]),
                              file=sys.stderr)
                    else:
                        print("\n\n\nℹ️ IGNORED: L{} of {}:\n{}".format(
                            i + 1, fn, lines[i]))
                        print(
                            "ℹ️ Remove '{}' from {} to stop ignoring this issue"
                            .format(key, sys.argv[2]))
    return ret
Example #8
0
def Make_Features( df ):
    df['Freq'] = df['text'].map(df['text'].value_counts())
    df['word_count'] = df['text'].swifter.apply(lambda x: len(str(x).split(" ") ) )
    df['char_count'] = df['text'].str.len()
    df['stpw_count'] = df['text'].swifter.apply( lambda x: len( [x for x in x.split() if x in stop] ) )
    
    df['spchar_count'] = df['text'].swifter.apply( lambda x: len( [x for x in list(x) if x in special_char] ) )
    #prepare for sentiment analysis:
    # 1- remove the punctuations :
    df['text_modif'] = df['text'].str.replace('[^\w\s]','')
    # 2- remove stop words:
    df['text_modif'] = df['text_modif'].swifter.apply(lambda x: " ".join(x for x in x.split() if x not in stop))
    df['text_modif'] = df['text_modif'].swifter.apply(lambda x: " ".join([Word(word).lemmatize() for word in x.split()]))
    df['sentiment'] = df['text_modif'].swifter.apply(lambda x: TextBlob(x).sentiment[0] )
    # finally add profanity_check
    df['profane_modf'] =  predict_prob(df['text_modif'])
    #print(df[['word_count','char_count','stpw_count','spchar_count', 'profane', 'sentiment']].head())
    df['profane_pfilter'] =  df['text'].swifter.apply(lambda x:nlp(x)._.is_profane )
    # spacy to identify people organizations etc ...
    df['org']   = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='ORG']))
    df['Money'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='MONEY']))
    df['tDate'] = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='DATE' or str(y.label_) =='TIME']))
    df['Pers']  = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='PERSON']))
    df['GPE']   = df['text'].swifter.apply(lambda x: len( [y for y in nlp(x).ents if str(y.label_) =='GPE']))
    print(df.describe() )
    return df
Example #9
0
def profanity_score(song_to_be_scored, profanity_score_min,
                    profanity_score_max, *args):
    words_of_lyrics_of_song_to_be_scored = []
    raw_text = ""

    f = open(dir_given(*args) + '/Cleaned_Songs/' + song_to_be_scored, 'rb')
    for line in f.readlines():
        this_line_wordlist = line.decode('utf-8').split()
        for word in this_line_wordlist:
            words_of_lyrics_of_song_to_be_scored.append(word)
    for word_ in words_of_lyrics_of_song_to_be_scored:
        raw_text += word_ + " "

    song_lyrics_for_profanity_check_of_song_to_be_scored = [raw_text]
    profanity_check = predict_prob(
        song_lyrics_for_profanity_check_of_song_to_be_scored)
    profanity_score_of_song_to_be_scored = 1 - \
        float(' '.join(map(str, profanity_check)))

    regularization_step = (profanity_score_of_song_to_be_scored -
                           profanity_score_min) / (profanity_score_max -
                                                   profanity_score_min)
    profanity_score_of_song_to_be_scored_regularized = 1 * \
        regularization_step + 0*(1-regularization_step)

    return round(profanity_score_of_song_to_be_scored_regularized, 2)
Example #10
0
 def _check_for_profanity(self, message):
     """
     Checks message content to see if it contains a blacklisted word
     :param message: content to be checked against blacklist
     :return True if content is flagged, False otherwise
     """
     return predict_prob(message)[0] > THRESHOLD
Example #11
0
def censor(word):
    new_words_list=''
    for i,item in enumerate(word.split()):
        if predict_prob([item])>=0.3 or item in chkchk(word):#(e for e in badwords['word'] if e == item ):#any(badword in item for badword in badwords_lst):
            item = item[0]+'*'*len(item[1:])
        new_words_list += item+" "
        
    return new_words_list
Example #12
0
def get_prob(message):
    '''
    Parameter (Discord Message): the discord message the bot has recieved

    Returns (float): a float value represent the probability profanity is the message
    '''
    #use .content to get the string in the Discord message and the wrap it in a list
    #get the probability from the first value in the numpy array then convert it into a native python float using .item()
    return predict_prob([message.content])[0].item()
Example #13
0
def profanityCheck(text):
    """
    Initial profanity check using profanity_check
    :param: text The text to analyse
    :param: keyNum The key number to use
    :param: lang The language of the text
    :return: The probability of the text containing profanity
    """
    return predict_prob([text])[0]
Example #14
0
 async def process_message(self, message):
     """Checks a message for profanity. Currently does nothing."""
     if message.author != self.bot.user:
         prof_percent = predict_prob([message.clean_content])
         if prof_percent[0] >= 0.75:
             await message.add_reaction('😡'
                                        )  # Pretty strong mute candidate
         elif prof_percent[0] >= 0.5:
             await message.add_reaction('😠')
Example #15
0
    def doc_calc(self, article):
        """Helper code to compute average word length of a name"""
        flesch_ease = textstat.flesch_reading_ease(article)
        flesch_grade = textstat.flesch_kincaid_grade(article)
        gunning = textstat.gunning_fog(article)
        profanity = predict_prob([article])[0]
        polarity = TextBlob(article).sentiment.polarity

        return pd.Series(
            [flesch_ease, flesch_grade, gunning, profanity, polarity])
Example #16
0
async def read_root(query: str):
    prediction = predict_prob([query])[0]
    leetSp33k = profanity.contains_profanity(query)
    if prediction > .5:
        return True
    else:
        if leetSp33k:
            return True
        else:
            return False
Example #17
0
async def rating(request):
    key = request.match_info.get("key", "")

    if key not in ALLOWED_API_KEYS:
        return web.Response(status=401)

    data = request.match_info.get("b64", "")
    data = b64decode(data)

    return web.Response(body=str(predict_prob([data])[0]).encode("utf-8"))
Example #18
0
    async def on_message(self, message):
        if message.channel.id != channels["system"]:
            swearing = any(predict([message.content]))
            if swearing:
                probability = round(predict_prob([message.content])[0],
                                    2) * 100

                if probability > 85:
                    await message.guild.get_channel(channels["system"]).send(
                        f'{message.author.mention} swore in {message.channel.mention}: "{message.content}" ({probability}%)'
                    )
def offensiveness(sentence):
    """
    Compute and return the probability that the given sentence is offensive.
    Args:
        sentence: The sentence to check

    Returns: The probability that the given sentence is offensive as a float p (1 = offensive, 0 = nice, 0 <= p <= 1)

    """
    profane_prob = predict_prob([sentence])
    return profane_prob[0]
Example #20
0
    async def action(self, channel, sender, message):
        quick_chats = [
            "OMG!", "Wow!", "Okay.", "Savage!", "Thanks!", "Holy cow!"
        ]

        profanity = predict_prob([message])
        if profanity[0] > self.profanity_threshold and random.random(
        ) < self.random_response_chance:
            say = Message().set_target(channel)
            say.add_field(name="", value=random.choice(quick_chats))
            await self.connector.send_message(say)
Example #21
0
async def on_message(message):
    if message.author == dscClient.user:
        return
    print(str(predict_prob([message.content])[0]) + " " + str(message.guild))
    if predict_prob([message.content
                     ])[0] > .79 or messageContainsTriggerWord(message):

        await deleteBlacklistedMessage(message)

    if message.channel.name == "catras-diary":
        archiveChannel = dscClient.get_channel(738415449582075924)
        await archiveChannel.send(message.author.name + ": " + message.content)
        time.sleep(calculateDelayTime(message.content))
        try:
            await message.delete()
        except discord.errors.NotFound:
            await log(
                "Someone deleted a message before me. Sneaky. System may reboot, this is normal behavior."
            )
    elif message.content.lower().startswith(
            "lighthope") or message.content.lower().endswith("lighthope"):
        response = witClient.message(msg=message.content)
        await handle_message(response, message.channel)
def profanityAnalysis(text_content):
    start_index = 0
    text_content_arr = text_content.split()
    end_index = len(text_content_arr)

    profanity_baseline = predict_prob([text_content
                                       ])[0] / (end_index - start_index)

    profanity_split_1 = predict_prob([
        ' '.join(text_content_arr[start_index:min(end_index // 2 +
                                                  1, end_index - 1)])
    ])[0] / (min(end_index // 2 + 1, end_index - 1) - start_index)
    profanity_split_2 = predict_prob([
        ' '.join(text_content_arr[max(1, end_index // 2 - 1):end_index])
    ])[0] / (end_index - max(1, end_index // 2 - 1))
    while ((end_index - start_index) > 1
           and (profanity_baseline <= profanity_split_1
                or profanity_baseline <= profanity_split_2)):
        if profanity_split_1 >= profanity_split_2:
            profanity_baseline = profanity_split_1
            start_index = start_index
            end_index = min((start_index + end_index) // 2, end_index - 1)
        else:
            profanity_baseline = profanity_split_1
            start_index = max(1, (start_index + end_index) // 2)
            end_index = end_index
        profanity_split_1 = predict_prob([
            ' '.join(text_content_arr[start_index:min(
                (start_index + end_index) // 2, end_index - 1)])
        ])[0] / max(1, (min(
            (start_index + end_index) // 2, end_index - 1) - start_index))
        profanity_split_2 = predict_prob([
            ' '.join(text_content_arr[max(1, (start_index + end_index) //
                                          2):end_index])
        ])[0] / max(1, (end_index - max(1, (start_index + end_index) // 2)))

    return (start_index, end_index)
Example #23
0
def feature_profanity(dataset_filename_pkl):
    feature_filename = 'features/' + os.path.basename(
        dataset_filename_pkl)[:-4] + '_feature_profanity.pkl'
    if not os.path.isfile(feature_filename):
        author_data = utils.load_feature(dataset_filename_pkl)
        author_profanity = {}
        for author in tqdm(author_data):
            single_text = ''.join(author_data[author])
            profanity_rate = predict_prob([single_text])
            author_profanity[author] = profanity_rate[0]

        profanity_file = open(feature_filename, 'wb')
        pickle.dump(author_profanity, profanity_file)
        profanity_file.close()
    return feature_filename
Example #24
0
def censoring(input_list):

    '''
		INPUT:
            input_list : (list of sentences) list of sentences want to check
        OUTPUT:
            predictions : (list of booleans) list consists of true/false if ther sentence includes offensiveness or profanity
    '''

    threshold = 0.15
    predictions = []

    predictions = [prob>threshold for prob in predict_prob(input_list)]

    return predictions
Example #25
0
def censor_profanity(comment_text, profanity_threshold=0.9):
    """
  Replaces profanity using a (probably) inefficient use of the alt-profanity-filter.

  Input: comment_text (str)
  Output: comment text with profane words censored 
  """
    repl_text = [
        (x, f"{x[0]}{''.join(['*' for x in range(len(x)-1)])}")
        for x in comment_text.split(' ')
        if predict_prob([x])[0] > profanity_threshold
    ]
    comment_text_censored = comment_text
    for text in repl_text:
        comment_text_censored = comment_text_censored.replace(text[0], text[1])
    return comment_text_censored
Example #26
0
def get_profanity(txt, prob=False):
    if not isinstance(txt, str):
        raise Exception('txt has to be a string')
    if len(txt.strip()) == 0:
        return False
    blob = TextBlob(txt)
    lang = blob.detect_language()
    result = None
    # if blob.detect_language() != 'en':
    #   raise Exception('Only english text can be verified for profanity!')
    if lang == 'en':
        if not prob:
            result = bool(predict([txt])[0] == 1)
        else:
            result = predict_prob([txt])[0]
    # aparent egalitatea urmatoarea nu intoarce python bool si trebuie sa convertim la python bool
    return {'result': result, 'lang': lang}
Example #27
0
def test_accuracy():
    texts = [
        'Hello there, how are you',
        'Lorem Ipsum is simply dummy text of the printing and typesetting industry.',
        '!!!! Click this now!!! -> https://example.com',
        'f**k you',
        'f**K u',
        'GO TO hElL, you dirty scum',
    ]
    assert list(predict(texts)) == [0, 0, 0, 1, 1, 1]

    probs = predict_prob(texts)
    for i in range(len(probs)):
        if i < 3:
            assert probs[i] <= 0.5
        else:
            assert probs[i] >= 0.5
Example #28
0
def add_tweet():
    user = request.json['user']
    description = request.json['description']
    private = request.json['private']
    pic = request.json['pic']
    number = predict_prob([description])
    if number <= 0.5:
        tweet = dict(user=user,
                     description=description,
                     private=private,
                     upvote=0,
                     date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                     pic=pic,
                     _id=str(ObjectId()))
        tweets[tweet['_id']] = tweet
        if push_to_redis:
            rjjsonsetwrapper('ttwi-' + ssm() + '-' + tweet['_id'],
                             Path.rootPath(), tweet)
        return jsonify(tweet)
Example #29
0
    async def on_message(message):
        # SAO Easter Egg
        punctuations = '!()-[]{};:\'"\\,<>./?@#$%^&*_~'
        # remove punctuation from the string
        msg = ""
        for char in message.content.lower():
            if char not in punctuations:
                msg = msg + char

        # profanity check
        prob = predict_prob([msg])
        if prob >= 0.8:
            em = discord.Embed(title=f"AI Analysis Results", color=0xC54B4F)
            em.add_field(name='PROFANITY DETECTED! ', value=str(prob[0]))
            await message.channel.send(embed=em)

        if msg.startswith("system call "):
            content = msg[12:].split(" ")
            if content[0].lower() == "inspect":
                if content[1].lower() == "entire":
                    if content[2].lower() == "command":
                        if content[3].lower() == "list":
                            em = discord.Embed(title=f"🍢 SAO Command List", color=0x7400FF)
                            em.set_thumbnail(
                                url="https://cdn.discordapp.com/attachments/668816286784159763/674285661510959105/Kirito-Sao-Logo-1506655414__76221.1550241566.png")
                            em.add_field(name='Commands',
                                         value="generate xx element\ngenerate xx element xx shape\ninspect entire command list")

                            em.set_footer(text=f"{teapot.copyright()} | Code licensed under the MIT License")
                            await message.channel.send(embed=em)
            elif content[0].lower() == "generate":
                if content[-1].lower() == "element":
                    em = discord.Embed(title=f"✏ Generated {content[1].lower()} element!",
                                       color=0xFF0000)
                    await message.channel.send(embed=em)
                if content[-1].lower() == "shape":
                    if content[2].lower() == "element":
                        em = discord.Embed(
                            title=f"✏ Generated {content[-2].lower()} shaped {content[1].lower()} element!",
                            color=0xFF0000)
                        await message.channel.send(embed=em)
        await bot.process_commands(message)
Example #30
0
def api_offensive():
    
    filepath = 'C:/Users/dipta/OneDrive/Documents/MCS/CS 410/CourseProject/file-read-api/search.txt'
    response = []
    with open(filepath, errors='ignore') as f:
        for line in f:
            profanity = []
            profanity.append(line);
            det_val =  predict(profanity)
            prob_val =  predict_prob(profanity)
            val_list = list(det_val)
            prob_list = list(prob_val)
            prediction = int(val_list[0])
            prediction_prob = float(prob_list[0])
            data = {}
            data['tweet'] = line
            data['prediction'] = prediction
            data['prob'] = prediction_prob
            response.append(data)
    return json.dumps(response)