Beispiel #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Profanity filter console utility')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-t',
                       '--text',
                       dest='text',
                       help='Test the given text for profanity')
    group.add_argument('-f',
                       '--file',
                       dest='path',
                       help='Test the given file for profanity')
    parser.add_argument(
        '-l',
        '--languages',
        dest='languages',
        default='en',
        help='Test for profanity using specified languages (comma separated)')
    parser.add_argument('-o',
                        '--output',
                        dest='output_file',
                        help='Write the censored output to a file')
    parser.add_argument('--show',
                        action='store_true',
                        help='Print the censored text')

    args = parser.parse_args()

    if args.text and args.path:
        parser.print_help()
        exit()

    if args.text:
        text = args.text
    elif args.path:
        with open(args.path) as f:
            text = ''.join(f.readlines())
    else:
        text = ''

    pf = ProfanityFilter(languages=args.languages.split(','))
    censored_text = pf.censor(text)

    if args.output_file:
        with open(args.output_file, 'w') as f:
            f.write(censored_text)
        print("Censored text written to output file at: " + args.output_file)

    if args.show:
        print("Censored text:\n")
        print(censored_text)

    if args.show or args.output_file:
        return

    if pf.is_clean(text):
        print("This text is clean.")
    else:
        print("This text is not clean!")
Beispiel #2
0
    def list(self, request):
        if all(k in request.query_params
               for k in ('comment', 'deep_flag', 'lang')):
            comment = request.query_params['comment']
            deep_flag = util.strtobool(request.query_params['deep_flag'])
            lang = request.query_params['lang']

            pf = ProfanityFilter(censor_whole_words=False,
                                 deep_analysis=deep_flag,
                                 languages=[lang])
            return Response({
                'comment': pf.censor(comment),
                'approved': pf.is_clean(comment)
            })
        else:
            return Response({'error_message': 'All params are required'},
                            status=status.HTTP_400_BAD_REQUEST)
Beispiel #3
0
    def check_profanity_filter_text():

        pf = ProfanityFilter()

        #Opens the text file from the given location.
        file_location = open('/yourfilelocation/filename.txt')

        # Opens the file. This line should be added when if your text file is in same location a program.
        #file_location = open('profanity.txt')

        #Read is a built in function of python to read files.
        content_of_file = file_location.read()

        #Censor is a built in function of ProfanityFilter package to check profanity of a sentence.
        text = pf.censor(content_of_file)

        #Prints the contents of the file where offensive words are marked by "*".
        print(text)
        # get goodell boo-meter
        if "boo" in comment_text_clean.split(
                " ") or comment_text_clean == "boo":
            comment_boo = 1
        else:
            comment_boo = 0

        # visualize and write
        print([
            comment_time, comment_team, comment_text, comment_length,
            comment_sent, comment_subr
        ])

        # filter out profanity
        try:
            comment_text_censored = pf.censor(comment_text)
        except:
            print("UNSUPPORTED CHARACTER IN PROFANITY CENSOR")
            continue

        ## keep track of chunks to cleanup after chunk size comments
        if n >= chunk_size:
            print('CHUNK SIZE HIT, STARTING NEW FILE')
            t = time.time()
            n = -1
            with open('D:/Dropbox/nfl-draft-sentiment/data/comments_temp.csv',
                      'w',
                      newline='') as csvfile:
                # initiate csv writer
                comment_writer = csv.writer(csvfile,
                                            delimiter='\t',
Beispiel #5
0
from profanity_filter import ProfanityFilter

pf = ProfanityFilter()

with open(input("Enter the name of Your File"), "r") as myFile:
    j = myFile.read()

filtered=pf.censor(j)
print(filtered)


Beispiel #6
0
def test():
    from profanity_filter import ProfanityFilter
    pf = ProfanityFilter()
    return pf.censor("That's bullshit!")
Beispiel #7
0
            comment_subr = "team"

        # get sentiment from coreNLP
        try:
            #comment_sent = TextBlob(comment_text).sentiment.polarity
            #comment_sent = getSentiment(comment_text)
            comment_sent = vader.polarity_scores(comment_text)['compound']
        except:
            print("ERROR: SENTIMENT ANALYSIS FAILED")
            continue

        # visualize and write
        print([comment_time, comment_team, comment_text, comment_length, comment_sent, comment_subr])

        # filter out profanity
        comment_text = pf.censor(comment_text)

        try:
            with open('D:/repositories/nfl-draft-sentiment/data/comments.csv', 'a', newline='') as csvfile:
                # initiate csv writer
                comment_writer = csv.writer(csvfile, delimiter='\t',
                                        quotechar='|', quoting=csv.QUOTE_MINIMAL)
                
                try:
                    comment_writer.writerow([comment_time, comment_team, comment_text, comment_length, comment_sent, comment_subr])
                except:
                    print("UNSUPPORTED CHARACTER, REMOVING COMMENT")
        except:
            print("ERROR: CONFLICT IN USING FILE")
            
Beispiel #8
0
from profanity_filter import ProfanityFilter

pf = ProfanityFilter()

pf.censor("That's bullshit!")
Beispiel #9
0
class Message():
    def __init__(self, messageIN, censorChar='•'):

        self.message = messageIN
        self.filter = ProfanityFilter()
        self.filter.censor_char = censorChar

    # Handle HTML Special Chars
    # Stop XSS injects
    def makeSafe(self):

        # Time is converted from UNIX timestamp into readable format
        self.message['time'] = datetime.utcfromtimestamp(
            int(self.message['time']) / 1000).strftime('%Y-%m-%d %H:%M:%S')

        message = self.message['msg'][:200]

        #message = message.replace('<','&lt;') # Replace < with html safe <
        #message = message.replace('>','&gt;') # Replace > with html safe >
        #message = message.replace('<','&lt;') # Replace < with html safe <
        #message = message.replace('>','&gt;') # Replace > with html safe >

        message = htmlspecialchars(message)

        if message.count('\n') > 10:
            message = message.replace('\n', '&nbsp;')
        else:
            message = message.replace('\n', '<br>')

        message = message.replace(' ', '&nbsp;')
        message = message.replace('\t', '&Tab;')

        self.message['msg'] = message

    # Gives verfied users the check
    def isVerf(self):

        verf = [l for l in open('VERIFIED', 'r')]

        if self.message['username'] in verf:
            self.message['username'] += ' ✔'

    # Formats links as <a> tags
    def formatLinks(self):

        message = ''  #

        for word in self.message['msg'].split(' '):

            if word[0:4] == 'http':
                message += f'<a href="{word}">{word}</a> '

            else:
                message += word + ' '

        self.message['msg'] = message

    # Removes illegals characters not defined in legalchars
    def legalize(self, legalchars):

        message = ''

        for char in self.message['msg'][:-1]:

            if char not in legalchars:
                message += '&diams;'

            else:
                message += char

        self.message['msg'] = message

        username = ''

        for char in self.message['username']:

            if char not in legalchars:
                username += '&diams;'

            else:
                username += char

        self.message['username'] = username

    # Censors restricted words
    def censor(self, restrictedwords):

        self.message['msg'] = self.filter.censor(self.message['msg'])

        self.message['username'] = self.filter.censor(self.message['username'])

        for word in restrictedwords:

            self.message['username'] = re.sub(word,
                                              self.filter.censor_char *
                                              len(word),
                                              self.message['username'],
                                              flags=re.IGNORECASE)

            self.message['msg'] = re.sub(word,
                                         self.filter.censor_char * len(word),
                                         self.message['msg'],
                                         flags=re.IGNORECASE)

    # Searches and replaces text
    def searchReplace(self, tags):

        for search, replace in tags:

            self.message['msg'] = self.message['msg'].replace(search, replace)
            self.message['username'] = self.message['username'].replace(
                search, replace)

    # Image handling :
    # - Encoding
    # - Saving
    # - URLFOR
    # - Transfer
    def formatImg(self):

        if self.message['src'] != 'NOIMAGE':

            src = self.message['src'].split(',')
            data = src[1]
            ext = ((src[0].split('/'))[1].split(';'))[0]

            filename = f'upl/{self.message["time"]}.{ext}'
            with open('static/' + filename, 'wb') as f:
                f.write(base64.decodebytes(data.encode("ascii")))

            self.message['src'] = url_for("static", filename=filename)

    # Incase of admin tag
    # Kinda deprecated
    def tag(self, tagr, tagl):

        self.message['msg'] = self.message['msg'].replace(tagl, '<')
        self.message['msg'] = self.message['msg'].replace(tagr, '>')

    # Detoxify System
    def isToxic(self, t):

        msgScore, metric, result = detox.toxicScore(self.message['msg'])
        self.message['metrics'] = metric
        return msgScore > t, metric, result
Beispiel #10
0
def profanity(text):
    # возвращаемое функцией значение — это то значение, которой подставится
    # к нам в шаблон
    pf_filter = ProfanityFilter(languages=['ru', 'en'])
    return pf_filter.censor(text)
Beispiel #11
0
from profanity_filter import ProfanityFilter

pf = ProfanityFilter()

with open('gp2.txt') as scripts_file:
    text = scripts_file.readlines()
    output = text[2::4]
    censored_output = []
    line_no = 0
    print('censoring')
    for line in output:
        print(f"line: {line}")
        censored_output.append(pf.censor(line))
    censored_output = "".join(censored_output)
    outFile = open('movie_scripts.txt', 'w')
    outFile.write(censored_output)