Exemple #1
0
 def isHateSpeech(self, line):  #using open source hate sonar api
     sonar = Sonar()
     response = sonar.ping(text=line)
     if response["top_class"] != "neither":  # line is hate speech
         return 1
     else:
         return 0
    def __init__(self, bot, mysql: MySQLWrapper):
        self.bot = bot
        self.mysql = mysql
        self.sonar = Sonar()

        self.discordForum = Forum("ECC-Discord", "ECC-Discord")
        self.discordForum.insert(self.mysql)

        self.discordAuthor = ForumAuthor("Discord", "-1")
        self.discordAuthor.insert(self.mysql)

        # self.generalThread = ForumThread("#general-chat", self.discordAuthor, self.discordForum.sqlID, hltvID = "709753463323754539")
        self.generalThread = ForumThread("#general-chat",
                                         self.discordAuthor,
                                         self.discordForum.sqlID,
                                         hltvID="456834448558653451")
        self.generalThread.insert(self.mysql)

        # self.shitpostingThread = ForumThread("#shitposting-and-media", self.discordAuthor, self.discordForum.sqlID, hltvID = "727107131416903750")
        self.shitpostingThread = ForumThread("#shitposting-and-media",
                                             self.discordAuthor,
                                             self.discordForum.sqlID,
                                             hltvID="468121733929369610")
        self.shitpostingThread.insert(self.mysql)

        self.mysql.db.commit()
Exemple #3
0
def hatedetect(hds):
    ypred = []
    sonar = Sonar()
    for k in hds:
        check = []
        res = sonar.ping(text=k)
        res_final = json.dumps(res)
        #person_dict = json.loads(str(res))

        res_dict = json.loads(res_final)
        #print(res_dict)
        #for x in res_dict["classes"]:
        if res_dict["classes"][0]["confidence"] > res_dict["classes"][1][
                "confidence"]:
            check = res_dict["classes"][0]["confidence"]
            val = 0
        else:
            check = res_dict["classes"][1]["confidence"]
            val = 1
        if check > res_dict["classes"][2]["confidence"]:
            check_final = check

        else:
            check_final = res_dict["classes"][2]["confidence"]
            val = 2

        if val == 0 or val == 1:
            yp = 0
        else:
            yp = 1

        ypred.append(yp)
    return ypred
Exemple #4
0
def refine_jsonl_file(path,
                      votes_threshold=10,
                      hate_limit=0.4,
                      offensive_limit=0.7,
                      general_limit=0.8):
    sonar = Sonar()
    name, _ = os.path.splitext(path)
    refined_name = "refined_{name}.txt".format(name=name)

    if os.path.exists(refined_name):
        os.remove(refined_name)
    refined_file = open(refined_name, mode='w')

    with open(path, 'rb') as f:  # opening file in binary(rb) mode
        with refined_file as rf:
            for item in json_lines.reader(f):
                if int(item['votes']) > 0:
                    text = item['text']
                    hate_confidence = sonar.ping(
                        text=text)['classes'][0]['confidence']
                    offensive_confidence = sonar.ping(
                        text=text)['classes'][1]['confidence']
                    if not ((hate_confidence > hate_limit) or
                            (offensive_confidence > offensive_limit) or
                            (hate_confidence + offensive_confidence >
                             general_limit)):
                        try:
                            print(text, file=rf)
                        except:
                            continue
            rf.close()
        f.close()
Exemple #5
0
def classify_texts(texts):
    """
    Classify texts with hate sonar (classifier).
    """
    # initialize parameters and containers
    texts = texts.split(".")[:-1]
    rating_map = {
        "neither": "green",
        "offensive_language": "orange",
        "hate_speech": "red"
    }
    sonar = Sonar()
    results = []
    # for each text, perform detection and format dictionary
    for i, sentence in enumerate(texts):
        sentence_res = sonar.ping(text=sentence)
        top_class = sentence_res["top_class"]
        sentence_output = {
            "index": i,
            "sentence": sentence,
            "top_class": top_class,
            "rating": rating_map[top_class]
        }
        results.append(sentence_output)

    return results
def isAbusiveComment(x):
    sonar = Sonar()
    a = sonar.ping(text=x)
    #print(x)
    #print(a)

    hateConfidence = 0.0
    offenseConfidence = 0.0
    neitherConfidence = 0.0


    for result in a['classes'] :
        #print(result)
        #print(result['confidence'])
        #print(result["class_name"])
        if result["class_name"] == "hate_speech" :
            hateConfidence = result['confidence']
        if result["class_name"] == "offensive_language" :
            offenseConfidence = result['confidence']
        if result["class_name"] == "neither" :
            neitherConfidence = result['confidence']
    #print("Hate ",hateConfidence, " || offenseConfidence ",offenseConfidence," || neither ",neitherConfidence)

    rez = False
    if neitherConfidence > 0.7:
        rez = False

    if hateConfidence > 0.6 :
        rez = True

    if offenseConfidence >0.55 :
        rez = True

    return MessageScreenerResult(rez,hateConfidence,offenseConfidence,"No Tips, Sorry!")
def main(argv):
  global sonar

  # setting up the database

  db = MongoClient().gab

  # initialising the hateometer

  sonar = Sonar()

  # parsing all posts

  print("hateometing the posts...")

  # loop over all english posts
  for post in db.posts.find({'post.language':'en'}, no_cursor_timeout=True):
    # run hate detection on the post body
    obj = hateometer(post['post']['body'])
    postid = post['_id']
    # store the results in mongodb
    db.posts.update_one({'_id':postid},{"$set":{'post.hateometer':obj}})

  print("hateometing the comments...")

  # do the same trick for the comments
  for comment in db.comments.find({'language':'en'}, no_cursor_timeout=True):
    obj = hateometer(comment['body'])
    commentid = comment['_id']
    db.comments.update_one({'_id':commentid},{"$set":{'hateometer':obj}})

  print("done!")
Exemple #8
0
def receive_tweet(request):
    tweet = request.GET.get('tweet')
    print(tweet)
    tweet = repr(tweet)
    sonar = Sonar()
    result = sonar.ping(text=tweet)
    print(result)
    json_result = json.dumps(result)
    return render(request, 'result_api/page2.html', result)
Exemple #9
0
 def isHateSpeech(self, line):
     '''Assign a 'hatespeech score' using sonar api '''
     indices = {"hate_speech": 0, "offensive_language": 1, "neither": 2}
     sonar = Sonar()
     response = sonar.ping(text=line)
     indexOfLanguage = indices[response["top_class"]]
     if response["top_class"] != "neither":
         return response['classes'][indexOfLanguage]['confidence']
     else:
         return 0
Exemple #10
0
def main():
    sonar=Sonar()

    textArray=readFile()

    Class=[]
    hate=[]
    offensive=[]
    neither=[]

    hate_speech_classifier(textArray,Class,hate,offensive,neither,sonar)
    displayResaults(hate,offensive,neither)
Exemple #11
0
def hateSpeech(comments):
    sonar = Sonar()
    print('Inside hatespeech')
    print('Comments len ='+str(len(comments)))
    for i in range(len(comments)):
        x = sonar.ping(text=comments[i].text)
        if x['top_class'] == "hate_speech":
            comments[i].hateType = 'hate'
        elif x['top_class'] == "offensive_language":
            comments[i].hateType = 'offensive'
        else:
            comments[i].hateType = 'neutral'
    return comments
Exemple #12
0
    def do_GET(self):

        query_components = parse_qs(urlparse(self.path).query)
        imsi = query_components["imsi"]
        print(imsi)

        # Send headers
        self._set_headers()

        # Send message back to client
        sonar = Sonar()
        message = sonar.ping(text=str(imsi[0]))
        # Write content as utf-8 data
        self.wfile.write(bytes(json.dumps(message), "utf-8"))
        return message
 def getOffensiveness(self, string, output_a):
     sonar = Sonar()
     data_raw = str(sonar.ping(string))
     data_raw_split = data_raw.split(": [{")[1].split("}]}")[0].replace("'class_name': '", "").replace("', 'confidence':", "").split("}, {")
     output = data_raw_split[0].split(" ")[0] + " "
     if "e" in data_raw_split[0].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[0].split(" ")[1]) * 100, 2)) + " "
     output += data_raw_split[1].split(" ")[0] + " "
     if "e" in data_raw_split[1].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[1].split(" ")[1]) * 100, 2)) + " "
     output += data_raw_split[2].split(" ")[0] + " "
     if "e" in data_raw_split[2].split(" ")[1]:
         output += "0.00"
     else:
         output += str('{0:.{1}f}'.format(float(data_raw_split[2].split(" ")[1]) * 100, 2))
     output_a.append(output)
Exemple #14
0
def ping_file(dataset_path):
    """Run ping function on each line in the chat log file
        :param dataset_path:    file path of the dataset 
                                (specify directory name if the file is under a folder)
    """
    sonar = Sonar()
    input_file = open(dataset_path, 'r', encoding="utf-8")

    # to read and remove "\n" escape characters at the end of each chat message
    chat_lines = input_file.read().splitlines()

    # to trim whitespaces before and after each chat message
    chat_lines = [each_line.strip() for each_line in chat_lines]

    # to get only the message after the [timestamp] <username>
    chat_lines = [each_line.partition("> ")[2] for each_line in chat_lines]

    return [
        sonar.ping(each_line) for each_line in tqdm(
            chat_lines, desc="Processing {} rows".format(len(chat_lines)))
    ]
Exemple #15
0
def getTweet(api, hashTag, fileName, startDT, numTweet=1):
    hashTag = "\"#" + hashTag + "\""
    startDT = "\"" + startDT + "\""
    csvFile = open(fileName, 'w')
    csvWriter = csv.writer(csvFile)
    sonar = Sonar()
    try:
        for tweet in tweepy.Cursor(api.search,
                                   q=hashTag,
                                   count=numTweet,
                                   lang="en",
                                   since=startDT).items():
            if tweet.retweeted == False:
                cleantweet = cleanTweet(tweet)
                text = "text=\"" + str(cleanTweet) + "\""
                csvWriter.writerow([
                    tweet.created_at, tweet.user.screen_name,
                    str(cleantweet),
                    sonar.ping(text).top_class
                ])
    except:
        print("Unable to connect Twitter API.")
data.replace('\r', '')
input = data.split('\n')

splitInput = [
    input[x:x + math.ceil(len(input) / 6)]
    for x in range(0, len(input), math.ceil(len(input) / 6))
]
#print(chunks[3])

#def f(splitInput, splitInputIndex):
#print('{}: hello {} from {}'.format(
#   dt.datetime.now(), name, current_process().name))
#sys.stdout.flush()

from hatesonar import Sonar
sonar = Sonar()


def f(splitInput, splitInputIndex):
    #offensiveCount = 0
    hatefulCount = 0
    for i in splitInput[splitInputIndex]:
        #for i in input:
        sonarEval = sonar.ping(i)
        if sonarEval['top_class'] == "neither":
            continue
        if sonarEval['top_class'] == "offensive_language":
            #os.system("cat {} | jq 'select(.body == \"{}\""")' >> offensive{}".format(args['input'], i, args['input']))
            #offensiveCount += 1
            continue
        if sonarEval['top_class'] == "hate_speech":
def main():
    tweet = sys.argv[1]
    sonar = Sonar()
    print(sonar.ping(tweet)['classes'])
Exemple #18
0
def triggers(request):
    if request.method == 'POST':
        print(request.POST)
        data = dict(request.POST)
        # Driver Code
        key = 'show_details'
        one = checkKey(data, key)
        key = 'check_triggers'
        two = checkKey(data, key)
        key = 'show_wordcloud'
        three = checkKey(data, key)
        key = 'hate_speech'
        four = checkKey(data, key)
        print(one, two, three)
        #URL Link case
        if (one == True):
            url = data['Link'][0]
            print(url)
            article = Article(url)
            article.download()
            article.parse()
            authors = article.authors
            publishdate = article.publish_date
            #article.text
            article.nlp()
            keywords = article.keywords
            articlesummary = article.summary
            return render(
                request, 'consciousApp/triggers.html', {
                    'authors': authors,
                    'publishdate': publishdate,
                    'keywords': keywords,
                    'articlesummary': articlesummary
                })
        #Show triggers
        elif (two == True):
            text = request.POST['input_text'].lower()
            triggers = [
                "9 11", "9-11", "9/11", "ableism", "abusive", "ageism",
                "alcoholism", "animal abuse", "animal death",
                "animal violence", "bestiality", "gore", "corpse", "bully",
                "cannibal", "car accident", "child abuse", "childbirth",
                "classism", "death", "decapitation", "abuse", "drug", "heroin",
                "cocaine", "eating disorder", "anorexia", "binge eating",
                "bulimia", "fatphobia", "forced captivity", "holocaust",
                "hitler", "homophobia", "hostage", "incest", "kidnap",
                "murder", "nazi", "overdose", "pedophilia", "prostitution",
                "PTSD", "racism", "racist", "rape", "raping", "scarification",
                "self-harm", "self harm", "cutting", "sexism", "slavery",
                "slurs", "suicide", "suicidal", "swearing", "terminal illness",
                "terrorism", "torture", "transphobia", "violence", "warfare"
            ]
            tw = []
            text_file = open(
                './consciousApp/static/consciousApp/input/triggercheckdata.txt',
                'w+')
            text_file.write(str(text))
            text_file.close()
            for trigger in triggers:
                if text.find(trigger) > -1: tw.append(trigger)
            if tw == []: tw.append('No Triggers Found')
            return render(request, 'consciousApp/triggers.html', {
                'text': text,
                'triggers': tw,
                'data': data
            })
        #Show_cloud
        elif (three == True):
            text = request.POST['input_text'].lower()
            tokens = word_tokenize(text)
            textdata = nltk.Text(tokens)
            stopwords = set(STOPWORDS)
            wordcloud = WordCloud(stopwords=stopwords,
                                  max_font_size=50,
                                  max_words=100,
                                  background_color="white").generate(text)
            wordcloud.to_file(
                "./consciousApp/static/consciousApp/output/word-cloud.png")
            data = "./../../static/consciousApp/output/word-cloud.png"
            return render(request, 'consciousApp/triggers.html',
                          {'data': data})

        elif (four == True):
            sonar = Sonar()
            text = request.POST['input_text'].lower()
            url = data['Link'][0]
            data = sonar.ping(text=text)["classes"]
            hate_speech = data[0]
            hate_speech_confidence = hate_speech["confidence"] * 100
            offensive_language = data[1]
            offensive_language_confidence = offensive_language[
                "confidence"] * 100
            neither = data[2]
            neither_confidence = neither["confidence"] * 100
            print(type(data))
            print(offensive_language_confidence * 100,
                  hate_speech_confidence * 100, neither_confidence * 100)
            return render(
                request, 'consciousApp/triggers.html', {
                    'hate_speech_confidence': hate_speech_confidence,
                    'offensive_language_confidence':
                    offensive_language_confidence,
                    'neither_confidence': neither_confidence
                })
    else:

        return render(request, 'consciousApp/triggers.html')
Exemple #19
0
"""
Natural language features defind on a single Comment.
"""

import logging

import nltk
from polyglot.detect import Detector
from textblob import TextBlob
from profanity_check import predict_prob as profanity
from hatesonar import Sonar
from comment import Comment, CommentFeatures
from feature_extraction.nl_sets import *

logging.basicConfig(filename='./output.log')
hatesonar = Sonar()


def compute_nl_features(c: Comment):
    c.stats = CommentFeatures()
    stats = c.stats

    stats['lang'] = comment_languge(c)
    stats['word_count'] = word_count(c)
    stats['score'] = c.score
    stats['controversial'] = c.controversial
    stats['prp_first'] = percent_first_pronouns(c)
    stats['prp_second'] = percent_second_pronouns(c)
    stats['prp_third'] = percent_third_pronouns(c)
    stats['sent'] = sentiment(c)
    stats['subj'] = subjectivity(c)
Exemple #20
0
 def check(self, txt):
     sonar = Sonar()
     res = sonar.ping(text=txt)
     return json.dumps(res)
Exemple #21
0
def get_report(webpage_html):
    sonar = Sonar()
    report = sonar.ping(text=webpage_html)
    return report
Exemple #22
0
 def __init__(self):
     self.sonar = Sonar()
Exemple #23
0
def sentiment_analysis():
    documents = read_documents()
    sonar = Sonar()
    print(sonar.ping(text=mtranslate.translate(documents[1])))
    analysis_tb = TextBlob(mtranslate.translate(documents[1]))
    print(analysis_tb.sentiment.polarity)