Beispiel #1
0
def executeEmpathOnISEAR(ISEAR, DATADIR):

	try:
		corpus = pd.read_csv(ISEAR, sep=',',header=None)

		if not os.path.isfile(DATADIR + "/labels_empath_on_ISEAR.txt"):

			lexicon = Empath()		#instance of empath analyser
			emotions_list = ['fear', 'joy', 'anger', 'sadness', 'disgust']
			model = "reddit"

			res = {}

			best_em = []		# will contain empath analysis results
			emotions_results = []

			for i in range(len(emotions_list)):			# creates a category for each emotion
				lexicon.create_category(emotions_list[i],[emotions_list[i]], model=model)

			for sentence in corpus[1]:
				for k in range(len(emotions_list)):			# tokenizes and analyzes the sentences
					tokens = nltk.word_tokenize(sentence)
					emotions_results = lexicon.analyze(tokens, normalize=True, categories=[emotions_list[k]])
					res = {**res, **emotions_results}		# merge all results in one dictionary

					emotion_results = []

				max_likely_emotions_empath = max(res.items(), key=operator.itemgetter(1))[0]

				if res[max_likely_emotions_empath] != 0.0:
					best_em.append(max_likely_emotions_empath)
				else:
					best_em.append('no_idea')

			best_em = np.asarray(best_em)
			np.savetxt(DATADIR + "/labels_empath_on_ISEAR.txt", best_em, fmt="%s")      #saves empath detection

		# ---------------------------------- if labels already exist: --------------------------------

		ISEAR_labels = corpus[0]

		empath_labels = pd.read_csv(DATADIR + '/labels_empath_on_ISEAR.txt', sep=',',header=None)

		detected_labels = [ISEAR_labels[i] for i in range(len(ISEAR_labels)) if empath_labels[0][i] != 'no_idea']
		matches = [ISEAR_labels[i] for i in range(len(ISEAR_labels)) if empath_labels[0][i] == ISEAR_labels[i]]

		detected_percentage = len(detected_labels)/len(ISEAR_labels)
		overall_accuracy = len(matches)/len(ISEAR_labels)
		detected_accuracy = len(matches)/len(detected_labels)

		print('detected_percentage:', detected_percentage)
		print('detected_accuracy:', detected_accuracy)
		print('overall_accuracy:', overall_accuracy)
		return 0
		
	except Exception as e:
		print(str(e))
		return 51
Beispiel #2
0
    args = np.argsort(cosines)[::-1]
    return [(index_to_botname[x], cosines[x]) for x in args]


def edit_distance(query_str, msg_str):
    return Levenshtein.distance(query_str.lower(), msg_str.lower())


def similar_names(query, msgs):
    li = [(edit_distance(query, msg), msg) for msg in msgs]
    li.sort(key=lambda x: x[0])
    return li


lexicon = Empath()
lexicon.create_category("funny", ["funny", "lol", "hilarious", "haha", "joke"])
#lexicon.create_category("silly",["silly","ridiculous","childish"])
lexicon.create_category("stupid", ["stupid", "dumb", "pointless", "wrong"])
#lexicon.create_category("good", ["good", "great", "perfect", "wonderful", "fantastic"])
lexicon.create_category(
    "bad",
    ["bad", "wrong", "waste", "inaccurate", "stupid", "disagree", "sad"])
lexicon.create_category("useful",
                        ["good", "function", "effective", "interesting"])
lexicon.create_category("appreciated",
                        ["appreciate", "thanks", "good", "useful"])
#lexicon.create_category("interesting", ["cool", "interesting", "fascinating"])
lexicon.create_category(
    "factual", ["fact", "check", "statistics", "information", "informative"])
lexicon.create_category("shocking",
                        ["shocked", "wtf", "shit", "jesus", "christ", "yikes"])
Beispiel #3
0
def textProcessing(JSON, DATA, USERNAME, APIKEY):

    global JSON_file
    global data_folder
    global sender_name

    JSON_file = JSON
    data_folder = DATA
    sender_name = USERNAME

    if not os.path.isfile(JSON_file):
        return 201

    if not os.path.isdir(data_folder):
        return 202

    if data_folder[-1:] == "/":
        return 203

    analyser = SentimentIntensityAnalyzer(
    )  # istance of vader sentiment analyzer

    lexicon = Empath()  #instance of empath analyser
    emotions_list = ['fear', 'joy', 'anger', 'sadness', 'disgust']
    model = "reddit"

    with open(JSON_file, encoding='utf-8-sig') as json_file:
        try:

            try:
                json_data = json.load(json_file)
            except Exception as e:
                return 23

            chat = json_data["chats"]["list"]
            sentences = []

            text_emotions = []

            for i in range(
                    len(chat[0]
                        ["messages"])):  # appends to a list all the messages
                if (chat[0]["messages"][i]["from"]).replace(
                        " ", "") == sender_name and len(
                            chat[0]["messages"][i]["text"]) > 6:
                    sentences.append(chat[0]["messages"][i]["text"])

            for i in range(
                    len(emotions_list)):  # creates a category for each emotion
                lexicon.create_category(emotions_list[i], [emotions_list[i]],
                                        model=model)

            best_em = []  # will contain empath analysis results
            emotions_results = []

            res = {}
            neg = []
            neutral = []
            pos = []

            for j in range(len(sentences)):
                if sentences[j] != '':
                    print('')
                    print(sentences[j])
                    output = detect_emotion(sentences[j], APIKEY)
                    if output == 25:
                        return output
                    text_emotions.append(output)

                    output = sentiment_analyzer_scores(sentences[j], analyser)
                    if output == 26:
                        return output

                    neg.append(output["neg"])
                    neutral.append(output["neu"])
                    pos.append(output["pos"])

                    if j != len(sentences) - 1:
                        neg.append(',')
                        neutral.append(',')
                        pos.append(',')

                    error = textBlobSentimentAnalyze(sentences[j])
                    if error != 0:
                        return error

                    for k in range(len(emotions_list)
                                   ):  # tokenizes and analyzes the sentences
                        tokens = nltk.word_tokenize(sentences[j])
                        emotions_results = lexicon.analyze(
                            tokens,
                            normalize=True,
                            categories=[emotions_list[k]])
                        res = {
                            **res,
                            **emotions_results
                        }  # merge all results in one dictionary

                    print(res)

                    emotions_results = []

                    max_likely_emotions_empath = max(
                        res.items(), key=operator.itemgetter(1))[0]

                    if max(res.items(), key=operator.itemgetter(1))[1] != 0.0:
                        print('max empath:', max_likely_emotions_empath)

                    print('')

            if not os.path.isfile(data_folder + "/sentences.txt"):
                print("Creating and wrting into:'sentences.txt' ...")
                sentences = np.asarray(sentences)
                np.savetxt(data_folder + "/sentences.txt", sentences,
                           fmt="%s")  #saves sentences

            else:
                return 280

            if not os.path.isfile(data_folder + "/text_emotions.txt"):
                print("Creating and wrting into:'text_emotions.txt' ...")
                text_emotions = np.asarray(text_emotions)
                np.savetxt(data_folder + "/text_emotions.txt",
                           text_emotions,
                           fmt="%s")  #saves emotions

            else:
                return 281

            if not os.path.isfile(data_folder + "/sentiment_types.txt"):
                print("Creating and wrting into:'sentiment_types.txt' ...")
                sentiment_types = []
                sentiment_types.append(neg)
                sentiment_types.append(neutral)
                sentiment_types.append(pos)
                sentiment_types = np.asarray(sentiment_types)
                np.savetxt(data_folder + "/sentiment_types.txt",
                           sentiment_types,
                           fmt="%s")  #saves sentiment types
            else:
                return 282

            return 0
        except Exception as e:
            print(e)
            return 24
Beispiel #4
0
 def command(self, name, seed_terms):
     from empath import Empath
     lexicon = Empath()
     lexicon.create_category(name, seed_terms, model="reddit")
     out = lexicon.cats[name]
     return name, out
Beispiel #5
0
def count_unconnect(u):

    # espero que seja um grupo bem diverso
    lexicon = Empath()
    # print(len(u))

    lexicon.create_category("support", support, model="nytimes")
    lexicon.create_category("conflict", conflict, model="nytimes")
    lexicon.create_category("conclusion", conclusion, model="nytimes")
    lexicon.create_category("complementary", complementary, model="nytimes")
    lexicon.create_category("causal_argument",
                            causal_argument,
                            model="nytimes")
    lexicon.create_category("verbs_hedging", verbs_hedging, model="nytimes")

    #["because", "only", "before", "so", "if", "though", "then", "until", "once", "even", "since", "although", "so", "while", "having", "because", "already", "thus", "time", "unless", "now", "actually", "eventually"]
    #["though", "although", "except", "yet", "but", "even", "because", "only", "Though", "Although", "Yet", "either", "nevertheless", "whereas", "though", "fact", "however", "unlike", "Furthermore", "because", "nonetheless", "And", "However", "none", "either", "still", "Even", "despite", "if", "so", "Yet", "meaning", "indeed", "consequently"]
    #[]
    #["while", "whereas", "though", "only", "yet", "While", "thus", "even", "Thus", "Instead", "although", "instead", "Though", "Moreover", "actually", "nevertheless", "sometimes", "still", "rather"]
    #["means", "therefore", "means", "merely", "mechanism", "democratic_process", "Therefore", "simply", "free_market", "consequence", "because"]
    # cat_all = lexicon.analyze(u, categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True)
    cat_all = lexicon.analyze(u, categories=['verbs_hedging'], normalize=True)
    #cat_all = {}
    #for arg in u:
    #   cat = lexicon.analyze(arg)
    #   if cat["children"] != 0:
    #       print(arg, cat["children"])
    return cat_all
Beispiel #6
0
def count_connect(u):

    cat_all = {}
    lexicon = Empath()
    lexicon.create_category("support", support, model="nytimes")
    lexicon.create_category("conflict", conflict, model="nytimes")
    lexicon.create_category("conclusion", conclusion, model="nytimes")
    lexicon.create_category("complementary", complementary, model="nytimes")
    lexicon.create_category("causal_argument",
                            causal_argument,
                            model="nytimes")
    lexicon.create_category("verbs_hedging", verbs_hedging, model="nytimes")

    heads = []
    not_heads = []

    for (arg1, arg2) in u:
        heads.append(arg1)
        not_heads.append(arg2)

    norep_heads = list(set(heads))
    norep_not_heads = list(set(not_heads))
    args_conn = list(set(heads) | set(not_heads))

    lexicon = Empath()
    #cat_heads = lexicon.analyze(norep_heads, categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True)
    cat_heads = lexicon.analyze(norep_heads,
                                categories=['verbs_hedging'],
                                normalize=True)
    # cat_heads = {}
    # for h in norep_heads:
    #    cat_heads = lexicon.analyze(h, normalize=True)
    #    if cat_heads["fun"] != 0:
    #        print(h, cat_heads["fun"])
    # cat_not_heads = lexicon.analyze(norep_not_heads,categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True)
    cat_not_heads = lexicon.analyze(norep_not_heads,
                                    categories=['verbs_hedging'],
                                    normalize=True)
    # cat_all = lexicon.analyze(args_conn,categories = ["support", "conflict", "conclusion", "complementary", "causal_argument"], normalize=True)
    cat_all = lexicon.analyze(args_conn,
                              categories=['verbs_hedging'],
                              normalize=True)

    return cat_heads, cat_not_heads, cat_all
Beispiel #7
0
from empath import Empath
import sys
import json
from cnrelated import get_related_terms
from os import path

emp = Empath()

# If -d specified delete the categories to start fresh
if len(sys.argv) > 2:
    if sys.argv[2] == "-d":
        with open(sys.argv[1], "r") as file:
            obj = json.load(file)
        for word in obj["topics"]:
            print(word)
            emp.delete_category(word)
# Otherwise, load up the topics and create categories from them
elif len(sys.argv) == 2:
    with open(sys.argv[1], "r") as file:
        obj = json.load(file)
    for word in obj["topics"]:
        # Check to see if a category already exists to ensure we aren't overwriting them every time
        if not path.exists("venv/Lib/site-packages/empath/data/user/" + word +
                           ".empath"):
            # Get the related words from ConceptNet to use as a seed for creating the category
            seeds = get_related_terms(word)
            print(word, seeds)
            # Create the category for word
            emp.create_category(word, seeds)