Python Nlp_util 예제들, common.word_format.df_utils.Nlp_util Python 예제들

예제 #1

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_no_idea(cls, df):
        df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]
        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="normal")
        is_subj_himself = Nlp_util.is_first_subject_in(["i"], noun_list,
                                                       verb_list)
        exist_subj_for_first_verb = Nlp_util.exist_subj_for_first_verb(
            noun_list, verb_list)

        is_idk_what_to_do = Df_util.anything_isin(
            {"do not know", "not sure", "have no idea"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"what to do", "how to do", "what to deal", "how to deal"},
                df_ex_adverb["base_form"])

        is_want_advice = Df_util.anything_isin(
            {"want", "need", "give me"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"advice", "suggestion"}, df_ex_adverb["word"])

        is_give_me_advice = Df_util.anything_isin(
            {"need", "want", "give me"},
            df_ex_adverb["base_form"]) and Df_util.anything_isin(
                {"advice", "suggestion"}, df_ex_adverb["word"])

        what_should_i_do = Nlp_util.are_words1_words2_words3_in_order(
            df_ex_adverb, ["what"], ["should"], ["i"])

        return (is_subj_himself and (is_idk_what_to_do or is_want_advice)) or (
            not exist_subj_for_first_verb
            and is_give_me_advice) or what_should_i_do

예제 #2

0

파일 보기

    def __alter_repeat_for_dont_think_SV(fixed_df):
        try:
            # TODO see if its neccesary to care about should and cant
            idx_of_think = Nlp_util.get_idx_list_of_word("think", fixed_df["base_form"])[0]
            df_after_think = fixed_df.loc[idx_of_think + 1:, :].reset_index(drop=True)
            verb_list = Nlp_util.make_verb_list(df_after_think, type="normal")
            noun_list = Nlp_util.make_noun_list(df_after_think)
            # possibly bug happen here since amount of verbs are different in cant do/dont do
            is_negative_form = Df_util.anything_isin(["not", "never"], df_after_think.loc[:, "base_form"])
            # can add possibly or likely(when its negative)
            head_words = ["so ", "so probably ", "probably ", "so maybe ", "maybe "]
            random_idx_for_heads_words = randint(0, len(head_words) - 1)
            if is_negative_form:
                # まず主語とるそのあとにwouldntいれるその後ろに動詞の原型をいれて、それ以降はつづける
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " would "
                idx_of_not = Nlp_util.get_idx_list_of_word_list(["not", "never"], df_after_think.loc[:, "base_form"])[0]
                verb_row = verb_list.loc[idx_of_not:, :].iloc[0]
                verb = verb_row.base_form + " "

                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_row.name + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
            else:
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " wouldnt "
                verb = verb_list["base_form"].iloc[0] + " "
                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_list.index[0] + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
        except:
            logging.exception('')
            return []

예제 #3

0

파일 보기

    def __exists_SV_around_cc(cls, df, cc_in_message, cc):
        try:
            sent_with_cc = df[df.sidx == cc.sidx]

            nouns_in_sent = Nlp_util.make_noun_list(sent_with_cc)
            verbs_in_sent = Nlp_util.make_verb_list(sent_with_cc, type="normal")

            first_half_of_sent = sent_with_cc[sent_with_cc.widx < cc.widx]
            nouns_in_first_half = nouns_in_sent[(nouns_in_sent.sidx == cc.sidx) & (nouns_in_sent.widx < cc.widx)]
            verbs_in_first_half = verbs_in_sent[(verbs_in_sent.sidx == cc.sidx) & (verbs_in_sent.widx < cc.widx)]

            other_cc_sidx = cc_in_message[(cc_in_message.sidx == cc.sidx) & (cc_in_message.widx > cc.widx)].sidx
            exists_other_cc_in_same_sentence = any(i == cc.sidx for i in other_cc_sidx)

            if exists_other_cc_in_same_sentence:
                other_cc_widx = cc_in_message[cc_in_message.index > cc.widx].index[0]
            else:
                other_cc_widx = len(sent_with_cc)

            second_half_of_sent = sent_with_cc[(sent_with_cc.widx >= cc.widx) & (sent_with_cc.widx < other_cc_widx)]

            if cls.__is_finishing_with_question_mark(second_half_of_sent):
                second_half_of_sent = second_half_of_sent.iloc[:-1]

            nouns_in_second_half = cls.__get_second_half_words(nouns_in_sent, cc, other_cc_widx)
            verbs_in_second_half = cls.__get_second_half_words(verbs_in_sent, cc, other_cc_widx)

            return cls.__is_complete_sent(nouns_in_first_half, verbs_in_first_half, first_half_of_sent) and \
                   cls.__is_complete_sent(nouns_in_second_half, verbs_in_second_half, second_half_of_sent)
        except:
            logging.exception('')
            return False

예제 #4

0

파일 보기

    def __alter_repeat_for_need_sent(fixed_df):
        idx_of_need = Nlp_util.get_idx_list_of_word("need",
                                                    fixed_df["base_form"])[0]

        row_of_first_noun = \
            Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs + Nlp_util.pos_PRPs,
                                                       idx_of_need,
                                                       column_name="pos").iloc[0]
        if fixed_df.loc[row_of_first_noun.name - 1,
                        "pos"] in Nlp_util.pos_ADJECTIVEs + ["PRP$", "DT"]:
            noun = WordFormatter.Series2Str(
                fixed_df.loc[row_of_first_noun.name - 1:row_of_first_noun.name,
                             "word"])
        else:
            noun = fixed_df.loc[row_of_first_noun.name, "word"]

        noun_nominative = Nlp_util.convert_objective_noun_to_nominative(noun)

        options = [[
            "so " + noun_nominative + " is very important thing for you..",
            "and sounds its kinda hard to get it now right😢"
        ],
                   [
                       "so its like its not easy to get " + noun +
                       " now but you really want..",
                       "and it can frustrate you😞"
                   ],
                   [
                       "sounds you really want " + noun + "..",
                       "might be tough time for you to seek for it now😓"
                   ]]

        random_idx_for_options = randint(0, len(options) - 1)

        return options[random_idx_for_options]

예제 #5

0

파일 보기

    def __alter_repeat_for_wish(fixed_df):
        wish_idx = Nlp_util.get_idx_list_of_word("wish", fixed_df["base_form"])[0]
        row_of_subj = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs+Nlp_util.pos_PRPs, wish_idx, column_name="pos").iloc[0]
        row_of_verb = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_VERBs, row_of_subj.name, column_name="pos").iloc[0]
        subj = row_of_subj.word
        verb = row_of_verb.word
        after_verb = WordFormatter.Series2Str(fixed_df.loc[row_of_verb.name+1:, "word"])
        objective_subj = Nlp_util.convert_nominative_noun_to_objective(subj)
        if subj == "you":
            repeat_list = [
                ["you really want to "+verb+" "+after_verb],
                ["so you seriously hope to "+verb+" "+after_verb],
                ["so you are dying to "+verb+" "+after_verb]
            ]
        else:
            repeat_list = [
                ["you really want "+objective_subj+" to "+verb+" "+after_verb],
                ["you really wanna have "+objective_subj+" "+verb+" "+after_verb],
                ["you really wanna make "+objective_subj+" "+verb+" "+after_verb]
            ]

        cmp_list = [
            ["but sounds you feel bit too much to expect that now..?"],
            ["and sounds you feel like its impossible..?"],
            ["and seems like you dont think it never happen😓"]
        ]
        random_idx_for_repeat_list = randint(0, len(repeat_list) - 1)
        random_idx_for_cmp_list = randint(0, len(cmp_list) - 1)

        return repeat_list[random_idx_for_repeat_list]+cmp_list[random_idx_for_cmp_list]

예제 #6

0

파일 보기

 def __replace_verb(text_df):
     text_df.loc[:, "word"] = text_df.apply(
         lambda row: Nlp_util.adjust_be_verb_for_changed_subject(
             row["word"], row["pos"], row.name,
             Nlp_util.make_noun_list(text_df)),
         axis=1)
     return text_df

예제 #7

0

파일 보기

 def __has_wish_S_V(target_df):
     if Df_util.anything_isin(["wish"], target_df["base_form"]):
         wish_idx = Nlp_util.get_idx_list_of_word("wish", target_df["base_form"])[0]
         if Nlp_util.are_words1_words2_words3_in_order(target_df.loc[wish_idx:,:], Nlp_util.pos_NOUNs+Nlp_util.pos_PRPs, Nlp_util.pos_VERBs, df_column="pos"):
             return True
         else:
             return False
     else:
         return False

예제 #8

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_complaint_or_dissing(df):
        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="normal")
        said_you_dont_listen = Nlp_util.is_first_subject_in(
            ["you"], noun_list, verb_list) and Df_util.anything_isin(
                ["not listen", "never listen"], df["base_form"])
        is_dissing = Df_util.anything_isin(["f**k you", "hate you"],
                                           df["base_form"])

        return said_you_dont_listen or is_dissing

예제 #9

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __mean_no_friends(cls, df):
        exists_nobody_likes_me = Nlp_util.are_words1_words2_words3_in_order(
            df, ["nobody", "no one"], ["like", "love"], ["me"])
        exists_friends_dont_like_me = Nlp_util.are_words1_words2_words3_in_order(
            df, ["friend", "they", "everybody"],
            ["not like", "not love", "hate"], ["me"])

        exists_have_no_friend = Nlp_util.are_words1_words2_words3_in_order(
            df, ["i"], ["not have", "have no"], ["friend"])

        return exists_nobody_likes_me or exists_friends_dont_like_me or exists_have_no_friend

예제 #10

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_jullie_useless(df):
        if df is None:
            return False

        c1 = Nlp_util.are_words1_words2_words3_in_order(
            df, ["you", "this"], ["be not", "be never"],
            ["helpful", "help", "helping"])

        c2 = Nlp_util.are_words1_words2_words3_in_order(
            df, ["you"], ["be"], ['useless', 'helpless'])

        return c1 or c2

예제 #11

0

파일 보기

    def __is_anxious(cls, df_by_sentence: DataFrame):
        try:
            word1_1 = ['anxious']
            word1_2 = ['be', 'being', 'am', 'was', 'been', 'feel', 'feeling']

            word2_1 = ['anxiety']
            word2_2 = ['have', 'having', 'had', 'feel', 'feeling']

            return Nlp_util.are_words1_words2_words3_in_order(df_by_sentence, word1_1, word1_2) \
                   or Nlp_util.are_words1_words2_words3_in_order(df_by_sentence, word2_1, word2_2)
        except:
            logging.exception('')
            return False

예제 #12

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_anxious(cls, df_by_sentence: DataFrame):
        word1_1 = ['anxious']
        word1_2 = ['be', 'being', 'am', 'was', 'been', 'feel', 'feeling']

        word2_1 = ['anxiety']
        word2_2 = ['have', 'having', 'had', 'feel', 'feeling']

        pattern_1 = Nlp_util.are_words1_words2_words3_in_order(
            df_by_sentence, word1_1, word1_2)
        pattern_2 = Nlp_util.are_words1_words2_words3_in_order(
            df_by_sentence, word2_1, word2_2)

        return pattern_1 or pattern_2

예제 #13

0

파일 보기

    def __is_jullie_useless(df):
        try:
            if df is None:
                return False

            is_useless = Nlp_util.are_words1_words2_words3_in_order(
                df, ["you", "this"], ["be not", "be never"],
                ["helpful", "help", "helping"
                 ]) or Nlp_util.are_words1_words2_words3_in_order(
                     df, ["you"], ["be"], ['useless', 'helpless'])

            return is_useless
        except:
            logging.exception('')
            return False

예제 #14

0

파일 보기

    def cut_sent_by_interjection(cls, df):
        try:
            separators = [",", "and", "but", "or", "then", "so", "plus", "cause", "because"]

            exists_separator = True

            while exists_separator:
                separators_in_message = df[(df.word.isin(separators)) & (df.widx != 0)]

                if separators_in_message.empty:
                    exists_separator = False
                else:
                    for idx, separator in separators_in_message.iterrows():
                        if separator.word == 'so' and separator.pos == 'RB':
                            continue
                        elif separator.word == 'cause' and separator.pos != 'VB':
                            continue

                        if cls.__exists_SV_around_cc(df, separators_in_message, separator):
                            df.loc[df[(df.sidx >= separator.sidx) & (df.index >= idx)].index, "sidx"] += 1
                            df = Nlp_util.reset_widx(df)
                            break
                    else:
                        exists_separator = False
            return df
        except:
            logging.exception('')
            return df

예제 #15

0

파일 보기

 def __like_someone(cls, df):
     df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]
     target_list = [{
         "subjects": ["guy", "friend", "boy", "man"],
         "word1": ["i"],
         "word2": ["like"],
         "exceptions": []
     }, {
         "subjects": ["i"],
         "word1": ["like"],
         "word2": ["guy", "friend", "boy", "man"],
         "exceptions": ["feel like"]
     }, {
         "subjects": ["i"],
         "word1": ["be", "fall"],
         "word2": ["in love"],
         "exceptions": []
     }]
     return any(
         Nlp_util.are_words1_words2_words3_in_order(
             df_ex_adverb,
             target["subjects"],
             target["word1"],
             target["word2"],
             exception_list=target["exceptions"]) for target in target_list)

예제 #16

0

파일 보기

    def __create_response_for_what_to_V(df):
        df_after_what_to = df.loc[Nlp_util.get_idx_list_of_idiom_list(
            ["what to", "how to"], df["base_form"])[0] + 2:, :]

        words_after_what_to = WordFormatter.Df2Str(df_after_what_to)

        cmp = [
            ["it must be not easy to find how to" + words_after_what_to],
            ["now you are looking for the way to" + words_after_what_to],
            ["should be not that easy to find how to" + words_after_what_to],
        ]

        encourage = [
            [
                "but i am sure that thinking about it and speaking out it helps you🤗"
            ],
            [
                "eventho its always tough to find the right way, you try to figure it out. Thats impressing me😊"
            ],
            [
                "plz let me know any idea comes to your mind now. it might help you figuring it out☺️"
            ],
            [
                "tell me if you have any little idea. It could help you finding ur way😊"
            ],
        ]

        random_idx_for_cmp = randint(0, len(cmp) - 1)
        random_idx_for_encourage = randint(0, len(encourage) - 1)

        return cmp[random_idx_for_cmp] + encourage[random_idx_for_encourage]

예제 #17

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_call_me_names(cls, df_by_sentence):
        word1 = ['call']
        word2 = ['me']
        word3 = ['names']

        return Nlp_util.are_words1_words2_words3_in_order(
            df_by_sentence, word1, word2, word3)

예제 #18

0

파일 보기

파일: intent_checker.py 프로젝트: hirokig/CBT

    def __is_about_money(cls, df_by_sentence: DataFrame):
        money_words = ['money', 'finance', 'financial', 'financially']

        if any(i in df_by_sentence.word.values for i in money_words):
            prp = ['they', 'he', 'she']
            have = ['have']
            if Nlp_util.are_words1_words2_words3_in_order(
                    df_by_sentence, prp, have, money_words):
                return False
            else:
                return True
        elif Nlp_util.are_words1_words2_words3_in_order(
                df_by_sentence, ['i'], ['poor']):
            return True
        else:
            return False

예제 #19

0

파일 보기

    def cut_sent_by_unimportant_words_at_head(cls, df):
        try:
            fixed_df = df
            while len(fixed_df) != 0:
                is_fixed_df_modified = False

                for sidx in set(fixed_df.sidx):
                    if len(fixed_df[fixed_df.sidx == sidx]) == 1:
                        continue

                    head_row = fixed_df[fixed_df.sidx == sidx].iloc[0]
                    head_word = head_row.word

                    if head_word in UNIMPORTANT_WORDS_FOR_REPEAT.word.values:
                        fixed_df.loc[fixed_df.index > head_row.name, "sidx"] += 1
                        fixed_df = Nlp_util.reset_widx(fixed_df)

                        is_fixed_df_modified = True
                        break
                    elif not head_word.isalpha():
                        if not head_word.isdigit():
                            fixed_df = cls.__remove_nums_n_symbols(fixed_df, sidx)
                            is_fixed_df_modified = True
                            break

                if not is_fixed_df_modified:
                    break

            return fixed_df
        except:
            logging.exception('')
            return df

예제 #20

0

파일 보기

 def __lack_of_confidence(cls, df):
     df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]
     target_list = [
         [["i"], ["hate", "not like"], ["myself"]],
         [["i"], ["be"],
          ["fat", "w***e", "s**t", "bitch", "stupid", "ugly", "burden"]],
         [["i"], ["bother"], ["people", "everyone", "friends"]],
     ]
     return any(
         Nlp_util.are_words1_words2_words3_in_order(
             df_ex_adverb,
             target[0],
             target[1],
             target[2],
             exception_list=["say", "tell"]) for target in
         target_list) or Nlp_util.are_words1_words2_words3_in_order(
             df, ["i"], ["be"], ["not good enough"])

예제 #21

0

파일 보기

    def get_sidx_of_not_basic_svo_sent(text_df):
        delete_sidx_list = []
        for sidx in set(text_df.sidx.values):
            df = text_df[text_df.sidx == sidx]
            noun_list = Nlp_util.make_noun_list(df)
            verb_list = Nlp_util.make_verb_list(df, type="normal")

            # catch the case such as "Dont judge me"
            if Nlp_util.is_any_verb_before_first_noun(noun_list, verb_list):
                delete_sidx_list.append(sidx)
            # catch the case such as "the situation horrible as like he said"
            elif not Nlp_util.is_any_verb_for_first_noun(noun_list, verb_list):
                delete_sidx_list.append(sidx)
            else:
                pass

        return delete_sidx_list

예제 #22

0

파일 보기

    def __alter_repeat_for_make_S_feel_ADJ(df):
        idx_of_make = Nlp_util.get_idx_list_of_word_list(["make"],
                                                         df["base_form"])[0]
        subj = Nlp_util.change_object_pronoun_to_pronoun(df.loc[idx_of_make +
                                                                1, "word"])
        df_after_subj = df.loc[idx_of_make + 2:idx_of_make + 4, :]
        adj = df_after_subj.loc[
            df_after_subj["pos"].isin(Nlp_util.pos_ADJECTIVEs), "word"].iloc[0]
        subj_adj_list = [subj, adj]
        options = [
            ["{0[0]} feel {0[1]} because of that".format(subj_adj_list)],
            ["thats getting {0[0]} feel {0[1]}".format(subj_adj_list)],
            ["thats the moment {0[0]} feel {0[1]}".format(subj_adj_list)],
        ]

        random_idx = randint(0, len(options) - 1)
        return options[random_idx]

예제 #23

0

파일 보기

 def __has_what_to_do(df):
     try:
         df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]
         return Nlp_util.are_words1_words2_words3_in_order(df_ex_adverb, ["i"], ["not know", "not sure"],
                                                           ["what to", "how to"])
     except:
         logging.exception('')
         return False

예제 #24

0

파일 보기

    def __mean_no_friends(cls, df):
        try:
            exists_nobody_likes_me = Nlp_util.are_words1_words2_words3_in_order(
                df, ["nobody", "no one"], ["like", "love"], ["me"])
            exists_friends_dont_like_me = Nlp_util.are_words1_words2_words3_in_order(
                df, ["friend", "they", "everybody"],
                ["not like", "not love", "hate"], ["me"])

            exists_have_no_friend = Nlp_util.are_words1_words2_words3_in_order(
                df, ["i"], ["not have", "have no"], ["friend"])

            if exists_nobody_likes_me or exists_friends_dont_like_me or exists_have_no_friend:
                return True
            else:
                return False
        except:
            logging.exception('')
            return False

예제 #25

0

파일 보기

    def __exists_word_after_want_to(df):
        try:
            idx_of_i = Nlp_util.get_idx_list_of_idiom("want to", df.word)[0]
            length_after_want_to = len(df.loc[idx_of_i + 2, :]) if len(df) >= idx_of_i + 3 else 0

            return length_after_want_to > 2
        except:
            logging.exception('')
            return False

예제 #26

0

파일 보기

    def __exists_want_to(cls, df):
        df_without_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)]

        noun_list = Nlp_util.make_noun_list(df)
        verb_list = Nlp_util.make_verb_list(df, type="basic")

        idx_of_i_wanna = Nlp_util.get_idx_list_of_idiom(
            "i want to", df_without_adverb.base_form)

        if len(idx_of_i_wanna) != 0 and len(
                df.loc[idx_of_i_wanna[0] + 2:, :]) > 1:
            if cls.__exists_word_after_want_to(
                    df) and Nlp_util.is_first_subject_in({"i"}, noun_list,
                                                         verb_list):
                return True
            else:
                return False
        else:
            return False

예제 #27

0

파일 보기

    def __has_say_plus_bad_word(df):
        if any([
                Nlp_util.are_words1_words2_words3_in_order(
                    df, ["say", "tell"], ["i be", "i look"], [negative_word])
                for negative_word in KWDF[KWDF['Type'] ==
                                          'n'].keyword.tolist()
        ]):
            return True

        elif any([
                Nlp_util.are_words1_words2_words3_in_order(
                    df, ["say", "tell"], ["i be not", "i do not look"],
                    [positive_word]) for positive_word in KWDF[
                        KWDF['Type'] == 'p'].keyword.tolist()
        ]):
            return True

        else:
            return False

예제 #28

0

파일 보기

    def __remove_nums_n_symbols(cls, fixed_df, sidx):
        try:
            fixed_df = fixed_df.drop(fixed_df[fixed_df.sidx == sidx].iloc[0].name)
            fixed_df = fixed_df.reset_index(drop=True)
            fixed_df.loc[fixed_df.sidx == sidx] = Nlp_util.reset_widx(fixed_df.loc[fixed_df.sidx == sidx])

            return fixed_df
        except:
            logging.exception('')
            return fixed_df

예제 #29

0

파일 보기

    def __is_about_missing(cls, df_by_sentence):
        try:
            phrase1 = ['i']
            phrase2 = ['miss']

            return Nlp_util.are_words1_words2_words3_in_order(
                df_by_sentence, phrase1, phrase2)
        except:
            logging.exception('')
            return False

예제 #30

0

파일 보기

 def __has_nobody_V(df):
     idx_list_of_nobody = Nlp_util.get_idx_list_of_word(
         "nobody", df["base_form"])
     if len(idx_list_of_nobody) == 0:
         return False
     else:
         if any(df.loc[idx_list_of_nobody[0]:,
                       "pos"].isin(Nlp_util.pos_VERBs)):
             return True
         else:
             return False