def __is_no_idea(cls, df): df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)] noun_list = Nlp_util.make_noun_list(df) verb_list = Nlp_util.make_verb_list(df, type="normal") is_subj_himself = Nlp_util.is_first_subject_in(["i"], noun_list, verb_list) exist_subj_for_first_verb = Nlp_util.exist_subj_for_first_verb( noun_list, verb_list) is_idk_what_to_do = Df_util.anything_isin( {"do not know", "not sure", "have no idea"}, df_ex_adverb["base_form"]) and Df_util.anything_isin( {"what to do", "how to do", "what to deal", "how to deal"}, df_ex_adverb["base_form"]) is_want_advice = Df_util.anything_isin( {"want", "need", "give me"}, df_ex_adverb["base_form"]) and Df_util.anything_isin( {"advice", "suggestion"}, df_ex_adverb["word"]) is_give_me_advice = Df_util.anything_isin( {"need", "want", "give me"}, df_ex_adverb["base_form"]) and Df_util.anything_isin( {"advice", "suggestion"}, df_ex_adverb["word"]) what_should_i_do = Nlp_util.are_words1_words2_words3_in_order( df_ex_adverb, ["what"], ["should"], ["i"]) return (is_subj_himself and (is_idk_what_to_do or is_want_advice)) or ( not exist_subj_for_first_verb and is_give_me_advice) or what_should_i_do
def __alter_repeat_for_dont_think_SV(fixed_df): try: # TODO see if its neccesary to care about should and cant idx_of_think = Nlp_util.get_idx_list_of_word("think", fixed_df["base_form"])[0] df_after_think = fixed_df.loc[idx_of_think + 1:, :].reset_index(drop=True) verb_list = Nlp_util.make_verb_list(df_after_think, type="normal") noun_list = Nlp_util.make_noun_list(df_after_think) # possibly bug happen here since amount of verbs are different in cant do/dont do is_negative_form = Df_util.anything_isin(["not", "never"], df_after_think.loc[:, "base_form"]) # can add possibly or likely(when its negative) head_words = ["so ", "so probably ", "probably ", "so maybe ", "maybe "] random_idx_for_heads_words = randint(0, len(head_words) - 1) if is_negative_form: # まず主語とるそのあとにwouldntいれるその後ろに動詞の原型をいれて、それ以降はつづける head_word = head_words[random_idx_for_heads_words] subj = noun_list["word"].iloc[0] auxiliary_verb = " would " idx_of_not = Nlp_util.get_idx_list_of_word_list(["not", "never"], df_after_think.loc[:, "base_form"])[0] verb_row = verb_list.loc[idx_of_not:, :].iloc[0] verb = verb_row.base_form + " " after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_row.name + 1:, "word"]) return [head_word + subj + auxiliary_verb + verb + after_verb] else: head_word = head_words[random_idx_for_heads_words] subj = noun_list["word"].iloc[0] auxiliary_verb = " wouldnt " verb = verb_list["base_form"].iloc[0] + " " after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_list.index[0] + 1:, "word"]) return [head_word + subj + auxiliary_verb + verb + after_verb] except: logging.exception('') return []
def __exists_SV_around_cc(cls, df, cc_in_message, cc): try: sent_with_cc = df[df.sidx == cc.sidx] nouns_in_sent = Nlp_util.make_noun_list(sent_with_cc) verbs_in_sent = Nlp_util.make_verb_list(sent_with_cc, type="normal") first_half_of_sent = sent_with_cc[sent_with_cc.widx < cc.widx] nouns_in_first_half = nouns_in_sent[(nouns_in_sent.sidx == cc.sidx) & (nouns_in_sent.widx < cc.widx)] verbs_in_first_half = verbs_in_sent[(verbs_in_sent.sidx == cc.sidx) & (verbs_in_sent.widx < cc.widx)] other_cc_sidx = cc_in_message[(cc_in_message.sidx == cc.sidx) & (cc_in_message.widx > cc.widx)].sidx exists_other_cc_in_same_sentence = any(i == cc.sidx for i in other_cc_sidx) if exists_other_cc_in_same_sentence: other_cc_widx = cc_in_message[cc_in_message.index > cc.widx].index[0] else: other_cc_widx = len(sent_with_cc) second_half_of_sent = sent_with_cc[(sent_with_cc.widx >= cc.widx) & (sent_with_cc.widx < other_cc_widx)] if cls.__is_finishing_with_question_mark(second_half_of_sent): second_half_of_sent = second_half_of_sent.iloc[:-1] nouns_in_second_half = cls.__get_second_half_words(nouns_in_sent, cc, other_cc_widx) verbs_in_second_half = cls.__get_second_half_words(verbs_in_sent, cc, other_cc_widx) return cls.__is_complete_sent(nouns_in_first_half, verbs_in_first_half, first_half_of_sent) and \ cls.__is_complete_sent(nouns_in_second_half, verbs_in_second_half, second_half_of_sent) except: logging.exception('') return False
def __alter_repeat_for_need_sent(fixed_df): idx_of_need = Nlp_util.get_idx_list_of_word("need", fixed_df["base_form"])[0] row_of_first_noun = \ Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs + Nlp_util.pos_PRPs, idx_of_need, column_name="pos").iloc[0] if fixed_df.loc[row_of_first_noun.name - 1, "pos"] in Nlp_util.pos_ADJECTIVEs + ["PRP$", "DT"]: noun = WordFormatter.Series2Str( fixed_df.loc[row_of_first_noun.name - 1:row_of_first_noun.name, "word"]) else: noun = fixed_df.loc[row_of_first_noun.name, "word"] noun_nominative = Nlp_util.convert_objective_noun_to_nominative(noun) options = [[ "so " + noun_nominative + " is very important thing for you..", "and sounds its kinda hard to get it now right😢" ], [ "so its like its not easy to get " + noun + " now but you really want..", "and it can frustrate you😞" ], [ "sounds you really want " + noun + "..", "might be tough time for you to seek for it now😓" ]] random_idx_for_options = randint(0, len(options) - 1) return options[random_idx_for_options]
def __alter_repeat_for_wish(fixed_df): wish_idx = Nlp_util.get_idx_list_of_word("wish", fixed_df["base_form"])[0] row_of_subj = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs+Nlp_util.pos_PRPs, wish_idx, column_name="pos").iloc[0] row_of_verb = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_VERBs, row_of_subj.name, column_name="pos").iloc[0] subj = row_of_subj.word verb = row_of_verb.word after_verb = WordFormatter.Series2Str(fixed_df.loc[row_of_verb.name+1:, "word"]) objective_subj = Nlp_util.convert_nominative_noun_to_objective(subj) if subj == "you": repeat_list = [ ["you really want to "+verb+" "+after_verb], ["so you seriously hope to "+verb+" "+after_verb], ["so you are dying to "+verb+" "+after_verb] ] else: repeat_list = [ ["you really want "+objective_subj+" to "+verb+" "+after_verb], ["you really wanna have "+objective_subj+" "+verb+" "+after_verb], ["you really wanna make "+objective_subj+" "+verb+" "+after_verb] ] cmp_list = [ ["but sounds you feel bit too much to expect that now..?"], ["and sounds you feel like its impossible..?"], ["and seems like you dont think it never happen😓"] ] random_idx_for_repeat_list = randint(0, len(repeat_list) - 1) random_idx_for_cmp_list = randint(0, len(cmp_list) - 1) return repeat_list[random_idx_for_repeat_list]+cmp_list[random_idx_for_cmp_list]
def __replace_verb(text_df): text_df.loc[:, "word"] = text_df.apply( lambda row: Nlp_util.adjust_be_verb_for_changed_subject( row["word"], row["pos"], row.name, Nlp_util.make_noun_list(text_df)), axis=1) return text_df
def __has_wish_S_V(target_df): if Df_util.anything_isin(["wish"], target_df["base_form"]): wish_idx = Nlp_util.get_idx_list_of_word("wish", target_df["base_form"])[0] if Nlp_util.are_words1_words2_words3_in_order(target_df.loc[wish_idx:,:], Nlp_util.pos_NOUNs+Nlp_util.pos_PRPs, Nlp_util.pos_VERBs, df_column="pos"): return True else: return False else: return False
def __is_complaint_or_dissing(df): noun_list = Nlp_util.make_noun_list(df) verb_list = Nlp_util.make_verb_list(df, type="normal") said_you_dont_listen = Nlp_util.is_first_subject_in( ["you"], noun_list, verb_list) and Df_util.anything_isin( ["not listen", "never listen"], df["base_form"]) is_dissing = Df_util.anything_isin(["f**k you", "hate you"], df["base_form"]) return said_you_dont_listen or is_dissing
def __mean_no_friends(cls, df): exists_nobody_likes_me = Nlp_util.are_words1_words2_words3_in_order( df, ["nobody", "no one"], ["like", "love"], ["me"]) exists_friends_dont_like_me = Nlp_util.are_words1_words2_words3_in_order( df, ["friend", "they", "everybody"], ["not like", "not love", "hate"], ["me"]) exists_have_no_friend = Nlp_util.are_words1_words2_words3_in_order( df, ["i"], ["not have", "have no"], ["friend"]) return exists_nobody_likes_me or exists_friends_dont_like_me or exists_have_no_friend
def __is_jullie_useless(df): if df is None: return False c1 = Nlp_util.are_words1_words2_words3_in_order( df, ["you", "this"], ["be not", "be never"], ["helpful", "help", "helping"]) c2 = Nlp_util.are_words1_words2_words3_in_order( df, ["you"], ["be"], ['useless', 'helpless']) return c1 or c2
def __is_anxious(cls, df_by_sentence: DataFrame): try: word1_1 = ['anxious'] word1_2 = ['be', 'being', 'am', 'was', 'been', 'feel', 'feeling'] word2_1 = ['anxiety'] word2_2 = ['have', 'having', 'had', 'feel', 'feeling'] return Nlp_util.are_words1_words2_words3_in_order(df_by_sentence, word1_1, word1_2) \ or Nlp_util.are_words1_words2_words3_in_order(df_by_sentence, word2_1, word2_2) except: logging.exception('') return False
def __is_anxious(cls, df_by_sentence: DataFrame): word1_1 = ['anxious'] word1_2 = ['be', 'being', 'am', 'was', 'been', 'feel', 'feeling'] word2_1 = ['anxiety'] word2_2 = ['have', 'having', 'had', 'feel', 'feeling'] pattern_1 = Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, word1_1, word1_2) pattern_2 = Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, word2_1, word2_2) return pattern_1 or pattern_2
def __is_jullie_useless(df): try: if df is None: return False is_useless = Nlp_util.are_words1_words2_words3_in_order( df, ["you", "this"], ["be not", "be never"], ["helpful", "help", "helping" ]) or Nlp_util.are_words1_words2_words3_in_order( df, ["you"], ["be"], ['useless', 'helpless']) return is_useless except: logging.exception('') return False
def cut_sent_by_interjection(cls, df): try: separators = [",", "and", "but", "or", "then", "so", "plus", "cause", "because"] exists_separator = True while exists_separator: separators_in_message = df[(df.word.isin(separators)) & (df.widx != 0)] if separators_in_message.empty: exists_separator = False else: for idx, separator in separators_in_message.iterrows(): if separator.word == 'so' and separator.pos == 'RB': continue elif separator.word == 'cause' and separator.pos != 'VB': continue if cls.__exists_SV_around_cc(df, separators_in_message, separator): df.loc[df[(df.sidx >= separator.sidx) & (df.index >= idx)].index, "sidx"] += 1 df = Nlp_util.reset_widx(df) break else: exists_separator = False return df except: logging.exception('') return df
def __like_someone(cls, df): df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)] target_list = [{ "subjects": ["guy", "friend", "boy", "man"], "word1": ["i"], "word2": ["like"], "exceptions": [] }, { "subjects": ["i"], "word1": ["like"], "word2": ["guy", "friend", "boy", "man"], "exceptions": ["feel like"] }, { "subjects": ["i"], "word1": ["be", "fall"], "word2": ["in love"], "exceptions": [] }] return any( Nlp_util.are_words1_words2_words3_in_order( df_ex_adverb, target["subjects"], target["word1"], target["word2"], exception_list=target["exceptions"]) for target in target_list)
def __create_response_for_what_to_V(df): df_after_what_to = df.loc[Nlp_util.get_idx_list_of_idiom_list( ["what to", "how to"], df["base_form"])[0] + 2:, :] words_after_what_to = WordFormatter.Df2Str(df_after_what_to) cmp = [ ["it must be not easy to find how to" + words_after_what_to], ["now you are looking for the way to" + words_after_what_to], ["should be not that easy to find how to" + words_after_what_to], ] encourage = [ [ "but i am sure that thinking about it and speaking out it helps you🤗" ], [ "eventho its always tough to find the right way, you try to figure it out. Thats impressing me😊" ], [ "plz let me know any idea comes to your mind now. it might help you figuring it out☺️" ], [ "tell me if you have any little idea. It could help you finding ur way😊" ], ] random_idx_for_cmp = randint(0, len(cmp) - 1) random_idx_for_encourage = randint(0, len(encourage) - 1) return cmp[random_idx_for_cmp] + encourage[random_idx_for_encourage]
def __is_call_me_names(cls, df_by_sentence): word1 = ['call'] word2 = ['me'] word3 = ['names'] return Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, word1, word2, word3)
def __is_about_money(cls, df_by_sentence: DataFrame): money_words = ['money', 'finance', 'financial', 'financially'] if any(i in df_by_sentence.word.values for i in money_words): prp = ['they', 'he', 'she'] have = ['have'] if Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, prp, have, money_words): return False else: return True elif Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, ['i'], ['poor']): return True else: return False
def cut_sent_by_unimportant_words_at_head(cls, df): try: fixed_df = df while len(fixed_df) != 0: is_fixed_df_modified = False for sidx in set(fixed_df.sidx): if len(fixed_df[fixed_df.sidx == sidx]) == 1: continue head_row = fixed_df[fixed_df.sidx == sidx].iloc[0] head_word = head_row.word if head_word in UNIMPORTANT_WORDS_FOR_REPEAT.word.values: fixed_df.loc[fixed_df.index > head_row.name, "sidx"] += 1 fixed_df = Nlp_util.reset_widx(fixed_df) is_fixed_df_modified = True break elif not head_word.isalpha(): if not head_word.isdigit(): fixed_df = cls.__remove_nums_n_symbols(fixed_df, sidx) is_fixed_df_modified = True break if not is_fixed_df_modified: break return fixed_df except: logging.exception('') return df
def __lack_of_confidence(cls, df): df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)] target_list = [ [["i"], ["hate", "not like"], ["myself"]], [["i"], ["be"], ["fat", "w***e", "s**t", "bitch", "stupid", "ugly", "burden"]], [["i"], ["bother"], ["people", "everyone", "friends"]], ] return any( Nlp_util.are_words1_words2_words3_in_order( df_ex_adverb, target[0], target[1], target[2], exception_list=["say", "tell"]) for target in target_list) or Nlp_util.are_words1_words2_words3_in_order( df, ["i"], ["be"], ["not good enough"])
def get_sidx_of_not_basic_svo_sent(text_df): delete_sidx_list = [] for sidx in set(text_df.sidx.values): df = text_df[text_df.sidx == sidx] noun_list = Nlp_util.make_noun_list(df) verb_list = Nlp_util.make_verb_list(df, type="normal") # catch the case such as "Dont judge me" if Nlp_util.is_any_verb_before_first_noun(noun_list, verb_list): delete_sidx_list.append(sidx) # catch the case such as "the situation horrible as like he said" elif not Nlp_util.is_any_verb_for_first_noun(noun_list, verb_list): delete_sidx_list.append(sidx) else: pass return delete_sidx_list
def __alter_repeat_for_make_S_feel_ADJ(df): idx_of_make = Nlp_util.get_idx_list_of_word_list(["make"], df["base_form"])[0] subj = Nlp_util.change_object_pronoun_to_pronoun(df.loc[idx_of_make + 1, "word"]) df_after_subj = df.loc[idx_of_make + 2:idx_of_make + 4, :] adj = df_after_subj.loc[ df_after_subj["pos"].isin(Nlp_util.pos_ADJECTIVEs), "word"].iloc[0] subj_adj_list = [subj, adj] options = [ ["{0[0]} feel {0[1]} because of that".format(subj_adj_list)], ["thats getting {0[0]} feel {0[1]}".format(subj_adj_list)], ["thats the moment {0[0]} feel {0[1]}".format(subj_adj_list)], ] random_idx = randint(0, len(options) - 1) return options[random_idx]
def __has_what_to_do(df): try: df_ex_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)] return Nlp_util.are_words1_words2_words3_in_order(df_ex_adverb, ["i"], ["not know", "not sure"], ["what to", "how to"]) except: logging.exception('') return False
def __mean_no_friends(cls, df): try: exists_nobody_likes_me = Nlp_util.are_words1_words2_words3_in_order( df, ["nobody", "no one"], ["like", "love"], ["me"]) exists_friends_dont_like_me = Nlp_util.are_words1_words2_words3_in_order( df, ["friend", "they", "everybody"], ["not like", "not love", "hate"], ["me"]) exists_have_no_friend = Nlp_util.are_words1_words2_words3_in_order( df, ["i"], ["not have", "have no"], ["friend"]) if exists_nobody_likes_me or exists_friends_dont_like_me or exists_have_no_friend: return True else: return False except: logging.exception('') return False
def __exists_word_after_want_to(df): try: idx_of_i = Nlp_util.get_idx_list_of_idiom("want to", df.word)[0] length_after_want_to = len(df.loc[idx_of_i + 2, :]) if len(df) >= idx_of_i + 3 else 0 return length_after_want_to > 2 except: logging.exception('') return False
def __exists_want_to(cls, df): df_without_adverb = df[~df.pos.isin(Nlp_util.pos_ADVERBs)] noun_list = Nlp_util.make_noun_list(df) verb_list = Nlp_util.make_verb_list(df, type="basic") idx_of_i_wanna = Nlp_util.get_idx_list_of_idiom( "i want to", df_without_adverb.base_form) if len(idx_of_i_wanna) != 0 and len( df.loc[idx_of_i_wanna[0] + 2:, :]) > 1: if cls.__exists_word_after_want_to( df) and Nlp_util.is_first_subject_in({"i"}, noun_list, verb_list): return True else: return False else: return False
def __has_say_plus_bad_word(df): if any([ Nlp_util.are_words1_words2_words3_in_order( df, ["say", "tell"], ["i be", "i look"], [negative_word]) for negative_word in KWDF[KWDF['Type'] == 'n'].keyword.tolist() ]): return True elif any([ Nlp_util.are_words1_words2_words3_in_order( df, ["say", "tell"], ["i be not", "i do not look"], [positive_word]) for positive_word in KWDF[ KWDF['Type'] == 'p'].keyword.tolist() ]): return True else: return False
def __remove_nums_n_symbols(cls, fixed_df, sidx): try: fixed_df = fixed_df.drop(fixed_df[fixed_df.sidx == sidx].iloc[0].name) fixed_df = fixed_df.reset_index(drop=True) fixed_df.loc[fixed_df.sidx == sidx] = Nlp_util.reset_widx(fixed_df.loc[fixed_df.sidx == sidx]) return fixed_df except: logging.exception('') return fixed_df
def __is_about_missing(cls, df_by_sentence): try: phrase1 = ['i'] phrase2 = ['miss'] return Nlp_util.are_words1_words2_words3_in_order( df_by_sentence, phrase1, phrase2) except: logging.exception('') return False
def __has_nobody_V(df): idx_list_of_nobody = Nlp_util.get_idx_list_of_word( "nobody", df["base_form"]) if len(idx_list_of_nobody) == 0: return False else: if any(df.loc[idx_list_of_nobody[0]:, "pos"].isin(Nlp_util.pos_VERBs)): return True else: return False