Ejemplo n.º 1
0
    def __alter_repeat_for_dont_think_SV(fixed_df):
        try:
            # TODO see if its neccesary to care about should and cant
            idx_of_think = Nlp_util.get_idx_list_of_word("think", fixed_df["base_form"])[0]
            df_after_think = fixed_df.loc[idx_of_think + 1:, :].reset_index(drop=True)
            verb_list = Nlp_util.make_verb_list(df_after_think, type="normal")
            noun_list = Nlp_util.make_noun_list(df_after_think)
            # possibly bug happen here since amount of verbs are different in cant do/dont do
            is_negative_form = Df_util.anything_isin(["not", "never"], df_after_think.loc[:, "base_form"])
            # can add possibly or likely(when its negative)
            head_words = ["so ", "so probably ", "probably ", "so maybe ", "maybe "]
            random_idx_for_heads_words = randint(0, len(head_words) - 1)
            if is_negative_form:
                # まず主語とるそのあとにwouldntいれるその後ろに動詞の原型をいれて、それ以降はつづける
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " would "
                idx_of_not = Nlp_util.get_idx_list_of_word_list(["not", "never"], df_after_think.loc[:, "base_form"])[0]
                verb_row = verb_list.loc[idx_of_not:, :].iloc[0]
                verb = verb_row.base_form + " "

                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_row.name + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
            else:
                head_word = head_words[random_idx_for_heads_words]
                subj = noun_list["word"].iloc[0]
                auxiliary_verb = " wouldnt "
                verb = verb_list["base_form"].iloc[0] + " "
                after_verb = WordFormatter.Series2Str(df_after_think.loc[verb_list.index[0] + 1:, "word"])
                return [head_word + subj + auxiliary_verb + verb + after_verb]
        except:
            logging.exception('')
            return []
Ejemplo n.º 2
0
    def __alter_repeat_for_need_sent(fixed_df):
        idx_of_need = Nlp_util.get_idx_list_of_word("need",
                                                    fixed_df["base_form"])[0]

        row_of_first_noun = \
            Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs + Nlp_util.pos_PRPs,
                                                       idx_of_need,
                                                       column_name="pos").iloc[0]
        if fixed_df.loc[row_of_first_noun.name - 1,
                        "pos"] in Nlp_util.pos_ADJECTIVEs + ["PRP$", "DT"]:
            noun = WordFormatter.Series2Str(
                fixed_df.loc[row_of_first_noun.name - 1:row_of_first_noun.name,
                             "word"])
        else:
            noun = fixed_df.loc[row_of_first_noun.name, "word"]

        noun_nominative = Nlp_util.convert_objective_noun_to_nominative(noun)

        options = [[
            "so " + noun_nominative + " is very important thing for you..",
            "and sounds its kinda hard to get it now right😢"
        ],
                   [
                       "so its like its not easy to get " + noun +
                       " now but you really want..",
                       "and it can frustrate you😞"
                   ],
                   [
                       "sounds you really want " + noun + "..",
                       "might be tough time for you to seek for it now😓"
                   ]]

        random_idx_for_options = randint(0, len(options) - 1)

        return options[random_idx_for_options]
Ejemplo n.º 3
0
    def __alter_repeat_for_wish(fixed_df):
        wish_idx = Nlp_util.get_idx_list_of_word("wish", fixed_df["base_form"])[0]
        row_of_subj = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_NOUNs+Nlp_util.pos_PRPs, wish_idx, column_name="pos").iloc[0]
        row_of_verb = Nlp_util.get_wordsDF_of_wordlist_after_idx(fixed_df, Nlp_util.pos_VERBs, row_of_subj.name, column_name="pos").iloc[0]
        subj = row_of_subj.word
        verb = row_of_verb.word
        after_verb = WordFormatter.Series2Str(fixed_df.loc[row_of_verb.name+1:, "word"])
        objective_subj = Nlp_util.convert_nominative_noun_to_objective(subj)
        if subj == "you":
            repeat_list = [
                ["you really want to "+verb+" "+after_verb],
                ["so you seriously hope to "+verb+" "+after_verb],
                ["so you are dying to "+verb+" "+after_verb]
            ]
        else:
            repeat_list = [
                ["you really want "+objective_subj+" to "+verb+" "+after_verb],
                ["you really wanna have "+objective_subj+" "+verb+" "+after_verb],
                ["you really wanna make "+objective_subj+" "+verb+" "+after_verb]
            ]

        cmp_list = [
            ["but sounds you feel bit too much to expect that now..?"],
            ["and sounds you feel like its impossible..?"],
            ["and seems like you dont think it never happen😓"]
        ]
        random_idx_for_repeat_list = randint(0, len(repeat_list) - 1)
        random_idx_for_cmp_list = randint(0, len(cmp_list) - 1)

        return repeat_list[random_idx_for_repeat_list]+cmp_list[random_idx_for_cmp_list]
Ejemplo n.º 4
0
 def get_idx_list_of_idiom(idiom, series):
     tokenized_word = nltk.word_tokenize(idiom)
     ngram_list_of_the_series = Nlp_util.create_ngrams(
         WordFormatter.Series2Str(series), len(tokenized_word))
     return [
         idx for idx, ngram in enumerate(ngram_list_of_the_series)
         if ngram == tokenized_word
     ]
Ejemplo n.º 5
0
 def __get_sidx_of_normal_and_too_long_sent(cls, df):
     delete_sidx_list = []
     for sidx in set(df.sidx.values):
         target_df = df[df.sidx == sidx].copy().reset_index(drop=True)
         if cls.__is_special_type(target_df):
             pass
         else:
             if len(WordFormatter.Series2Str(target_df.word)) > 75:
                 delete_sidx_list.append(sidx)
             else:
                 pass
     return delete_sidx_list
Ejemplo n.º 6
0
    def anything_isin(word_list, series):
        for word in word_list:
            tokenized_word = nltk.word_tokenize(word)
            if len(tokenized_word) == 1:
                if any(series.isin(word_list)):
                    return True
                else:
                    pass

            else:
                ngram_list_of_the_series = Nlp_util.create_ngrams(
                    WordFormatter.Series2Str(series), len(tokenized_word))
                if tokenized_word in ngram_list_of_the_series:
                    return True
                else:
                    pass
        return False