Esempio n. 1
0
def recumpile_sentence(sentance: Sentence) -> List[str]:
    new_tokens = []
    # TODO: determine mood classifier for sentence and add respective emoji

    for token in sentance.tokens:
        if decision(random_synonym_probability):
            token = replace_with_random_synonym(token)
        if decision(censor_profanity_probability) and profanity.contains_profanity(
            token
        ):
            token = custom_censoring(token, censor_profanity_percent)
        elif decision(random_censor_probability):
            token = custom_censoring(token, random_censor_percent)

        if re.match("musk", token, flags=re.IGNORECASE):
            add_husky = True
        else:
            add_husky = False

        # processing
        recumpiled_token = recumpile_token(token)

        # post processing
        new_tokens.append(recumpiled_token)

        if decision(add_definition_in_parenthesis_probability):
            definition = get_token_random_definition(token)
            if definition:
                new_tokens += [
                    f"[[{recumpile_token('DEFINITION')} {token.upper()}:",
                    f"{recumpile_text(definition)}]]",
                ]

        if add_husky:
            new_tokens.append(recumpile_token("husky"))

        if add_random_garbage and decision(add_random_garbage_probability):
            new_tokens.append(recumpile_token(add_random_garbage_token()))
        if add_randomly_text_face_emoji and decision(
            add_randomly_text_face_emoji_probability
        ):
            new_tokens.append(get_random_text_face_emojis())
        if add_random_simple_text_emoji and decision(
            # TODO: use textblob to determine mood of text and insert faces
            #  accordingly likely need to do this after reconstruction of the
            #  text blob and go through this sentence by sentence rather than
            #  word by word.
            add_random_simple_text_emoji_probability
        ):
            new_tokens.append(get_random_simple_text_emojis())
        if add_random_rp_action and decision(
            add_random_rp_mid_sentence_action_probability
        ):
            new_tokens.append(get_random_rp_action_sentence())
    if add_random_rp_action and decision(add_random_rp_end_sentence_action_probability):
        new_tokens.append(get_random_rp_action_sentence())

    if decision(random_lorem_ipsum_probability):
        new_tokens.append(get_random_lorem_ipsum_sentance())
    return new_tokens
Esempio n. 2
0
def utf_8_char_swaps(token: str) -> str:
    if decision(0.5):
        token = re.sub(r"ae", "æ", token)
        token = re.sub(r"AE", "Æ", token)
    if decision(0.3):
        token = re.sub(r"ea", "æ", token)
        token = re.sub(r"EA", "Æ", token)
    return token
Esempio n. 3
0
def lazy_char_subbing(token: str) -> str:
    """e.g.you -> u are -> r"""
    # TODO: better capital replacement

    # you -> u, yuu
    token = re.sub(
        "^y+(o+)?u+$",
        lambda match: f"u" if decision(0.5) else f"y{'u' * random.randint(1, 4)}",
        token,
        flags=re.IGNORECASE,
    )

    # are -> r, arrr
    token = re.sub(
        "^a+(r+)?e+$",
        lambda match: f"r" if decision(0.5) else f"a{'r' * random.randint(1, 4)}",
        token,
        flags=re.IGNORECASE,
    )

    # with -> wif
    token = re.sub(
        "^wi+th+$",
        lambda match: f"w{'i' * random.randint(1, 4)}{'f' * random.randint(1, 4)}",
        token,
        flags=re.IGNORECASE,
    )

    # what -> wat OR wut
    if decision(0.5):
        token = re.sub(
            "^wha+t$",
            lambda match: f"w{random.choice(['a', 'u']) * random.randint(1, 4)}t",
            token,
            flags=re.IGNORECASE,
        )

    # er -> ur
    token = re.sub(
        "(e+)r",
        lambda match: f"{'u' * (len(match.group(1)) + random.randint(0, 3))}r",
        token,
        flags=re.IGNORECASE,
        count=random.randint(0, 2),
    )

    # easy -> ez
    token = re.sub(
        "^ea+s+y+$",
        lambda match: f"e{'z' * random.randint(1, 3)}",
        token,
        flags=re.IGNORECASE,
    )

    # to,too, -> 2
    token = re.sub("to+$", lambda match: f"2", token, flags=re.IGNORECASE)
    return token
Esempio n. 4
0
def reeeer(token: str) -> str:
    if decision(REEE_probability):
        token = re.sub(
            r"([Rr])e*",
            lambda match: f"{match.group(1)}e" + "e" * random.choice(range(1, 15)),
            token,
        )
        if decision(REEE_allcaps_probability):
            token = token.upper()
    return token
Esempio n. 5
0
def owoer(token: str) -> str:
    # TODO: owo usually goes to owoo should supress.

    token = re.sub(
        r"(ou)([^o])?",
        lambda match: f"ouo{match.group(2) or ''}",
        token,
        flags=re.IGNORECASE,
    )
    token = re.sub(
        r"(ow)([^o])?",
        lambda match: f"owo{match.group(2) or ''}",
        token,
        flags=re.IGNORECASE,
    )
    token = re.sub(
        r"(ov)([^o])?",
        lambda match: f"ovo{match.group(2) or ''}",
        token,
        flags=re.IGNORECASE,
    )

    token = re.sub(r"(cor)", lambda match: f"cowor", token)

    if (
        "owo" not in token.lower()
        and "ouo" not in token.lower()
        and decision(hard_owo_replace_probability)
    ):
        owo_str = "owo" if decision(owo_vs_ouo_bias) else "ouo"
        token = re.sub(
            r"(o+)",
            lambda match: (owo_str * len(match.group(1))).replace("oo", "o"),
            token,
            flags=re.IGNORECASE,
            count=random.choice(range(0, 2)),
        )

    # TODO: UWU
    # juice -> juwuice
    if decision(juwuice_swap_probability):
        token = re.sub(
            r"u+(i?ce)",
            lambda match: f"uwu{match.group(1)}",
            token,
            flags=re.IGNORECASE,
        )

    if "uwu" not in token.lower() and decision(hard_uwu_replace_probability):
        uwu_str = "uwu"
        token = re.sub(
            r"u+", uwu_str, token, flags=re.IGNORECASE, count=random.choice(range(0, 2))
        )

    return token
Esempio n. 6
0
def fuckyer(token: str) -> str:
    extra_fun = ""
    y_choice_1 = ("y" if decision(0.5) else "i") * random.choice(range(1, 5))
    y_choice_2 = ("y" if decision(0.5) else "i") * random.choice(range(1, 5))
    if decision(0.5):
        extra_fun = f"w{'u' * random.choice(range(1, 5))}k{y_choice_2}"
    token = re.sub(
        r"([Ff])?uck(er|ing)?",
        lambda match:
        f"{match.group(1) or ''}{'u' * random.choice(range(1,5))}k{y_choice_1}{match.group(2) or ''}"
        + " " + extra_fun,
        token,
    )
    return token
Esempio n. 7
0
def get_random_lorem_ipsum_sentance() -> str:
    """get lorem ipsum sentence"""
    lorem_sentence = lorem.sentence()
    if decision(lorem_ipsum_fuck_probability):
        lorem_sentence = fix_punctuation_spacing(
            TreebankWordDetokenizer().detokenize(
                recumpile_sentence(Sentence(lorem_sentence))
            )
        )
    return lorem_sentence
Esempio n. 8
0
def get_random_rp_action_sentence() -> str:
    more_verbs = []
    more_verbs_probability = 1
    while True:
        if decision(more_verbs_probability):
            additional_verb = get_random_action_verb()
            if decision(0.5):  # TODO: config
                additional_verb = Word(additional_verb).lemmatize()
            additional_verb = recumpile_token(additional_verb)
            additional_verb = Word(additional_verb).pluralize()
            more_verbs.append(additional_verb)
        else:
            break
        more_verbs_probability -= more_verbs_probability_decay

    noun = get_random_rp_pronoun()
    if decision(0.5):  # TODO: config
        noun = Word(noun).lemmatize()

    # TODO: add boolean for enable
    noun = recumpile_token(noun)
    noun = Word(noun).pluralize()
    return to_rp_text(f"{' and '.join(more_verbs)}{' ' if more_verbs else ''}{noun}")
Esempio n. 9
0
def recumpile_token(token: str) -> str:
    # TODO: determine mood classifier for token and add respective emoji
    if decision(split_compound_word_probability):
        tokens = split_compound_word(token)
    else:
        tokens = [token]

    # TODO: migrate fuck_token to maybe a generator?
    fucked_tokens = []
    for token in tokens:
        relevant_emoji = None
        if decision(add_text_relevant_emoji_probability):
            relevant_emoji = find_text_relevant_emoji(
                token)  # TODO: add ability to get multiple?
            if relevant_emoji and decision(
                    wrap_text_relevant_emoji_probability):
                fucked_tokens.append(relevant_emoji)

        if decision(0.1):
            token = remove_dupe_chars(token)

        if decision(lazy_char_subbing_probability):
            token = lazy_char_subbing(token)

        # TODO: this is a potential for unexpected behavior
        if decision(word_to_num_probability):
            token = word_to_num(token)
        if decision(num_to_word_probability):
            token = num_to_word(token)

        if decision(lr_to_w_swap_probability):
            token = lr_to_w_swap(token)

        # TODO: this might be too much idk
        if decision(invert_word_probability):
            token = word_inverter(token)

        if decision(upside_down_word_probability):
            token = word_upside_downer(token)
        elif decision(upside_down_word_probability):
            token = word_upside_downer_preserve_char_order(token)

        fucked_token = knotter(
            fuckyer(reeeer(rawrer(garbage(owoer(cummer(token)))))))

        if decision(add_extra_ed_probability):
            fucked_token = add_extra_ed(fucked_token)

        if decision(random_ending_y_probability):
            fucked_token = add_ending_y(fucked_token)

        # TODO: likely making fu@k into k
        # TODO: NOTE: indeed it is doing this fu@k
        #   >>>list(TextBlob("fu@k").words)
        #   ['fu', 'k']
        if add_random_plurals and decision(add_random_plurals_probability):
            fucked_token = Word(fucked_token).pluralize()

        if randomly_lemmatize and decision(randomly_lemmatize_probability):
            fucked_token = Word(fucked_token).lemmatize()

        if randomly_capitalize_word and decision(
                randomly_capitalize_word_probability):
            fucked_token = fucked_token.upper()

        if randomly_spongebob_word and decision(
                randomly_spongebob_word_probability):
            fucked_token = generate_spongebob_text(fucked_token)

        if randomly_overemphasis_punctuation and decision(
                randomly_overemphasis_punctuation_probability):
            fucked_token = over_emphasise_punctuation(
                fucked_token, randomly_overemphasis_punctuation_max_fuck)

        if decision(common_misspellings_probability):
            fucked_token = common_mispellings(fucked_token)

        if randomly_swap_char and decision(randomly_swap_char_probability):
            fucked_token = random_swap_char(fucked_token,
                                            randomly_swap_char_swap_percent)

        if randomly_insert_char and decision(randomly_insert_char_probability):
            fucked_token = random_insert_char(
                fucked_token, randomly_insert_char_insert_percent)
        if decision(utf_8_char_swaps_probability):
            fucked_token = utf_8_char_swaps(fucked_token)

        if random_leet_speak and decision(random_leet_speak_probability):
            fucked_token = token_to_leet(fucked_token)

        if decision(common_misspellings_probability):
            fucked_token = common_mispellings(fucked_token)

        # TODO: likely also breaking the spacing between punctuation kittly 1!
        # TODO: `f****d` went to `DS` investigate
        # TODO: likely this is at fault
        if decision(homofiy_probability):
            fucked_token = homoify(fucked_token, homofiy_probability)

        fucked_tokens.append(fucked_token)

        if decision(add_x3_if_token_has_rawr_probability) and (
                "rawr" in fucked_token.lower()):
            fucked_tokens.append("X3" if decision(0.5) else "x3")

        if decision(adding_ending_ksksk_andioop_probability) and (
                fucked_token.lower().endswith("ksk")
                or fucked_token.lower().endswith("sks") or "ksksk"
                in fucked_token.lower() or "sksks" in fucked_token.lower()):
            for i in range(random.randint(1, 2)):
                fucked_tokens.append(recumpile_token("andioop"))
        if decision(adding_ending_ksksk_save_the_turtles_probability) and (
                fucked_token.lower().endswith("ksk")
                or fucked_token.lower().endswith("sks") or "ksksk"
                in fucked_token.lower() or "sksks" in fucked_token.lower()):
            fucked_tokens.append(recumpile_text("save the turtles!"))

        if decision(
                fucking_normies_addition) and "reee" in fucked_token.lower():
            fucked_tokens.append(recumpile_text("f*****g normies!"))

        if decision(get_rhymes_probability):
            for rhyme in get_runon_of_rhymes(token,
                                             max_runon=max_runon_rhymes):
                fucked_rhyme = recumpile_token(rhyme)
                fucked_tokens.append(fucked_rhyme)

        if relevant_emoji:
            fucked_tokens.append(relevant_emoji)

    for i, fucked_token in enumerate(fucked_tokens):
        if decision(space_gap_text_probability):
            # TODO: this modification may be better placed elsewhere
            fucked_token = space_gap_text(
                fucked_token,
                min_gap_size=space_gap_text_min_gap_size,
                max_gap_size=space_gap_text_max_gap_size,
            )
        # TODO: discord format options
        if decision(bold_text_probability):
            fucked_token = bold_text(fucked_token)
        elif decision(back_tick_text_probability):
            fucked_token = back_tick_text(fucked_token)
        fucked_tokens[i] = fucked_token

    return " ".join(fucked_tokens)
Esempio n. 10
0
def recumpile_sentence(sentence: Sentence) -> List[str]:
    new_tokens = []
    # TODO: determine mood classifier for sentence and add respective emoji
    sentiment_emoji = None
    if decision(0.89):
        sentiment_emoji = get_sentiment_emoji(sentence)

    for token in sentence.tokenize(TweetWordTokenizer()):
        # TODO: this is only for discord so we dont break tokenization
        if re.match(
                r"@everyone|@here|<:[^:\s]+:[0-9]+>|<a:[^:\s]+:[0-9]+>|<(?:@!?\d+|:[A-Za-z0-9]+:)\w+>",
                token,
        ):
            new_tokens.append(token)
            continue

        emoji = None
        alias_emoji = get_cheap_emoji_alias(token)

        # TODO: refactor into its own mutator
        if decision(0.9) and (re.match("among", token, flags=re.IGNORECASE) or
                              re.match("amogus", token, flags=re.IGNORECASE) or
                              re.match(r"su+s", token, flags=re.IGNORECASE)):
            emoji = "ඞ"

        emoticon = get_emoticon(token)

        if alias_emoji:
            if decision(0.1) or (len(str(token)) == 1 and decision(0.9)):
                new_tokens.append(alias_emoji)
                continue
            else:
                if decision(0.5):
                    new_tokens.append(alias_emoji)

        if decision(0.5):
            emoji = get_emoji_from_data(token)
        if decision(0.3):
            emoji = get_gloveword_emoji(token)
        if emoji:
            if decision(0.5):
                new_tokens.append(emoji)

        if decision(random_synonym_probability):
            token = replace_with_random_synonym(token)
        if decision(0.5) and profanity.contains_profanity(token):
            token = token.upper()
        if decision(censor_profanity_probability
                    ) and profanity.contains_profanity(token):
            if decision(0.1):
                token = custom_censoring(token, 1)
            else:
                token = custom_censoring(token, censor_profanity_percent)
        elif decision(random_censor_probability):
            token = custom_censoring(token, random_censor_percent)

        if re.match("musk", token, flags=re.IGNORECASE):
            add_husky = True
        else:
            add_husky = False

        # processing
        recumpiled_token = recumpile_token(token)

        # post processing
        new_tokens.append(recumpiled_token)

        if emoji:
            if decision(0.8):
                new_tokens.append(emoji)
        if alias_emoji:
            if decision(0.8):
                new_tokens.append(alias_emoji)
        if emoticon:
            if decision(0.8):
                new_tokens.append(emoticon)

        if add_husky:
            new_tokens.append(recumpile_token("husky"))

        if add_random_garbage and decision(add_random_garbage_probability):
            new_tokens.append(recumpile_token(add_random_garbage_token()))
        if add_randomly_text_face_emoji and decision(
                add_randomly_text_face_emoji_probability):
            new_tokens.append(get_random_text_face_emojis())
        if add_random_simple_text_emoji and decision(
                # TODO: use textblob to determine mood of text and insert faces
                #  accordingly likely need to do this after reconstruction of the
                #  text blob and go through this sentence by sentence rather than
                #  word by word.
                add_random_simple_text_emoji_probability):
            new_tokens.append(get_random_simple_text_emojis())
        if add_random_rp_action and decision(
                add_random_rp_mid_sentence_action_probability):
            new_tokens.append(get_random_rp_action_sentence())
    if add_random_rp_action and decision(
            add_random_rp_end_sentence_action_probability):
        new_tokens.append(get_random_rp_action_sentence())

    if sentiment_emoji:
        new_tokens.append(sentiment_emoji)
        if decision(0.4):
            for i in range(5):
                if decision(0.3):
                    new_tokens.append(sentiment_emoji)
                else:
                    break

    return new_tokens
Esempio n. 11
0
def garbage(token: str) -> str:
    # inserting gay
    token = re.sub(r"([a-fh-zA-FH-Z])a+y+",
                   lambda match: f"{match.group(1)}gay", token)

    # hello -> hewwo
    token = re.sub(r"([Hh])e+ll+o+?", lambda match: f"{match.group(1)}ewwo",
                   token)

    # er -> ur
    if decision(0.4):
        token = re.sub(
            r"e+r+",
            lambda match: f"u{'r' * ceil(np.random.rayleigh(1.2))}",
            token,
            flags=re.IGNORECASE,
        )

    #  ello - >ewwo
    if decision(0.4):
        token = re.sub(
            r"e+ll+o+?",
            lambda match: f"ew{'w' * ceil(np.random.rayleigh(1.2))}o",
            token,
            flags=re.IGNORECASE,
        )  # 2-6ish

    # cute -> koot
    token = re.sub(
        r"([Cc])u+te",
        lambda match: f"{match.group(1)}oo{'o' * random.randint(0,5)}t",
        token,
    )

    # ove -> wuv
    if decision(0.7):
        token = re.sub(r"(o+)ve",
                       lambda match: f"w{'u' * len(match.group(1))}v", token)

    # one -> wun
    if decision(0.7):
        token = re.sub(r"one", "wun", token, flags=re.IGNORECASE)

    # as -> ass asss
    if decision(0.5):
        token = re.sub(
            r"([aA])([sS])($|[^s])",
            lambda match:
            f"{match.group(1)}{match.group(2) * random.randint(2,3)}t",
            token,
        )

    # TODO: refactor (me -> meh|me -> meow) together?
    # me -> meow
    if decision(me_2_meow_swap_probability):
        token = re.sub(
            r"^me+$",
            lambda match:
            f"m{'e' * random.randint(1,3)}{'o' * random.randint(1,3)}w",
            token,
            flags=re.IGNORECASE,
        )

    # me -> meh
    if decision(me_2_meh_swap_probability):
        token = re.sub(
            r"^me+$",
            lambda match: f"m{'e' * random.randint(1, 3)}h",
            token,
            flags=re.IGNORECASE,
        )

    # my -> mah, myah
    if decision(0.5):
        token = re.sub(
            r"^my+$",
            lambda match:
            f"m{'y' if decision(0.3) else ''}{'a' * random.randint(2, 3)}{'h' if decision(0.5) else ''}",
            token,
        )

    # ion -> shun
    if decision(0.5):
        token = re.sub(r"ion$", "shun", token)

    # .ome -> .um
    if decision(0.5):
        token = re.sub(r"([a-zA-Z])ome", lambda match: f"{match.group(1)}um",
                       token)

    # teh or da
    if decision(0.5):
        token = re.sub(r"^([Tt])he$", lambda match: f"{match.group(1)}eh",
                       token)
    else:
        token = re.sub(
            r"^([Tt])he$",
            lambda match: f"{'D' if match.group(1) == 'T' else 'd'}a",
            token,
        )

    # ing -> inn
    if decision(0.5):
        token = re.sub(
            r"ing$",
            f"in{'n' * random.randint(0,4) if decision(0.5) else 'in' * random.randint(0, 4)}",
            token,
            flags=re.IGNORECASE,
        )

    # ks -> ksksksk
    if decision(ksksk_enlargement_probability):
        token = re.sub(
            r"[kK][sS]|[sS][kK]",
            lambda match: f"{match.group(0) * random.randint(2,6)}",
            token,
            flags=re.IGNORECASE,
        )

    # uck -> ucc, uccci
    if decision(uck_to_ucc_swap_probability):
        token = re.sub(
            r"u+c+k+",
            lambda match:
            f"u{'c' * random.randint(2,6)}{'i' * random.randint(0,3)}",
            token,
            flags=re.IGNORECASE,
        )

    if decision(sub_to_subby_swap_probability):
        token = re.sub(
            r"s(u+)b",
            lambda match:
            f"s{match.group(1)}bb{('y' if decision(0.5) else 'i') * random.randint(1, 2)}",
            token,
            flags=re.IGNORECASE,
        )

    # no -> nu+ nyu+
    if decision(0.5):
        token = re.sub(
            "([nN])(o+)",
            lambda match:
            f"{match.group(1)}{'y' if decision(0.5) else ''}{'u' * (len(match.group(2)) * random.randint(1, 6))}",
            token,
            flags=re.IGNORECASE,
        )
    return token