def generate_sentances(model: markovify.Text, time_int: 0.5, **kwargs): while True: sent = model.make_sentence(**kwargs) while sent is None: sent = model.make_sentence(**kwargs) print(sent, end='\n' + '\n') time.sleep(time_int - 0.05)
def make_proper_sentence(self, model: markovify.Text) -> str: """ Make sentences that start with a capital letter and end with a punctuation mark. """ punctuation = (".", "?", "!") sentence = model.make_sentence().capitalize() if sentence[-1] not in punctuation: sentence += random.choice(punctuation) return sentence
def generate_text(chain: markovify.Text, model): if model == "Titles": return chain.make_short_sentence(70) if model == "Usernames": return chain.make_short_sentence(36) if model == "Questions" or "Answers": paragraphs = [] sentences = [] count = int((random.randint(2, 6) * random.randint(3, 6) / 5)) for _ in range(count): sentences.append(chain.make_sentence()) if random.random() < 0.4: paragraphs.append(sentences) sentences = [] paragraphs.append(sentences) return "\n".join([" ".join(paragraph) for paragraph in paragraphs]) return chain.make_sentence()
async def write_model(self, file_name: str, model: markovify.Text) -> None: """ markovify.Text型のmodelをjson形式で保存する :param file_name: 保存したいファイル名 :param model: 保存したいmodel :return: None """ async with self.lock: # 同時アクセスを防ぐ with open(f"models/{file_name}", "w") as f: model = model.to_json() json.dump(model, f)
def create_adventure(filename: str, title_model: markovify.Text, chapter_model: markovify.Text, heading_model: markovify.Text, spoken_model: markovify.Text, paragraph_model: markovify.Text): doc = Document() doc.documentclass = Command( 'documentclass', options="letterpaper,twocolumn,openany,nodeprecatedcode", arguments=["dndbook"]) doc.preamble.append(Command('title', title_model.make_sentence())) doc.append(NoEscape(r'\maketitle')) doc.append(NoEscape(r'\tableofcontents')) for _ in range(random.randint(5, 12)): with doc.create(chapter(chapter_model.make_sentence())): for _ in range(random.randint(5, 15)): heading = heading_model.make_sentence() while heading is None: heading = heading_model.make_sentence() with doc.create(Section(heading)): with doc.create(DndReadAloud()): doc.append(spoken_model.make_sentence()) for _ in range(3, 9): doc.append(paragraph_model.make_sentence()) doc.generate_tex(filename)
def __init__(self, text: Union[str, List[str]] = None, fpath: Union[str, List[str]] = None, model_path: str = None, weights: Union[List[int], List[float]] = None, state_size: int = 2, do_compile: bool = False): """ Builds the actual model to generate sentences from Args: text: str or list of str, the source text(s) to read in. fpath: str or list of str, the path to the source text(s) to read in. weights: list of int, if used, assigns weight to each model (e.g., 1, 1.5, 2) state_size: int, number of words the probability of the next word depends on. default = 2 do_compile: bool, if True, will compile the model to be a bit more performant """ self.model = None if model_path is not None: # Read in pre-built model from path with open(model_path) as f: self.model = Text.from_json(json.load(f)) if fpath is not None and self.model is None: # Read in text from path if isinstance(fpath, str): fpath = [fpath] text = [] for fp in fpath: with open(fp) as f: text.append(f.read()) if self.model is None: if isinstance(text, str): self.model = Text(text, state_size=state_size) else: models = [Text(x, state_size=state_size) for x in text] self.model = markovify.combine(models, weights) if self.model is None: raise ValueError('Pass in a value for either the text for fpath argument.') if do_compile: self.model.compile(inplace=True)
def make_sentence(self, statement: str = None, init_state=None, **kwargs: dict) -> str: if statement: response = str(self.commander.get_response(statement)) if response == "FAIL": return "NI YET" else: response = MarkovText.make_sentence(self, init_state=init_state, kwargs=kwargs) return self.grammar.flatten(response)
def __init__(self, input_text, retain_original: bool = True): self.nlp = spacy.load("en", disable=["parser", "ner", "textcat"]) MarkovText.__init__(self, input_text, retain_original=retain_original)
def setup(): msg = '[SETUP]' with gzip.open('./novel-model-markovify.json.gz') as fh: return Text.from_json(fh.read())
from markovify import Text txt = ( """ One night—it was on the twentieth of March, 1888—I was returning from a journey to a patient (for I had now returned to civil practice), when my way led me through Baker Street. As I passed the well-remembered door, which must always be associated in my mind with my wooing, and with the dark incidents of the Study in Scarlet, I was seized with a keen desire to see Holmes again, and to know how he was employing his extraordinary powers. His rooms were brilliantly lit, and, even as I looked up, I saw his tall, spare figure pass twice in a dark silhouette against the blind. He was pacing the room swiftly, eagerly, with his head sunk upon his chest and his hands clasped behind him. To me, who knew his every mood and habit, his attitude and manner told their own story. He was at work again. He had risen out of his drug-created dreams and was hot upon the scent of some new problem. I rang the bell and was shown up to the chamber which had formerly been in part my own. """.strip() .replace("\r", "") .replace("\n", "") ) with open("markov-source.json", "w+") as f: f.write(Text(txt, retain_original=False, state_size=1).to_json())
def __init__( self, # Data generator_text: Union[str, List[str]] = None, responder_text: List[str] = None, command_text: List[str] = None, grammar: Union[Dict[str, str], Grammar] = None, # Models chain: Union[Dict[str], MarkovText] = None, phraser: Union[Dict[str], Phraser] = None, word_vectors: Union[Dict[str], KeyedVectors] = None, nn: Union[Dict[str], Model] = None, # Chatterbot commander: ChatBot = None, **kwargs: Dict[str, int], ): # Defaults kwargs.update({ "word_vector_size": 256, "min_count": 5, "max_vocab_size": 40000000 }) self.nlp = spacy.load("en") corpus = list(map(self.word_split, responder_text)) # Chain if (not chain) or isinstance(chain, dict): chain = chain or {} for Key, Value in { "state_size": 2, "retain_original": True }.items(): chain.setdefault(Key, Value) MarkovText.__init__( self, None, state_size=chain["state_size"], parsed_sentences=corpus + list(self.generate_corpus(generator_text)), retain_original=chain["retain_original"], ) else: MarkovText.__init__( self, None, state_size=chain.state_size, chain=chain, parsed_sentences=chain.parsed_sentences, retain_original=chain.retain_original, ) corpus = [[word.split(self.separator)[0] for word in sentence] for sentence in corpus] # Phraser if (not phraser) or isinstance(phraser, dict): phraser = phraser or {} for Key, Value in {"gram_size": 3, "scoring": "default"}.items(): phraser.setdefault(Key, Value) for _ in range(phraser["gram_size"]): self.phraser = Phraser( Phrases( corpus, min_count=kwargs["min_count"], max_vocab_size=kwargs["max_vocab_size"], scoring=phraser["scoring"], )) corpus = self.phraser[corpus] else: self.phraser = phraser corpus = self.phraser[corpus] # Word Vectors if (not word_vectors) or isinstance(word_vectors, dict): word_vectors = word_vectors or {} for Key, Value in { "embedding_model": "fasttext", "window": 5, "workers": 3, }.items(): word_vectors.setdefault(Key, Value) self.word_vectors = { "fasttext": FastText, "word2vec": Word2Vec }[word_vectors["embedding_model"].lower()]( corpus, size=kwargs["word_vector_size"], window=word_vectors["window"], min_count=1, # kwargs["min_count"], workers=word_vectors["workers"], max_vocab_size=kwargs["max_vocab_size"], ).wv else: self.word_vectors = word_vectors # LSTM RNN if (not nn) or isinstance(nn, dict): nn = nn or {} for Key, Value in { "cell_type": "LSTM", # "num_layers": 3, Perhaps later "max_words": 100, "sentence_vector_size": 300, "activation": "tanh", "dropout_rate": .2, "loss": "categorical_crossentropy", "learning_rate": .0005, "metrics": ["accuracy"], }.items(): nn.setdefault(Key, Value) input_statement = Input( shape=(nn["max_words"], kwargs["word_vector_size"]), name="input_statement", ) input_response = Input( shape=(nn["max_words"], kwargs["word_vector_size"]), name="input_response", ) self.nn = Model( inputs=[input_statement, input_response], outputs=[ Dense(kwargs["max_vocab_size"], activation="softmax")( Dense(kwargs["max_vocab_size"] / 2, activation="relu")(concatenate( [ Bidirectional({ "LSTM": LSTM, "GRU": GRU }[nn["cell_type"]]( units=nn["sentence_vector_size"], input_shape=( nn["max_words"], kwargs["word_vector_size"], ), activation=nn["activation"], dropout=nn["dropout_rate"], kernel_initializer=lecun_uniform(), ))(input_statement), Bidirectional({ "LSTM": LSTM, "GRU": GRU }[nn["cell_type"]]( units=nn["sentence_vector_size"], input_shape=( nn["max_words"], kwargs["word_vector_size"], ), activation=nn["activation"], dropout=nn["dropout_rate"], kernel_initializer=lecun_uniform(), ))(input_response), ], axis=1, ))) ], ) self.nn.compile( loss=nn["loss"], optimizer=Adam(lr=nn["learning_rate"]), metrics=nn["metrics"], ) else: self.nn = nn # Commander self.commander = commander or ChatBot( "Commander", preprocessors=[ "chatterbot.preprocessors.clean_whitespace", "chatterbot.preprocessors.convert_to_ascii", ], trainer="chatterbot.trainers.ListTrainer", logic_adapters=[ { "import_path": "chatterbot.logic.BestMatch" }, { "import_path": "chatterbot.logic.LowConfidenceAdapter", "threshold": 0.65, "default_response": "FAIL", }, ], ) if command_text: self.commander.train(command_text) # Grammar if (not grammar) or isinstance(grammar, dict): grammar = grammar or {} for Key, Value in {}.items(): grammar.setdefault(Key, Value) self.grammar = Grammar(grammar) self.grammar.add_modifiers(base_english) else: self.grammar = grammar
def get_sentences(model: markovify.Text, n=50, **kwargs): return [ s for _ in range(n) if (s := model.make_sentence(**kwargs)) is not None ]
from flask import Flask, render_template, Markup, url_for from random import choice, seed from markovify import Text seed() lines = open("firstLines.json").read() print("lines read") file = open("guessGame.json") chainJson = file.read() file.close() print("json read") generator = Text.from_json(chainJson) print("generated") true = """ <div id="aQuestion"> <blockquote> <p><a id="number">$. </a>*</p> </blockquote> <center> <button id="trueButton" onclick="raiseButton($)">This quote is taken straight from the TV series!</button> <button onclick="lowerButton($)" id="falseButton">This quote is generated by a Markov Chain</button> </center> </div> """ false = """ <div id="aQuestion"> <blockquote>