Ejemplo n.º 1
0
def give_sentence():
    text = tokenize("corpus.txt")

    # re.sub(r'(?<=\n).*', 'START END', text)

    model = MarkovModel(text)

    sentence = model.random_walk()

    sentence = " ".join(sentence[:-1])

    # print()

    # (?<=\n).*

    re.sub(r'(?<=\n).*', '', sentence)
    new_sentence = ""
    
    sentence = sentence.split('\n')

    # print(sentence)

    if type(sentence) is list:
        return max(sentence, key=len)
    else:
        return sentence
Ejemplo n.º 2
0
def generate(modelType, startWord):
    if modelType == "markov":
        model = MarkovModel()
    elif modelType == "lstm":
        model = LSTM()
    else:
        raise NoSuchModelException()
    return model.generate(startWord)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='Markov Chain Generator')
    parser.add_argument("filename",
                        help="path of file containing training text")
    parser.add_argument("-w",
                        "--words",
                        type=int,
                        default=100,
                        help="minimum number of words in generated text")
    args = parser.parse_args()

    model = MarkovModel(args.filename)
    gen_text = model.generate_text(args.words)

    print(f"\n{gen_text}\n")
Ejemplo n.º 4
0
#!/usr/bin/env python3
import sys

from markov import MarkovModel

def err_msg():
    print("Usage: {} <sqlite3 file> <text file>: add text file to markov chain\n       {} <sqlite3 file> delete: delete markov db\n       {} <sqlite3 file> get s/p: get random sentence/paragraph".format(sys.argv[0], sys.argv[0], sys.argv[0]))

if __name__ == "__main__":
    if len(sys.argv) > 1:
        mm = MarkovModel(sys.argv[1])
        if len(sys.argv) == 3 and sys.argv[2] == "delete":
            mm.delete_table()
        elif len(sys.argv) == 4 and sys.argv[2] == "get":
            if sys.argv[3] == "p":
                print(mm.get_random_paragraph())
            elif sys.argv[3] == "s":
                print(mm.get_random_sentence())
        elif len(sys.argv) == 3:
            with open(sys.argv[2]) as f:
                for l in f.readlines():
                    mm.add_text(l.replace("\r\n"," ").replace("\n"," "))
        else:
            err_msg()
    else:
        err_msg()
Ejemplo n.º 5
0
plt.scatter(observations, state1_pd, color=(0.5, 0, 0), **config)
plt.scatter(observations, state2_pd, color=(0, 0, 0.5), **config)

fig = matplotlib.pyplot.gcf()
fig.set_dpi(fig_dpi)
fig.set_tight_layout(True)
if fig_export:
    savefig("report/res/pdfs-w-obs.png")
plt.show()

# %% [markdown]
# # Forward Procedure (3)

# %%
model = MarkovModel(states=[state1, state2],
                    observations=observations,
                    state_transitions=state_transition)
model.populate_forward()

print(model.forward)

forward = model.forward
model.calculate_p_obs_forward()

# %%
model = MarkovModel(states=[state1, state2],
                    observations=observations,
                    state_transitions=state_transition).populate()

state_x = np.arange(1, 10)
Ejemplo n.º 6
0
    }


def weight_coeff_len(l: int) -> float:
    n = max(0, min(10, l) - 5)
    return math.exp(n) - 0.9


def my_reducer(_state_0: MarkovState, _memory_1: MarkovMemory) -> MarkovState:
    return MarkovState(_state_0 + (_memory_1[-1], ))


txt = open('corpus.txt', encoding='utf8').read()
corpus = txt.split()

model = MarkovModel(my_selector)
model.create_layer('3-word', lambda _state_0: _state_0[-3:], my_weighter,
                   my_reducer)
model.create_layer('2-word', lambda _state_0: _state_0[-2:], my_weighter,
                   my_reducer)
model.create_layer('1-word', lambda _state_0: _state_0[-1:], my_weighter,
                   my_reducer)

state_empty = MarkovState((None, ))

state_0 = deepcopy(state_empty)
prefix = []

for word in corpus:

    # text preparation
Ejemplo n.º 7
0
from twitterapi import TwitterAPI
from markov import MarkovModel

POST_SIGNATURE = ' #JohnMaddenBot'

if __name__ == '__main__':
    #get something to post
    johnMadden = MarkovModel('docs/jmcorpus.txt')
    postContent = johnMadden.makeSentence(140 -
                                          len(POST_SIGNATURE)) + POST_SIGNATURE

    #set up twitter connection
    tweeter = TwitterAPI('tokens.txt')
    tweeter.tweet(postContent)
Ejemplo n.º 8
0
from collections import Counter
import random
import re

import images
from markov import MarkovModel

PASTA_MM = MarkovModel("data/copypasta.sqlite3")
MEAN_WORDS_PER_PARAGRAPH = 50
STDEV_WORDS_PER_PARAGRAPH = 20
with open("data/mostcommonwords.txt") as f:
    COMMON_WORDS = [l.strip().lower() for l in f]

def generate_copypasta(short=False):
    wordmin = discrete_normal(
        mu=MEAN_WORDS_PER_PARAGRAPH - (45 if short else 0),
        sigma=STDEV_WORDS_PER_PARAGRAPH - (10 if short else 0),
        minimum=1
    )
    return PASTA_MM.get_random_paragraph_min(wordmin)

# Gets n most common words from text, barring most common words in English.
def get_most_common_words(text, n):
    words = [word.lower() for word in re.findall(r"[A-Za-z'\*\-]+", text)
        if word.lower() not in COMMON_WORDS and '*' not in word]
    ctr = Counter(words)
    return [w for w,_ in ctr.most_common(n)]

def discrete_normal(mu, sigma, minimum=-float('inf')):
    retval = round(random.gauss(mu,sigma))
    return minimum if minimum > retval else retval
Ejemplo n.º 9
0
def butcher_word(word):
    butchered = list(word)
    butchered = ["START"] + butchered + ["END"]
    model = MarkovModel(butchered)
    new_word = model.random_walk()
    return "".join(new_word[:-1])