def give_sentence():
    text = tokenize("corpus.txt")

    # re.sub(r'(?<=\n).*', 'START END', text)

    model = MarkovModel(text)

    sentence = model.random_walk()

    sentence = " ".join(sentence[:-1])

    # print()

    # (?<=\n).*

    re.sub(r'(?<=\n).*', '', sentence)
    new_sentence = ""
    
    sentence = sentence.split('\n')

    # print(sentence)

    if type(sentence) is list:
        return max(sentence, key=len)
    else:
        return sentence
Example #2
0
def generate(modelType, startWord):
    if modelType == "markov":
        model = MarkovModel()
    elif modelType == "lstm":
        model = LSTM()
    else:
        raise NoSuchModelException()
    return model.generate(startWord)
def main():
    parser = argparse.ArgumentParser(description='Markov Chain Generator')
    parser.add_argument("filename",
                        help="path of file containing training text")
    parser.add_argument("-w",
                        "--words",
                        type=int,
                        default=100,
                        help="minimum number of words in generated text")
    args = parser.parse_args()

    model = MarkovModel(args.filename)
    gen_text = model.generate_text(args.words)

    print(f"\n{gen_text}\n")
Example #4
0
def main():

    #### We can't do this for multiple songs.
    songs = glob.glob("songs/*.mp3")
    filename = generate(songs)
    beats = []
    audiofile = audio.LocalAudioFile(filename)
    beats = audiofile.analysis.beats
    print "Number of beats %s" % len(beats)

    samples = beats[::SAMPLING_STEP]
    print "Number of samples to build cluster model %s" % len(samples)
    cl = cluster.KMeansClustering(samples, distance_beats)
    clusters = cl.getclusters(K)
    print "Clustering completed"

    for c in clusters:
        c.centroid = None
    pickle.dump(clusters, open("clustering.c", "wb"))
    print "Pickled Cluster Model"

    for c in clusters:
        c.centroid = cluster.centroid(c)
    print "Reset the centroids"

    training_input = []
    for beat in beats:
        training_input.append(get_cluster_index(beat, clusters))
    print("Training markovModel")
    markov_model = MarkovModel()
    markov_model.learn_ngram_distribution(training_input, NGRAM)

    #### We should have his function as iterator.
    print "Generating bunch of music"
    output_list = markov_model.generate_a_bunch_of_text(len(training_input))
    generated_beats = audio.AudioQuantumList()
    print "Coming back to beats"
    for index in output_list:
        generated_beats.append(get_beats_back(index, clusters))

    #### We can't do this for multiple songs.
    print "Saving an Amazing Song"
    out = audio.getpieces(audiofile, generated_beats)
    out.encode("bunch_of_music.wav")
Example #5
0
#!/usr/bin/env python3
import sys

from markov import MarkovModel

def err_msg():
    print("Usage: {} <sqlite3 file> <text file>: add text file to markov chain\n       {} <sqlite3 file> delete: delete markov db\n       {} <sqlite3 file> get s/p: get random sentence/paragraph".format(sys.argv[0], sys.argv[0], sys.argv[0]))

if __name__ == "__main__":
    if len(sys.argv) > 1:
        mm = MarkovModel(sys.argv[1])
        if len(sys.argv) == 3 and sys.argv[2] == "delete":
            mm.delete_table()
        elif len(sys.argv) == 4 and sys.argv[2] == "get":
            if sys.argv[3] == "p":
                print(mm.get_random_paragraph())
            elif sys.argv[3] == "s":
                print(mm.get_random_sentence())
        elif len(sys.argv) == 3:
            with open(sys.argv[2]) as f:
                for l in f.readlines():
                    mm.add_text(l.replace("\r\n"," ").replace("\n"," "))
        else:
            err_msg()
    else:
        err_msg()
Example #6
0
plt.scatter(observations, state1_pd, color=(0.5, 0, 0), **config)
plt.scatter(observations, state2_pd, color=(0, 0, 0.5), **config)

fig = matplotlib.pyplot.gcf()
fig.set_dpi(fig_dpi)
fig.set_tight_layout(True)
if fig_export:
    savefig("report/res/pdfs-w-obs.png")
plt.show()

# %% [markdown]
# # Forward Procedure (3)

# %%
model = MarkovModel(states=[state1, state2],
                    observations=observations,
                    state_transitions=state_transition)
model.populate_forward()

print(model.forward)

forward = model.forward
model.calculate_p_obs_forward()

# %%
model = MarkovModel(states=[state1, state2],
                    observations=observations,
                    state_transitions=state_transition).populate()

state_x = np.arange(1, 10)
Example #7
0
    }


def weight_coeff_len(l: int) -> float:
    n = max(0, min(10, l) - 5)
    return math.exp(n) - 0.9


def my_reducer(_state_0: MarkovState, _memory_1: MarkovMemory) -> MarkovState:
    return MarkovState(_state_0 + (_memory_1[-1], ))


txt = open('corpus.txt', encoding='utf8').read()
corpus = txt.split()

model = MarkovModel(my_selector)
model.create_layer('3-word', lambda _state_0: _state_0[-3:], my_weighter,
                   my_reducer)
model.create_layer('2-word', lambda _state_0: _state_0[-2:], my_weighter,
                   my_reducer)
model.create_layer('1-word', lambda _state_0: _state_0[-1:], my_weighter,
                   my_reducer)

state_empty = MarkovState((None, ))

state_0 = deepcopy(state_empty)
prefix = []

for word in corpus:

    # text preparation
Example #8
0
from twitterapi import TwitterAPI
from markov import MarkovModel

POST_SIGNATURE = ' #JohnMaddenBot'

if __name__ == '__main__':
    #get something to post
    johnMadden = MarkovModel('docs/jmcorpus.txt')
    postContent = johnMadden.makeSentence(140 -
                                          len(POST_SIGNATURE)) + POST_SIGNATURE

    #set up twitter connection
    tweeter = TwitterAPI('tokens.txt')
    tweeter.tweet(postContent)
Example #9
0
def main():
    parser = argparse.ArgumentParser(
        description="Markov")
    parser.add_argument(
        "-d", "--directory",
        default=None,
        help="Music dir")
    parser.add_argument(
        "-f", "--filename",
        default=None,
        help="Song file")
    parser.add_argument(
        "-p", "--pickle",
        default=False, action="store_true",
        help="Pickle")
    parser.add_argument(
        "-k", "--clusters", type=int,
        default=50, help="Clusters")
    parser.add_argument(
        "-s", "--sample", type=int,
        default=2, help="Sampling")
    parser.add_argument(
        "-n", "--ngram", type=int,
        default=10, help="Ngram")
    parser.add_argument(
        "-l", "--length", type=int,
        default=None, help="Length")
    args = parser.parse_args()

    if args.directory is not None:
        args.filename = generate_single_song(args.directory)
    if args.filename is None:
        raise Exception("Song not defined")

    #### We can't do this for multiple songs.
    beats = []
    audiofile = audio.LocalAudioFile(args.filename)
    beats = audiofile.analysis.beats
    print "Number of beats %s" % len(beats)
    internal_filename = os.path.split(args.filename)[1]
    if not args.pickle:
        samples = beats[::args.sample]
        print "Number of samples to build cluster model %s" % len(samples)
        cl = cluster.KMeansClustering(samples, distance_beats)
        clusters = cl.getclusters(args.clusters)
        print "Clustering completed"
        for c in clusters:
            c.centroid = None
        pickle.dump(clusters, open(internal_filename[:-4] + ".pickle", "wb"))
        print "Pickled Cluster Model"
    else:
        clusters = pickle.load(open(internal_filename[:-4] + ".pickle", "rb"))
        attach_source(clusters, audiofile)

    print "Resetting the centroids"
    for c in clusters:
        c.centroid = cluster.centroid(c)
    print "Reset the centroids"

    training_input = []
    for beat in beats:
        training_input.append(get_cluster_index(beat, clusters))
    print("Training markovModel")
    markov_model = MarkovModel()
    markov_model.learn_ngram_distribution(training_input, args.ngram)

    #### We should have his function as iterator.
    print "Generating bunch of music"
    if args.length is None:
        args.length = len(training_input)
    output_list = markov_model.generate_a_bunch_of_text(args.length)
    generated_beats = audio.AudioQuantumList()

    print "Coming back to beats"
    prev_beat = None
    for index in output_list:
        curr_beat = get_beats_back(index, clusters, prev_beat)
        generated_beats.append(curr_beat)
        prev_beat = curr_beat

    #### We can't do this for multiple songs.
    print "Saving an Amazing Song"
    out = audio.getpieces(audiofile, generated_beats)
    out.encode(internal_filename[:-4] + ".wav")
Example #10
0
from collections import Counter
import random
import re

import images
from markov import MarkovModel

PASTA_MM = MarkovModel("data/copypasta.sqlite3")
MEAN_WORDS_PER_PARAGRAPH = 50
STDEV_WORDS_PER_PARAGRAPH = 20
with open("data/mostcommonwords.txt") as f:
    COMMON_WORDS = [l.strip().lower() for l in f]

def generate_copypasta(short=False):
    wordmin = discrete_normal(
        mu=MEAN_WORDS_PER_PARAGRAPH - (45 if short else 0),
        sigma=STDEV_WORDS_PER_PARAGRAPH - (10 if short else 0),
        minimum=1
    )
    return PASTA_MM.get_random_paragraph_min(wordmin)

# Gets n most common words from text, barring most common words in English.
def get_most_common_words(text, n):
    words = [word.lower() for word in re.findall(r"[A-Za-z'\*\-]+", text)
        if word.lower() not in COMMON_WORDS and '*' not in word]
    ctr = Counter(words)
    return [w for w,_ in ctr.most_common(n)]

def discrete_normal(mu, sigma, minimum=-float('inf')):
    retval = round(random.gauss(mu,sigma))
    return minimum if minimum > retval else retval
Example #11
0
def butcher_word(word):
    butchered = list(word)
    butchered = ["START"] + butchered + ["END"]
    model = MarkovModel(butchered)
    new_word = model.random_walk()
    return "".join(new_word[:-1])