def give_sentence(): text = tokenize("corpus.txt") # re.sub(r'(?<=\n).*', 'START END', text) model = MarkovModel(text) sentence = model.random_walk() sentence = " ".join(sentence[:-1]) # print() # (?<=\n).* re.sub(r'(?<=\n).*', '', sentence) new_sentence = "" sentence = sentence.split('\n') # print(sentence) if type(sentence) is list: return max(sentence, key=len) else: return sentence
def generate(modelType, startWord): if modelType == "markov": model = MarkovModel() elif modelType == "lstm": model = LSTM() else: raise NoSuchModelException() return model.generate(startWord)
def main(): parser = argparse.ArgumentParser(description='Markov Chain Generator') parser.add_argument("filename", help="path of file containing training text") parser.add_argument("-w", "--words", type=int, default=100, help="minimum number of words in generated text") args = parser.parse_args() model = MarkovModel(args.filename) gen_text = model.generate_text(args.words) print(f"\n{gen_text}\n")
def main(): #### We can't do this for multiple songs. songs = glob.glob("songs/*.mp3") filename = generate(songs) beats = [] audiofile = audio.LocalAudioFile(filename) beats = audiofile.analysis.beats print "Number of beats %s" % len(beats) samples = beats[::SAMPLING_STEP] print "Number of samples to build cluster model %s" % len(samples) cl = cluster.KMeansClustering(samples, distance_beats) clusters = cl.getclusters(K) print "Clustering completed" for c in clusters: c.centroid = None pickle.dump(clusters, open("clustering.c", "wb")) print "Pickled Cluster Model" for c in clusters: c.centroid = cluster.centroid(c) print "Reset the centroids" training_input = [] for beat in beats: training_input.append(get_cluster_index(beat, clusters)) print("Training markovModel") markov_model = MarkovModel() markov_model.learn_ngram_distribution(training_input, NGRAM) #### We should have his function as iterator. print "Generating bunch of music" output_list = markov_model.generate_a_bunch_of_text(len(training_input)) generated_beats = audio.AudioQuantumList() print "Coming back to beats" for index in output_list: generated_beats.append(get_beats_back(index, clusters)) #### We can't do this for multiple songs. print "Saving an Amazing Song" out = audio.getpieces(audiofile, generated_beats) out.encode("bunch_of_music.wav")
#!/usr/bin/env python3 import sys from markov import MarkovModel def err_msg(): print("Usage: {} <sqlite3 file> <text file>: add text file to markov chain\n {} <sqlite3 file> delete: delete markov db\n {} <sqlite3 file> get s/p: get random sentence/paragraph".format(sys.argv[0], sys.argv[0], sys.argv[0])) if __name__ == "__main__": if len(sys.argv) > 1: mm = MarkovModel(sys.argv[1]) if len(sys.argv) == 3 and sys.argv[2] == "delete": mm.delete_table() elif len(sys.argv) == 4 and sys.argv[2] == "get": if sys.argv[3] == "p": print(mm.get_random_paragraph()) elif sys.argv[3] == "s": print(mm.get_random_sentence()) elif len(sys.argv) == 3: with open(sys.argv[2]) as f: for l in f.readlines(): mm.add_text(l.replace("\r\n"," ").replace("\n"," ")) else: err_msg() else: err_msg()
plt.scatter(observations, state1_pd, color=(0.5, 0, 0), **config) plt.scatter(observations, state2_pd, color=(0, 0, 0.5), **config) fig = matplotlib.pyplot.gcf() fig.set_dpi(fig_dpi) fig.set_tight_layout(True) if fig_export: savefig("report/res/pdfs-w-obs.png") plt.show() # %% [markdown] # # Forward Procedure (3) # %% model = MarkovModel(states=[state1, state2], observations=observations, state_transitions=state_transition) model.populate_forward() print(model.forward) forward = model.forward model.calculate_p_obs_forward() # %% model = MarkovModel(states=[state1, state2], observations=observations, state_transitions=state_transition).populate() state_x = np.arange(1, 10)
} def weight_coeff_len(l: int) -> float: n = max(0, min(10, l) - 5) return math.exp(n) - 0.9 def my_reducer(_state_0: MarkovState, _memory_1: MarkovMemory) -> MarkovState: return MarkovState(_state_0 + (_memory_1[-1], )) txt = open('corpus.txt', encoding='utf8').read() corpus = txt.split() model = MarkovModel(my_selector) model.create_layer('3-word', lambda _state_0: _state_0[-3:], my_weighter, my_reducer) model.create_layer('2-word', lambda _state_0: _state_0[-2:], my_weighter, my_reducer) model.create_layer('1-word', lambda _state_0: _state_0[-1:], my_weighter, my_reducer) state_empty = MarkovState((None, )) state_0 = deepcopy(state_empty) prefix = [] for word in corpus: # text preparation
from twitterapi import TwitterAPI from markov import MarkovModel POST_SIGNATURE = ' #JohnMaddenBot' if __name__ == '__main__': #get something to post johnMadden = MarkovModel('docs/jmcorpus.txt') postContent = johnMadden.makeSentence(140 - len(POST_SIGNATURE)) + POST_SIGNATURE #set up twitter connection tweeter = TwitterAPI('tokens.txt') tweeter.tweet(postContent)
def main(): parser = argparse.ArgumentParser( description="Markov") parser.add_argument( "-d", "--directory", default=None, help="Music dir") parser.add_argument( "-f", "--filename", default=None, help="Song file") parser.add_argument( "-p", "--pickle", default=False, action="store_true", help="Pickle") parser.add_argument( "-k", "--clusters", type=int, default=50, help="Clusters") parser.add_argument( "-s", "--sample", type=int, default=2, help="Sampling") parser.add_argument( "-n", "--ngram", type=int, default=10, help="Ngram") parser.add_argument( "-l", "--length", type=int, default=None, help="Length") args = parser.parse_args() if args.directory is not None: args.filename = generate_single_song(args.directory) if args.filename is None: raise Exception("Song not defined") #### We can't do this for multiple songs. beats = [] audiofile = audio.LocalAudioFile(args.filename) beats = audiofile.analysis.beats print "Number of beats %s" % len(beats) internal_filename = os.path.split(args.filename)[1] if not args.pickle: samples = beats[::args.sample] print "Number of samples to build cluster model %s" % len(samples) cl = cluster.KMeansClustering(samples, distance_beats) clusters = cl.getclusters(args.clusters) print "Clustering completed" for c in clusters: c.centroid = None pickle.dump(clusters, open(internal_filename[:-4] + ".pickle", "wb")) print "Pickled Cluster Model" else: clusters = pickle.load(open(internal_filename[:-4] + ".pickle", "rb")) attach_source(clusters, audiofile) print "Resetting the centroids" for c in clusters: c.centroid = cluster.centroid(c) print "Reset the centroids" training_input = [] for beat in beats: training_input.append(get_cluster_index(beat, clusters)) print("Training markovModel") markov_model = MarkovModel() markov_model.learn_ngram_distribution(training_input, args.ngram) #### We should have his function as iterator. print "Generating bunch of music" if args.length is None: args.length = len(training_input) output_list = markov_model.generate_a_bunch_of_text(args.length) generated_beats = audio.AudioQuantumList() print "Coming back to beats" prev_beat = None for index in output_list: curr_beat = get_beats_back(index, clusters, prev_beat) generated_beats.append(curr_beat) prev_beat = curr_beat #### We can't do this for multiple songs. print "Saving an Amazing Song" out = audio.getpieces(audiofile, generated_beats) out.encode(internal_filename[:-4] + ".wav")
from collections import Counter import random import re import images from markov import MarkovModel PASTA_MM = MarkovModel("data/copypasta.sqlite3") MEAN_WORDS_PER_PARAGRAPH = 50 STDEV_WORDS_PER_PARAGRAPH = 20 with open("data/mostcommonwords.txt") as f: COMMON_WORDS = [l.strip().lower() for l in f] def generate_copypasta(short=False): wordmin = discrete_normal( mu=MEAN_WORDS_PER_PARAGRAPH - (45 if short else 0), sigma=STDEV_WORDS_PER_PARAGRAPH - (10 if short else 0), minimum=1 ) return PASTA_MM.get_random_paragraph_min(wordmin) # Gets n most common words from text, barring most common words in English. def get_most_common_words(text, n): words = [word.lower() for word in re.findall(r"[A-Za-z'\*\-]+", text) if word.lower() not in COMMON_WORDS and '*' not in word] ctr = Counter(words) return [w for w,_ in ctr.most_common(n)] def discrete_normal(mu, sigma, minimum=-float('inf')): retval = round(random.gauss(mu,sigma)) return minimum if minimum > retval else retval
def butcher_word(word): butchered = list(word) butchered = ["START"] + butchered + ["END"] model = MarkovModel(butchered) new_word = model.random_walk() return "".join(new_word[:-1])