class Parser: def __init__(self, filename): self.filename = filename self.markov_chain = MarkovChain() midi = mido.MidiFile(self.filename) previous_note = 0 for track in midi.tracks: for message in track: if message.type == "note_on": current_note = (message.note)%12 self.markov_chain.add(previous_note, current_note, 0) previous_note = current_note def get_chain(self): return self.markov_chain
def test_add_N_1(self): mc = MarkovChain() mc.add(( 'a', 'b', 'c', )) self.assertEqual( { ('a', ): { 'b': 1 }, ('b', ): { 'c': 1 }, 'START': { ('a', ): 1 }, }, mc.model) mc.add(( 'b', 'a', )) self.assertEqual( { ('a', ): { 'b': 1 }, ('b', ): { 'c': 1, 'a': 1 }, 'START': { ('a', ): 1, ('b', ): 1 }, }, mc.model) mc.add(( 'a', 'c', )) self.assertEqual( { ('a', ): { 'b': 1, 'c': 1 }, ('b', ): { 'c': 1, 'a': 1 }, 'START': { ('a', ): 2, ('b', ): 1 }, }, mc.model)
class Parser: def __init__(self, filename, verbose=False): """ This is the constructor for a Serializer, which will serialize a midi given the filename and generate a markov chain of the notes in the midi. """ self.filename = filename # The tempo is number representing the number of microseconds # per beat. self.tempo = None # The delta time between each midi message is a number that # is a number of ticks, which we can convert to beats using # ticks_per_beat. self.ticks_per_beat = None self.markov_chain = MarkovChain() self._parse(verbose=verbose) def _parse(self, verbose=False): """ This function handles the reading of the midi and chunks the notes into sequenced "chords", which are inserted into the markov chain. """ midi = mido.MidiFile(self.filename) self.ticks_per_beat = midi.ticks_per_beat previous_chunk = [] current_chunk = [] for track in midi.tracks: for message in track: if verbose: print(message) if message.type == "set_tempo": self.tempo = message.tempo elif message.type == "note_on": if message.time == 0: current_chunk.append(message.note) else: self._sequence(previous_chunk, current_chunk, message.time) previous_chunk = current_chunk current_chunk = [] def _sequence(self, previous_chunk, current_chunk, duration): """ Given the previous chunk and the current chunk of notes as well as an averaged duration of the current notes, this function permutes every combination of the previous notes to the current notes and sticks them into the markov chain. """ for n1 in previous_chunk: for n2 in current_chunk: self.markov_chain.add(n1, n2, self._bucket_duration(duration)) def _bucket_duration(self, ticks): """ This method takes a tick count and converts it to a time in milliseconds, bucketing it to the nearest 250 milliseconds. """ try: ms = ((ticks / self.ticks_per_beat) * self.tempo) / 1000 return int(ms - (ms % 250) + 250) except TypeError: raise TypeError( "Could not read a tempo and ticks_per_beat from midi") def get_chain(self): return self.markov_chain
def test_add_N_2(self): mc = MarkovChain(2) mc.add(( 'a', 'b', 'c', 'd', 'a', 'b', 'e', )) self.assertEqual( { ( 'a', 'b', ): { 'c': 1, 'e': 1 }, ( 'b', 'c', ): { 'd': 1 }, ( 'c', 'd', ): { 'a': 1 }, ( 'd', 'a', ): { 'b': 1 }, 'START': { ('a', 'b'): 1 }, }, mc.model) mc.add(( 'a', 'b', 'e', 'd', )) self.assertEqual( { ( 'a', 'b', ): { 'c': 1, 'e': 2 }, ( 'b', 'c', ): { 'd': 1 }, ( 'c', 'd', ): { 'a': 1 }, ( 'd', 'a', ): { 'b': 1 }, ('b', 'e'): { 'd': 1 }, 'START': { ('a', 'b'): 2 }, }, mc.model)
def test_generate_sentence(self): mc = MarkovChain() mc.add(('aaa', 'bbb.', 'ccc')) self.assertEqual('Aaa bbb.', mc.generate_sentence(12)) self.assertEqual('Aaa bbb. Ccc.', mc.generate_sentence(13))
if not(override) and cache_file_exists: shutil.copy(cache_filename, cache_current) stop('Swap current markov chain to "{}"'.format(file), error=False) # Compute MC markov_chain = MarkovChain() print('Parsing file "{}"...'.format(file)) with open(source_filename, 'r') as f: word = f.readline() while word: previous_char = SPACE for char in word.rstrip(): markov_chain.add(previous_char, char) previous_char = char markov_chain.add(previous_char, SPACE) word = f.readline() markov_chain.build() # Store MC with open(cache_filename, 'wb') as f: pickle.dump(markov_chain, f) shutil.copy(cache_filename, cache_current) print("Parsing done.") print('Set "{}" as current markov chain'.format(file))
class MasterpieceWriter(object): def __init__(self, sentence_tokenizer, word_tokenizer): self.sentence_tokenizer = sentence_tokenizer self.word_tokenizer = word_tokenizer self.markov_chain = MarkovChain() self.word_contexts = defaultdict(list) self.word_counts = Counter() self.word_pair_counts = Counter() def _paragraphs_from_file(self, file_name): with open(file_name) as f: for line in f: line = line.strip() if line != "": yield line def _get_words_and_contexts(self, input_files): for file_name in input_files: for paragr in self._paragraphs_from_file(file_name): sentences = self.sentence_tokenizer.tokenize(paragr) if len(sentences) == 0: continue yield PARA_BEGIN, None for sentence in sentences: words, contexts = self.word_tokenizer.tokenize(sentence) if len(words) == 0: continue yield SENT_BEGIN, None for word in words: yield (word, None) yield SENT_END, None if contexts is not None: yield None, contexts yield PARA_END, None def train(self, training_files): prev_prev_word, prev_word = None, None for word, contexts in self._get_words_and_contexts(training_files): if contexts is not None: for ctx_key in contexts: self.word_contexts[ctx_key].extend(contexts[ctx_key]) if word is not None: # Train markov chain (need at least 3 tokens) if prev_prev_word is not None: self.markov_chain.add((prev_prev_word, prev_word), (prev_word, word)) # Collect stats if word not in ALL_SPECIAL: self.word_counts[word] += 1 if prev_word not in ALL_SPECIAL: self.word_pair_counts[(prev_word, word)] += 1 # Update prev_prev_word and prev_word prev_prev_word, prev_word = prev_word, word def stats(self, top=10): return dict(most_common_words=self.word_counts.most_common(top), most_common_word_pairs=self.word_pair_counts.most_common(top)) def generate_masterpiece(self, prng=None): yield PARA_BEGIN yield SENT_BEGIN for next in self.markov_chain.generate((PARA_BEGIN, SENT_BEGIN), prng): w1, w2 = next yield w2
class Parser: def __init__(self, filename, verbose=False, order=1): """ This is the constructor for a Serializer, which will serialize a midi given the filename and generate a markov chain of the notes in the midi. """ self.filename = filename # The tempo is number representing the number of microseconds # per beat. self.tempo = None # The delta time between each midi message is a number that # is a number of ticks, which we can convert to beats using # ticks_per_beat. self.markov_chain = MarkovChain() self.order = order self.markov_chain.order = order self._parse(verbose=verbose) def _parse(self, verbose=False): """ This function handles the reading of the midi and chunks the notes into sequenced "chords", which are inserted into the markov chain. """ previous_notes = [] midi = converter.parse(self.filename) for parts in midi: list_of_notes = list(parts.recurse()) list_of_notes.sort(key=lambda x: float(x.offset)) previous_offset = 0.0 for n in list_of_notes: if verbose: print(str(n)) notes = "" if isinstance(n, note.Note): notes = str(n.pitch) elif isinstance(n, chord.Chord): n_c = str(n).replace('>', '') notes = '|'.join(n_c.split()[1:]) duration = float(n.duration.quarterLength) note_offset = float(n.offset) - previous_offset previous_offset = float(n.offset) # generalized for orders greater than 0. next_notes = None if len(previous_notes) < self.order: next_notes = previous_notes + [notes] else: next_notes = previous_notes[1:] + [notes] if len(previous_notes) != 0: self.markov_chain.add(','.join(previous_notes), ','.join(next_notes), duration, note_offset) previous_notes = next_notes def get_chain(self): return self.markov_chain
class Parser: def __init__(self, filename, verbose=False): """ This is the constructor for a Serializer, which will serialize a midi given the filename and generate a markov chain of the notes in the midi. """ self.filename = filename # The tempo is number representing the number of microseconds # per beat. self.tempo = None # The delta time between each midi message is a number that # is a number of ticks, which we can convert to beats using # ticks_per_beat. self.ticks_per_beat = None self.markov_chain = MarkovChain() self._parse(verbose=verbose) def _parse(self, verbose=False): """ This function handles the reading of the midi and chunks the notes into sequenced "chords", which are inserted into the markov chain. """ midi = mido.MidiFile(self.filename) self.ticks_per_beat = midi.ticks_per_beat previous_chunk = [] current_chunk = [] for track in midi.tracks: for message in track: if verbose: print(message) if message.type == "set_tempo": self.tempo = message.tempo elif message.type == "note_on": if message.time == 0: current_chunk.append(message.note) else: self._sequence(previous_chunk, current_chunk, message.time) previous_chunk = current_chunk current_chunk = [] def _sequence(self, previous_chunk, current_chunk, duration): """ Given the previous chunk and the current chunk of notes as well as an averaged duration of the current notes, this function permutes every combination of the previous notes to the current notes and sticks them into the markov chain. """ for n1 in previous_chunk: for n2 in current_chunk: self.markov_chain.add( n1, n2, self._bucket_duration(duration)) def _bucket_duration(self, ticks): """ This method takes a tick count and converts it to a time in milliseconds, bucketing it to the nearest 250 milliseconds. """ try: ms = ((ticks / self.ticks_per_beat) * self.tempo) / 1000 return int(ms - (ms % 250) + 250) except TypeError: raise TypeError( "Could not read a tempo and ticks_per_beat from midi") def get_chain(self): return self.markov_chain