예제 #1
0
class Parser:

    def __init__(self, filename):
        self.filename = filename
        self.markov_chain = MarkovChain()
        
        midi = mido.MidiFile(self.filename)
        previous_note = 0
        for track in midi.tracks:
            for message in track:
               if message.type == "note_on":
                   current_note = (message.note)%12
                   self.markov_chain.add(previous_note, current_note, 0)
                   previous_note = current_note


    def get_chain(self):
        return self.markov_chain
예제 #2
0
    def test_add_N_1(self):
        mc = MarkovChain()
        mc.add((
            'a',
            'b',
            'c',
        ))
        self.assertEqual(
            {
                ('a', ): {
                    'b': 1
                },
                ('b', ): {
                    'c': 1
                },
                'START': {
                    ('a', ): 1
                },
            }, mc.model)

        mc.add((
            'b',
            'a',
        ))
        self.assertEqual(
            {
                ('a', ): {
                    'b': 1
                },
                ('b', ): {
                    'c': 1,
                    'a': 1
                },
                'START': {
                    ('a', ): 1,
                    ('b', ): 1
                },
            }, mc.model)

        mc.add((
            'a',
            'c',
        ))
        self.assertEqual(
            {
                ('a', ): {
                    'b': 1,
                    'c': 1
                },
                ('b', ): {
                    'c': 1,
                    'a': 1
                },
                'START': {
                    ('a', ): 2,
                    ('b', ): 1
                },
            }, mc.model)
예제 #3
0
class Parser:
    def __init__(self, filename, verbose=False):
        """
        This is the constructor for a Serializer, which will serialize
        a midi given the filename and generate a markov chain of the
        notes in the midi.
        """
        self.filename = filename
        # The tempo is number representing the number of microseconds
        # per beat.
        self.tempo = None
        # The delta time between each midi message is a number that
        # is a number of ticks, which we can convert to beats using
        # ticks_per_beat.
        self.ticks_per_beat = None
        self.markov_chain = MarkovChain()
        self._parse(verbose=verbose)

    def _parse(self, verbose=False):
        """
        This function handles the reading of the midi and chunks the
        notes into sequenced "chords", which are inserted into the
        markov chain.
        """
        midi = mido.MidiFile(self.filename)
        self.ticks_per_beat = midi.ticks_per_beat
        previous_chunk = []
        current_chunk = []
        for track in midi.tracks:
            for message in track:
                if verbose:
                    print(message)
                if message.type == "set_tempo":
                    self.tempo = message.tempo
                elif message.type == "note_on":
                    if message.time == 0:
                        current_chunk.append(message.note)
                    else:
                        self._sequence(previous_chunk, current_chunk,
                                       message.time)
                        previous_chunk = current_chunk
                        current_chunk = []

    def _sequence(self, previous_chunk, current_chunk, duration):
        """
        Given the previous chunk and the current chunk of notes as well
        as an averaged duration of the current notes, this function
        permutes every combination of the previous notes to the current
        notes and sticks them into the markov chain.
        """
        for n1 in previous_chunk:
            for n2 in current_chunk:
                self.markov_chain.add(n1, n2, self._bucket_duration(duration))

    def _bucket_duration(self, ticks):
        """
        This method takes a tick count and converts it to a time in
        milliseconds, bucketing it to the nearest 250 milliseconds.
        """
        try:
            ms = ((ticks / self.ticks_per_beat) * self.tempo) / 1000
            return int(ms - (ms % 250) + 250)
        except TypeError:
            raise TypeError(
                "Could not read a tempo and ticks_per_beat from midi")

    def get_chain(self):
        return self.markov_chain
예제 #4
0
    def test_add_N_2(self):
        mc = MarkovChain(2)
        mc.add((
            'a',
            'b',
            'c',
            'd',
            'a',
            'b',
            'e',
        ))
        self.assertEqual(
            {
                (
                    'a',
                    'b',
                ): {
                    'c': 1,
                    'e': 1
                },
                (
                    'b',
                    'c',
                ): {
                    'd': 1
                },
                (
                    'c',
                    'd',
                ): {
                    'a': 1
                },
                (
                    'd',
                    'a',
                ): {
                    'b': 1
                },
                'START': {
                    ('a', 'b'): 1
                },
            }, mc.model)

        mc.add((
            'a',
            'b',
            'e',
            'd',
        ))
        self.assertEqual(
            {
                (
                    'a',
                    'b',
                ): {
                    'c': 1,
                    'e': 2
                },
                (
                    'b',
                    'c',
                ): {
                    'd': 1
                },
                (
                    'c',
                    'd',
                ): {
                    'a': 1
                },
                (
                    'd',
                    'a',
                ): {
                    'b': 1
                },
                ('b', 'e'): {
                    'd': 1
                },
                'START': {
                    ('a', 'b'): 2
                },
            }, mc.model)
예제 #5
0
 def test_generate_sentence(self):
     mc = MarkovChain()
     mc.add(('aaa', 'bbb.', 'ccc'))
     self.assertEqual('Aaa bbb.', mc.generate_sentence(12))
     self.assertEqual('Aaa bbb. Ccc.', mc.generate_sentence(13))
예제 #6
0
if not(override) and cache_file_exists:
    shutil.copy(cache_filename, cache_current)
    stop('Swap current markov chain to "{}"'.format(file), error=False)

# Compute MC

markov_chain = MarkovChain()

print('Parsing file "{}"...'.format(file))

with open(source_filename, 'r') as f:
    word = f.readline()
    while word:
        previous_char = SPACE
        for char in word.rstrip():
            markov_chain.add(previous_char, char)
            previous_char = char
        markov_chain.add(previous_char, SPACE)
        word = f.readline()

markov_chain.build()

# Store MC
with open(cache_filename, 'wb') as f:
    pickle.dump(markov_chain, f)

shutil.copy(cache_filename, cache_current)

print("Parsing done.")
print('Set "{}" as current markov chain'.format(file))
예제 #7
0
class MasterpieceWriter(object):
    def __init__(self, sentence_tokenizer, word_tokenizer):
        self.sentence_tokenizer = sentence_tokenizer
        self.word_tokenizer = word_tokenizer

        self.markov_chain = MarkovChain()
        self.word_contexts = defaultdict(list)

        self.word_counts = Counter()
        self.word_pair_counts = Counter()

    def _paragraphs_from_file(self, file_name):
        with open(file_name) as f:
            for line in f:
                line = line.strip()
                if line != "":
                    yield line

    def _get_words_and_contexts(self, input_files):
        for file_name in input_files:
            for paragr in self._paragraphs_from_file(file_name):
                sentences = self.sentence_tokenizer.tokenize(paragr)
                if len(sentences) == 0:
                    continue

                yield PARA_BEGIN, None
                for sentence in sentences:
                    words, contexts = self.word_tokenizer.tokenize(sentence)
                    if len(words) == 0:
                        continue

                    yield SENT_BEGIN, None
                    for word in words:
                        yield (word, None)
                    yield SENT_END, None

                    if contexts is not None:
                        yield None, contexts

                yield PARA_END, None

    def train(self, training_files):
        prev_prev_word, prev_word = None, None
        for word, contexts in self._get_words_and_contexts(training_files):
            if contexts is not None:
                for ctx_key in contexts:
                    self.word_contexts[ctx_key].extend(contexts[ctx_key])

            if word is not None:
                # Train markov chain (need at least 3 tokens)
                if prev_prev_word is not None:
                    self.markov_chain.add((prev_prev_word, prev_word),
                                          (prev_word, word))
                # Collect stats
                if word not in ALL_SPECIAL:
                    self.word_counts[word] += 1
                    if prev_word not in ALL_SPECIAL:
                        self.word_pair_counts[(prev_word, word)] += 1

                # Update prev_prev_word and prev_word
                prev_prev_word, prev_word = prev_word, word

    def stats(self, top=10):
        return dict(most_common_words=self.word_counts.most_common(top),
                    most_common_word_pairs=self.word_pair_counts.most_common(top))

    def generate_masterpiece(self, prng=None):
        yield PARA_BEGIN
        yield SENT_BEGIN
        for next in self.markov_chain.generate((PARA_BEGIN, SENT_BEGIN), prng):
            w1, w2 = next
            yield w2
예제 #8
0
class Parser:
    def __init__(self, filename, verbose=False, order=1):
        """
        This is the constructor for a Serializer, which will serialize
        a midi given the filename and generate a markov chain of the
        notes in the midi.
        """
        self.filename = filename
        # The tempo is number representing the number of microseconds
        # per beat.
        self.tempo = None
        # The delta time between each midi message is a number that
        # is a number of ticks, which we can convert to beats using
        # ticks_per_beat.
        self.markov_chain = MarkovChain()

        self.order = order
        self.markov_chain.order = order

        self._parse(verbose=verbose)

    def _parse(self, verbose=False):
        """
        This function handles the reading of the midi and chunks the
        notes into sequenced "chords", which are inserted into the
        markov chain.
        """
        previous_notes = []

        midi = converter.parse(self.filename)
        for parts in midi:
            list_of_notes = list(parts.recurse())
            list_of_notes.sort(key=lambda x: float(x.offset))
            previous_offset = 0.0
            for n in list_of_notes:
                if verbose:
                    print(str(n))
                notes = ""
                if isinstance(n, note.Note):
                    notes = str(n.pitch)
                elif isinstance(n, chord.Chord):
                    n_c = str(n).replace('>', '')
                    notes = '|'.join(n_c.split()[1:])
                duration = float(n.duration.quarterLength)
                note_offset = float(n.offset) - previous_offset
                previous_offset = float(n.offset)

                # generalized for orders greater than 0.
                next_notes = None
                if len(previous_notes) < self.order:
                    next_notes = previous_notes + [notes]
                else:
                    next_notes = previous_notes[1:] + [notes]

                if len(previous_notes) != 0:
                    self.markov_chain.add(','.join(previous_notes),
                                          ','.join(next_notes), duration,
                                          note_offset)

                previous_notes = next_notes

    def get_chain(self):
        return self.markov_chain
예제 #9
0
class Parser:

    def __init__(self, filename, verbose=False):
        """
        This is the constructor for a Serializer, which will serialize
        a midi given the filename and generate a markov chain of the
        notes in the midi.
        """
        self.filename = filename
        # The tempo is number representing the number of microseconds
        # per beat.
        self.tempo = None
        # The delta time between each midi message is a number that
        # is a number of ticks, which we can convert to beats using
        # ticks_per_beat.
        self.ticks_per_beat = None
        self.markov_chain = MarkovChain()
        self._parse(verbose=verbose)

    def _parse(self, verbose=False):
        """
        This function handles the reading of the midi and chunks the
        notes into sequenced "chords", which are inserted into the
        markov chain.
        """
        midi = mido.MidiFile(self.filename)
        self.ticks_per_beat = midi.ticks_per_beat
        previous_chunk = []
        current_chunk = []
        for track in midi.tracks:
            for message in track:
                if verbose:
                    print(message)
                if message.type == "set_tempo":
                    self.tempo = message.tempo
                elif message.type == "note_on":
                    if message.time == 0:
                        current_chunk.append(message.note)
                    else:
                        self._sequence(previous_chunk,
                                       current_chunk,
                                       message.time)
                        previous_chunk = current_chunk
                        current_chunk = []

    def _sequence(self, previous_chunk, current_chunk, duration):
        """
        Given the previous chunk and the current chunk of notes as well
        as an averaged duration of the current notes, this function
        permutes every combination of the previous notes to the current
        notes and sticks them into the markov chain.
        """
        for n1 in previous_chunk:
            for n2 in current_chunk:
                self.markov_chain.add(
                    n1, n2, self._bucket_duration(duration))

    def _bucket_duration(self, ticks):
        """
        This method takes a tick count and converts it to a time in
        milliseconds, bucketing it to the nearest 250 milliseconds.
        """
        try:
            ms = ((ticks / self.ticks_per_beat) * self.tempo) / 1000
            return int(ms - (ms % 250) + 250)
        except TypeError:
            raise TypeError(
                "Could not read a tempo and ticks_per_beat from midi")

    def get_chain(self):
        return self.markov_chain