def train_emission_number_distribution(self, inputs): """ Trains the distribution over the number of notes emitted from a chord class. It's not conditioned on the chord class, so the only training data needed is a segmented MIDI corpus. @type inputs: list of lists @param inputs: training data. The same format as is produced by L{jazzparser.taggers.segmidi.midi.midi_to_emission_stream} """ self.add_history( "Training emission number probabilities using %d MIDI segments"\ % len(inputs)) emission_number_counts = FreqDist() for sequence in inputs: for segment in sequence: notes = len(segment) # There should very rarely be more than the max num of notes if notes <= self.max_notes: emission_number_counts.inc(notes) # Apply simple laplace smoothing for notes in range(self.max_notes): emission_number_counts.inc(notes) # Make a prob dist out of this emission_number_dist = prob_dist_to_dictionary_prob_dist(\ mle_estimator(emission_number_counts, None)) self.emission_number_dist = emission_number_dist
def train_transition_distribution(self, inputs, grammar, contprob=0.3): """ Train the transition distribution parameters in a supervised manner, using chord corpus input. This is used as an initialization step to set transition parameters before running EM on unannotated data. @type inputs: L{jazzparser.data.input.AnnotatedDbBulkInput} @param inputs: annotated chord training data @type contprob: float or string @param contprob: probability mass to reserve for staying on the same state (self transitions). Use special value 'learn' to learn the probabilities from the durations """ self.add_history( "Training transition probabilities using %d annotated chord "\ "sequences" % len(inputs)) learn_cont = contprob == "learn" # Prepare the label sequences that we'll train on if learn_cont: # Repeat values with a duration > 1 sequences = [] for seq in inputs: sequence = [] last_cat = None for chord,cat in zip(seq, seq.categories): # Put it in once for each duration for i in range(chord.duration): sequence.append((chord,cat)) sequences.append(sequence) else: sequences = [list(zip(sequence, sequence.categories)) for \ sequence in inputs] # Prepare a list of transformations to apply to the categories label_transform = {} # First include all the categories we want to keep as they were for schema in self.schemata: label_transform[schema] = (schema, 0) # Then include any transformations the grammar defines for pos,mapping in grammar.equiv_map.items(): label_transform[pos] = (mapping.target.pos, mapping.root) # Apply the transformation to all the training data training_samples = [] for chord_cats in sequences: seq_samples = [] for chord,cat in chord_cats: # Transform the label if it has a transformation if cat in label_transform: use_cat, alter_root = label_transform[cat] else: use_cat, alter_root = cat, 0 root = (chord.root + alter_root) % 12 seq_samples.append((str(use_cat), root)) training_samples.append(seq_samples) training_data = sum([ [(cat0, cat1, (root1 - root0) % 12) for ((cat0,root0),(cat1,root1)) in \ group_pairs(seq_samples)] \ for seq_samples in training_samples], []) # Count up the observations schema_transition_counts = ConditionalFreqDist() root_transition_counts = ConditionalFreqDist() for (label0, label1, root_change) in training_data: # Only use counts for categories the model's looking for if label0 in self.schemata and label1 in self.schemata: schema_transition_counts[label0].inc(label1) root_transition_counts[(label0,label1)].inc(root_change) # Transition probability to final state (end of sequence) for sequence in training_samples: # Inc the count of going from the label the sequence ends on to # the final state schema_transition_counts[sequence[-1][0]].inc(None) # Use Laplace (plus one) smoothing # We don't use the laplace_estimator because we want the conversion # to a dict prob dist to get all the labels, not just to discount # the ones it's seen for label0 in self.schemata: for label1 in self.schemata: for root_change in range(12): # Exclude self-transition for now, unless we're learning it if learn_cont or not (label0 == label1 and root_change == 0): schema_transition_counts[label0].inc(label1) root_transition_counts[(label0,label1)].inc(root_change) # We don't add a count for going to the final state: we don't # want to initialize it with too much weight # Estimate distribution from this frequency distribution schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\ ConditionalProbDist(schema_transition_counts, mle_estimator, None), \ mutable=True, samples=self.schemata+[None]) root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(\ ConditionalProbDist(root_transition_counts, mle_estimator, None), \ mutable=True, samples=range(12)) if not learn_cont: # Discount all probabilities to allow for self-transition probs discount = logprob(1.0 - contprob) self_prob = logprob(contprob) for label0 in self.schemata: # Give saved prob mass to self-transitions trans_dist[label0].update((label0, 0), self_prob) # Discount all other transitions to allow for this for label1 in self.schemata: for root_change in range(12): if not (label0 == label1 and root_change == 0): # Discount non self transitions trans_dist[label0].update((label1, root_change), \ trans_dist[label0].logprob((label1, root_change)) + \ discount) # Recreate the dict prob dist so it's not mutable any more schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(schema_trans_dist) root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(root_trans_dist) ## Now for the initial distribution # Count up the observations initial_counts = FreqDist() for sequence in training_samples: initial_counts.inc(sequence[0][0]) # Use Laplace (plus one) smoothing #for label in self.schemata: # initial_counts.inc(label) # Estimate distribution from this frequency distribution initial_dist = prob_dist_to_dictionary_prob_dist(\ mle_estimator(initial_counts, None), samples=self.schemata) # Replace the model's transition distributions self.schema_transition_dist = schema_trans_dist self.root_transition_dist = root_trans_dist self.initial_state_dist = initial_dist # Invalidate the cache self.clear_cache()
def initialize_chord_classes(cls, tetrad_prob, max_notes, grammar, \ illegal_transitions=[], fixed_root_transitions={}, metric=False): """ Creates a new model with the distributions initialized naively to favour simple chord-types, in a similar way to what R&S do in the paper. The transition distribution is initialized so that everything is equiprobable. @type tetrad_prob: float @param tetrad_prob: prob of a note in the tetrad. This prob is distributed over the notes of the tetrad. The remaining prob mass is distributed over the remaining notes. You'll want this to be >0.33, so that tetrad notes are more probable than others. @type max_notes: int @param max_notes: maximum number of notes that can be generated in each emission. Usually best to set to something high, like 100 - it's just to make the distribution finite. @type grammar: L{jazzparser.grammar.Grammar} @param grammar: grammar from which to take the chord class definitions @type metric: bool @param metric: if True, creates a model with a metrical component (dependence on metrical position). Default False """ # Only use chord classes that are used by some morph item in the lexicon classes = [ccls for ccls in grammar.chord_classes.values() if ccls.used] # Create a probability distribution for the emission distribution dists = {} # Create the distribution for each possible r-value if we're creating # a metrical model if metric: r_vals = range(4) else: r_vals = [0] # Separate emission distribution for each chord class for ccls in classes: for r in r_vals: probabilities = {} # We assign two different probabilities: in tetrad or out # Don't assume the tetrad has 4 notes! in_tetrad_prob = tetrad_prob / len(ccls.notes) out_tetrad_prob = (1.0 - tetrad_prob) / (12 - len(ccls.notes)) # Give a probability to every pitch class for d in range(12): if d in ccls.notes: probabilities[d] = in_tetrad_prob else: probabilities[d] = out_tetrad_prob dists[(ccls.name,r)] = DictionaryProbDist(probabilities) emission_dist = DictionaryConditionalProbDist(dists) # Take the state labels from the lexical entries in the grammar # Include only tonic categories that were generated from lexical # expansion rules - i.e. only tonic repetition categories schemata = grammar.midi_families.keys() # Check that the transition constraint specifications refer to existing # schemata for labels in illegal_transitions: for label in labels: if label not in schemata: raise ValueError, "%s, given in illegal transition "\ "specification, is not a valid schema in the grammar" \ % label for labels in fixed_root_transitions: for label in labels: if label not in schemata: raise ValueError, "%s, given in fixed root transition "\ "specification, is not a valid schema in the grammar" \ % label # Build from the grammar a mapping from lexical schemata (POSs) to # chord classes chord_class_mapping = {} for morph in grammar.morphs: if morph.pos in schemata: chord_class_mapping.setdefault(morph.pos, []).append(str(morph.chord_class.name)) # Make sure that every label appears in the mapping for label in schemata: if label not in chord_class_mapping: chord_class_mapping[label] = [] # Initialize transition distribution so every transition is equiprobable schema_transition_counts = ConditionalFreqDist() root_transition_counts = ConditionalFreqDist() for label0 in schemata: for label1 in schemata: # Increment the count once for each chord class associated # with this schema: schemata with 2 chord classes get 2 # counts for cclass in chord_class_mapping[label1]: schema_transition_counts[label0].inc(label1) for root_change in range(12): # Give one count to the root transition corresponding to this state transition root_transition_counts[(label0,label1)].inc(root_change) # Give a count to finishing in this state schema_transition_counts[label0].inc(None) # Estimate distribution from this frequency distribution schema_trans_dist = ConditionalProbDist(schema_transition_counts, mle_estimator, None) root_trans_dist = ConditionalProbDist(root_transition_counts, mle_estimator, None) # Sample this to get dictionary prob dists schema_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(schema_trans_dist) root_trans_dist = cond_prob_dist_to_dictionary_cond_prob_dist(root_trans_dist) # Do the same with the initial states (just schemata, not roots) initial_state_counts = FreqDist() for label in schemata: initial_state_counts.inc(label) initial_state_dist = mle_estimator(initial_state_counts, None) initial_state_dist = prob_dist_to_dictionary_prob_dist(initial_state_dist) # Also initialize the notes number distribution to uniform emission_number_counts = FreqDist() for i in range(max_notes): emission_number_counts.inc(i) emission_number_dist = mle_estimator(emission_number_counts, None) emission_number_dist = prob_dist_to_dictionary_prob_dist(emission_number_dist) # Create the model model = cls(schema_trans_dist, root_trans_dist, emission_dist, emission_number_dist, initial_state_dist, schemata, chord_class_mapping, classes, metric=metric, illegal_transitions=illegal_transitions, fixed_root_transitions=fixed_root_transitions) model.add_history(\ "Initialized model to chord type probabilities, using "\ "tetrad probability %s. Metric: %s" % \ (tetrad_prob, metric)) return model