def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: usually you want to update all distributions, including the initial state distribution. If update_initial=False, the initial state distribution updates won't be made for this sequence. We want this when the sequence is actually a non-initial fragment of a longer sequence """ try: trans, ems, trans_denom, ems_denom = empty_arrays state_ids, em_ids = array_ids # Compute the forwards with seq_prob=True fwds,seq_logprob = last_model.normal_forward_probabilities(sequence, seq_prob=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.gamma_probabilities(sequence, forward=fwds) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence) label_dom = last_model.label_dom T = len(sequence) for time in range(T): for state in label_dom: state_i = state_ids[state] if time < T-1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: state_j = state_ids[next_state] ## Transition dist update ## trans[state_i][state_j] += xi[time][state_i][state_j] ## Emission dist update ## ems[state_ids[state]][em_ids[sequence[time]]] += \ gamma[time][state_i] # Calculate the denominators by summing trans_denom = array_sum(trans, axis=1) ems_denom = array_sum(ems, axis=1) # Wrap this all up in a tuple to return to the master return (trans, ems, trans_denom, ems_denom, seq_logprob) except KeyboardInterrupt: return
def normal_backward_probabilities(self, sequence, array=False): """ @see: normal_forward_probabilities (except that this doesn't return the logprob) @type array: bool @param array: if True, returns a numpy 2d array instead of a list of dicts. """ T = len(sequence) N = len(self.label_dom) beta = numpy.zeros((T, N), numpy.float64) scale = numpy.zeros(T, numpy.float64) # Initialize with the probabilities of transitioning to the final state for i,si in enumerate(self.label_dom): beta[T-1, i] = self.transition_probability(None, si) # Normalize total = array_sum(beta[T-1, :]) beta[T-1,:] /= total # Initialize scale[T-1] = total # Iterate backwards over the other timesteps for t in range(T-2, -1, -1): # To speed things up, calculate all the t+1 emission probabilities # first, instead of calculating them all for every t state em_probs = [ self.emission_probability(sequence[t+1], sj) \ for sj in self.label_dom] for i,si in enumerate(self.label_dom): # Multiply each next state's prob by the transition prob # from this state to that and the emission prob in that next # state beta[t, i] = sum( (beta[t+1, j] * self.transition_probability(sj, si) * \ em_probs[j] \ for j,sj in enumerate(self.label_dom)), 0.0) # Normalize by dividing all values by the total probability total = array_sum(beta[t,:]) beta[t,:] /= total scale[t] = total if not array: # Convert this into a list of dicts matrix = [] for t in range(T): timestep = {} for (i,label) in enumerate(self.label_dom): timestep[label] = beta[t,i] matrix.append(timestep) return matrix,scale else: return beta,scale
def f_tt(theta, f, k0T, S, nCoeffs): TnTT = S[0][:, 1:nCoeffs + 1] tnTT = S[1][:, 1:nCoeffs + 1] nfreq = len(f) n = arange(1, nCoeffs + 1, 1) n = ones((nfreq, 1)) * n if theta == 0: return array_sum((2 * n + 1) * (TnTT + tnTT), axis=1) / (1j * k0T) elif theta == pi: return array_sum((2 * n + 1) * (TnTT - tnTT), axis=1) / (1j * k0T)
def calcul_f_sc(theta, f, k0, S, nCoeffs): n = arange(0, nCoeffs + 1, 1) TnLL = S[:, 0:nCoeffs + 1] if theta == 0: return array_sum((2 * n + 1) * TnLL, axis=1) / (1j * k0) elif theta == pi: return array_sum((2 * n + 1) * ((-1)**n) * TnLL, axis=1) / (1j * k0) nfreq = len(f) Pncos0 = calculPn(cos(theta), nCoeffs) Pncos0 = ones((nfreq, 1)) * transpose(Pncos0) n = ones((nfreq, 1)) * n fsc_theta_f = array_sum((2 * n + 1) * TnLL * Pncos0, axis=1) fsc_theta_f = 1 / (1j * k0) * fsc_theta_f return fsc_theta_f
def compute_decomposed_xi(self, sequence, forward=None, backward=None, emission_matrix=None, transition_matrix=None): from numpy import newaxis if forward is None: forward = self.normal_forward_probabilities(sequence, decomposed=True) if backward is None: backward = self.normal_backward_probabilities(sequence, decomposed=True) # T is the number of timesteps # N is the number of states T = forward.shape[0] C = len(self.chord_types) # Create the empty array to fill xi = numpy.zeros((T-1,12,12,C,12,12,C), numpy.float64) # Precompute all the emission probabilities if emission_matrix is None: emission_matrix = self.get_small_emission_matrix(sequence) # And transition probabilities: we'll need these many times over if transition_matrix is None: transition_matrix = self.get_small_transition_matrix(transpose=True) # Do it without logs - much faster for t in range(T-1): total = 0.0 # Add axies to the forward probabilities to represent the next state fwd_trans = forward[t,:,:,:, newaxis,newaxis,newaxis] # Compute the xi values by multiplying the arrays together xi[t] = transition_matrix * fwd_trans * backward[t+1] * \ emission_matrix[t+1] # Normalize all the probabilities # Sum all the probs for the timestep and divide them all by total xi[t] /= array_sum(xi[t]) return xi
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput( 0, 1.5, 100, 42 + i)) for i in range(20) ] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD(stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) t = time() self.ssc.start() self._ssc_wait(t, 15.0, 0.01) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance, self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput( 0, 1.5, 100, 42 + i)) for i in range(20) ] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD(stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) self.ssc.start() def condition(): self.assertEqual(len(models), len(input_batches)) return True # We want all batches to finish for this test. eventually(condition, 60.0, catch_assertions=True) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def test_accuracy_for_single_center(self): """Test that parameters obtained are correct for a single center.""" centers, batches = self.streamingKMeansDataGenerator(batches=5, numPoints=5, k=1, d=5, r=0.1, seed=0) stkm = StreamingKMeans(1) stkm.setInitialCenters([[0., 0., 0., 0., 0.]], [0.]) input_stream = self.ssc.queueStream( [self.sc.parallelize(batch, 1) for batch in batches]) stkm.trainOn(input_stream) self.ssc.start() def condition(): self.assertEqual(stkm.latestModel().clusterWeights, [25.0]) return True eventually(condition, catch_assertions=True) realCenters = array_sum(array(centers), axis=0) for i in range(5): modelCenters = stkm.latestModel().centers[0][i] self.assertAlmostEqual(centers[0][i], modelCenters, 1) self.assertAlmostEqual(realCenters[i], modelCenters, 1)
def compute_gamma(self, sequence, forward=None, backward=None): """ Computes the gamma matrix used in Baum-Welch. This is the matrix of state occupation probabilities for each timestep. It is computed from the forward and backward matrices. These can be passed in as arguments to avoid recomputing if you need to reuse them, but will be computed from the model if not given. They are assumed to be the matrices computed by L{normal_forward_probabilities} and L{normal_backward_probabilities} (i.e. normalized, non-log probabilities). """ if forward is None: forward = self.normal_forward_probabilities(sequence, array=True)[0] if backward is None: backward = self.normal_backward_probabilities(sequence, array=True)[0] # T is the number of timesteps # N is the number of states T,N = forward.shape # Multiply forward and backward elementwise to get unnormalised gamma gamma = forward * backward # Sum the values in each timestep to get the normalizing denominator denominators = array_sum(gamma, axis=1) # Divide all the values in each timestep by each denominator gamma = (gamma.transpose() / denominators).transpose() return gamma
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i)) for i in range(20)] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD( stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) t = time() self.ssc.start() self._ssc_wait(t, 15.0, 0.01) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance, self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def compute_gamma(self, sequence, forward=None, backward=None): """ Computes the gamma matrix used in Baum-Welch. This is the matrix of state occupation probabilities for each timestep. It is computed from the forward and backward matrices. These can be passed in as arguments to avoid recomputing if you need to reuse them, but will be computed from the model if not given. They are assumed to be the matrices computed by L{normal_forward_probabilities} and L{normal_backward_probabilities} (i.e. normalized, non-log probabilities). """ if forward is None: forward = self.normal_forward_probabilities(sequence) if backward is None: backward = self.normal_backward_probabilities(sequence) # T is the number of timesteps # N is the number of states T,N = forward.shape gamma = zeros((T,N), float64) for t in range(T): for i in range(N): gamma[t][i] = forward[t][i]*backward[t][i] denominator = array_sum(gamma[t]) # Normalize for i in range(N): gamma[t][i] /= denominator return gamma
def compute_gamma(self, sequence, forward=None, backward=None): """ Computes the gamma matrix used in Baum-Welch. This is the matrix of state occupation probabilities for each timestep. It is computed from the forward and backward matrices. These can be passed in as arguments to avoid recomputing if you need to reuse them, but will be computed from the model if not given. They are assumed to be the matrices computed by L{normal_forward_probabilities} and L{normal_backward_probabilities} (i.e. normalized, non-log probabilities). """ if forward is None: forward = self.normal_forward_probabilities(sequence) if backward is None: backward = self.normal_backward_probabilities(sequence) # T is the number of timesteps # N is the number of states T, N = forward.shape gamma = zeros((T, N), float64) for t in range(T): for i in range(N): gamma[t][i] = forward[t][i] * backward[t][i] denominator = array_sum(gamma[t]) # Normalize for i in range(N): gamma[t][i] /= denominator return gamma
def test_convergence(self): """ Test that weights converge to the required value on toy data. """ input_batches = [ self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i)) for i in range(20)] input_stream = self.ssc.queueStream(input_batches) models = [] slr = StreamingLogisticRegressionWithSGD( stepSize=0.2, numIterations=25) slr.setInitialWeights([0.0]) slr.trainOn(input_stream) input_stream.foreachRDD( lambda x: models.append(slr.latestModel().weights[0])) self.ssc.start() def condition(): self.assertEqual(len(models), len(input_batches)) return True # We want all batches to finish for this test. self._eventually(condition, 60.0, catch_assertions=True) t_models = array(models) diff = t_models[1:] - t_models[:-1] # Test that weights improve with a small tolerance self.assertTrue(all(diff >= -0.1)) self.assertTrue(array_sum(diff > 0) > 1)
def corr_from_index(*index): """Correlation of index with zero. Turns a correlation function in terms of index distance into one in terms of indices on a periodic domain. Parameters ---------- index: tuple of int Returns ------- float[-1, 1] The correlation of the given index with the origin. See Also -------- DistanceCorrelationFunction.correlation_from_index """ comp2_1 = square(index) # Components of distance to shifted origin comp2_2 = square(broadcastable_shape - index) # use the smaller components to get the distance to the # closest of the shifted origins comp2 = fmin(comp2_1, comp2_2) return corr_func(sqrt(array_sum(comp2, axis=0)))
def get_probabilities(histogram): probabilites = [] count = array_sum(histogram[0]) for i in range(0, len(histogram[0])): times = histogram[0][i] tempo = histogram[1][i] + 0.5 if times != 0.0: probabilites.append([tempo, times / count]) return probabilites
def assemble_stiffness_sum(num_dof, edm, elements, u): """ Assembles "summed" stiffness matrix (each row is summed, taking absolute value) """ K_sum = zeros(num_dof) for elem_id, elem in enumerate(elements): u_loc = empty(elem.num_dof) gather_element_vector(edm[elem_id], u, u_loc) K_elem = array_sum(abs(elem.calc_linear_stiffness([], u_loc)), axis=1) for local_dof, dof in enumerate(edm[elem_id]): K_sum[dof] += K_elem[local_dof] return K_sum
def _sequence_updates_uni(sequence, last_model, label_dom, state_ids, \ beat_ids, d_ids, d_func): """Same as L{_sequence_updates}, modified for unigram models. """ num_beats = len(beat_ids) num_ds = len(d_ids) num_ktrans = 12 # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators ems_local = zeros((num_beats, num_ds), float64) # Compute the forward and backward probabilities alpha, scale, seq_logprob = last_model.normal_forward_probabilities( sequence) beta, scale = last_model.normal_backward_probabilities(sequence) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, alpha, beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, alpha, beta) T = len(sequence) for time in range(T): for state in label_dom: tonic, mode, chord = state state_i = state_ids[state] # We don't update the transition distribution here, because it's fixed ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc, beat in sequence[time]: beat_i = beat_ids[beat] d = d_func(pc, state) d_i = d_ids[d] ems_local[beat_i][d_i] += gamma[time][state_i] # Calculate the denominators ems_denom_local = array_sum(ems_local, axis=1) # Wrap this all up in a tuple to return to the master return (ems_local, ems_denom_local, seq_logprob)
def _sequence_updates_uni(sequence, last_model, label_dom, state_ids, \ beat_ids, d_ids, d_func): """Same as L{_sequence_updates}, modified for unigram models. """ num_beats = len(beat_ids) num_ds = len(d_ids) num_ktrans = 12 # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators ems_local = zeros((num_beats,num_ds), float64) # Compute the forward and backward probabilities alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence) beta,scale = last_model.normal_backward_probabilities(sequence) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, alpha, beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, alpha, beta) T = len(sequence) for time in range(T): for state in label_dom: tonic,mode,chord = state state_i = state_ids[state] # We don't update the transition distribution here, because it's fixed ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc,beat in sequence[time]: beat_i = beat_ids[beat] d = d_func(pc, state) d_i = d_ids[d] ems_local[beat_i][d_i] += gamma[time][state_i] # Calculate the denominators ems_denom_local = array_sum(ems_local, axis=1) # Wrap this all up in a tuple to return to the master return (ems_local, ems_denom_local, seq_logprob)
def test_accuracy_for_single_center(self): """Test that parameters obtained are correct for a single center.""" centers, batches = self.streamingKMeansDataGenerator( batches=5, numPoints=5, k=1, d=5, r=0.1, seed=0) stkm = StreamingKMeans(1) stkm.setInitialCenters([[0., 0., 0., 0., 0.]], [0.]) input_stream = self.ssc.queueStream( [self.sc.parallelize(batch, 1) for batch in batches]) stkm.trainOn(input_stream) t = time() self.ssc.start() self._ssc_wait(t, 10.0, 0.01) self.assertEquals(stkm.latestModel().clusterWeights, [25.0]) realCenters = array_sum(array(centers), axis=0) for i in range(5): modelCenters = stkm.latestModel().centers[0][i] self.assertAlmostEqual(centers[0][i], modelCenters, 1) self.assertAlmostEqual(realCenters[i], modelCenters, 1)
def _sequence_updates(sequence, last_model, label_dom, state_ids, mode_ids, \ chord_ids, beat_ids, d_ids, d_func): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. """ num_chords = len(chord_ids) num_beats = len(beat_ids) num_modes = len(mode_ids) num_ds = len(d_ids) num_ktrans = 12 # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators ctrans_local = zeros((num_chords, num_chords), float64) ems_local = zeros((num_beats, num_ds), float64) ktrans_local = zeros((num_modes, num_ktrans, num_modes), float64) uni_chords_local = zeros(num_chords, float64) # Compute the forward and backward probabilities alpha, scale, seq_logprob = last_model.normal_forward_probabilities( sequence) beta, scale = last_model.normal_backward_probabilities(sequence) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, alpha, beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, alpha, beta) T = len(sequence) for time in range(T): for state in label_dom: tonic, mode, chord = state state_i = state_ids[state] mode_i = mode_ids[mode] if time < T - 1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: ntonic, nmode, nchord = next_state state_j = state_ids[next_state] mode_j = mode_ids[nmode] ## Key transition dist update ## tonic_change = (ntonic - tonic) % 12 ktrans_local[mode_i][tonic_change][mode_j] += \ xi[time][state_i][state_j] ## Chord transition dist update ## chord_i, chord_j = chord_ids[chord], chord_ids[nchord] if tonic == ntonic and mode == nmode: # Add to chord transition dist for this chord pair ctrans_local[chord_i][chord_j] += xi[time][state_i][ state_j] else: uni_chords_local[chord_j] += xi[time][state_i][state_j] ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc, beat in sequence[time]: beat_i = beat_ids[beat] d = d_func(pc, state) d_i = d_ids[d] ems_local[beat_i][d_i] += gamma[time][state_i] # Calculate the denominators ctrans_denom_local = array_sum(ctrans_local, axis=1) ems_denom_local = array_sum(ems_local, axis=1) ktrans_denom_local = array_sum(array_sum(ktrans_local, axis=2), axis=1) uni_chords_denom_local = array_sum(uni_chords_local) # Wrap this all up in a tuple to return to the master return (ktrans_local, ctrans_local, ems_local, \ uni_chords_local, \ ktrans_denom_local, ctrans_denom_local, \ ems_denom_local, uni_chords_denom_local, \ seq_logprob)
def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: usually you want to update all distributions, including the initial state distribution. If update_initial=False, the initial state distribution updates won't be made for this sequence. We want this when the sequence is actually a non-initial fragment of a longer sequence """ try: trans, ems, trans_denom, ems_denom = empty_arrays state_ids, em_ids = array_ids # Compute the forwards with seq_prob=True fwds, seq_logprob = last_model.normal_forward_probabilities( sequence, seq_prob=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.gamma_probabilities(sequence, forward=fwds) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence) label_dom = last_model.label_dom T = len(sequence) for time in range(T): for state in label_dom: state_i = state_ids[state] if time < T - 1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: state_j = state_ids[next_state] ## Transition dist update ## trans[state_i][state_j] += xi[time][state_i][state_j] ## Emission dist update ## ems[state_ids[state]][em_ids[sequence[time]]] += \ gamma[time][state_i] # Calculate the denominators by summing trans_denom = array_sum(trans, axis=1) ems_denom = array_sum(ems, axis=1) # Wrap this all up in a tuple to return to the master return (trans, ems, trans_denom, ems_denom, seq_logprob) except KeyboardInterrupt: return
def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: if update_initial=False, the initial state distribution updates won't be made for this sequence """ try: ( initial_keys, initial_chords, key_trans, chord_trans, ems, initial_keys_denom, initial_chords_denom, key_trans_denom, chord_trans_denom, ems_denom, ) = empty_arrays chord_ids, chord_type_ids = array_ids # Compute the forwards with seq_prob=True fwds, seq_logprob = last_model.normal_forward_probabilities(sequence, seq_prob=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.gamma_probabilities(sequence, forward=fwds) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence) label_dom = last_model.label_dom # Enumerate the label dom state_ids = dict([(state, id) for (id, state) in enumerate(label_dom)]) T = len(sequence) for time in range(T): for state in label_dom: keyi, rooti, labeli = state state_i = state_ids[state] chord_i = chord_ids[((rooti - keyi) % 12, labeli)] if time == 0: # Update initial distributions initial_keys[keyi] += gamma[time][state_i] initial_chords[chord_i] += gamma[time][state_i] if time == T - 1: # Last timestep # Update the transition dists for transitions to final state chord_trans[chord_i][-1] += gamma[time][state_i] else: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: keyj, rootj, labelj = next_state state_j = state_ids[next_state] chord_j = chord_ids[((rootj - keyj) % 12, labelj)] key_change = (keyj - keyi) % 12 ## Transition dist updates ## key_trans[key_change] += xi[time][state_i][state_j] chord_trans[chord_i][chord_j] += xi[time][state_i][state_j] ## Emission dist update ## for note in sequence[time]: pc = (note - rooti) % 12 ems[chord_type_ids[labeli]][pc] += gamma[time][state_i] # Calculate the denominators by summing initial_keys_denom[0] = array_sum(initial_keys) initial_chords_denom[0] = array_sum(initial_chords) key_trans_denom[0] = array_sum(key_trans) chord_trans_denom = array_sum(chord_trans, axis=1) ems_denom = array_sum(ems, axis=1) # Wrap this all up in a tuple to return to the master return ( initial_keys, initial_chords, key_trans, chord_trans, ems, initial_keys_denom, initial_chords_denom, key_trans_denom, chord_trans_denom, ems_denom, seq_logprob, ) except KeyboardInterrupt: return
def normal_forward_probabilities(self, sequence, array=False): """If you want the normalized matrix of forward probabilities, it's ok to use normal (non-log) probabilities and these can be computed more quickly, since you don't need to sum logs (which is time consuming). Returns the matrix, and also the vector of values that each timestep was divided by to normalize (i.e. total probability of each timestep over all states). Also returns the total log probability of the sequence. @type array: bool @param array: if True, returns a numpy 2d array instead of a list of dicts. @return: (matrix,normalizing vector,log prob) """ T = len(sequence) N = len(self.label_dom) alpha = numpy.zeros((T, N), numpy.float64) scale = numpy.zeros(T, numpy.float64) # Prepare the first column of the matrix: probs of all states in the # first timestep for i,state in enumerate(self.label_dom): alpha[0,i] = self.transition_probability(state, None) * \ self.emission_probability(sequence[0], state) # Normalize by dividing all values by the total probability total = array_sum(alpha[0,:]) alpha[0,:] /= total scale[0] = total # Iterate over the other timesteps for t in range(1, T): for j,sj in enumerate(self.label_dom): # Multiply each previous state's prob by the transition prob # to this state and sum them all together prob = sum( (alpha[t-1, i] * self.transition_probability(sj, si) \ for i,si in enumerate(self.label_dom)), 0.0) # Also multiply this by the emission probability alpha[t, j] = prob * \ self.emission_probability(sequence[t], sj) # Normalize by dividing all values by the total probability total = array_sum(alpha[t,:]) alpha[t,:] /= total scale[t] = total # Multiply together the probability of each timestep to get the whole # probability of the sequence # This gets the same result as if we did: # alpha = model.forward_log_probabilities(sequence, normalize=False, array=True) # log_prob = sum_logs(alpha[T-1,:]) log_prob = sum((logprob(total) for total in scale), 0.0) if not array: # Convert this into a list of dicts matrix = [] for t in range(T): timestep = {} for (i,label) in enumerate(self.label_dom): timestep[label] = alpha[t,i] matrix.append(timestep) return matrix,scale,log_prob else: return alpha,scale,log_prob
def _sequence_updates(sequence, last_model, label_dom, state_ids, mode_ids, \ chord_ids, beat_ids, d_ids, d_func): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. """ num_chords = len(chord_ids) num_beats = len(beat_ids) num_modes = len(mode_ids) num_ds = len(d_ids) num_ktrans = 12 # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators ctrans_local = zeros((num_chords,num_chords), float64) ems_local = zeros((num_beats,num_ds), float64) ktrans_local = zeros((num_modes,num_ktrans,num_modes), float64) uni_chords_local = zeros(num_chords, float64) # Compute the forward and backward probabilities alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence) beta,scale = last_model.normal_backward_probabilities(sequence) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, alpha, beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, alpha, beta) T = len(sequence) for time in range(T): for state in label_dom: tonic,mode,chord = state state_i = state_ids[state] mode_i = mode_ids[mode] if time < T-1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: ntonic,nmode,nchord = next_state state_j = state_ids[next_state] mode_j = mode_ids[nmode] ## Key transition dist update ## tonic_change = (ntonic - tonic) % 12 ktrans_local[mode_i][tonic_change][mode_j] += \ xi[time][state_i][state_j] ## Chord transition dist update ## chord_i, chord_j = chord_ids[chord], chord_ids[nchord] if tonic == ntonic and mode == nmode: # Add to chord transition dist for this chord pair ctrans_local[chord_i][chord_j] += xi[time][state_i][state_j] else: uni_chords_local[chord_j] += xi[time][state_i][state_j] ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc,beat in sequence[time]: beat_i = beat_ids[beat] d = d_func(pc, state) d_i = d_ids[d] ems_local[beat_i][d_i] += gamma[time][state_i] # Calculate the denominators ctrans_denom_local = array_sum(ctrans_local, axis=1) ems_denom_local = array_sum(ems_local, axis=1) ktrans_denom_local = array_sum(array_sum(ktrans_local, axis=2), axis=1) uni_chords_denom_local = array_sum(uni_chords_local) # Wrap this all up in a tuple to return to the master return (ktrans_local, ctrans_local, ems_local, \ uni_chords_local, \ ktrans_denom_local, ctrans_denom_local, \ ems_denom_local, uni_chords_denom_local, \ seq_logprob)
def compute_xi(self, sequence, forward=None, backward=None, emission_matrix=None, transition_matrix=None, use_logs=False): """ Computes the xi matrix used by Baum-Welch. It is the matrix of joint probabilities of occupation of pairs of consecutive states: P(i_t, j_{t+1} | O). As with L{compute_gamma} forward and backward matrices can optionally be passed in to avoid recomputing. @type use_logs: bool @param use_logs: by default, this function does not use logs in its calculations. This can lead to underflow if your forward/backward matrices have sufficiently low values. If C{use_logs=True}, logs will be used internally (though the returned values are exponentiated again). This makes the function an order of magnitude slower. """ if forward is None: forward = self.normal_forward_probabilities(sequence) if backward is None: backward = self.normal_backward_probabilities(sequence) # T is the number of timesteps # N is the number of states T,N = forward.shape # Create the empty array to fill xi = numpy.zeros((T-1,N,N), numpy.float64) # Precompute all the emission probabilities if emission_matrix is None: emission_matrix = self.get_emission_matrix(sequence) # And transition probabilities: we'll need these many times over if transition_matrix is None: transition_matrix = self.get_transition_matrix() if not use_logs: # Do it without logs - much faster for t in range(T-1): total = 0.0 # Transpose the forward probabilities so that we multiply them # along the vertical axis fwd_trans = forward[t,:, numpy.newaxis] # Compute the xi values by multiplying the arrays together xi[t] = transition_matrix.T * fwd_trans * backward[t+1] * \ emission_matrix[t+1] # Normalize all the probabilities # Sum all the probs for the timestep and divide them all by total total = array_sum(xi[t]) xi[t] /= total else: # Take logs of all the matrices we need emission_matrix = numpy.log2(emission_matrix) transition_matrix = numpy.log2(transition_matrix) forward = numpy.log2(forward) backward = numpy.log2(backward) for t in range(T-1): total = 0.0 fwd_trans = forward[t,:, numpy.newaxis] xi[t] = transition_matrix.T + fwd_trans + backward[t+1] + \ emission_matrix[t+1] # This takes a (relatively) long time total = numpy.logaddexp2.reduce(xi[t]) xi[t] -= total # Exponentiate all the probabilities again # This also takes a while xi = numpy.exp2(xi) return xi
def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: if update_initial=False, the initial state distribution updates won't be made for this sequence """ try: (initial_keys, initial_chords, key_trans, chord_trans, ems, initial_keys_denom, initial_chords_denom, key_trans_denom, chord_trans_denom, ems_denom) = empty_arrays chord_ids, chord_type_ids = array_ids # Compute the forwards with seq_prob=True fwds, seq_logprob = last_model.normal_forward_probabilities( sequence, seq_prob=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.gamma_probabilities(sequence, forward=fwds) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence) label_dom = last_model.label_dom # Enumerate the label dom state_ids = dict([(state, id) for (id, state) in enumerate(label_dom)]) T = len(sequence) for time in range(T): for state in label_dom: keyi, rooti, labeli = state state_i = state_ids[state] chord_i = chord_ids[((rooti - keyi) % 12, labeli)] if time == 0: # Update initial distributions initial_keys[keyi] += gamma[time][state_i] initial_chords[chord_i] += gamma[time][state_i] if time == T - 1: # Last timestep # Update the transition dists for transitions to final state chord_trans[chord_i][-1] += gamma[time][state_i] else: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: keyj, rootj, labelj = next_state state_j = state_ids[next_state] chord_j = chord_ids[((rootj - keyj) % 12, labelj)] key_change = (keyj - keyi) % 12 ## Transition dist updates ## key_trans[key_change] += xi[time][state_i][state_j] chord_trans[chord_i][chord_j] += xi[time][state_i][ state_j] ## Emission dist update ## for note in sequence[time]: pc = (note - rooti) % 12 ems[chord_type_ids[labeli]][pc] += gamma[time][state_i] # Calculate the denominators by summing initial_keys_denom[0] = array_sum(initial_keys) initial_chords_denom[0] = array_sum(initial_chords) key_trans_denom[0] = array_sum(key_trans) chord_trans_denom = array_sum(chord_trans, axis=1) ems_denom = array_sum(ems, axis=1) # Wrap this all up in a tuple to return to the master return (initial_keys, initial_chords, key_trans, chord_trans, ems, initial_keys_denom, initial_chords_denom, key_trans_denom, chord_trans_denom, ems_denom, seq_logprob) except KeyboardInterrupt: return
def _sequence_updates(sequence, last_model, label_dom, schema_ids, emission_cond_ids, update_initial=True, catch_interrupt=False): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: usually you want to update all distributions, including the initial state distribution. If update_initial=False, the initial state distribution updates won't be made for this sequence. We want this when the sequence is actually a non-initial fragment of a longer sequence @type catch_interrupt: bool @param catch_interrupt: catch KeyboardInterrupt exceptions and return None. This is useful behaviour when calling this in a process pool, since it allows the parent process to handle the interrupt, but should be set to False (default) if calling directly. """ try: # Get the sizes we'll need for the matrix num_schemata = len(last_model.schemata) num_root_changes = 12 num_chord_classes = len(last_model.chord_classes) num_emission_conds = len(emission_cond_ids) num_emissions = 12 T = len(sequence) state_ids = dict([(state,id) for (id,state) in \ enumerate(last_model.label_dom)]) # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators schema_trans = zeros((num_schemata, num_schemata + 1), float64) root_trans = zeros((num_schemata, num_schemata, num_root_changes), float64) ems = zeros((num_emission_conds, num_emissions), float64) sinit = zeros(num_schemata, float64) # Compute the forward and backward probabilities # These are normalized, but that makes no difference to the outcome of # compute_gamma and compute_xi alpha, scale, seq_logprob = last_model.normal_forward_probabilities( sequence, array=True) beta, scale = last_model.normal_backward_probabilities(sequence, array=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, forward=alpha, backward=beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, forward=alpha, backward=beta) # Update the initial state distribution if requested if update_initial: for state in label_dom: schema, root, chord_class = state schema_i = schema_ids[schema] # Add this contribution to the sum of the states with this schema sinit[schema_i] += gamma[0][state_ids[state]] for time in range(T): for state in label_dom: schema, root, chord_class = state schema_i = schema_ids[schema] state_i = state_ids[state] if time < T - 1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: next_schema, next_root, next_chord_class = next_state schema_j = schema_ids[next_schema] state_j = state_ids[next_state] ## Transition dist update ## root_change = (next_root - root) % 12 schema_trans[schema_i][schema_j] += \ xi[time][state_i][state_j] root_trans[schema_i][schema_j][root_change] += \ xi[time][state_i][state_j] else: # Final state: update the probs of transitioning to end schema_trans[schema_i][num_schemata] += gamma[T - 1][state_i] ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc, beat in sequence[time]: # Take the pitch class relative to the root rel_pc = (pc - root) % 12 ems[emission_cond_ids[(chord_class,beat)]][rel_pc] += \ gamma[time][state_i] # Calculate the denominators schema_trans_denom = array_sum(schema_trans, axis=1) root_trans_denom = array_sum(root_trans, axis=2) ems_denom = array_sum(ems, axis=1) # This should come to 1.0 sinit_denom = array_sum(sinit) # Wrap this all up in a tuple to return to the master return (schema_trans, root_trans, ems, sinit, \ schema_trans_denom, root_trans_denom, ems_denom, sinit_denom, \ seq_logprob) except KeyboardInterrupt: if catch_interrupt: return else: raise
def _sequence_updates(sequence, last_model, label_dom, schema_ids, emission_cond_ids, update_initial=True, catch_interrupt=False): """ Evaluates the forward/backward probability matrices for a single sequence under the model that came from the previous iteration and returns matrices that contain the updates to be made to the distributions during this iteration. This is wrapped up in a function so it can be run in parallel for each sequence. Once all sequences have been evaluated, the results are combined and model updated. @type update_initial: bool @param update_initial: usually you want to update all distributions, including the initial state distribution. If update_initial=False, the initial state distribution updates won't be made for this sequence. We want this when the sequence is actually a non-initial fragment of a longer sequence @type catch_interrupt: bool @param catch_interrupt: catch KeyboardInterrupt exceptions and return None. This is useful behaviour when calling this in a process pool, since it allows the parent process to handle the interrupt, but should be set to False (default) if calling directly. """ try: # Get the sizes we'll need for the matrix num_schemata = len(last_model.schemata) num_root_changes = 12 num_chord_classes = len(last_model.chord_classes) num_emission_conds = len(emission_cond_ids) num_emissions = 12 T = len(sequence) state_ids = dict([(state,id) for (id,state) in \ enumerate(last_model.label_dom)]) # Local versions of the matrices store the accumulated values # for just this sequence (so we can normalize before adding # to the global matrices) # The numerators schema_trans = zeros((num_schemata,num_schemata+1), float64) root_trans = zeros((num_schemata,num_schemata,num_root_changes), float64) ems = zeros((num_emission_conds,num_emissions), float64) sinit = zeros(num_schemata, float64) # Compute the forward and backward probabilities # These are normalized, but that makes no difference to the outcome of # compute_gamma and compute_xi alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence, array=True) beta,scale = last_model.normal_backward_probabilities(sequence, array=True) # gamma contains the state occupation probability for each state at each # timestep gamma = last_model.compute_gamma(sequence, forward=alpha, backward=beta) # xi contains the probability of every state transition at every timestep xi = last_model.compute_xi(sequence, forward=alpha, backward=beta) # Update the initial state distribution if requested if update_initial: for state in label_dom: schema, root, chord_class = state schema_i = schema_ids[schema] # Add this contribution to the sum of the states with this schema sinit[schema_i] += gamma[0][state_ids[state]] for time in range(T): for state in label_dom: schema, root, chord_class = state schema_i = schema_ids[schema] state_i = state_ids[state] if time < T-1: # Go through all possible pairs of states to update the # transition distributions for next_state in label_dom: next_schema, next_root, next_chord_class = next_state schema_j = schema_ids[next_schema] state_j = state_ids[next_state] ## Transition dist update ## root_change = (next_root - root) % 12 schema_trans[schema_i][schema_j] += \ xi[time][state_i][state_j] root_trans[schema_i][schema_j][root_change] += \ xi[time][state_i][state_j] else: # Final state: update the probs of transitioning to end schema_trans[schema_i][num_schemata] += gamma[T-1][state_i] ## Emission dist update ## # Add the state occupation probability to the emission numerator # for every note for pc,beat in sequence[time]: # Take the pitch class relative to the root rel_pc = (pc - root) % 12 ems[emission_cond_ids[(chord_class,beat)]][rel_pc] += \ gamma[time][state_i] # Calculate the denominators schema_trans_denom = array_sum(schema_trans, axis=1) root_trans_denom = array_sum(root_trans, axis=2) ems_denom = array_sum(ems, axis=1) # This should come to 1.0 sinit_denom = array_sum(sinit) # Wrap this all up in a tuple to return to the master return (schema_trans, root_trans, ems, sinit, \ schema_trans_denom, root_trans_denom, ems_denom, sinit_denom, \ seq_logprob) except KeyboardInterrupt: if catch_interrupt: return else: raise
def compute_xi(self, sequence, forward=None, backward=None, emission_matrix=None, transition_matrix=None, use_logs=False): """ Computes the xi matrix used by Baum-Welch. It is the matrix of joint probabilities of occupation of pairs of consecutive states: P(i_t, j_{t+1} | O). As with L{compute_gamma} forward and backward matrices can optionally be passed in to avoid recomputing. @type use_logs: bool @param use_logs: by default, this function does not use logs in its calculations. This can lead to underflow if your forward/backward matrices have sufficiently low values. If C{use_logs=True}, logs will be used internally (though the returned values are exponentiated again). This makes the function an order of magnitude slower. """ if forward is None: forward = self.normal_forward_probabilities(sequence) if backward is None: backward = self.normal_backward_probabilities(sequence) # T is the number of timesteps # N is the number of states T, N = forward.shape # Create the empty array to fill xi = numpy.zeros((T - 1, N, N), numpy.float64) # Precompute all the emission probabilities if emission_matrix is None: emission_matrix = self.get_emission_matrix(sequence) # And transition probabilities: we'll need these many times over if transition_matrix is None: transition_matrix = self.get_transition_matrix() if not use_logs: # Do it without logs - much faster for t in range(T - 1): total = 0.0 # Transpose the forward probabilities so that we multiply them # along the vertical axis fwd_trans = forward[t, :, numpy.newaxis] # Compute the xi values by multiplying the arrays together xi[t] = transition_matrix.T * fwd_trans * backward[t+1] * \ emission_matrix[t+1] # Normalize all the probabilities # Sum all the probs for the timestep and divide them all by total total = array_sum(xi[t]) xi[t] /= total else: # Take logs of all the matrices we need emission_matrix = numpy.log2(emission_matrix) transition_matrix = numpy.log2(transition_matrix) forward = numpy.log2(forward) backward = numpy.log2(backward) for t in range(T - 1): total = 0.0 fwd_trans = forward[t, :, numpy.newaxis] xi[t] = transition_matrix.T + fwd_trans + backward[t+1] + \ emission_matrix[t+1] # This takes a (relatively) long time total = numpy.logaddexp2.reduce(xi[t]) xi[t] -= total # Exponentiate all the probabilities again # This also takes a while xi = numpy.exp2(xi) return xi