Exemple #1
0
def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: usually you want to update all distributions, 
        including the initial state distribution. If update_initial=False, 
        the initial state distribution updates won't be made for this sequence. 
        We want this when the sequence is actually a non-initial fragment of 
        a longer sequence
    
    """
    try:
        trans, ems, trans_denom, ems_denom = empty_arrays
        state_ids, em_ids = array_ids
        
        # Compute the forwards with seq_prob=True
        fwds,seq_logprob = last_model.normal_forward_probabilities(sequence, seq_prob=True)
        # gamma contains the state occupation probability for each state at each 
        #  timestep
        gamma = last_model.gamma_probabilities(sequence, forward=fwds)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence)
        
        label_dom = last_model.label_dom
        T = len(sequence)
        
        for time in range(T):
            for state in label_dom:
                state_i = state_ids[state]
                
                if time < T-1:
                    # Go through all possible pairs of states to update the 
                    #  transition distributions
                    for next_state in label_dom:
                        state_j = state_ids[next_state]
                        
                        ## Transition dist update ##
                        trans[state_i][state_j] += xi[time][state_i][state_j]
                
                ## Emission dist update ##
                ems[state_ids[state]][em_ids[sequence[time]]] += \
                                                    gamma[time][state_i]
        
        # Calculate the denominators by summing
        trans_denom = array_sum(trans, axis=1)
        ems_denom = array_sum(ems, axis=1)
                
        # Wrap this all up in a tuple to return to the master
        return (trans, ems, trans_denom, ems_denom, seq_logprob)
    except KeyboardInterrupt:
        return
Exemple #2
0
 def normal_backward_probabilities(self, sequence, array=False):
     """
     @see: normal_forward_probabilities
     
     (except that this doesn't return the logprob)
     
     @type array: bool
     @param array: if True, returns a numpy 2d array instead of a list of 
         dicts.
     
     """
     T = len(sequence)
     N = len(self.label_dom)
     beta = numpy.zeros((T, N), numpy.float64)
     scale = numpy.zeros(T, numpy.float64)
     
     # Initialize with the probabilities of transitioning to the final state
     for i,si in enumerate(self.label_dom):
         beta[T-1, i] = self.transition_probability(None, si)
     # Normalize
     total = array_sum(beta[T-1, :])
     beta[T-1,:] /= total
     
     # Initialize
     scale[T-1] = total
     
     # Iterate backwards over the other timesteps
     for t in range(T-2, -1, -1):
         # To speed things up, calculate all the t+1 emission probabilities 
         #  first, instead of calculating them all for every t state
         em_probs = [
             self.emission_probability(sequence[t+1], sj) \
                 for sj in self.label_dom]
         
         for i,si in enumerate(self.label_dom):
             # Multiply each next state's prob by the transition prob 
             #  from this state to that and the emission prob in that next 
             #  state
             beta[t, i] = sum(
                 (beta[t+1, j] * self.transition_probability(sj, si) * \
                     em_probs[j] \
                         for j,sj in enumerate(self.label_dom)), 0.0)
         # Normalize by dividing all values by the total probability
         total = array_sum(beta[t,:])
         beta[t,:] /= total
         scale[t] = total
         
     if not array:
         # Convert this into a list of dicts
         matrix = []
         for t in range(T):
             timestep = {}
             for (i,label) in enumerate(self.label_dom):
                 timestep[label] = beta[t,i]
             matrix.append(timestep)
         return matrix,scale
     else:
         return beta,scale
def f_tt(theta, f, k0T, S, nCoeffs):
    TnTT = S[0][:, 1:nCoeffs + 1]
    tnTT = S[1][:, 1:nCoeffs + 1]
    nfreq = len(f)
    n = arange(1, nCoeffs + 1, 1)
    n = ones((nfreq, 1)) * n
    if theta == 0:
        return array_sum((2 * n + 1) * (TnTT + tnTT), axis=1) / (1j * k0T)
    elif theta == pi:
        return array_sum((2 * n + 1) * (TnTT - tnTT), axis=1) / (1j * k0T)
def calcul_f_sc(theta, f, k0, S, nCoeffs):
    n = arange(0, nCoeffs + 1, 1)
    TnLL = S[:, 0:nCoeffs + 1]
    if theta == 0:
        return array_sum((2 * n + 1) * TnLL, axis=1) / (1j * k0)
    elif theta == pi:
        return array_sum((2 * n + 1) * ((-1)**n) * TnLL, axis=1) / (1j * k0)
    nfreq = len(f)
    Pncos0 = calculPn(cos(theta), nCoeffs)
    Pncos0 = ones((nfreq, 1)) * transpose(Pncos0)
    n = ones((nfreq, 1)) * n
    fsc_theta_f = array_sum((2 * n + 1) * TnLL * Pncos0, axis=1)
    fsc_theta_f = 1 / (1j * k0) * fsc_theta_f

    return fsc_theta_f
Exemple #5
0
 def compute_decomposed_xi(self, sequence, forward=None, backward=None, 
                     emission_matrix=None, transition_matrix=None):
     from numpy import newaxis
     
     if forward is None:
         forward = self.normal_forward_probabilities(sequence, decomposed=True)
     if backward is None:
         backward = self.normal_backward_probabilities(sequence, decomposed=True)
     # T is the number of timesteps
     # N is the number of states
     T = forward.shape[0]
     C = len(self.chord_types)
     # Create the empty array to fill
     xi = numpy.zeros((T-1,12,12,C,12,12,C), numpy.float64)
     
     # Precompute all the emission probabilities
     if emission_matrix is None:
         emission_matrix = self.get_small_emission_matrix(sequence)
     # And transition probabilities: we'll need these many times over
     if transition_matrix is None:
         transition_matrix = self.get_small_transition_matrix(transpose=True)
     
     # Do it without logs - much faster
     for t in range(T-1):
         total = 0.0
         # Add axies to the forward probabilities to represent the next state
         fwd_trans = forward[t,:,:,:, newaxis,newaxis,newaxis]
         # Compute the xi values by multiplying the arrays together
         xi[t] = transition_matrix * fwd_trans * backward[t+1] * \
                     emission_matrix[t+1]
         # Normalize all the probabilities
         # Sum all the probs for the timestep and divide them all by total
         xi[t] /= array_sum(xi[t])
     
     return xi
Exemple #6
0
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(
                0, 1.5, 100, 42 + i)) for i in range(20)
        ]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(stepSize=0.2,
                                                 numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 15.0, 0.01)
        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]

        # Test that weights improve with a small tolerance,
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(
                0, 1.5, 100, 42 + i)) for i in range(20)
        ]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(stepSize=0.2,
                                                 numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        self.ssc.start()

        def condition():
            self.assertEqual(len(models), len(input_batches))
            return True

        # We want all batches to finish for this test.
        eventually(condition, 60.0, catch_assertions=True)

        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]
        # Test that weights improve with a small tolerance
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
    def test_accuracy_for_single_center(self):
        """Test that parameters obtained are correct for a single center."""
        centers, batches = self.streamingKMeansDataGenerator(batches=5,
                                                             numPoints=5,
                                                             k=1,
                                                             d=5,
                                                             r=0.1,
                                                             seed=0)
        stkm = StreamingKMeans(1)
        stkm.setInitialCenters([[0., 0., 0., 0., 0.]], [0.])
        input_stream = self.ssc.queueStream(
            [self.sc.parallelize(batch, 1) for batch in batches])
        stkm.trainOn(input_stream)

        self.ssc.start()

        def condition():
            self.assertEqual(stkm.latestModel().clusterWeights, [25.0])
            return True

        eventually(condition, catch_assertions=True)

        realCenters = array_sum(array(centers), axis=0)
        for i in range(5):
            modelCenters = stkm.latestModel().centers[0][i]
            self.assertAlmostEqual(centers[0][i], modelCenters, 1)
            self.assertAlmostEqual(realCenters[i], modelCenters, 1)
Exemple #9
0
 def compute_gamma(self, sequence, forward=None, backward=None):
     """
     Computes the gamma matrix used in Baum-Welch. This is the matrix 
     of state occupation probabilities for each timestep. It is computed 
     from the forward and backward matrices.
     
     These can be passed in as 
     arguments to avoid recomputing if you need to reuse them, but will 
     be computed from the model if not given. They are assumed to be 
     the matrices computed by L{normal_forward_probabilities} and 
     L{normal_backward_probabilities} (i.e. normalized, non-log 
     probabilities).
     
     """
     if forward is None:
         forward = self.normal_forward_probabilities(sequence, array=True)[0]
     if backward is None:
         backward = self.normal_backward_probabilities(sequence, array=True)[0]
     # T is the number of timesteps
     # N is the number of states
     T,N = forward.shape
     
     # Multiply forward and backward elementwise to get unnormalised gamma
     gamma = forward * backward
     # Sum the values in each timestep to get the normalizing denominator
     denominators = array_sum(gamma, axis=1)
     # Divide all the values in each timestep by each denominator
     gamma = (gamma.transpose() / denominators).transpose()
     
     return gamma
Exemple #10
0
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
            for i in range(20)]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(
            stepSize=0.2, numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 15.0, 0.01)
        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]

        # Test that weights improve with a small tolerance,
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
Exemple #11
0
 def compute_gamma(self, sequence, forward=None, backward=None):
     """
     Computes the gamma matrix used in Baum-Welch. This is the matrix 
     of state occupation probabilities for each timestep. It is computed 
     from the forward and backward matrices.
     
     These can be passed in as 
     arguments to avoid recomputing if you need to reuse them, but will 
     be computed from the model if not given. They are assumed to be 
     the matrices computed by L{normal_forward_probabilities} and 
     L{normal_backward_probabilities} (i.e. normalized, non-log 
     probabilities).
     
     """
     if forward is None:
         forward = self.normal_forward_probabilities(sequence)
     if backward is None:
         backward = self.normal_backward_probabilities(sequence)
     # T is the number of timesteps
     # N is the number of states
     T,N = forward.shape
     
     gamma = zeros((T,N), float64)
     for t in range(T):
         for i in range(N):
             gamma[t][i] = forward[t][i]*backward[t][i]
         denominator = array_sum(gamma[t])
         # Normalize
         for i in range(N):
             gamma[t][i] /= denominator
     return gamma
Exemple #12
0
    def compute_gamma(self, sequence, forward=None, backward=None):
        """
        Computes the gamma matrix used in Baum-Welch. This is the matrix 
        of state occupation probabilities for each timestep. It is computed 
        from the forward and backward matrices.
        
        These can be passed in as 
        arguments to avoid recomputing if you need to reuse them, but will 
        be computed from the model if not given. They are assumed to be 
        the matrices computed by L{normal_forward_probabilities} and 
        L{normal_backward_probabilities} (i.e. normalized, non-log 
        probabilities).
        
        """
        if forward is None:
            forward = self.normal_forward_probabilities(sequence)
        if backward is None:
            backward = self.normal_backward_probabilities(sequence)
        # T is the number of timesteps
        # N is the number of states
        T, N = forward.shape

        gamma = zeros((T, N), float64)
        for t in range(T):
            for i in range(N):
                gamma[t][i] = forward[t][i] * backward[t][i]
            denominator = array_sum(gamma[t])
            # Normalize
            for i in range(N):
                gamma[t][i] /= denominator
        return gamma
    def test_convergence(self):
        """
        Test that weights converge to the required value on toy data.
        """
        input_batches = [
            self.sc.parallelize(self.generateLogisticInput(0, 1.5, 100, 42 + i))
            for i in range(20)]
        input_stream = self.ssc.queueStream(input_batches)
        models = []

        slr = StreamingLogisticRegressionWithSGD(
            stepSize=0.2, numIterations=25)
        slr.setInitialWeights([0.0])
        slr.trainOn(input_stream)
        input_stream.foreachRDD(
            lambda x: models.append(slr.latestModel().weights[0]))

        self.ssc.start()

        def condition():
            self.assertEqual(len(models), len(input_batches))
            return True

        # We want all batches to finish for this test.
        self._eventually(condition, 60.0, catch_assertions=True)

        t_models = array(models)
        diff = t_models[1:] - t_models[:-1]
        # Test that weights improve with a small tolerance
        self.assertTrue(all(diff >= -0.1))
        self.assertTrue(array_sum(diff > 0) > 1)
        def corr_from_index(*index):
            """Correlation of index with zero.

            Turns a correlation function in terms of index distance
            into one in terms of indices on a periodic domain.

            Parameters
            ----------
            index: tuple of int

            Returns
            -------
            float[-1, 1]
                The correlation of the given index with the origin.

            See Also
            --------
            DistanceCorrelationFunction.correlation_from_index
            """
            comp2_1 = square(index)
            # Components of distance to shifted origin
            comp2_2 = square(broadcastable_shape - index)
            # use the smaller components to get the distance to the
            # closest of the shifted origins
            comp2 = fmin(comp2_1, comp2_2)
            return corr_func(sqrt(array_sum(comp2, axis=0)))
def get_probabilities(histogram):
    probabilites = []
    count = array_sum(histogram[0])

    for i in range(0, len(histogram[0])):
        times = histogram[0][i]
        tempo = histogram[1][i] + 0.5
        if times != 0.0:
            probabilites.append([tempo, times / count])

    return probabilites
Exemple #16
0
def assemble_stiffness_sum(num_dof, edm, elements, u):
    """
    Assembles "summed" stiffness matrix (each row is summed, taking absolute value)
    """
    K_sum = zeros(num_dof)
    for elem_id, elem in enumerate(elements):
        u_loc = empty(elem.num_dof)
        gather_element_vector(edm[elem_id], u, u_loc)
        K_elem = array_sum(abs(elem.calc_linear_stiffness([], u_loc)), axis=1)
        for local_dof, dof in enumerate(edm[elem_id]):
            K_sum[dof] += K_elem[local_dof]
    return K_sum
Exemple #17
0
def _sequence_updates_uni(sequence, last_model, label_dom, state_ids, \
            beat_ids, d_ids, d_func):
    """Same as L{_sequence_updates}, modified for unigram models. """
    num_beats = len(beat_ids)
    num_ds = len(d_ids)
    num_ktrans = 12

    # Local versions of the matrices store the accumulated values
    #  for just this sequence (so we can normalize before adding
    #  to the global matrices)
    # The numerators
    ems_local = zeros((num_beats, num_ds), float64)

    # Compute the forward and backward probabilities
    alpha, scale, seq_logprob = last_model.normal_forward_probabilities(
        sequence)
    beta, scale = last_model.normal_backward_probabilities(sequence)
    # gamma contains the state occupation probability for each state at each
    #  timestep
    gamma = last_model.compute_gamma(sequence, alpha, beta)
    # xi contains the probability of every state transition at every timestep
    xi = last_model.compute_xi(sequence, alpha, beta)

    T = len(sequence)

    for time in range(T):
        for state in label_dom:
            tonic, mode, chord = state
            state_i = state_ids[state]
            # We don't update the transition distribution here, because it's fixed

            ## Emission dist update ##
            # Add the state occupation probability to the emission numerator
            #  for every note
            for pc, beat in sequence[time]:
                beat_i = beat_ids[beat]
                d = d_func(pc, state)
                d_i = d_ids[d]

                ems_local[beat_i][d_i] += gamma[time][state_i]

    # Calculate the denominators
    ems_denom_local = array_sum(ems_local, axis=1)

    # Wrap this all up in a tuple to return to the master
    return (ems_local, ems_denom_local, seq_logprob)
Exemple #18
0
def _sequence_updates_uni(sequence, last_model, label_dom, state_ids, \
            beat_ids, d_ids, d_func):
    """Same as L{_sequence_updates}, modified for unigram models. """
    num_beats = len(beat_ids)
    num_ds = len(d_ids)
    num_ktrans = 12
    
    # Local versions of the matrices store the accumulated values 
    #  for just this sequence (so we can normalize before adding 
    #  to the global matrices)
    # The numerators
    ems_local = zeros((num_beats,num_ds), float64)
    
    # Compute the forward and backward probabilities
    alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence)
    beta,scale = last_model.normal_backward_probabilities(sequence)
    # gamma contains the state occupation probability for each state at each 
    #  timestep
    gamma = last_model.compute_gamma(sequence, alpha, beta)
    # xi contains the probability of every state transition at every timestep
    xi = last_model.compute_xi(sequence, alpha, beta)
    
    T = len(sequence)
    
    for time in range(T):
        for state in label_dom:
            tonic,mode,chord = state
            state_i = state_ids[state]
            # We don't update the transition distribution here, because it's fixed
            
            ## Emission dist update ##
            # Add the state occupation probability to the emission numerator 
            #  for every note
            for pc,beat in sequence[time]:
                beat_i = beat_ids[beat]
                d = d_func(pc, state)
                d_i = d_ids[d]
                
                ems_local[beat_i][d_i] += gamma[time][state_i]
    
    # Calculate the denominators
    ems_denom_local = array_sum(ems_local, axis=1)
            
    # Wrap this all up in a tuple to return to the master
    return (ems_local, ems_denom_local, seq_logprob)
Exemple #19
0
    def test_accuracy_for_single_center(self):
        """Test that parameters obtained are correct for a single center."""
        centers, batches = self.streamingKMeansDataGenerator(
            batches=5, numPoints=5, k=1, d=5, r=0.1, seed=0)
        stkm = StreamingKMeans(1)
        stkm.setInitialCenters([[0., 0., 0., 0., 0.]], [0.])
        input_stream = self.ssc.queueStream(
            [self.sc.parallelize(batch, 1) for batch in batches])
        stkm.trainOn(input_stream)

        t = time()
        self.ssc.start()
        self._ssc_wait(t, 10.0, 0.01)
        self.assertEquals(stkm.latestModel().clusterWeights, [25.0])
        realCenters = array_sum(array(centers), axis=0)
        for i in range(5):
            modelCenters = stkm.latestModel().centers[0][i]
            self.assertAlmostEqual(centers[0][i], modelCenters, 1)
            self.assertAlmostEqual(realCenters[i], modelCenters, 1)
Exemple #20
0
def _sequence_updates(sequence, last_model, label_dom, state_ids, mode_ids, \
                        chord_ids, beat_ids, d_ids, d_func):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    """
    num_chords = len(chord_ids)
    num_beats = len(beat_ids)
    num_modes = len(mode_ids)
    num_ds = len(d_ids)
    num_ktrans = 12

    # Local versions of the matrices store the accumulated values
    #  for just this sequence (so we can normalize before adding
    #  to the global matrices)
    # The numerators
    ctrans_local = zeros((num_chords, num_chords), float64)
    ems_local = zeros((num_beats, num_ds), float64)
    ktrans_local = zeros((num_modes, num_ktrans, num_modes), float64)
    uni_chords_local = zeros(num_chords, float64)

    # Compute the forward and backward probabilities
    alpha, scale, seq_logprob = last_model.normal_forward_probabilities(
        sequence)
    beta, scale = last_model.normal_backward_probabilities(sequence)
    # gamma contains the state occupation probability for each state at each
    #  timestep
    gamma = last_model.compute_gamma(sequence, alpha, beta)
    # xi contains the probability of every state transition at every timestep
    xi = last_model.compute_xi(sequence, alpha, beta)

    T = len(sequence)

    for time in range(T):
        for state in label_dom:
            tonic, mode, chord = state
            state_i = state_ids[state]
            mode_i = mode_ids[mode]

            if time < T - 1:
                # Go through all possible pairs of states to update the
                #  transition distributions
                for next_state in label_dom:
                    ntonic, nmode, nchord = next_state
                    state_j = state_ids[next_state]
                    mode_j = mode_ids[nmode]

                    ## Key transition dist update ##
                    tonic_change = (ntonic - tonic) % 12
                    ktrans_local[mode_i][tonic_change][mode_j] += \
                                                    xi[time][state_i][state_j]

                    ## Chord transition dist update ##
                    chord_i, chord_j = chord_ids[chord], chord_ids[nchord]
                    if tonic == ntonic and mode == nmode:
                        # Add to chord transition dist for this chord pair
                        ctrans_local[chord_i][chord_j] += xi[time][state_i][
                            state_j]
                    else:
                        uni_chords_local[chord_j] += xi[time][state_i][state_j]

            ## Emission dist update ##
            # Add the state occupation probability to the emission numerator
            #  for every note
            for pc, beat in sequence[time]:
                beat_i = beat_ids[beat]
                d = d_func(pc, state)
                d_i = d_ids[d]

                ems_local[beat_i][d_i] += gamma[time][state_i]

    # Calculate the denominators
    ctrans_denom_local = array_sum(ctrans_local, axis=1)
    ems_denom_local = array_sum(ems_local, axis=1)
    ktrans_denom_local = array_sum(array_sum(ktrans_local, axis=2), axis=1)
    uni_chords_denom_local = array_sum(uni_chords_local)

    # Wrap this all up in a tuple to return to the master
    return (ktrans_local, ctrans_local, ems_local, \
            uni_chords_local, \
            ktrans_denom_local, ctrans_denom_local, \
            ems_denom_local, uni_chords_denom_local, \
            seq_logprob)
Exemple #21
0
def sequence_updates(sequence,
                     last_model,
                     empty_arrays,
                     array_ids,
                     update_initial=True):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: usually you want to update all distributions, 
        including the initial state distribution. If update_initial=False, 
        the initial state distribution updates won't be made for this sequence. 
        We want this when the sequence is actually a non-initial fragment of 
        a longer sequence
    
    """
    try:
        trans, ems, trans_denom, ems_denom = empty_arrays
        state_ids, em_ids = array_ids

        # Compute the forwards with seq_prob=True
        fwds, seq_logprob = last_model.normal_forward_probabilities(
            sequence, seq_prob=True)
        # gamma contains the state occupation probability for each state at each
        #  timestep
        gamma = last_model.gamma_probabilities(sequence, forward=fwds)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence)

        label_dom = last_model.label_dom
        T = len(sequence)

        for time in range(T):
            for state in label_dom:
                state_i = state_ids[state]

                if time < T - 1:
                    # Go through all possible pairs of states to update the
                    #  transition distributions
                    for next_state in label_dom:
                        state_j = state_ids[next_state]

                        ## Transition dist update ##
                        trans[state_i][state_j] += xi[time][state_i][state_j]

                ## Emission dist update ##
                ems[state_ids[state]][em_ids[sequence[time]]] += \
                                                    gamma[time][state_i]

        # Calculate the denominators by summing
        trans_denom = array_sum(trans, axis=1)
        ems_denom = array_sum(ems, axis=1)

        # Wrap this all up in a tuple to return to the master
        return (trans, ems, trans_denom, ems_denom, seq_logprob)
    except KeyboardInterrupt:
        return
def sequence_updates(sequence, last_model, empty_arrays, array_ids, update_initial=True):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: if update_initial=False, 
        the initial state distribution updates won't be made for this sequence
    
    """
    try:
        (
            initial_keys,
            initial_chords,
            key_trans,
            chord_trans,
            ems,
            initial_keys_denom,
            initial_chords_denom,
            key_trans_denom,
            chord_trans_denom,
            ems_denom,
        ) = empty_arrays
        chord_ids, chord_type_ids = array_ids

        # Compute the forwards with seq_prob=True
        fwds, seq_logprob = last_model.normal_forward_probabilities(sequence, seq_prob=True)
        # gamma contains the state occupation probability for each state at each
        #  timestep
        gamma = last_model.gamma_probabilities(sequence, forward=fwds)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence)

        label_dom = last_model.label_dom
        # Enumerate the label dom
        state_ids = dict([(state, id) for (id, state) in enumerate(label_dom)])
        T = len(sequence)

        for time in range(T):
            for state in label_dom:
                keyi, rooti, labeli = state
                state_i = state_ids[state]
                chord_i = chord_ids[((rooti - keyi) % 12, labeli)]

                if time == 0:
                    # Update initial distributions
                    initial_keys[keyi] += gamma[time][state_i]
                    initial_chords[chord_i] += gamma[time][state_i]

                if time == T - 1:
                    # Last timestep
                    # Update the transition dists for transitions to final state
                    chord_trans[chord_i][-1] += gamma[time][state_i]
                else:
                    # Go through all possible pairs of states to update the
                    #  transition distributions
                    for next_state in label_dom:
                        keyj, rootj, labelj = next_state
                        state_j = state_ids[next_state]
                        chord_j = chord_ids[((rootj - keyj) % 12, labelj)]
                        key_change = (keyj - keyi) % 12

                        ## Transition dist updates ##
                        key_trans[key_change] += xi[time][state_i][state_j]
                        chord_trans[chord_i][chord_j] += xi[time][state_i][state_j]

                ## Emission dist update ##
                for note in sequence[time]:
                    pc = (note - rooti) % 12
                    ems[chord_type_ids[labeli]][pc] += gamma[time][state_i]

        # Calculate the denominators by summing
        initial_keys_denom[0] = array_sum(initial_keys)
        initial_chords_denom[0] = array_sum(initial_chords)
        key_trans_denom[0] = array_sum(key_trans)
        chord_trans_denom = array_sum(chord_trans, axis=1)
        ems_denom = array_sum(ems, axis=1)

        # Wrap this all up in a tuple to return to the master
        return (
            initial_keys,
            initial_chords,
            key_trans,
            chord_trans,
            ems,
            initial_keys_denom,
            initial_chords_denom,
            key_trans_denom,
            chord_trans_denom,
            ems_denom,
            seq_logprob,
        )
    except KeyboardInterrupt:
        return
Exemple #23
0
 def normal_forward_probabilities(self, sequence, array=False):
     """If you want the normalized matrix of forward probabilities, it's 
     ok to use normal (non-log) probabilities and these can be computed 
     more quickly, since you don't need to sum logs (which is time 
     consuming).
     
     Returns the matrix, and also the vector of values that each timestep 
     was divided by to normalize (i.e. total probability of each timestep 
     over all states).
     Also returns the total log probability of the sequence.
     
     @type array: bool
     @param array: if True, returns a numpy 2d array instead of a list of 
         dicts.
     @return: (matrix,normalizing vector,log prob)
     
     """
     T = len(sequence)
     N = len(self.label_dom)
     alpha = numpy.zeros((T, N), numpy.float64)
     scale = numpy.zeros(T, numpy.float64)
     
     # Prepare the first column of the matrix: probs of all states in the 
     #  first timestep
     for i,state in enumerate(self.label_dom):
         alpha[0,i] = self.transition_probability(state, None) * \
                         self.emission_probability(sequence[0], state)
     # Normalize by dividing all values by the total probability
     total = array_sum(alpha[0,:])
     alpha[0,:] /= total
     scale[0] = total
     
     # Iterate over the other timesteps
     for t in range(1, T):
         for j,sj in enumerate(self.label_dom):
             # Multiply each previous state's prob by the transition prob 
             #  to this state and sum them all together
             prob = sum(
                 (alpha[t-1, i] * self.transition_probability(sj, si) \
                     for i,si in enumerate(self.label_dom)), 0.0)
             # Also multiply this by the emission probability
             alpha[t, j] = prob * \
                             self.emission_probability(sequence[t], sj)
         # Normalize by dividing all values by the total probability
         total = array_sum(alpha[t,:])
         alpha[t,:] /= total
         scale[t] = total
     
     # Multiply together the probability of each timestep to get the whole 
     # probability of the sequence
     # This gets the same result as if we did:
     #  alpha = model.forward_log_probabilities(sequence, normalize=False, array=True)
     #  log_prob = sum_logs(alpha[T-1,:])
     log_prob = sum((logprob(total) for total in scale), 0.0)
     
     if not array:
         # Convert this into a list of dicts
         matrix = []
         for t in range(T):
             timestep = {}
             for (i,label) in enumerate(self.label_dom):
                 timestep[label] = alpha[t,i]
             matrix.append(timestep)
         return matrix,scale,log_prob
     else:
         return alpha,scale,log_prob
Exemple #24
0
def _sequence_updates(sequence, last_model, label_dom, state_ids, mode_ids, \
                        chord_ids, beat_ids, d_ids, d_func):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    """
    num_chords = len(chord_ids)
    num_beats = len(beat_ids)
    num_modes = len(mode_ids)
    num_ds = len(d_ids)
    num_ktrans = 12
    
    # Local versions of the matrices store the accumulated values 
    #  for just this sequence (so we can normalize before adding 
    #  to the global matrices)
    # The numerators
    ctrans_local = zeros((num_chords,num_chords), float64)
    ems_local = zeros((num_beats,num_ds), float64)
    ktrans_local = zeros((num_modes,num_ktrans,num_modes), float64)
    uni_chords_local = zeros(num_chords, float64)
    
    # Compute the forward and backward probabilities
    alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence)
    beta,scale = last_model.normal_backward_probabilities(sequence)
    # gamma contains the state occupation probability for each state at each 
    #  timestep
    gamma = last_model.compute_gamma(sequence, alpha, beta)
    # xi contains the probability of every state transition at every timestep
    xi = last_model.compute_xi(sequence, alpha, beta)
    
    T = len(sequence)
    
    for time in range(T):
        for state in label_dom:
            tonic,mode,chord = state
            state_i = state_ids[state]
            mode_i = mode_ids[mode]
            
            if time < T-1:
                # Go through all possible pairs of states to update the 
                #  transition distributions
                for next_state in label_dom:
                    ntonic,nmode,nchord = next_state
                    state_j = state_ids[next_state]
                    mode_j = mode_ids[nmode]
                    
                    ## Key transition dist update ##
                    tonic_change = (ntonic - tonic) % 12
                    ktrans_local[mode_i][tonic_change][mode_j] += \
                                                    xi[time][state_i][state_j]
                    
                    ## Chord transition dist update ##
                    chord_i, chord_j = chord_ids[chord], chord_ids[nchord]
                    if tonic == ntonic and mode == nmode:
                        # Add to chord transition dist for this chord pair
                        ctrans_local[chord_i][chord_j] += xi[time][state_i][state_j]
                    else:
                        uni_chords_local[chord_j] += xi[time][state_i][state_j]
            
            ## Emission dist update ##
            # Add the state occupation probability to the emission numerator 
            #  for every note
            for pc,beat in sequence[time]:
                beat_i = beat_ids[beat]
                d = d_func(pc, state)
                d_i = d_ids[d]
                
                ems_local[beat_i][d_i] += gamma[time][state_i]
    
    # Calculate the denominators
    ctrans_denom_local = array_sum(ctrans_local, axis=1)
    ems_denom_local = array_sum(ems_local, axis=1)
    ktrans_denom_local = array_sum(array_sum(ktrans_local, axis=2), axis=1)
    uni_chords_denom_local = array_sum(uni_chords_local)
            
    # Wrap this all up in a tuple to return to the master
    return (ktrans_local, ctrans_local, ems_local, \
            uni_chords_local, \
            ktrans_denom_local, ctrans_denom_local, \
            ems_denom_local, uni_chords_denom_local, \
            seq_logprob)
Exemple #25
0
 def compute_xi(self, sequence, forward=None, backward=None, 
                     emission_matrix=None, transition_matrix=None,
                     use_logs=False):
     """
     Computes the xi matrix used by Baum-Welch. It is the matrix of joint 
     probabilities of occupation of pairs of consecutive states: 
     P(i_t, j_{t+1} | O).
     
     As with L{compute_gamma} forward and backward matrices can optionally 
     be passed in to avoid recomputing.
     
     @type use_logs: bool
     @param use_logs: by default, this function does not use logs in its 
         calculations. This can lead to underflow if your forward/backward 
         matrices have sufficiently low values. If C{use_logs=True}, logs 
         will be used internally (though the returned values are 
         exponentiated again). This makes the function an order of magnitude 
         slower.
     
     """
     if forward is None:
         forward = self.normal_forward_probabilities(sequence)
     if backward is None:
         backward = self.normal_backward_probabilities(sequence)
     # T is the number of timesteps
     # N is the number of states
     T,N = forward.shape
     
     # Create the empty array to fill
     xi = numpy.zeros((T-1,N,N), numpy.float64)
     
     # Precompute all the emission probabilities
     if emission_matrix is None:
         emission_matrix = self.get_emission_matrix(sequence)
     # And transition probabilities: we'll need these many times over
     if transition_matrix is None:
         transition_matrix = self.get_transition_matrix()
     
     if not use_logs:
         # Do it without logs - much faster
         for t in range(T-1):
             total = 0.0
             # Transpose the forward probabilities so that we multiply them 
             #  along the vertical axis
             fwd_trans = forward[t,:, numpy.newaxis]
             # Compute the xi values by multiplying the arrays together
             xi[t] = transition_matrix.T * fwd_trans * backward[t+1] * \
                         emission_matrix[t+1]
             # Normalize all the probabilities
             # Sum all the probs for the timestep and divide them all by total
             total = array_sum(xi[t])
             xi[t] /= total
     else:
         # Take logs of all the matrices we need
         emission_matrix = numpy.log2(emission_matrix)
         transition_matrix = numpy.log2(transition_matrix)
         forward = numpy.log2(forward)
         backward = numpy.log2(backward)
         
         for t in range(T-1):
             total = 0.0
             fwd_trans = forward[t,:, numpy.newaxis]
             xi[t] = transition_matrix.T + fwd_trans + backward[t+1] + \
                         emission_matrix[t+1]
             # This takes a (relatively) long time
             total = numpy.logaddexp2.reduce(xi[t])
             xi[t] -= total
         # Exponentiate all the probabilities again
         # This also takes a while
         xi = numpy.exp2(xi)
     
     return xi
Exemple #26
0
def sequence_updates(sequence,
                     last_model,
                     empty_arrays,
                     array_ids,
                     update_initial=True):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: if update_initial=False, 
        the initial state distribution updates won't be made for this sequence
    
    """
    try:
        (initial_keys, initial_chords, key_trans, chord_trans, ems,
         initial_keys_denom, initial_chords_denom, key_trans_denom,
         chord_trans_denom, ems_denom) = empty_arrays
        chord_ids, chord_type_ids = array_ids

        # Compute the forwards with seq_prob=True
        fwds, seq_logprob = last_model.normal_forward_probabilities(
            sequence, seq_prob=True)
        # gamma contains the state occupation probability for each state at each
        #  timestep
        gamma = last_model.gamma_probabilities(sequence, forward=fwds)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence)

        label_dom = last_model.label_dom
        # Enumerate the label dom
        state_ids = dict([(state, id) for (id, state) in enumerate(label_dom)])
        T = len(sequence)

        for time in range(T):
            for state in label_dom:
                keyi, rooti, labeli = state
                state_i = state_ids[state]
                chord_i = chord_ids[((rooti - keyi) % 12, labeli)]

                if time == 0:
                    # Update initial distributions
                    initial_keys[keyi] += gamma[time][state_i]
                    initial_chords[chord_i] += gamma[time][state_i]

                if time == T - 1:
                    # Last timestep
                    # Update the transition dists for transitions to final state
                    chord_trans[chord_i][-1] += gamma[time][state_i]
                else:
                    # Go through all possible pairs of states to update the
                    #  transition distributions
                    for next_state in label_dom:
                        keyj, rootj, labelj = next_state
                        state_j = state_ids[next_state]
                        chord_j = chord_ids[((rootj - keyj) % 12, labelj)]
                        key_change = (keyj - keyi) % 12

                        ## Transition dist updates ##
                        key_trans[key_change] += xi[time][state_i][state_j]
                        chord_trans[chord_i][chord_j] += xi[time][state_i][
                            state_j]

                ## Emission dist update ##
                for note in sequence[time]:
                    pc = (note - rooti) % 12
                    ems[chord_type_ids[labeli]][pc] += gamma[time][state_i]

        # Calculate the denominators by summing
        initial_keys_denom[0] = array_sum(initial_keys)
        initial_chords_denom[0] = array_sum(initial_chords)
        key_trans_denom[0] = array_sum(key_trans)
        chord_trans_denom = array_sum(chord_trans, axis=1)
        ems_denom = array_sum(ems, axis=1)

        # Wrap this all up in a tuple to return to the master
        return (initial_keys, initial_chords, key_trans, chord_trans, ems,
                initial_keys_denom, initial_chords_denom, key_trans_denom,
                chord_trans_denom, ems_denom, seq_logprob)
    except KeyboardInterrupt:
        return
Exemple #27
0
def _sequence_updates(sequence,
                      last_model,
                      label_dom,
                      schema_ids,
                      emission_cond_ids,
                      update_initial=True,
                      catch_interrupt=False):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: usually you want to update all distributions, 
        including the initial state distribution. If update_initial=False, 
        the initial state distribution updates won't be made for this sequence. 
        We want this when the sequence is actually a non-initial fragment of 
        a longer sequence
    @type catch_interrupt: bool
    @param catch_interrupt: catch KeyboardInterrupt exceptions and return 
        None. This is useful behaviour when calling this in a process pool, 
        since it allows the parent process to handle the interrupt, but should 
        be set to False (default) if calling directly.
    
    """
    try:
        # Get the sizes we'll need for the matrix
        num_schemata = len(last_model.schemata)
        num_root_changes = 12
        num_chord_classes = len(last_model.chord_classes)
        num_emission_conds = len(emission_cond_ids)
        num_emissions = 12

        T = len(sequence)

        state_ids = dict([(state,id) for (id,state) in \
                                        enumerate(last_model.label_dom)])

        # Local versions of the matrices store the accumulated values
        #  for just this sequence (so we can normalize before adding
        #  to the global matrices)
        # The numerators
        schema_trans = zeros((num_schemata, num_schemata + 1), float64)
        root_trans = zeros((num_schemata, num_schemata, num_root_changes),
                           float64)
        ems = zeros((num_emission_conds, num_emissions), float64)
        sinit = zeros(num_schemata, float64)

        # Compute the forward and backward probabilities
        # These are normalized, but that makes no difference to the outcome of
        #  compute_gamma and compute_xi
        alpha, scale, seq_logprob = last_model.normal_forward_probabilities(
            sequence, array=True)
        beta, scale = last_model.normal_backward_probabilities(sequence,
                                                               array=True)
        # gamma contains the state occupation probability for each state at each
        #  timestep
        gamma = last_model.compute_gamma(sequence,
                                         forward=alpha,
                                         backward=beta)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence, forward=alpha, backward=beta)

        # Update the initial state distribution if requested
        if update_initial:
            for state in label_dom:
                schema, root, chord_class = state
                schema_i = schema_ids[schema]
                # Add this contribution to the sum of the states with this schema
                sinit[schema_i] += gamma[0][state_ids[state]]

        for time in range(T):
            for state in label_dom:
                schema, root, chord_class = state
                schema_i = schema_ids[schema]
                state_i = state_ids[state]

                if time < T - 1:
                    # Go through all possible pairs of states to update the
                    #  transition distributions
                    for next_state in label_dom:
                        next_schema, next_root, next_chord_class = next_state
                        schema_j = schema_ids[next_schema]
                        state_j = state_ids[next_state]

                        ## Transition dist update ##
                        root_change = (next_root - root) % 12
                        schema_trans[schema_i][schema_j] += \
                                                    xi[time][state_i][state_j]
                        root_trans[schema_i][schema_j][root_change] += \
                                                    xi[time][state_i][state_j]
                else:
                    # Final state: update the probs of transitioning to end
                    schema_trans[schema_i][num_schemata] += gamma[T -
                                                                  1][state_i]

                ## Emission dist update ##
                # Add the state occupation probability to the emission numerator
                #  for every note
                for pc, beat in sequence[time]:
                    # Take the pitch class relative to the root
                    rel_pc = (pc - root) % 12
                    ems[emission_cond_ids[(chord_class,beat)]][rel_pc] += \
                                                gamma[time][state_i]

        # Calculate the denominators
        schema_trans_denom = array_sum(schema_trans, axis=1)
        root_trans_denom = array_sum(root_trans, axis=2)
        ems_denom = array_sum(ems, axis=1)
        # This should come to 1.0
        sinit_denom = array_sum(sinit)

        # Wrap this all up in a tuple to return to the master
        return (schema_trans, root_trans, ems, sinit, \
                schema_trans_denom, root_trans_denom, ems_denom, sinit_denom, \
                seq_logprob)
    except KeyboardInterrupt:
        if catch_interrupt:
            return
        else:
            raise
Exemple #28
0
def _sequence_updates(sequence, last_model, label_dom, schema_ids, 
        emission_cond_ids, update_initial=True, catch_interrupt=False):
    """
    Evaluates the forward/backward probability matrices for a 
    single sequence under the model that came from the previous 
    iteration and returns matrices that contain the updates 
    to be made to the distributions during this iteration.
    
    This is wrapped up in a function so it can be run in 
    parallel for each sequence. Once all sequences have been 
    evaluated, the results are combined and model updated.
    
    @type update_initial: bool
    @param update_initial: usually you want to update all distributions, 
        including the initial state distribution. If update_initial=False, 
        the initial state distribution updates won't be made for this sequence. 
        We want this when the sequence is actually a non-initial fragment of 
        a longer sequence
    @type catch_interrupt: bool
    @param catch_interrupt: catch KeyboardInterrupt exceptions and return 
        None. This is useful behaviour when calling this in a process pool, 
        since it allows the parent process to handle the interrupt, but should 
        be set to False (default) if calling directly.
    
    """
    try:
        # Get the sizes we'll need for the matrix
        num_schemata = len(last_model.schemata)
        num_root_changes = 12
        num_chord_classes = len(last_model.chord_classes)
        num_emission_conds = len(emission_cond_ids)
        num_emissions = 12
        
        T = len(sequence)
        
        state_ids = dict([(state,id) for (id,state) in \
                                        enumerate(last_model.label_dom)])
        
        # Local versions of the matrices store the accumulated values 
        #  for just this sequence (so we can normalize before adding 
        #  to the global matrices)
        # The numerators
        schema_trans = zeros((num_schemata,num_schemata+1), float64)
        root_trans = zeros((num_schemata,num_schemata,num_root_changes), float64)
        ems = zeros((num_emission_conds,num_emissions), float64)
        sinit = zeros(num_schemata, float64)
        
        # Compute the forward and backward probabilities
        # These are normalized, but that makes no difference to the outcome of 
        #  compute_gamma and compute_xi
        alpha,scale,seq_logprob = last_model.normal_forward_probabilities(sequence, array=True)
        beta,scale = last_model.normal_backward_probabilities(sequence, array=True)
        # gamma contains the state occupation probability for each state at each 
        #  timestep
        gamma = last_model.compute_gamma(sequence, forward=alpha, backward=beta)
        # xi contains the probability of every state transition at every timestep
        xi = last_model.compute_xi(sequence, forward=alpha, backward=beta)
        
        # Update the initial state distribution if requested
        if update_initial:
            for state in label_dom:
                schema, root, chord_class = state
                schema_i = schema_ids[schema]
                # Add this contribution to the sum of the states with this schema
                sinit[schema_i] += gamma[0][state_ids[state]]
        
        for time in range(T):
            for state in label_dom:
                schema, root, chord_class = state
                schema_i = schema_ids[schema]
                state_i = state_ids[state]
                
                if time < T-1:
                    # Go through all possible pairs of states to update the 
                    #  transition distributions
                    for next_state in label_dom:
                        next_schema, next_root, next_chord_class = next_state
                        schema_j = schema_ids[next_schema]
                        state_j = state_ids[next_state]
                        
                        ## Transition dist update ##
                        root_change = (next_root - root) % 12
                        schema_trans[schema_i][schema_j] += \
                                                    xi[time][state_i][state_j]
                        root_trans[schema_i][schema_j][root_change] += \
                                                    xi[time][state_i][state_j]
                else:
                    # Final state: update the probs of transitioning to end
                    schema_trans[schema_i][num_schemata] += gamma[T-1][state_i]
                
                ## Emission dist update ##
                # Add the state occupation probability to the emission numerator 
                #  for every note
                for pc,beat in sequence[time]:
                    # Take the pitch class relative to the root
                    rel_pc = (pc - root) % 12
                    ems[emission_cond_ids[(chord_class,beat)]][rel_pc] += \
                                                gamma[time][state_i]
        
        # Calculate the denominators
        schema_trans_denom = array_sum(schema_trans, axis=1)
        root_trans_denom = array_sum(root_trans, axis=2)
        ems_denom = array_sum(ems, axis=1)
        # This should come to 1.0
        sinit_denom = array_sum(sinit)
                
        # Wrap this all up in a tuple to return to the master
        return (schema_trans, root_trans, ems, sinit, \
                schema_trans_denom, root_trans_denom, ems_denom, sinit_denom, \
                seq_logprob)
    except KeyboardInterrupt:
        if catch_interrupt:
            return
        else:
            raise
Exemple #29
0
    def compute_xi(self,
                   sequence,
                   forward=None,
                   backward=None,
                   emission_matrix=None,
                   transition_matrix=None,
                   use_logs=False):
        """
        Computes the xi matrix used by Baum-Welch. It is the matrix of joint 
        probabilities of occupation of pairs of consecutive states: 
        P(i_t, j_{t+1} | O).
        
        As with L{compute_gamma} forward and backward matrices can optionally 
        be passed in to avoid recomputing.
        
        @type use_logs: bool
        @param use_logs: by default, this function does not use logs in its 
            calculations. This can lead to underflow if your forward/backward 
            matrices have sufficiently low values. If C{use_logs=True}, logs 
            will be used internally (though the returned values are 
            exponentiated again). This makes the function an order of magnitude 
            slower.
        
        """
        if forward is None:
            forward = self.normal_forward_probabilities(sequence)
        if backward is None:
            backward = self.normal_backward_probabilities(sequence)
        # T is the number of timesteps
        # N is the number of states
        T, N = forward.shape

        # Create the empty array to fill
        xi = numpy.zeros((T - 1, N, N), numpy.float64)

        # Precompute all the emission probabilities
        if emission_matrix is None:
            emission_matrix = self.get_emission_matrix(sequence)
        # And transition probabilities: we'll need these many times over
        if transition_matrix is None:
            transition_matrix = self.get_transition_matrix()

        if not use_logs:
            # Do it without logs - much faster
            for t in range(T - 1):
                total = 0.0
                # Transpose the forward probabilities so that we multiply them
                #  along the vertical axis
                fwd_trans = forward[t, :, numpy.newaxis]
                # Compute the xi values by multiplying the arrays together
                xi[t] = transition_matrix.T * fwd_trans * backward[t+1] * \
                            emission_matrix[t+1]
                # Normalize all the probabilities
                # Sum all the probs for the timestep and divide them all by total
                total = array_sum(xi[t])
                xi[t] /= total
        else:
            # Take logs of all the matrices we need
            emission_matrix = numpy.log2(emission_matrix)
            transition_matrix = numpy.log2(transition_matrix)
            forward = numpy.log2(forward)
            backward = numpy.log2(backward)

            for t in range(T - 1):
                total = 0.0
                fwd_trans = forward[t, :, numpy.newaxis]
                xi[t] = transition_matrix.T + fwd_trans + backward[t+1] + \
                            emission_matrix[t+1]
                # This takes a (relatively) long time
                total = numpy.logaddexp2.reduce(xi[t])
                xi[t] -= total
            # Exponentiate all the probabilities again
            # This also takes a while
            xi = numpy.exp2(xi)

        return xi