# Prepare ground-truth GFs (bars)
    W_gt = np.zeros((H, D2, D2))
    for i in xrange(D2):
        W_gt[i, i, :] = 10.0
        W_gt[D2 + i, :, i] = 10.0
    W_gt = W_gt.reshape((H, D))

    # Prepare model...
    model = MCA_ET(D, H, Hprime, gamma)
    gt_params = {"W": W_gt, "pi": 2.0 / H, "sigma": 1.00}

    # Generate trainig data
    my_N = N // comm.size
    my_data = model.generate_data(gt_params, my_N)
    dlog.append("y", my_data["y"][0:25, :])

    # Initialize model parameters (to be learned)
    params = {
        #    'W'     : W_gt,
        "W": np.abs(5 + np.random.normal(size=W_gt.shape)),
        "pi": 2 / H,
        "sigma": 5.00,
    }
    # params = model.noisify_params(params, anneal)
    params = comm.bcast(params)

    # Create and start EM annealing
    em = EM(model=model, anneal=anneal)
    em.data = my_data
    em.lparams = params
Exemple #2
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ MCA M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing AND softmax
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm = self.comm
        H, Hprime = self.H, self.Hprime
        gamma = self.gamma
        W = model_params['W']
        pies = model_params['pi']
        sigma = model_params['sigma']

        # Read in data:
        my_y = my_data['y']
        my_cand = my_data['candidates']
        my_logpj = my_suff_stat['logpj']
        my_N, D = my_y.shape
        N = comm.allreduce(my_N)

        state_mtx = self.state_matrix  # shape: (no_states, Hprime)
        state_abs = self.state_abs  # shape: (no_states,)
        no_states = len(state_abs)

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute
        T = anneal['T']
        T_rho = np.maximum(T, self.rho_temp_bound)
        rho = 1. / (1. - 1. / T_rho)
        beta = 1. / T
        pre0 = (1. - rho) / rho
        pre1 = -1. / 2. / sigma / sigma
        pil_bar = np.log(pies / (1. - pies))
        Wl = np.log(W)
        Wrho = np.exp(rho * Wl)
        Wsquared = W * W

        # Some asserts
        assert np.isfinite(pil_bar).all()
        assert np.isfinite(Wl).all()
        assert np.isfinite(Wrho).all()
        assert (Wrho > 1e-86).all()

        my_corr = beta * ((my_logpj).max(axis=1))  # shape: (my_N,)
        my_pjb = np.exp(beta * my_logpj -
                        my_corr[:, None])  # shape: (my_N, no_states)

        # Precompute factor for pi/gamma update
        A_pi_gamma = 0.
        B_pi_gamma = 0.
        for gp in xrange(0, self.gamma + 1):
            a = comb(H, gp, exact=1) * pies**gp * (1. - pies)**(H - gp)
            A_pi_gamma += a
            B_pi_gamma += gp * a

        # Truncate data
        if anneal['Ncut_factor'] > 0.0:
            tracing.tracepoint("M_step:truncating")
            my_denoms = np.log(my_pjb.sum(axis=1)) + my_corr
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))

            cut_denom = parallel.allsort(my_denoms)[-N_use]
            which = np.array(my_denoms >= cut_denom)

            my_y = my_y[which]
            my_cand = my_cand[which]
            my_logpj = my_logpj[which]
            my_pjb = my_pjb[which]
            my_corr = my_corr[which]
            my_N, D = my_y.shape
            N_use = comm.allreduce(my_N)
        else:
            N_use = N
        dlog.append('N_use', N_use)

        # Allocate suff-stat arrays
        my_Wp = np.zeros_like(W)  # shape (H, D)
        my_Wq = np.zeros_like(W)  # shape (H, D)
        my_pi = 0.0  #
        my_sigma = 0.0  #

        # Iterate over all datapoints
        for n in xrange(my_N):
            tracing.tracepoint("M_step:iterating")
            y = my_y[n, :]  # shape (D,)
            cand = my_cand[n, :]  # shape (Hprime,)
            logpj = my_logpj[n, :]  # shape (no_states,)
            pjb = my_pjb[n, :]  # shape (no_states,)
            corr = my_corr[n]  # scalar

            this_Wp = np.zeros_like(
                W)  # numerator for W (current datapoint)   (H, D)
            this_Wq = np.zeros_like(
                W)  # denominator for W (current datapoint) (H, D)
            this_pi = 0.0  # numerator for pi update (current datapoint)
            this_sigma = 0.0  # numerator for gamma update (current datapoint)

            # Zero active hidden causes
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do
            this_sigma += pjb[0] * (y**2).sum()

            # One active hidden cause
            this_Wp += (pjb[1:(H + 1), None] * Wsquared[:, :]) * y[None, :]
            this_Wq += (pjb[1:(H + 1), None] * Wsquared[:, :])
            this_pi += pjb[1:(H + 1)].sum()
            this_sigma += (pjb[1:(H + 1)] * ((W - y)**2).sum(axis=1)).sum()

            # Handle hidden states with more than 1 active cause
            W_ = W[cand]  # is (Hprime, D)
            Wl_ = Wl[cand]  # is (   "    ")
            Wrho_ = Wrho[cand]  # is (   "    ")

            Wlrhom1 = (rho - 1) * Wl_  # is (Hprime, D)
            Wlbar = np.log(np.dot(state_mtx, Wrho_)) / rho  # is (no_states, D)
            Wbar = np.exp(Wlbar)  # is (no_states, D)
            blpj = beta * logpj[1 + H:] - corr  # is (no_states,)

            Aid = (state_mtx[:, :, None] *
                   np.exp(blpj[:, None, None] + (1 - rho) * Wlbar[:, None, :] +
                          Wlrhom1[None, :, :])).sum(axis=0)

            assert np.isfinite(Wlbar).all()
            assert np.isfinite(Wbar).all()
            assert np.isfinite(pjb).all()
            assert np.isfinite(Aid).all()

            this_Wp[cand] += Aid * y[None, :]
            this_Wq[cand] += Aid
            this_pi += (pjb[1 + H:] * state_abs).sum()
            this_sigma += (pjb[1 + H:] * ((Wbar - y)**2).sum(axis=1)).sum()

            denom = pjb.sum()
            my_Wp += this_Wp / denom
            my_Wq += this_Wq / denom
            my_pi += this_pi / denom
            my_sigma += this_sigma / denom

            my_ldenom_sum += np.log(np.sum(
                np.exp(logpj)))  #For loglike computation

        # Calculate updated W
        if 'W' in self.to_learn:
            tracing.tracepoint("M_step:update W")

            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)

            assert np.isfinite(my_Wp).all()
            assert np.isfinite(my_Wq).all()

            comm.Allreduce([my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE])
            comm.Allreduce([my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE])

            # Make sure wo do not devide by zero
            tiny = np.finfo(Wq.dtype).tiny
            Wp[Wq < tiny] = 0.
            Wq[Wq < tiny] = tiny

            W_new = Wp / Wq
        else:
            W_new = W

        # Calculate updated pi
        if 'pi' in self.to_learn:
            tracing.tracepoint("M_step:update pi")

            assert np.isfinite(my_pi).all()
            pi_new = A_pi_gamma / B_pi_gamma * pies * comm.allreduce(
                my_pi) / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if 'sigma' in self.to_learn:  # TODO: XXX see LinCA XXX (merge!)
            tracing.tracepoint("M_step:update sigma")

            assert np.isfinite(my_sigma).all()
            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma

        #Put all together and compute (always) et_approx_likelihood
        ldenom_sum = comm.allreduce(my_ldenom_sum)
        lAi = (H * np.log(1. - pi_new)) - (
            (D / 2) * np.log(2 * pi)) - (D * np.log(sigma_new))

        #For practical and et approx reasons we use: sum of restected respons=1
        loglike_et = (lAi * N_use) + ldenom_sum

        return {'W': W_new, 'pi': pi_new, 'sigma': sigma_new, 'Q': loglike_et}
Exemple #3
0
    def E_step(self, anneal, model_params, my_data):
        """ BSC E_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['can']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm = self.comm
        my_y = my_data['y'].copy()
        my_cand = my_data['candidates']
        my_N, D = my_data['y'].shape
        H = self.H

        SM = self.state_matrix  # shape: (no_states, Hprime)
        state_abs = self.state_abs  # shape: (no_states,)

        W = model_params['W']
        pies = model_params['pi']
        sigma = model_params['sigma']
        try:
            mu = model_params['mu']
        except:
            mu = np.zeros(D)
            model_params['mu'] = mu

        # Precompute
        beta = 1. / anneal['T']
        pre1 = -1. / 2. / sigma / sigma
        pil_bar = np.log(pies / (1. - pies))

        # Allocate return structures
        F = np.empty([my_N, 1 + H + self.no_states])
        pre_F = np.empty([my_N, 1 + H + self.no_states])
        denoms = np.zeros(my_N)

        # Joerg's data noise idea
        data_noise_scale = anneal['data_noise']
        dlog.append('Data Noise', data_noise_scale)
        if data_noise_scale > 0.:
            my_data['data_noise'] = np.random.normal(scale=data_noise_scale,
                                                     size=my_y.shape)
            my_y += my_data['data_noise']

        # Pre-fill pre_F:
        pre_F[:, 0] = 0.
        pre_F[:, 1:H + 1] = pil_bar
        pre_F[:, 1 + H:] = pil_bar * state_abs  # is (no_states,)

        # Iterate over all datapoints
        tracing.tracepoint("E_step:iterating")
        for n in xrange(my_N):
            y = my_data['y'][n, :] - mu
            cand = my_data['candidates'][n, :]

            # Zero active hidden causes
            log_prod_joint = pre1 * (y**2).sum()
            F[n, 0] = log_prod_joint

            # Hidden states with one active cause
            log_prod_joint = pre1 * ((W - y)**2).sum(axis=1)
            F[n, 1:H + 1] = log_prod_joint

            # Handle hidden states with more than 1 active cause
            W_ = W[cand]  # is (Hprime x D)

            Wbar = np.dot(SM, W_)
            log_prod_joint = pre1 * ((Wbar - y)**2).sum(axis=1)
            F[n, 1 + H:] = log_prod_joint

        if anneal['anneal_prior']:
            F = beta * (pre_F + F)
        else:
            F = pre_F + beta * F

        return {'logpj': F}
Exemple #4
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ MCA M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing AND softmax
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm = self.comm
        H, Hprime = self.H, self.Hprime
        gamma = self.gamma
        W = model_params['W']
        pies = model_params['pi']
        sigma = model_params['sigma']

        # Read in data:
        my_y = my_data['y']
        my_cand = my_data['candidates']
        my_logpj = my_suff_stat['logpj']
        my_N, D = my_y.shape
        N = comm.allreduce(my_N)

        state_mtx = self.state_matrix  # shape: (no_states, Hprime)
        state_abs = self.state_abs  # shape: (no_states,)
        no_states = len(state_abs)

        # Disable some warnings
        old_seterr = np.seterr(divide='ignore', under='ignore')

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute
        T = anneal['T']
        T_rho = np.maximum(T, self.rho_T_bound)
        rho = 1. / (1. - 1. / T_rho)
        rho = np.maximum(np.minimum(rho, self.rho_ubound), self.rho_lbound)
        beta = 1. / T
        pre1 = -1. / 2. / sigma / sigma
        pil_bar = np.log(pies / (1. - pies))
        Wl = accel.log(np.abs(W))
        Wrho = accel.exp(rho * Wl)
        Wrhos = np.sign(W) * Wrho
        Wsquared = W * W

        # Some asserts
        assert np.isfinite(pil_bar).all()
        assert np.isfinite(Wl).all()
        assert np.isfinite(Wrho).all()
        assert (Wrho > 1e-86).all()

        my_corr = beta * ((my_logpj).max(axis=1))  # shape: (my_N,)
        my_logpjb = beta * my_logpj - my_corr[:,
                                              None]  # shape: (my_N, no_states)
        my_pj = accel.exp(my_logpj)  # shape: (my_N, no_states)
        my_pjb = accel.exp(my_logpjb)  # shape: (my_N, no_states)

        # Precompute factor for pi update and ET cutting
        A_pi_gamma = 0.
        B_pi_gamma = 0.
        for gp in xrange(0, self.gamma + 1):
            a = comb(H, gp) * pies**gp * (1. - pies)**(H - gp)
            A_pi_gamma += a
            B_pi_gamma += gp * a

        # Truncate data
        if anneal['Ncut_factor'] > 0.0:
            tracing.tracepoint("M_step:truncating")
            my_logdenoms = accel.log(my_pjb.sum(axis=1)) + my_corr
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))

            cut_denom = parallel.allsort(my_logdenoms)[-N_use]
            my_sel, = np.where(my_logdenoms >= cut_denom)
            my_N, = my_sel.shape
            N_use = comm.allreduce(my_N)
        else:
            my_N, _ = my_y.shape
            my_sel = np.arange(my_N)
            N_use = N

        # Allocate suff-stat arrays
        my_Wp = np.zeros_like(W)  # shape (H, D)
        my_Wq = np.zeros_like(W)  # shape (H, D)
        my_pi = 0.0  #
        my_sigma = 0.0  #

        # Do reverse correlation if requested
        if self.rev_corr:
            my_y_rc = my_data['y_rc']
            D_rev_corr = my_y_rc.shape[1]
            my_rev_corr = np.zeros((H, D_rev_corr))
            my_rev_corr_count = np.zeros(H)

        # Iterate over all datapoints
        tracing.tracepoint("M_step:iterating...")
        dlog.append('N_use', N_use)
        for n in my_sel:
            y = my_y[n, :]  # shape (D,)
            cand = my_cand[n, :]  # shape (Hprime,)
            logpj = my_logpj[n, :]  # shape (no_states,)
            logpjb = my_logpjb[n, :]  # shape (no_states,)
            pj = my_pj[n, :]  # shape (no_states,)
            pjb = my_pjb[n, :]  # shape (no_states,)

            this_Wp = np.zeros_like(
                W)  # numerator for W (current datapoint)   (H, D)
            this_Wq = np.zeros_like(
                W)  # denominator for W (current datapoint) (H, D)
            this_pi = 0.0  # numerator for pi update (current datapoint)
            this_sigma = 0.0  # numerator for gamma update (current datapoint)

            # Zero active hidden causes
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do
            this_sigma += pjb[0] * (y**2).sum()

            # One active hidden cause
            this_Wp += (pjb[1:(H + 1), None]) * y[None, :]
            this_Wq += (pjb[1:(H + 1), None])
            this_pi += pjb[1:(H + 1)].sum()
            this_sigma += (pjb[1:(H + 1)] * ((W - y)**2).sum(axis=1)).sum()

            # Handle hidden states with more than 1 active cause
            W_ = W[cand]  # is (Hprime, D)
            Wl_ = Wl[cand]  # is (   "    ")
            Wrho_ = Wrho[cand]  # is (   "    ")
            Wrhos_ = Wrhos[cand]  # is (   "    ")

            #Wbar   = calc_Wbar(state_mtx, W_)
            #Wlbar  = np.log(np.abs(Wbar))

            t0 = np.dot(state_mtx, Wrhos_)
            Wlbar = accel.log(np.abs(t0)) / rho  # is (no_states, D)
            #Wlbar   = np.maximum(Wlbar, -9.21)
            Wbar = np.sign(t0) * accel.exp(Wlbar)  # is (no_states, D)

            t = Wlbar[:, None, :] - Wl_[None, :, :]
            t = np.maximum(t, 0.)
            Aid = state_mtx[:, :,
                            None] * accel.exp(logpjb[H + 1:, None, None] -
                                              (rho - 1) * t)
            Aid = Aid.sum(axis=0)

            #Aid = calc_Aid(logpjb[H+1:], W_, Wl_, state_mtx, Wbar, Wlbar, rho)

            #assert np.isfinite(Wlbar).all()
            #assert np.isfinite(Wbar).all()
            #assert np.isfinite(pjb).all()
            #assert np.isfinite(Aid).all()

            this_Wp[cand] += Aid * y[None, :]
            this_Wq[cand] += Aid
            this_pi += (pjb[1 + H:] * state_abs).sum()
            this_sigma += (pjb[1 + H:] * ((Wbar - y)**2).sum(axis=1)).sum()

            denom = pjb.sum()
            my_Wp += this_Wp / denom
            my_Wq += this_Wq / denom
            my_pi += this_pi / denom
            my_sigma += this_sigma / denom

            #self.tbl.append("logpj", logpj)
            #self.tbl.append("corr", my_corr[n])
            #self.tbl.append("denom", denom)
            #self.tbl.append("cand", cand)
            #self.tbl.append("Aid", Aid)

            my_ldenom_sum += accel.log(np.sum(
                accel.exp(logpj)))  #For loglike computation

            # Estimate reverse correlation
            if self.rev_corr:
                pys = pjb / denom
                if np.isfinite(pys).all():
                    my_rev_corr += pys[1:H + 1, None] * my_y_rc[n, None, :]
                    my_rev_corr_count += pys[1:H + 1]
                    my_rev_corr[cand] += np.sum(state_mtx[:, :, None] *
                                                pys[H + 1:, None, None] *
                                                my_y_rc[n, None, :],
                                                axis=0)
                    my_rev_corr_count[cand] += np.sum(state_mtx[:, :] *
                                                      pys[H + 1, None],
                                                      axis=0)
                else:
                    print "Not all finite rev_corr %d" % n

        # Calculate updated W
        if 'W' in self.to_learn:
            tracing.tracepoint("M_step:update W")

            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)

            assert np.isfinite(my_Wp).all()
            assert np.isfinite(my_Wq).all()

            comm.Allreduce([my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE])
            comm.Allreduce([my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE])

            # Make sure wo do not devide by zero
            tiny = self.tol
            Wq[Wq < tiny] = tiny

            # Calculate updated W
            W_new = Wp / Wq

            # Add inertia depending on Wq
            alpha = 2.5
            inertia = np.maximum(1. - accel.exp(-Wq / alpha), 0.2)
            W_new = inertia * W_new + (1 - inertia) * W
        else:
            W_new = W

        # Calculate updated pi
        if 'pi' in self.to_learn:
            tracing.tracepoint("M_step:update pi")

            assert np.isfinite(my_pi).all()
            pi_new = A_pi_gamma / B_pi_gamma * pies * comm.allreduce(
                my_pi) / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if 'sigma' in self.to_learn:  # TODO: XXX see LinCA XXX (merge!)
            tracing.tracepoint("M_step:update sigma")

            assert np.isfinite(my_sigma).all()
            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma

        # Put all together and compute (always) et_approx_likelihood
        ldenom_sum = comm.allreduce(my_ldenom_sum)
        lAi = (H * np.log(1. - pi_new)) - (
            (D / 2) * np.log(2 * pi)) - (D * np.log(sigma_new))

        # For practical and et approx reasons we use: sum of restected respons=1
        loglike_et = (lAi * N_use) + ldenom_sum

        if self.rev_corr:
            rev_corr = np.empty_like(my_rev_corr)
            rev_corr_count = np.empty_like(my_rev_corr_count)
            comm.Allreduce([my_rev_corr, MPI.DOUBLE], [rev_corr, MPI.DOUBLE])
            comm.Allreduce([my_rev_corr_count, MPI.DOUBLE],
                           [rev_corr_count, MPI.DOUBLE])
            rev_corr /= (1e-16 + rev_corr_count[:, None])
        else:
            rev_corr = np.zeros((H, D))

        # Restore np.seterr
        np.seterr(**old_seterr)

        return {
            'W': W_new,
            'pi': pi_new,
            'sigma': sigma_new,
            'rev_corr': rev_corr,
            'Q': loglike_et
        }
Exemple #5
0
    def E_step(self, anneal, model_params, my_data):
        """ BSC E_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['can']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm      = self.comm
        my_y      = my_data['y'].copy()
        my_cand   = my_data['candidates']
        my_N, D   = my_data['y'].shape
        H = self.H

        SM = self.state_matrix        # shape: (no_states, Hprime)
        state_abs = self.state_abs           # shape: (no_states,)

        W         = model_params['W']
        pies      = model_params['pi']
        sigma     = model_params['sigma']
        try:
            mu = model_params['mu']
        except:
            mu = np.zeros(D)
            model_params['mu'] = mu

        # Precompute 
        beta     = 1./anneal['T']
        pre1     = -1./2./sigma/sigma
        pil_bar  = np.log( pies/(1.-pies) )

        # Allocate return structures
        F = np.empty( [my_N, 1+H+self.no_states] )
        pre_F = np.empty( [my_N, 1+H+ self.no_states] )
        denoms = np.zeros(my_N)
        
        # Joerg's data noise idea
        data_noise_scale = anneal['data_noise']
        dlog.append('Data Noise', data_noise_scale)
        if data_noise_scale > 0.:
            my_data['data_noise'] = np.random.normal(scale=data_noise_scale, size=my_y.shape)
            my_y += my_data['data_noise']
        
        # Pre-fill pre_F:
        pre_F[:,0] = 0.
        pre_F[:,1:H+1] = pil_bar
        pre_F[:,1+H:] = pil_bar * state_abs   # is (no_states,)
        
        # Iterate over all datapoints
        tracing.tracepoint("E_step:iterating")
        for n in xrange(my_N):
            y    = my_data['y'][n,:] - mu
            cand = my_data['candidates'][n,:]

            # Zero active hidden causes
            log_prod_joint = pre1 * (y**2).sum()
            F[n,0] = log_prod_joint

            # Hidden states with one active cause
            log_prod_joint = pre1 * ((W-y)**2).sum(axis=1)
            F[n,1:H+1] = log_prod_joint

            # Handle hidden states with more than 1 active cause
            W_ = W[cand]                          # is (Hprime x D)

            Wbar = np.dot(SM,W_)
            log_prod_joint = pre1 * ((Wbar-y)**2).sum(axis=1)
            F[n,1+H:] = log_prod_joint

        if anneal['anneal_prior']:
            F = beta * (pre_F + F)
        else:
            F = pre_F + beta * F

        return { 'logpj': F } 
Exemple #6
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ BSC M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """

        comm = self.comm
        H, Hprime = self.H, self.Hprime
        gamma = self.gamma
        W = model_params['W']
        pies = model_params['pi']
        sigma = model_params['sigma']
        mu = model_params['mu']

        # Read in data:
        my_y = my_data['y'].copy()
        candidates = my_data['candidates']
        logpj_all = my_suff_stat['logpj']
        all_denoms = np.exp(logpj_all).sum(axis=1)

        my_N, D = my_y.shape
        N = comm.allreduce(my_N)

        # Joerg's data noise idea
        data_noise_scale = anneal['data_noise']
        if data_noise_scale > 0:
            my_y += my_data['data_noise']

        SM = self.state_matrix  # shape: (no_states, Hprime)

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute factor for pi update
        A_pi_gamma = 0
        B_pi_gamma = 0
        for gamma_p in range(gamma + 1):
            A_pi_gamma += comb(H, gamma_p) * (pies**gamma_p) * (
                (1 - pies)**(H - gamma_p))
            B_pi_gamma += gamma_p * comb(H, gamma_p) * (pies**gamma_p) * (
                (1 - pies)**(H - gamma_p))
        E_pi_gamma = pies * H * A_pi_gamma / B_pi_gamma

        # Truncate data
        if anneal['Ncut_factor'] > 0.0:
            tracing.tracepoint("M_step:truncating")
            #alpha = 0.9 # alpha from ET paper
            #N_use = int(alpha * (N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor'])))
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))
            cut_denom = parallel.allsort(all_denoms)[-N_use]
            which = np.array(all_denoms >= cut_denom)
            candidates = candidates[which]
            logpj_all = logpj_all[which]
            my_y = my_y[which]
            my_N, D = my_y.shape
            N_use = comm.allreduce(my_N)
        else:
            N_use = N
        dlog.append('N', N_use)

        # Calculate truncated Likelihood
        L = H * np.log(1 - pies) - 0.5 * D * np.log(
            2 * pi * sigma**2) - np.log(A_pi_gamma)
        Fs = np.log(np.exp(logpj_all).sum(axis=1)).sum()
        L += comm.allreduce(Fs) / N_use
        dlog.append('L', L)

        # Precompute
        pil_bar = np.log(pies / (1. - pies))
        corr_all = logpj_all.max(axis=1)  # shape: (my_N,)
        pjb_all = np.exp(logpj_all -
                         corr_all[:, None])  # shape: (my_N, no_states)

        # Allocate
        my_Wp = np.zeros_like(W)  # shape (H, D)
        my_Wq = np.zeros((H, H))  # shape (H, H)
        my_pi = 0.0  #
        my_sigma = 0.0  #
        #my_mup = np.zeros_like(W)  # shape (H, D)
        #my_muq = np.zeros((H,H))   # shape (H, H)
        my_mus = np.zeros(H)  # shape D
        data_sum = my_y.sum(axis=0)  # sum over all data points for mu update

        ## Calculate mu
        #for n in xrange(my_N):
        #tracing.tracepoint("Calculationg offset")
        #y     = my_y[n,:]                  # length D
        #cand  = candidates[n,:] # length Hprime
        #logpj = logpj_all[n,:] # length no_states
        #corr  = corr_all[n]        # scalar
        #pjb = pjb_all[n, :]

        ## Zero active hidden cause (do nothing for the W and pi case)
        ## this_Wp += 0.     # nothing to do
        ## this_Wq += 0.     # nothing to do
        ## this_pi += 0.     # nothing to do

        ## One active hidden cause
        #this_mup = np.outer(pjb[1:(H+1)],y)
        #this_muq = pjb[1:(H+1)] * np.identity(H)
        #this_mus = pjb[1:(H+1)]

        ## Handle hidden states with more than 1 active cause
        #this_mup[cand]      += np.dot(np.outer(y,pjb[(1+H):]),SM).T
        #this_muq_tmp         = np.zeros_like(my_muq[cand])
        #this_muq_tmp[:,cand] = np.dot(pjb[(1+H):] * SM.T,SM)
        #this_muq[cand]      += this_muq_tmp
        #this_mus[cand]      += np.inner(SM.T,pjb[(1+H):])

        #denom = pjb.sum()
        #my_mup += this_mup / denom
        #my_muq += this_muq / denom
        #my_mus += this_mus / denom

        ## Calculate updated mu
        #if 'mu' in self.to_learn:
        #tracing.tracepoint("M_step:update mu")
        #mup = np.empty_like(my_mup)
        #muq = np.empty_like(my_muq)
        #mus = np.empty_like(my_mus)
        #all_data_sum = np.empty_like(data_sum)
        #comm.Allreduce( [my_mup, MPI.DOUBLE], [mup, MPI.DOUBLE] )
        #comm.Allreduce( [my_muq, MPI.DOUBLE], [muq, MPI.DOUBLE] )
        #comm.Allreduce( [my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE] )
        #comm.Allreduce( [data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE] )
        #mu_numer = all_data_sum - np.dot(mus,np.dot(np.linalg.inv(muq), mup))
        #mu_denom = my_N - np.dot(mus,np.dot(np.linalg.inv(muq), mus))
        #mu_new  = mu_numer/ mu_denom
        #else:
        #mu_new = mu

        # Iterate over all datapoints
        tracing.tracepoint("M_step:iterating")
        for n in xrange(my_N):
            y = my_y[n, :] - mu  # length D
            cand = candidates[n, :]  # length Hprime
            pjb = pjb_all[n, :]

            this_Wp = np.zeros_like(
                my_Wp)  # numerator for current datapoint   (H, D)
            this_Wq = np.zeros_like(
                my_Wq)  # denominator for current datapoint (H, H)
            this_pi = 0.0  # numerator for pi update (current datapoint)

            # Zero active hidden cause (do nothing for the W and pi case)
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do

            # One active hidden cause
            this_Wp = np.outer(pjb[1:(H + 1)], y)
            this_Wq = pjb[1:(H + 1)] * np.identity(H)
            this_pi = pjb[1:(H + 1)].sum()
            this_mus = pjb[1:(H + 1)].copy()

            # Handle hidden states with more than 1 active cause
            this_Wp[cand] += np.dot(np.outer(y, pjb[(1 + H):]), SM).T
            this_Wq_tmp = np.zeros_like(my_Wq[cand])
            this_Wq_tmp[:, cand] = np.dot(pjb[(1 + H):] * SM.T, SM)
            this_Wq[cand] += this_Wq_tmp
            this_pi += np.inner(pjb[(1 + H):], SM.sum(axis=1))
            this_mus[cand] += np.inner(SM.T, pjb[(1 + H):])

            denom = pjb.sum()
            my_Wp += this_Wp / denom
            my_Wq += this_Wq / denom
            my_pi += this_pi / denom
            my_mus += this_mus / denom

        # Calculate updated W
        if 'W' in self.to_learn:
            tracing.tracepoint("M_step:update W")
            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)
            comm.Allreduce([my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE])
            comm.Allreduce([my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE])
            W_new = np.dot(np.linalg.inv(Wq), Wp)
            # W_new  = np.linalg.lstsq(Wq, Wp)
        else:
            W_new = W

        # Calculate updated pi
        if 'pi' in self.to_learn:
            tracing.tracepoint("M_step:update pi")
            pi_new = E_pi_gamma * comm.allreduce(my_pi) / H / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if 'sigma' in self.to_learn:
            tracing.tracepoint("M_step:update sigma")
            # Loop for sigma update:
            for n in xrange(my_N):
                y = my_y[n, :] - mu  # length D
                cand = candidates[n, :]  # length Hprime
                logpj = logpj_all[n, :]  # length no_states
                corr = logpj.max()  # scalar
                pjb = np.exp(logpj - corr)

                # Zero active hidden causes
                this_sigma = pjb[0] * (y**2).sum()

                # Hidden states with one active cause
                this_sigma += (pjb[1:(H + 1)] * ((W - y)**2).sum(axis=1)).sum()

                # Handle hidden states with more than 1 active cause
                SM = self.state_matrix  # is (no_states, Hprime)
                W_ = W[cand]  # is (Hprime x D)
                Wbar = np.dot(SM, W_)
                this_sigma += (pjb[(H + 1):] *
                               ((Wbar - y)**2).sum(axis=1)).sum()

                denom = pjb.sum()
                my_sigma += this_sigma / denom

            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma

        # Calculate updated mu:
        if 'mu' in self.to_learn:
            tracing.tracepoint("M_step:update mu")
            mus = np.empty_like(my_mus)
            all_data_sum = np.empty_like(data_sum)
            comm.Allreduce([my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE])
            comm.Allreduce([data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE])
            mu_new = all_data_sum / my_N - np.inner(W_new.T / my_N, mus)
        else:
            mu_new = mu

        for param in anneal.crit_params:
            exec('this_param = ' + param)
            anneal.dyn_param(param, this_param)

        dlog.append('N_use', N_use)

        return {'W': W_new, 'pi': pi_new, 'sigma': sigma_new, 'mu': mu_new}
Exemple #7
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ MCA M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing AND softmax
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm = self.comm
        H, Hprime = self.H, self.Hprime
        gamma = self.gamma
        W = model_params["W"]
        pies = model_params["pi"]
        sigma = model_params["sigma"]

        # Read in data:
        my_y = my_data["y"]
        my_cand = my_data["candidates"]
        my_logpj = my_suff_stat["logpj"]
        my_N, D = my_y.shape
        N = comm.allreduce(my_N)

        state_mtx = self.state_matrix  # shape: (no_states, Hprime)
        state_abs = self.state_abs  # shape: (no_states,)
        no_states = len(state_abs)

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute
        T = anneal["T"]
        T_rho = np.maximum(T, self.rho_temp_bound)
        rho = 1.0 / (1.0 - 1.0 / T_rho)
        beta = 1.0 / T
        pre0 = (1.0 - rho) / rho
        pre1 = -1.0 / 2.0 / sigma / sigma
        pil_bar = np.log(pies / (1.0 - pies))
        Wl = np.log(W)
        Wrho = np.exp(rho * Wl)
        Wsquared = W * W

        # Some asserts
        assert np.isfinite(pil_bar).all()
        assert np.isfinite(Wl).all()
        assert np.isfinite(Wrho).all()
        assert (Wrho > 1e-86).all()

        my_corr = beta * ((my_logpj).max(axis=1))  # shape: (my_N,)
        my_pjb = np.exp(beta * my_logpj - my_corr[:, None])  # shape: (my_N, no_states)

        # Precompute factor for pi/gamma update
        A_pi_gamma = 0.0
        B_pi_gamma = 0.0
        for gp in xrange(0, self.gamma + 1):
            a = comb(H, gp, exact=1) * pies ** gp * (1.0 - pies) ** (H - gp)
            A_pi_gamma += a
            B_pi_gamma += gp * a

        # Truncate data
        if anneal["Ncut_factor"] > 0.0:
            tracing.tracepoint("M_step:truncating")
            my_denoms = np.log(my_pjb.sum(axis=1)) + my_corr
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal["Ncut_factor"]))

            cut_denom = parallel.allsort(my_denoms)[-N_use]
            which = np.array(my_denoms >= cut_denom)

            my_y = my_y[which]
            my_cand = my_cand[which]
            my_logpj = my_logpj[which]
            my_pjb = my_pjb[which]
            my_corr = my_corr[which]
            my_N, D = my_y.shape
            N_use = comm.allreduce(my_N)
        else:
            N_use = N
        dlog.append("N_use", N_use)

        # Allocate suff-stat arrays
        my_Wp = np.zeros_like(W)  # shape (H, D)
        my_Wq = np.zeros_like(W)  # shape (H, D)
        my_pi = 0.0  #
        my_sigma = 0.0  #

        # Iterate over all datapoints
        for n in xrange(my_N):
            tracing.tracepoint("M_step:iterating")
            y = my_y[n, :]  # shape (D,)
            cand = my_cand[n, :]  # shape (Hprime,)
            logpj = my_logpj[n, :]  # shape (no_states,)
            pjb = my_pjb[n, :]  # shape (no_states,)
            corr = my_corr[n]  # scalar

            this_Wp = np.zeros_like(W)  # numerator for W (current datapoint)   (H, D)
            this_Wq = np.zeros_like(W)  # denominator for W (current datapoint) (H, D)
            this_pi = 0.0  # numerator for pi update (current datapoint)
            this_sigma = 0.0  # numerator for gamma update (current datapoint)

            # Zero active hidden causes
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do
            this_sigma += pjb[0] * (y ** 2).sum()

            # One active hidden cause
            this_Wp += (pjb[1 : (H + 1), None] * Wsquared[:, :]) * y[None, :]
            this_Wq += pjb[1 : (H + 1), None] * Wsquared[:, :]
            this_pi += pjb[1 : (H + 1)].sum()
            this_sigma += (pjb[1 : (H + 1)] * ((W - y) ** 2).sum(axis=1)).sum()

            # Handle hidden states with more than 1 active cause
            W_ = W[cand]  # is (Hprime, D)
            Wl_ = Wl[cand]  # is (   "    ")
            Wrho_ = Wrho[cand]  # is (   "    ")

            Wlrhom1 = (rho - 1) * Wl_  # is (Hprime, D)
            Wlbar = np.log(np.dot(state_mtx, Wrho_)) / rho  # is (no_states, D)
            Wbar = np.exp(Wlbar)  # is (no_states, D)
            blpj = beta * logpj[1 + H :] - corr  # is (no_states,)

            Aid = (
                state_mtx[:, :, None]
                * np.exp(blpj[:, None, None] + (1 - rho) * Wlbar[:, None, :] + Wlrhom1[None, :, :])
            ).sum(axis=0)

            assert np.isfinite(Wlbar).all()
            assert np.isfinite(Wbar).all()
            assert np.isfinite(pjb).all()
            assert np.isfinite(Aid).all()

            this_Wp[cand] += Aid * y[None, :]
            this_Wq[cand] += Aid
            this_pi += (pjb[1 + H :] * state_abs).sum()
            this_sigma += (pjb[1 + H :] * ((Wbar - y) ** 2).sum(axis=1)).sum()

            denom = pjb.sum()
            my_Wp += this_Wp / denom
            my_Wq += this_Wq / denom
            my_pi += this_pi / denom
            my_sigma += this_sigma / denom

            my_ldenom_sum += np.log(np.sum(np.exp(logpj)))  # For loglike computation

        # Calculate updated W
        if "W" in self.to_learn:
            tracing.tracepoint("M_step:update W")

            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)

            assert np.isfinite(my_Wp).all()
            assert np.isfinite(my_Wq).all()

            comm.Allreduce([my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE])
            comm.Allreduce([my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE])

            # Make sure wo do not devide by zero
            tiny = np.finfo(Wq.dtype).tiny
            Wp[Wq < tiny] = 0.0
            Wq[Wq < tiny] = tiny

            W_new = Wp / Wq
        else:
            W_new = W

        # Calculate updated pi
        if "pi" in self.to_learn:
            tracing.tracepoint("M_step:update pi")

            assert np.isfinite(my_pi).all()
            pi_new = A_pi_gamma / B_pi_gamma * pies * comm.allreduce(my_pi) / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if "sigma" in self.to_learn:  # TODO: XXX see LinCA XXX (merge!)
            tracing.tracepoint("M_step:update sigma")

            assert np.isfinite(my_sigma).all()
            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma

        # Put all together and compute (always) et_approx_likelihood
        ldenom_sum = comm.allreduce(my_ldenom_sum)
        lAi = (H * np.log(1.0 - pi_new)) - ((D / 2) * np.log(2 * pi)) - (D * np.log(sigma_new))

        # For practical and et approx reasons we use: sum of restected respons=1
        loglike_et = (lAi * N_use) + ldenom_sum

        return {"W": W_new, "pi": pi_new, "sigma": sigma_new, "Q": loglike_et}
Exemple #8
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ BSC M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """

        comm      = self.comm
        H, Hprime = self.H, self.Hprime
        gamma     = self.gamma
        W         = model_params['W']
        pies      = model_params['pi']
        sigma     = model_params['sigma']
        mu        = model_params['mu']

        # Read in data:
        my_y       = my_data['y'].copy()
        candidates = my_data['candidates']
        logpj_all  = my_suff_stat['logpj']
        all_denoms = np.exp(logpj_all).sum(axis=1)

        my_N, D   = my_y.shape
        N         = comm.allreduce(my_N)
        
        # Joerg's data noise idea
        data_noise_scale = anneal['data_noise']
        if data_noise_scale > 0:
            my_y += my_data['data_noise']

        SM = self.state_matrix        # shape: (no_states, Hprime)

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute factor for pi update
        A_pi_gamma = 0
        B_pi_gamma = 0
        for gamma_p in range(gamma+1):
            A_pi_gamma += comb(H,gamma_p) * (pies**gamma_p) * ((1-pies)**(H-gamma_p))
            B_pi_gamma += gamma_p * comb(H,gamma_p) * (pies**gamma_p) * ((1-pies)**(H-gamma_p))
        E_pi_gamma = pies * H * A_pi_gamma / B_pi_gamma
        
        # Truncate data
        if anneal['Ncut_factor'] > 0.0:
            tracing.tracepoint("M_step:truncating")
            #alpha = 0.9 # alpha from ET paper
            #N_use = int(alpha * (N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor'])))
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))
            cut_denom = parallel.allsort(all_denoms)[-N_use]
            which   = np.array(all_denoms >= cut_denom)
            candidates = candidates[which]
            logpj_all = logpj_all[which]
            my_y    = my_y[which]
            my_N, D = my_y.shape
            N_use = comm.allreduce(my_N)
        else:
            N_use = N
        dlog.append('N', N_use)
        
        # Calculate truncated Likelihood
        L = H * np.log(1-pies) - 0.5 * D * np.log(2*pi*sigma**2) - np.log(A_pi_gamma)
        Fs = np.log(np.exp(logpj_all).sum(axis=1)).sum()
        L += comm.allreduce(Fs)/N_use
        dlog.append('L',L)

        # Precompute
        pil_bar   = np.log( pies/(1.-pies) )
        corr_all  = logpj_all.max(axis=1)                 # shape: (my_N,)
        pjb_all   = np.exp(logpj_all - corr_all[:, None]) # shape: (my_N, no_states)

        # Allocate 
        my_Wp = np.zeros_like(W)   # shape (H, D)
        my_Wq = np.zeros((H,H))    # shape (H, H)
        my_pi = 0.0                #
        my_sigma = 0.0             #
        #my_mup = np.zeros_like(W)  # shape (H, D)
        #my_muq = np.zeros((H,H))   # shape (H, H)
        my_mus = np.zeros(H)       # shape D
        data_sum = my_y.sum(axis=0)   # sum over all data points for mu update
        
        ## Calculate mu
        #for n in xrange(my_N):
            #tracing.tracepoint("Calculationg offset")
            #y     = my_y[n,:]                  # length D
            #cand  = candidates[n,:] # length Hprime
            #logpj = logpj_all[n,:] # length no_states
            #corr  = corr_all[n]        # scalar
            #pjb = pjb_all[n, :]

            ## Zero active hidden cause (do nothing for the W and pi case) 
            ## this_Wp += 0.     # nothing to do
            ## this_Wq += 0.     # nothing to do
            ## this_pi += 0.     # nothing to do

            ## One active hidden cause
            #this_mup = np.outer(pjb[1:(H+1)],y)
            #this_muq = pjb[1:(H+1)] * np.identity(H)
            #this_mus = pjb[1:(H+1)]

            ## Handle hidden states with more than 1 active cause
            #this_mup[cand]      += np.dot(np.outer(y,pjb[(1+H):]),SM).T
            #this_muq_tmp         = np.zeros_like(my_muq[cand])
            #this_muq_tmp[:,cand] = np.dot(pjb[(1+H):] * SM.T,SM)
            #this_muq[cand]      += this_muq_tmp
            #this_mus[cand]      += np.inner(SM.T,pjb[(1+H):])

            #denom = pjb.sum()
            #my_mup += this_mup / denom
            #my_muq += this_muq / denom
            #my_mus += this_mus / denom

        ## Calculate updated mu
        #if 'mu' in self.to_learn:
            #tracing.tracepoint("M_step:update mu")
            #mup = np.empty_like(my_mup)
            #muq = np.empty_like(my_muq)
            #mus = np.empty_like(my_mus)
            #all_data_sum = np.empty_like(data_sum)
            #comm.Allreduce( [my_mup, MPI.DOUBLE], [mup, MPI.DOUBLE] )
            #comm.Allreduce( [my_muq, MPI.DOUBLE], [muq, MPI.DOUBLE] )
            #comm.Allreduce( [my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE] )
            #comm.Allreduce( [data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE] )
            #mu_numer = all_data_sum - np.dot(mus,np.dot(np.linalg.inv(muq), mup))
            #mu_denom = my_N - np.dot(mus,np.dot(np.linalg.inv(muq), mus))
            #mu_new  = mu_numer/ mu_denom
        #else:
            #mu_new = mu

        # Iterate over all datapoints
        tracing.tracepoint("M_step:iterating")
        for n in xrange(my_N):
            y     = my_y[n,:]-mu                  # length D
            cand  = candidates[n,:] # length Hprime
            pjb   = pjb_all[n, :]

            this_Wp = np.zeros_like(my_Wp)    # numerator for current datapoint   (H, D)
            this_Wq = np.zeros_like(my_Wq)    # denominator for current datapoint (H, H)
            this_pi = 0.0                     # numerator for pi update (current datapoint)

            # Zero active hidden cause (do nothing for the W and pi case) 
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do

            # One active hidden cause
            this_Wp = np.outer(pjb[1:(H+1)],y)
            this_Wq = pjb[1:(H+1)] * np.identity(H)
            this_pi = pjb[1:(H+1)].sum()
            this_mus = pjb[1:(H+1)].copy()

            # Handle hidden states with more than 1 active cause
            this_Wp[cand]      += np.dot(np.outer(y,pjb[(1+H):]),SM).T
            this_Wq_tmp         = np.zeros_like(my_Wq[cand])
            this_Wq_tmp[:,cand] = np.dot(pjb[(1+H):] * SM.T,SM)
            this_Wq[cand]      += this_Wq_tmp
            this_pi            += np.inner(pjb[(1+H):], SM.sum(axis=1))
            this_mus[cand]     += np.inner(SM.T,pjb[(1+H):])

            denom = pjb.sum()
            my_Wp += this_Wp / denom
            my_Wq += this_Wq / denom
            my_pi += this_pi / denom
            my_mus += this_mus / denom

        # Calculate updated W
        if 'W' in self.to_learn:
            tracing.tracepoint("M_step:update W")
            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)
            comm.Allreduce( [my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE] )
            comm.Allreduce( [my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE] )
            W_new  = np.dot(np.linalg.inv(Wq), Wp)
            # W_new  = np.linalg.lstsq(Wq, Wp)
        else:
            W_new = W

        # Calculate updated pi
        if 'pi' in self.to_learn:
            tracing.tracepoint("M_step:update pi")
            pi_new = E_pi_gamma * comm.allreduce(my_pi) / H / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if 'sigma' in self.to_learn:
            tracing.tracepoint("M_step:update sigma")
            # Loop for sigma update:
            for n in xrange(my_N):
                y     = my_y[n,:]-mu           # length D
                cand  = candidates[n,:]     # length Hprime
                logpj = logpj_all[n,:]      # length no_states
                corr  = logpj.max()         # scalar
                pjb   = np.exp(logpj - corr)

                # Zero active hidden causes
                this_sigma = pjb[0] * (y**2).sum()

                # Hidden states with one active cause
                this_sigma += (pjb[1:(H+1)] * ((W-y)**2).sum(axis=1)).sum()

                # Handle hidden states with more than 1 active cause
                SM = self.state_matrix                 # is (no_states, Hprime)
                W_ = W[cand]                           # is (Hprime x D)
                Wbar = np.dot(SM,W_)
                this_sigma += (pjb[(H+1):] * ((Wbar-y)**2).sum(axis=1)).sum()

                denom = pjb.sum()
                my_sigma += this_sigma/ denom

            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma
        
        # Calculate updated mu:
        if 'mu' in self.to_learn:
            tracing.tracepoint("M_step:update mu")
            mus = np.empty_like(my_mus)
            all_data_sum = np.empty_like(data_sum)
            comm.Allreduce( [my_mus, MPI.DOUBLE], [mus, MPI.DOUBLE] )
            comm.Allreduce( [data_sum, MPI.DOUBLE], [all_data_sum, MPI.DOUBLE] )
            mu_new = all_data_sum/my_N - np.inner(W_new.T/my_N,mus)
        else:
            mu_new = mu

        for param in anneal.crit_params:
            exec('this_param = ' + param)
            anneal.dyn_param(param, this_param)

        dlog.append('N_use', N_use)

        return { 'W': W_new, 'pi': pi_new, 'sigma': sigma_new, 'mu': mu_new }
        W_gt[sample(range(H), np.int(H * neg_bars))] *= -1
    W_gt = W_gt.reshape((H, D))
    W_gt += np.random.normal(size=(H, D), scale=0.5)

    # Prepare model...
    model = BSC_ET(D, H, Hprime, gamma, to_learn)
    mparams = {"W": W_gt, "pi": pi_gt, "sigma": sigma_gt, "mu": mu_gt}
    mparams = comm.bcast(mparams)

    pprint("Generating Model Parameters:")
    pprint("pi = " + np.str(mparams["pi"]) + "; sigma = " + np.str(mparams["sigma"]))

    # Generate trainig data
    my_N = N // comm.size
    my_data = model.generate_data(mparams, my_N)
    dlog.append("y", my_data["y"][0:20])

    # Choose annealing schedule
    anneal = LinearAnnealing(anneal_steps)
    anneal["T"] = [(15, start_temp), (-10, end_temp)]
    anneal["Ncut_factor"] = [(0, 0.0), (2.0 / 3, 1.0)]
    anneal["anneal_prior"] = anneal_prior
    anneal["W_noise"] = [(0.0, W_noise_intensity), (0.9, W_noise_intensity), (1.0, 0.0)]
    anneal["pi_noise"] = [(0.0, pi_noise_intensity), (0.9, pi_noise_intensity), (1.0, 0.0)]
    anneal["sigma_noise"] = [(0.0, sigma_noise_intensity), (0.9, sigma_noise_intensity), (1.0, 0.0)]

    mean_W = np.zeros((H, D))
    pics_per_H = my_N // H
    for indH in xrange(H):
        mean_W_tmp = np.sum(comm.allreduce(my_data["y"][indH * pics_per_H : (indH + 1) * pics_per_H, :]), axis=0) / (
            my_N // H * comm.size
Exemple #10
0
    old_fname = sys.argv[2] + "/result.h5"
    old_h5 = openFile(old_fname, 'r')

    if comm.rank == 0:
        # Copy old results
        for node in old_h5.listNodes("/"):
            name = node.name
            rows = node.shape[0]
            if name in [
                    'RF', 'gabor_params', 'gabor_errors', 'dog_params',
                    'dog_errors', 'dog_sigmas'
            ]:
                continue
            for r in xrange(rows):
                dlog.append(name, node[r])

    # Extract current parameters
    steps_done = old_h5.root.W.shape[0]
    lparams = {
        'mu': np.zeros((D, )),
        'W': old_h5.root.W[-1],
        'pi': old_h5.root.pi[-1],
        'sigma': old_h5.root.sigma[-1],
    }
    old_h5.close()

    # Advance annealing schedule
    dlog.progress("Skipping %d EM iterations" % steps_done)
    for i in xrange(steps_done - 1):
        anneal.next()
    W_gt = W_gt.reshape( (H, D) )
    W_gt += np.random.normal(size=(H, D), scale=0.5) 
    
    
    # Prepare model...
    model = MMCA_ET(D, H, Hprime, gamma)
    gt_params = {
        'W'     : W_gt,
        'pi'    : 2./H,
        'sigma' : 0.10
    }
    
    # Generate trainig data
    my_N = N // comm.size
    my_data = model.generate_data(gt_params, my_N)
    dlog.append('y', my_data['y'][0:25,:])
    
    # Initialize model parameters (to be learned)
    params = {
    #    'W'     : W_gt,
        'W'     : np.random.normal(size=W_gt.shape),
        'pi'    : 1/H, 
        'sigma' : 5.00
    }
    #params = model.noisify_params(params, anneal)
    params = comm.bcast(params)

   # Create and start EM annealing
    em = EM(model=model, anneal=anneal)
    em.data = my_data
    em.lparams = params
Exemple #12
0
    #=============== Pick up and continue previous computation ============
    dlog.progress("Picking up computation from %s" % sys.argv[2])
    tracing.tracepoint("Copy old reults")

    old_fname = sys.argv[2] + "/result.h5"
    old_h5 = openFile(old_fname, 'r')

    if comm.rank == 0:
        # Copy old results
        for node in old_h5.listNodes("/"):
            name = node.name
            rows = node.shape[0]
            if name in ['RF', 'gabor_params', 'gabor_errors', 'dog_params', 'dog_errors', 'dog_sigmas']:
                continue
            for r in xrange(rows):
                dlog.append(name, node[r])

    # Extract current parameters
    steps_done = old_h5.root.W.shape[0]
    lparams = {
        'mu'    : np.zeros( (D,) ),
        'W'     : old_h5.root.W[-1],
        'pi'    : old_h5.root.pi[-1],
        'sigma' : old_h5.root.sigma[-1],
    }
    old_h5.close()

    # Advance annealing schedule
    dlog.progress("Skipping %d EM iterations" % steps_done)
    for i in xrange(steps_done-1):
        anneal.next()
Exemple #13
0
    def M_step(self, anneal, model_params, my_suff_stat, my_data):
        """ MCA M_step

        my_data variables used:
            
            my_data['y']           Datapoints
            my_data['candidates']         Candidate H's according to selection func.

        Annealing variables used:

            anneal['T']            Temperature for det. annealing AND softmax
            anneal['N_cut_factor'] 0.: no truncation; 1. trunc. according to model

        """
        comm      = self.comm
        H, Hprime = self.H, self.Hprime
        gamma     = self.gamma
        W         = model_params['W']
        pies      = model_params['pi']
        sigma     = model_params['sigma']

        # Read in data:
        my_y      = my_data['y']
        my_cand   = my_data['candidates']
        my_logpj  = my_suff_stat['logpj']
        my_N, D   = my_y.shape
        N         = comm.allreduce(my_N)

        state_mtx = self.state_matrix        # shape: (no_states, Hprime)
        state_abs = self.state_abs           # shape: (no_states,)
        no_states = len(state_abs)

        # Disable some warnings
        old_seterr = np.seterr(divide='ignore', under='ignore')

        # To compute et_loglike:
        my_ldenom_sum = 0.0
        ldenom_sum = 0.0

        # Precompute 
        T        = anneal['T'] 
        T_rho    = np.maximum(T, self.rho_T_bound)
        rho      = 1./(1.-1./T_rho)
        rho      = np.maximum(np.minimum(rho, self.rho_ubound), self.rho_lbound)
        beta     = 1./T
        pre1     = -1./2./sigma/sigma
        pil_bar  = np.log( pies/(1.-pies) )
        Wl       = accel.log(np.abs(W))
        Wrho     = accel.exp(rho * Wl)
        Wrhos    = np.sign(W) * Wrho
        Wsquared = W*W

        # Some asserts
        assert np.isfinite(pil_bar).all()
        assert np.isfinite(Wl).all()
        assert np.isfinite(Wrho).all()
        assert (Wrho > 1e-86).all()

        my_corr   = beta*((my_logpj).max(axis=1))            # shape: (my_N,)
        my_logpjb = beta*my_logpj - my_corr[:, None]         # shape: (my_N, no_states)
        my_pj     = accel.exp(my_logpj)                         # shape: (my_N, no_states)
        my_pjb    = accel.exp(my_logpjb)                        # shape: (my_N, no_states)

        # Precompute factor for pi update and ET cutting
        A_pi_gamma = 0.; B_pi_gamma = 0.
        for gp in xrange(0, self.gamma+1):
            a = comb(H, gp) * pies**gp * (1.-pies)**(H-gp)
            A_pi_gamma += a
            B_pi_gamma += gp * a

        # Truncate data
        if anneal['Ncut_factor'] > 0.0:
            tracing.tracepoint("M_step:truncating")
            my_logdenoms = accel.log(my_pjb.sum(axis=1)) + my_corr
            N_use = int(N * (1 - (1 - A_pi_gamma) * anneal['Ncut_factor']))

            cut_denom = parallel.allsort(my_logdenoms)[-N_use]
            my_sel,   = np.where(my_logdenoms >= cut_denom)
            my_N,     = my_sel.shape
            N_use     = comm.allreduce(my_N)
        else:
            my_N,_ = my_y.shape
            my_sel = np.arange(my_N)
            N_use  = N
            
        # Allocate suff-stat arrays
        my_Wp    = np.zeros_like(W)  # shape (H, D)
        my_Wq    = np.zeros_like(W)  # shape (H, D)
        my_pi    = 0.0               #
        my_sigma = 0.0               #

        # Do reverse correlation if requested
        if self.rev_corr:
            my_y_rc   = my_data['y_rc']
            D_rev_corr  = my_y_rc.shape[1]
            my_rev_corr = np.zeros( (H,D_rev_corr) )
            my_rev_corr_count = np.zeros(H)

        # Iterate over all datapoints
        tracing.tracepoint("M_step:iterating...")
        dlog.append('N_use', N_use)
        for n in my_sel:
            y      = my_y[n,:]             # shape (D,)
            cand   = my_cand[n,:]          # shape (Hprime,)
            logpj  = my_logpj[n,:]         # shape (no_states,)
            logpjb = my_logpjb[n,:]        # shape (no_states,)
            pj     = my_pj[n,:]            # shape (no_states,)
            pjb    = my_pjb[n,:]           # shape (no_states,)

            this_Wp = np.zeros_like(W)    # numerator for W (current datapoint)   (H, D)
            this_Wq = np.zeros_like(W)    # denominator for W (current datapoint) (H, D)
            this_pi = 0.0                 # numerator for pi update (current datapoint)
            this_sigma = 0.0              # numerator for gamma update (current datapoint)

            # Zero active hidden causes
            # this_Wp += 0.     # nothing to do
            # this_Wq += 0.     # nothing to do
            # this_pi += 0.     # nothing to do
            this_sigma += pjb[0] * (y**2).sum()

            # One active hidden cause
            this_Wp    += (pjb[1:(H+1),None]) * y[None, :]
            this_Wq    += (pjb[1:(H+1),None])
            this_pi    +=  pjb[1:(H+1)].sum()
            this_sigma += (pjb[1:(H+1)] * ((W-y)**2).sum(axis=1)).sum()

            # Handle hidden states with more than 1 active cause
            W_     = W[cand]                                    # is (Hprime, D)
            Wl_    = Wl[cand]                                   # is (   "    ")
            Wrho_  = Wrho[cand]                                 # is (   "    ")
            Wrhos_ = Wrhos[cand]                                # is (   "    ")

            #Wbar   = calc_Wbar(state_mtx, W_)
            #Wlbar  = np.log(np.abs(Wbar))

            t0 = np.dot(state_mtx, Wrhos_)
            Wlbar   = accel.log(np.abs(t0)) / rho    # is (no_states, D)
            #Wlbar   = np.maximum(Wlbar, -9.21)
            Wbar    = np.sign(t0)*accel.exp(Wlbar)   # is (no_states, D)

            t = Wlbar[:, None, :]-Wl_[None, :, :]
            t = np.maximum(t, 0.)
            Aid = state_mtx[:,:, None] * accel.exp(logpjb[H+1:,None,None] - (rho-1)*t)
            Aid = Aid.sum(axis=0)

            #Aid = calc_Aid(logpjb[H+1:], W_, Wl_, state_mtx, Wbar, Wlbar, rho)

            #assert np.isfinite(Wlbar).all()
            #assert np.isfinite(Wbar).all()
            #assert np.isfinite(pjb).all()
            #assert np.isfinite(Aid).all()

            this_Wp[cand] += Aid * y[None, :]                     
            this_Wq[cand] += Aid
            this_pi       += (pjb[1+H:] * state_abs).sum()
            this_sigma    += (pjb[1+H:] * ((Wbar-y)**2).sum(axis=1)).sum()

            denom     = pjb.sum()
            my_Wp    += this_Wp / denom
            my_Wq    += this_Wq / denom
            my_pi    += this_pi / denom
            my_sigma += this_sigma / denom

            #self.tbl.append("logpj", logpj)
            #self.tbl.append("corr", my_corr[n])
            #self.tbl.append("denom", denom)
            #self.tbl.append("cand", cand)
            #self.tbl.append("Aid", Aid)

            my_ldenom_sum += accel.log(np.sum(accel.exp(logpj))) #For loglike computation

            # Estimate reverse correlation
            if self.rev_corr:
                pys = pjb / denom
                if np.isfinite(pys).all():
                    my_rev_corr       += pys[1:H+1, None]*my_y_rc[n,None,:]
                    my_rev_corr_count += pys[1:H+1]
                    my_rev_corr[cand]       += np.sum(state_mtx[:,:,None]*pys[H+1:,None,None]*my_y_rc[n,None,:], axis=0)
                    my_rev_corr_count[cand] += np.sum(state_mtx[:,:]*pys[H+1,None], axis=0)
                else:
                    print "Not all finite rev_corr %d" % n
 


        # Calculate updated W
        if 'W' in self.to_learn:
            tracing.tracepoint("M_step:update W")

            Wp = np.empty_like(my_Wp)
            Wq = np.empty_like(my_Wq)

            assert np.isfinite(my_Wp).all()
            assert np.isfinite(my_Wq).all()

            comm.Allreduce( [my_Wp, MPI.DOUBLE], [Wp, MPI.DOUBLE] )
            comm.Allreduce( [my_Wq, MPI.DOUBLE], [Wq, MPI.DOUBLE] )


            # Make sure wo do not devide by zero
            tiny = self.tol
            Wq[Wq < tiny] = tiny

            # Calculate updated W
            W_new = Wp / Wq

            # Add inertia depending on Wq
            alpha = 2.5
            inertia = np.maximum(1. - accel.exp(-Wq / alpha), 0.2)
            W_new = inertia*W_new + (1-inertia)*W
        else:
            W_new = W

        # Calculate updated pi
        if 'pi' in self.to_learn:
            tracing.tracepoint("M_step:update pi")

            assert np.isfinite(my_pi).all()
            pi_new = A_pi_gamma / B_pi_gamma * pies * comm.allreduce(my_pi) / N_use
        else:
            pi_new = pies

        # Calculate updated sigma
        if 'sigma' in self.to_learn:               # TODO: XXX see LinCA XXX (merge!)
            tracing.tracepoint("M_step:update sigma")

            assert np.isfinite(my_sigma).all()
            sigma_new = np.sqrt(comm.allreduce(my_sigma) / D / N_use)
        else:
            sigma_new = sigma

        # Put all together and compute (always) et_approx_likelihood
        ldenom_sum = comm.allreduce(my_ldenom_sum)
        lAi = (H * np.log(1. - pi_new)) - ((D/2) * np.log(2*pi)) -( D * np.log(sigma_new))

        # For practical and et approx reasons we use: sum of restected respons=1
        loglike_et = (lAi * N_use) + ldenom_sum

        if self.rev_corr:
            rev_corr       = np.empty_like(my_rev_corr)
            rev_corr_count = np.empty_like(my_rev_corr_count)
            comm.Allreduce( [my_rev_corr,       MPI.DOUBLE], [rev_corr,       MPI.DOUBLE])
            comm.Allreduce( [my_rev_corr_count, MPI.DOUBLE], [rev_corr_count, MPI.DOUBLE])
            rev_corr /= (1e-16+rev_corr_count[:,None])
        else:
            rev_corr = np.zeros( (H, D) )


        # Restore np.seterr
        np.seterr(**old_seterr)

        return { 'W': W_new, 'pi': pi_new, 'sigma': sigma_new , 'rev_corr': rev_corr, 'Q':loglike_et}
Exemple #14
0
    W_gt = W_gt.reshape((H, D))
    W_gt += np.random.normal(size=(H, D), scale=0.5)

    # Prepare model...
    model = BSC_ET(D, H, Hprime, gamma, to_learn)
    mparams = {'W': W_gt, 'pi': pi_gt, 'sigma': sigma_gt, 'mu': mu_gt}
    mparams = comm.bcast(mparams)

    pprint("Generating Model Parameters:")
    pprint("pi = " + np.str(mparams['pi']) + "; sigma = " +
           np.str(mparams['sigma']))

    # Generate trainig data
    my_N = N // comm.size
    my_data = model.generate_data(mparams, my_N)
    dlog.append('y', my_data['y'][0:20])

    # Choose annealing schedule
    anneal = LinearAnnealing(anneal_steps)
    anneal['T'] = [(15, start_temp), (-10, end_temp)]
    anneal['Ncut_factor'] = [(0, 0.), (2. / 3, 1.)]
    anneal['anneal_prior'] = anneal_prior
    anneal['W_noise'] = [(0., W_noise_intensity), (0.9, W_noise_intensity),
                         (1., 0.)]
    anneal['pi_noise'] = [(0., pi_noise_intensity), (0.9, pi_noise_intensity),
                          (1., 0.)]
    anneal['sigma_noise'] = [(0., sigma_noise_intensity),
                             (0.9, sigma_noise_intensity), (1., 0.)]

    mean_W = np.zeros((H, D))
    pics_per_H = my_N // H
Exemple #15
0
    # Prepare ground-truth GFs (bars)
    W_gt = np.zeros((H, D2, D2))
    for i in xrange(D2):
        W_gt[i, i, :] = -10.
        W_gt[D2 + i, :, i] = +10.
    W_gt = W_gt.reshape((H, D))
    W_gt += np.random.normal(size=(H, D), scale=0.5)

    # Prepare model...
    model = MMCA_ET(D, H, Hprime, gamma)
    gt_params = {'W': W_gt, 'pi': 2. / H, 'sigma': 0.10}

    # Generate trainig data
    my_N = N // comm.size
    my_data = model.generate_data(gt_params, my_N)
    dlog.append('y', my_data['y'][0:25, :])

    # Initialize model parameters (to be learned)
    params = {
        #    'W'     : W_gt,
        'W': np.random.normal(size=W_gt.shape),
        'pi': 1 / H,
        'sigma': 5.00
    }
    #params = model.noisify_params(params, anneal)
    params = comm.bcast(params)

    # Create and start EM annealing
    em = EM(model=model, anneal=anneal)
    em.data = my_data
    em.lparams = params