def test_mixedLogistic_train_tribolium(self):
     # --- prepare data ---
     data = prepareTriboliumData()
     # --- set initial ---
     beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1])
     vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0)
     # --- Estimation ---
     res = mixedLogistic_train(vBetaAlpha0, data['c'], data['y'], data['xm'], data['xr'], data['m'])
     mapOptimizationVector2Matrices(res.x, data['c'], data['xm'].shape[1])
    def test_mixedLogistic_QNtrain(self):
        # prepare data
        data = prepareTriboliumData()
        # set initial
        beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1])
        vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0)

        nollk = nollkForOptimization(data['c'], data['y'], data['xm'], data['xr'], data['m'])
        res = mixedLogistic_QNtrain(nollk, vBetaAlpha0)
        print '\n'
        print 'parameter estimates =\n', res.x
        print 'minimum fval =', res.fun

        mapOptimizationVector2Matrices(res.x, data['c'], data['xm'].shape[1])
 def test_minimize_negLogLikelihoodForOptimize_tribolium(self):
     # read data
     df = import_data()
     xm = pd.get_dummies(df.Replicate).ix[:, 2:]
     xr = pd.get_dummies(df.Species).ix[:, 1:]
     m = np.matrix(df.Total).T
     y = np.matrix(df.Remaining).T
     # pre-process data
     xm = addIntercept(xm)  # add a leading 1s column
     xr = addIntercept(xr)  # add a leading 1s column
     c = 3  # three replicates as three components
     # set initial
     beta0, alpha0 = set_initial(c, xm.shape[1], xr.shape[1])
     vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0)
     res = sp.optimize.minimize(nellkForOptimization, x0=vBetaAlpha0,
                                args=(vBetaAlpha0,c,y,xm,xr,m), method='BFGS')
    def test_mixedLogistic_EMtrain_evaluation_tribolium(self):
        # --- prepare data ---
        data = prepareTriboliumData()
        # --- set initial ---
        beta0, alpha0 = set_initial(data['c'], data['xm'].shape[1], data['xr'].shape[1])
        vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0)
        # vBetaAlpha0 = np.repeat(1., 15)
        # --- EM ---
        # param, fval, nIter = mixedLogistic_EMtrain(vBetaAlpha0, c, y, xm, xr, m)
        res = mixedLogistic_EMtrain(vBetaAlpha0, data['c'], data['y'], data['xm'], data['xr'], data['m'])

        # param in matrix form
        beta, alpha = mapOptimizationVector2Matrices(res['param'], data['c'], data['xm'].shape[1])
        print 'beta ='
        print beta
        print 'alpha ='
        print alpha
 def test_mixedLogistic_Mstep_evaluation_tribolium(self):
     # read data
     df = import_data()
     xm = pd.get_dummies(df.Replicate).ix[:, 2:]
     xr = pd.get_dummies(df.Species).ix[:, 1:]
     m = np.matrix(df.Total).T
     y = np.matrix(df.Remaining).T
     # pre-process data
     xm = addIntercept(xm)  # add a leading 1s column
     xr = addIntercept(xr)  # add a leading 1s column
     c = 3  # three replicates as three components
     # set initial
     beta0, alpha0 = set_initial(c, xm.shape[1], xr.shape[1])
     vBetaAlpha0 = mapOptimizationMatrices2Vector(beta0, alpha0)
     # E-step
     qfn = mixedLogistic_Estep(vBetaAlpha0, c, y, xm, xr, m)
     # M-step
     param, fval = mixedLogistic_Mstep(qfn, vBetaAlpha0)
     print '\n'
     print 'parameter estimates = \n', param
     print 'function value = ', fval
     print '\n'
c_candidates = np.arange(10) + 1  # candidates for number of hidden groups
                                  # It is tuned by cross-validation
nfolds = 10  # 'nfolds' cross-validation
batchSize = trainAndValidateXm.shape[0] / nfolds

# params = []
print('--- Start %d-fold cross-validation ---' % (nfolds))
avgValidateScores = []
for c in c_candidates:
    print(" \tCurrent number of hidden groups 'c' = %d " % (c))
    validateScores = []
    for batchIndex in xrange(nfolds):
        print('\t\t Batch %d ...' % (batchIndex))
        # Initial parameters
        random.seed(25)
        b0, a0 = set_initial(c, data.dxm + 1, data.dxr + 1)
        param0 = mapOptimizationMatrices2Vector(b0, a0)

        validateIds = np.arange(batchIndex * batchSize, (batchIndex + 1) * batchSize)
        trainIds = list(set(np.arange(trainAndValidateXm.shape[0])) - set(validateIds))

        res = mixedLogistic_EMtrain(param0=param0, c=c,
                                    y=np.matrix(trainAndValidateY[trainIds]).T,
                                    xm=trainAndValidateXm[trainIds, :],
                                    xr=trainAndValidateXr[trainIds, :],
                                    m=1)

        pred = mixedLogistic_pred(res=res,
                                  xmt=trainAndValidateXm[validateIds, :],
                                  xrt=trainAndValidateXr[validateIds, :],
                                  c=c,