Exemple #1
0
# d.X
# g.partition


"""
While determining structure, kernel functions should be fixed.
"""
# Test forward selection
np.random.seed(25)
y, X = DataSimulator.SimData_Wang04(1000)  # Simulate data
data = Data(y, X)
ykernel = Kernel('gaussian', sigma=0.5)
kernel = Kernel('gaussian', sigma=0.5)

covariatesPool = list(np.arange(data.p) + 1)
oldGroup = Group()
bestR2 = 0.
bestOKGT = None
bestCovariateIndex = None
bestGroupIndex = None

while len(covariatesPool):
    print "** Available covariates: ", covariatesPool
    # add a new group no matter what
    print "** Add as new group: **"
    for covariateInd in covariatesPool:
        print("\t try covariate %d ..." % covariateInd)
        currentGroup = oldGroup.addNewCovariateAsGroup(covariateInd)
        print("\t\t current group structure: %s " % (currentGroup.partition,))
        # The following OKGT needs a subset of data and the grouped covariate
        # indices being normalized, so that the training is done as if we are
def backwardSelection(data, kernel, useLowRankApproximation=True, rank=10):
    # useLowRankApproximation = True
    # rank = 10

    ykernel = kernel

    covariatesPool = list(np.arange(data.p) + 1)
    oldGroup = Group(covariatesPool)
    p = oldGroup.p
    bestR2 = 0.
    bestCovariateIndex = None

    counter = 0

    while len(covariatesPool) > 1:
        counter += 1
        print("** === Step %d === **" % counter)
        # Create a new group
        print("** Create a new group: **")
        for covariateInd in covariatesPool:
            print("\t Create a new group for covariate %d ..." % covariateInd)
            _currentGroup = oldGroup.removeOneCovariate(covariateInd)
            currentGroup = _currentGroup.addNewCovariateAsGroup(covariateInd)
            print("\t\t current group structure: %s " % (currentGroup.partition,))
            # Contrary to forward selection, the data matrix doesn't
            # change.
            xkernels = [kernel] * currentGroup.size
            parameters = Parameters(currentGroup, ykernel, xkernels)
            currentOKGT = OKGTReg(data, parameters)
            # Train OKGT
            if useLowRankApproximation:
                res = currentOKGT.train_Nystroem(rank)
            else:
                res = currentOKGT.train_Vanilla()

            currentR2 = res['r2']
            if currentR2 > bestR2:
                print("\t\t current R2 =\t %.10f \t *" % currentR2)
                bestR2 = currentR2
                newGroup = currentGroup
                bestCovariateIndex = covariateInd
            else:
                print("\t\t current R2 =\t %.10f" % currentR2)
            print("\t\t best R2 =\t\t %.10f \n" % bestR2)

        # print("** Updated group structure is: %s \n" % (newGroup.partition, ))
        # print '\n'
        # If there are already new groups, a chosen variable can join one of the
        # new groups instead of creating a new group.
        print "** Add to an existing group: **"
        if oldGroup.size > 1:
            for covariateInd in covariatesPool:
                print("\t try adding covariate %d " % covariateInd)
                # Remove `covariateInd`-th covariate from the pool,
                # which will be added into one of the other groups.
                updatedCovariatesPool = copy.deepcopy(covariatesPool)
                updatedCovariatesPool.remove(covariateInd)
                # Get the group number of the chosen `covariateInd`
                covariateMember = oldGroup.getMembership(covariateInd)
                # Take all other groups as a Group object
                otherGroupInds = list(np.arange(oldGroup.size)+1)
                otherGroupInds.remove(covariateMember)

                # print type(otherGroupInds), ": ", otherGroupInds

                otherGroup = oldGroup.getPartitions(otherGroupInds, True)
                # Try adding the chosen `covariateInd` to each of the other groups
                for groupInd in np.arange(otherGroup.size) + 1:
                    print("\t   in other group %d ..." % groupInd)
                    updatedOtherGroup = otherGroup.addNewCovariateToGroup(covariateInd, groupInd)
                    currentGroup = updatedOtherGroup + updatedCovariatesPool
                    print("\t\t current group structure: %s " % (currentGroup.partition,))
                    xkernels = [kernel] * currentGroup.size
                    parameters = Parameters(currentGroup, ykernel, xkernels)
                    currentOKGT = OKGTReg(data, parameters)
                    # Train OKGT
                    if useLowRankApproximation:
                        res = currentOKGT.train_Nystroem(rank)
                    else:
                        res = currentOKGT.train_Vanilla()

                    currentR2 = res['r2']
                    # Check if there is improvement
                    if currentR2 > bestR2:
                        print("\t\t current R2 =\t %.10f \t *" % currentR2)
                        bestR2 = currentR2
                        newGroup = currentGroup
                        bestCovariateIndex = covariateInd
                    else:
                        print("\t\t current R2 =\t %.10f" % currentR2)
                    print("\t\t best R2 =\t\t %.10f \n" % bestR2)
        else:
            print("\t ** No other groups than the pool. Pass ... ** \n")

        print("** Step %d updated group structure is: %s \n" % (counter, newGroup.partition))

        # print "covariate pool: ", covariatesPool
        # print "best covariate index so far: ", bestCovariateIndex

        if bestCovariateIndex in covariatesPool:
            covariatesPool.remove(bestCovariateIndex)
            oldGroup = newGroup
            if counter == p-1:
                print("** Finish with complete iterations. ** \n")
        else:
            print("** Finish with early termination at step %d due to no further improvement of R2. ** \n" % counter)
            break

    print ("** SELECTED GROUP STRUCTURE: %s with R2 = %f ** \n" % (oldGroup.partition, bestR2))
    return oldGroup