Ejemplo n.º 1
0
def rbm(data, numHid, modelType = "BB", **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases,batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize,numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch) :
        error = []

        if anneal :
            # apply linear weight decay
            penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target = visState)
            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visState)

            # another positive phase
            cm.dot(visState, weight, target = hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart :
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
                t = t+1
            else :
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)
        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target = top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top)

    cm.shutdown()

    return model_
Ejemplo n.º 2
0
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) :
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose :
        print "Processing data ..."

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1,nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target = visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target = labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)
            weightLabel.add_mult(weightLabelInc, eta/batchSize)
            biasLabel.add_mult(biasLabelInc, eta/batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose :
            print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel :
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name,model)

    return model_
Ejemplo n.º 3
0
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs):
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose:
        print "Processing data ..."

    # from which step, we start to compute the average


#    avgStart = maxEpoch - avgLast

# for weight decay use
#    oldPenalty = penalty

# numCases : number of example
# numDims : the length of each example
# each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1, nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target=visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target=labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)
            weightLabel.add_mult(weightLabelInc, eta / batchSize)
            biasLabel.add_mult(biasLabelInc, eta / batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose:
            print "Epoch %d/%d, reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel:
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name, model)

    return model_
Ejemplo n.º 4
0
def rbm(X, numHid, **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
    # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
    #  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
    #    avgStart = maxEpoch - avgLast

    # for weight decay use
    #    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis), target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)

            #            if epoch > avgStart :
            #                # apply average
            #                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
            #                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
            #                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
            #                t = t+1
            #            else :
            #                weightAgv = weight
            #                biasVAgv = biasV
            #                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free device memory
            visTrue.free_device_memory()

        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)

    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_
Ejemplo n.º 5
0
def rbm(X, numHid, **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
# when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
#  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)

#            if epoch > avgStart :
#                # apply average
#                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
#                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
#                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
#                t = t+1
#            else :
#                weightAgv = weight
#                biasVAgv = biasV
#                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)

            # free device memory
            visTrue.free_device_memory()

        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)


    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_
Ejemplo n.º 6
0
def rbm(data, numHid, modelType="BB", **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize, numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch):
        error = []

        if anneal:
            # apply linear weight decay
            penalty = oldPenalty - 0.9 * epoch / maxEpoch * oldPenalty

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target=visState)
            elif cmp(modelType, "GB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),
                            target=visState)

            # another positive phase
            cm.dot(visState, weight, target=hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis=0).subtract(visState.sum(axis=0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis=0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart:
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0 / t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0 / t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0 / t))
                t = t + 1
            else:
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)
        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target=top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv, biasVAgv, biasHAgv, type=modelType, top=top)

    cm.shutdown()

    return model_