Exemple #1
0
def rbmHtoV(m, X):
    """convey data fron hidden layer to visible layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasV = cm.CUDAMatrix(cm.reformat(m.biasV))

    nCase = X.shape[0]
    nVis = biasV.asarray().size
    VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis)))

    if m.type == "BB":
        cm.dot(data, weight.T, target=VisActP)
        VisActP.add_row_vec(biasV)
        VisActP.apply_sigmoid()
    elif m.type == "BG":
        cm.dot(data, weight.T, target=VisActP)
        VisActP.add_row_vec(biasV)
    elif m.type == "GB":
        pass

    result = VisActP.asarray()

    #free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    VisActP.free_device_memory()

    cm.shutdown()

    return result
Exemple #2
0
def infer(
    full_sim_matrix_file: str,
    repr_sim_matrix_file: str,
    output: str,
    n_components: int,
    sim: str,
    use_gpu: bool,
):
    """Load pre-computed similarity matrix."""
    secho(
        f"Loading the repr similarity matrix for the full vocabulary to {repr_sim_matrix_file}"
    )
    repr_similarity_matrix = np.load(repr_sim_matrix_file)
    secho(
        f"Loading the full similarity matrix for the full vocabulary to {full_sim_matrix_file}"
    )
    full_similarity_matrix = np.load(full_sim_matrix_file)
    optim_folder = os.path.join(output, 'optim')
    os.makedirs(optim_folder, exist_ok=True)

    optimize_projections(
        output=optim_folder,
        repr_similarity_matrix=repr_similarity_matrix,
        full_similarity_matrix=full_similarity_matrix,
        n_components=n_components,
        similarity_type=sim,
        use_gpu=use_gpu,
    )

    if use_gpu:  # only shut down after all loops have used this function
        import cudamat as cm
        cm.shutdown()

    secho(f"done. Enjoy your {make_ratvec(3)}")
Exemple #3
0
def rbmHtoV(m, X) :
    """convey data fron hidden layer to visible layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasV = cm.CUDAMatrix(cm.reformat(m.biasV))

    nCase = X.shape[0]
    nVis = biasV.asarray().size
    VisActP = cm.CUDAMatrix(np.zeros((nCase, nVis)))

    if m.type == "BB" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
        VisActP.apply_sigmoid()
    elif m.type == "BG" :
        cm.dot(data, weight.T, target = VisActP)
        VisActP.add_row_vec(biasV)
    elif m.type == "GB" :
        pass

    result = VisActP.asarray()

    #free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    VisActP.free_device_memory()

    cm.shutdown()

    return result
def main():
    cmt.init()
    cmt.CUDAMatrix.init_random()
    if HEATUP:
        print("heating up for %g seconds..." % HEATUP, end=' ')
        sys.stdout.flush()
        heatup(HEATUP)
        print("done.")
    print("small matrix shape:", XS_SHAPE)
    print("large matrix shape:", XL_SHAPE)
    for funcname, func in filter(lambda f: f[0].startswith('bench_'),
                                 getmembers(getmodule(main), isfunction)):
        print("%-15s" % funcname[len('bench_'):], end=' ')
        sys.stdout.flush()
        for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE,
                                                             1):
            repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
            time = min(timeit.repeat(\
                    setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
                    stmt="%s(*mats)" % funcname, repeat=repeat,
                    number=NUM_ITER * factor)) / (NUM_ITER * factor)
            print("%.3es (%s) " % (time, size), end=' ')
            sys.stdout.flush()
        print()
    cmt.shutdown()
Exemple #5
0
def main(argv):
    global episodes
    global graph_size

    cm.init()

    env = gym.make('SpaceInvaders-v0')

    action_space = env.action_space.n
    observation_shape = env.observation_space.shape
    observation_space = observation_shape[0] / 3 * observation_shape[
        1] / 4 * observation_shape[2]
    observation_space = int(observation_space)
    total_size = graph_size + action_space + observation_space + 1  # Additional spot for reward
    graph = Graph(total_size)

    # Run until done
    for i in range(episodes):
        # Initial step
        x = np.random.normal(size=graph_size)
        action = np.zeros(action_space)
        input_val = create_input(env.reset(), action, x, 0.0)
        output = graph.predict(input_val, 0.2)
        action = output[observation_space:observation_space + action_space]

        while True:
            observation, reward, done, info = env.step(np.argmax(action))
            if done:
                print('Final reward: %f' % (reward, ))
                break

            # Update graph
            input_val = create_input(observation, action, x, reward)
            x = graph(input_val, 0.2)
            x = x[observation_space + action_space:-1]

            # env.render()

            # Select next action
            if random.random() < 0.3:
                action = np.zeros(action_space)
                action[env.action_space.sample()] = 1.0
            else:
                input_val = create_input(observation, np.zeros(action_space),
                                         x, 10000.0)
                output = graph.predict(input_val, 0.2)
                action = output[observation_space:observation_space +
                                action_space]

        graph.save('graph.npy')

    env.close()
    cm.shutdown()
Exemple #6
0
def rbmVtoH(m, X) :
    """convey data fron visual layer to hidden layer"""
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))

    nCase = X.shape[0]
    nHid = biasH.asarray().size

    hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid)))

    if m.type == "BB" :
        cm.dot(data, weight, target = hidActP)
        hidActP.add_row_vec(biasH)
        hidActP.apply_sigmoid()
    elif m.type == "BG" :
        cm.dot(data, weight, target = hidActP)
        hidActP.add_row_vec(biasH)
    elif m.type == "GB" :
        pass

    result = hidActP.asarray()

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    hidActP.free_device_memory()

    cm.shutdown()

    return result
Exemple #7
0
def main():
    cmt.init()
    cmt.CUDAMatrix.init_random()
    if HEATUP:
        print("heating up for %g seconds..." % HEATUP, end=' ')
        sys.stdout.flush()
        heatup(HEATUP)
        print("done.")
    print("small matrix shape:", XS_SHAPE)
    print("large matrix shape:", XL_SHAPE)
    for funcname, func in filter(lambda fn_f: fn_f[0].startswith('bench_'),
            getmembers(getmodule(main), isfunction)):
        print("%-15s" % funcname[len('bench_'):], end=' ')
        sys.stdout.flush()
        for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1):
            repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
            time = min(timeit.repeat(\
                    setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
                    stmt="%s(*mats)" % funcname, repeat=repeat,
                    number=NUM_ITER * factor)) / (NUM_ITER * factor)
            print("%.3es (%s) " % (time, size), end=' ')
            sys.stdout.flush()
        print()
    cmt.shutdown()
Exemple #8
0
batchsz = 1000

tic = time.time()
cm.cublas_init()
pool = scipy.io.loadmat(pool_path)
W = cm.CUDAMatrix(pool.get('W'))
data_dim = W.shape[0]
pool_dim = W.shape[1]
trainXP = np.zeros((pool_dim, trainsz))
testXP = np.zeros((pool_dim, testsz))
XP_gpu = cm.empty((pool_dim, batchsz))
for n in range(n_train_file):
    print "processing train %d" % n
    XC = np.load(train_path % (n+1))
    for i in range(filesz/batchsz):
        XC_gpu = cm.CUDAMatrix(XC[:,i*batchsz:(i+1)*batchsz])
        cm.dot(W.T, XC_gpu, target=XP_gpu)
        trainXP[:,n*filesz+i*batchsz:n*filesz+(i+1)*batchsz] = XP_gpu.asarray()

for n in range(n_test_file):
    print "processing test %d" % n
    XC = np.load(test_path % (n+1))
    for i in range(filesz/batchsz):
        XC_gpu = cm.CUDAMatrix(XC[:,i*batchsz:(i+1)*batchsz])
        cm.dot(W.T, XC_gpu, target=XP_gpu)
        testXP[:,n*filesz+i*batchsz:n*filesz+(i+1)*batchsz] = XP_gpu.asarray()
cm.shutdown()

scipy.io.savemat(out_path , {"trainXC":trainXP,"testXC":testXP})    
print "time %f" % (time.time() - tic)
Exemple #9
0
def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1),
                      biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(
        np.abs(F.numpy_array -
               np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]
 def __del__(self):
     cm.shutdown()
Exemple #11
0
def main():
    cmt.init()
    cmt.CUDAMatrix.init_random()
    if HEATUP:
        print "heating up for %g seconds..." % HEATUP,
        sys.stdout.flush()
        heatup(HEATUP)
        print "done."
    print "small matrix shape:", XS_SHAPE
    print "large matrix shape:", XL_SHAPE
    for funcname, func in ifilter(lambda (fn, f): fn.startswith('bench_'),
                                  getmembers(getmodule(main), isfunction)):
        print "%-15s" % funcname[len('bench_'):],
        sys.stdout.flush()
        for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE,
                                                             1):
            repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
            time = min(timeit.repeat(\
                    setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
                    stmt="%s(*mats)" % funcname, repeat=repeat,
                    number=NUM_ITER * factor)) / (NUM_ITER * factor)
            print "%.3es (%s) " % (time, size),
            sys.stdout.flush()
        print
    cmt.shutdown()


if __name__ == "__main__":
    main()
Exemple #12
0
        cmt.dot(cmt.empty((200, 200)), cmt.empty((200, 200)))

def main():
    cmt.init()
    cmt.CUDAMatrix.init_random()
    if HEATUP:
        print "heating up for %g seconds..." % HEATUP,
        sys.stdout.flush()
        heatup(HEATUP)
        print "done."
    print "small matrix shape:", XS_SHAPE
    print "large matrix shape:", XL_SHAPE
    for funcname, func in ifilter(lambda (fn, f): fn.startswith('bench_'),
            getmembers(getmodule(main), isfunction)):
        print "%-15s" % funcname[len('bench_'):],
        sys.stdout.flush()
        for size, shape, factor in ('small', XS_SHAPE, 10), ('large', XL_SHAPE, 1):
            repeat = NUM_REPEATS * getattr(func, 'repeats', 1)
            time = min(timeit.repeat(\
                    setup="from __main__ import setup, %s\nmats = setup(%s)" % (funcname, shape),
                    stmt="%s(*mats)" % funcname, repeat=repeat,
                    number=NUM_ITER * factor)) / (NUM_ITER * factor)
            print "%.3es (%s) " % (time, size),
            sys.stdout.flush()
        print
    cmt.shutdown()

if __name__=="__main__":
    main()

Exemple #13
0
def main(
    full_vocab_file: str,
    repr_vocab_file: str,
    output: str,
    n_components: int,
    sim: str,
    sim_alignment_matrix: str,
    n_ngram: int,
    use_gpu: bool,
    processes: int,
) -> None:
    """Compute KPCA embeddings on a given data set."""
    n = n_ngram  # meh
    output = os.path.abspath(output)
    os.makedirs(output, exist_ok=True)

    full_vocab = _preprocess_vocab_file(full_vocab_file)

    if repr_vocab_file is None:
        repr_vocab = full_vocab
    else:
        repr_vocab = _preprocess_vocab_file(repr_vocab_file)

    params_path = os.path.join(output, 'training_manifest.json')
    secho(f'Outputting training information to {params_path}')
    manifest = dict(
        sim=sim,
        n=n,
        len_full_vocab=len(full_vocab),
        len_repr_vocab=len(repr_vocab),
        kernels=kernels,
    )
    with open(params_path, 'w') as file:
        json.dump(manifest, file, sort_keys=True, indent=2)

    if use_gpu:
        import cudamat as cm
        cm.cublas_init()

    if sim == 'global-alignment':
        secho(
            f'Computing global alignment similarities with {sim_alignment_matrix}'
        )
        repr_similarity_matrix = calculate_global_alignment_similarity_matrix(
            full_vocab=repr_vocab,
            repr_vocab=repr_vocab,
            processes=processes,
            matrix=sim_alignment_matrix,
            tqdm_desc=f'{EMOJI} Computing self-similarity matrix for '
            f'repr vocab with global alignment ({sim_alignment_matrix})')
        full_similarity_matrix = calculate_global_alignment_similarity_matrix(
            full_vocab=full_vocab,
            repr_vocab=repr_vocab,
            processes=processes,
            matrix=sim_alignment_matrix,
            tqdm_desc=f'{EMOJI} Computing similarity matrix between '
            f'full/repr vocab with global alignment ({sim_alignment_matrix})')
    else:
        alphabet = set(itt.chain.from_iterable(repr_vocab))
        alphabet.add(" ")

        ngram_to_index = {
            ngram: i
            for i, ngram in enumerate(
                ["".join(t) for t in itt.product(alphabet, repeat=n)])
        }

        if sim == "ngram_intersec":
            secho(f'Computing n-gram sparse similarities with {sim}')
            repr_similarity_matrix = compute_similarity_matrix_ngram_sparse(
                full_vocab=repr_vocab,
                repr_vocab=repr_vocab,
                ngram_to_index=ngram_to_index,
                n=n,
            )
            full_similarity_matrix = compute_similarity_matrix_ngram_sparse(
                full_vocab=full_vocab,
                repr_vocab=repr_vocab,
                ngram_to_index=ngram_to_index,
                n=n,
            )
        else:  # sim == 'ngram_sim'
            secho(f'Computing n-gram similarities with {sim}')
            repr_similarity_matrix = compute_similarity_matrix_ngram_parallel(
                full_vocab=repr_vocab,
                repr_vocab=repr_vocab,
                n=n,
                ngram_to_index=ngram_to_index,
                processes=processes,  # Extra because this gets multi-processed
            )
            full_similarity_matrix = compute_similarity_matrix_ngram_parallel(
                full_vocab=full_vocab,
                repr_vocab=repr_vocab,
                n=n,
                ngram_to_index=ngram_to_index,
                processes=processes,  # Extra because this gets multi-processed
            )

    repr_similarity_matrix_path = os.path.join(output,
                                               f"repr_similarity_matrix.npy")
    secho(
        f"Saving the repr similarity matrix for the full vocabulary to {repr_similarity_matrix_path}"
    )
    np.save(repr_similarity_matrix_path,
            repr_similarity_matrix,
            allow_pickle=False)

    full_similarity_matrix_path = os.path.join(output,
                                               f"full_similarity_matrix.npy")
    secho(
        f"Saving the full similarity matrix for the full vocabulary to {full_similarity_matrix_path}"
    )
    np.save(full_similarity_matrix_path,
            full_similarity_matrix,
            allow_pickle=False)

    optim_folder = os.path.join(output, 'optim')
    os.makedirs(optim_folder, exist_ok=True)

    if n_components is None:
        n_components = int(0.5 + len(repr_vocab) * 2 / 3)

    optimize_projections(
        output=optim_folder,
        repr_similarity_matrix=repr_similarity_matrix,
        full_similarity_matrix=full_similarity_matrix,
        n_components=n_components,
        similarity_type=sim,
        use_gpu=use_gpu,
    )

    if use_gpu:  # only shut down after all loops have used this function
        import cudamat as cm
        cm.shutdown()

    secho(f"done. Enjoy your {make_ratvec(3)}")
Exemple #14
0
def rbm(X, numHid, **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
    # when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
    #  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
    #    avgStart = maxEpoch - avgLast

    # for weight decay use
    #    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0:
                cm.dot(hidActP, weight.T, target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis), target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)

            #            if epoch > avgStart :
            #                # apply average
            #                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
            #                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
            #                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
            #                t = t+1
            #            else :
            #                weightAgv = weight
            #                biasVAgv = biasV
            #                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free device memory
            visTrue.free_device_memory()

        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target=top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)

    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_
Exemple #15
0
 def __del__(self):
     cm.shutdown()
Exemple #16
0
def rbm(data, numHid, modelType="BB", **kwargs):
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose:
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
    #    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize, numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch):
        error = []

        if anneal:
            # apply linear weight decay
            penalty = oldPenalty - 0.9 * epoch / maxEpoch * oldPenalty

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                batchSize = visTrue.shape[0]
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target=visState)
            elif cmp(modelType, "GB") == 0:
                cm.dot(hidState, weight.transpose(), target=visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),
                            target=visState)

            # another positive phase
            cm.dot(visState, weight, target=hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis=0).subtract(visState.sum(axis=0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis=0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart:
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0 / t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0 / t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0 / t))
                t = t + 1
            else:
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)
        if verbose:
            print "epoch %d/%d. Reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target=top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv, biasVAgv, biasHAgv, type=modelType, top=top)

    cm.shutdown()

    return model_
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100):
    cm.cublas_init()

    m, n = X_aux.shape
    U = cm.CUDAMatrix(np.random.rand(m, k))
    S = cm.CUDAMatrix(np.random.rand(k, l))
    V = cm.CUDAMatrix(np.random.rand(n, l))

    X = cm.CUDAMatrix(X_aux)

    # if norm:
    #     X = Normalizer().fit_transform(X)

    XV = cm.CUDAMatrix(np.random.rand(m, l))
    XVSt = cm.CUDAMatrix(np.random.rand(m, k))
    US = cm.CUDAMatrix(np.random.rand(m, l))
    USVt = cm.CUDAMatrix(np.random.rand(m, n))
    USVtXt = cm.CUDAMatrix(np.random.rand(m, m))
    USVtXtU = cm.CUDAMatrix(np.random.rand(m, k))
    U_aux = cm.CUDAMatrix(np.random.rand(m, k))

    XtUS = cm.CUDAMatrix(np.random.rand(m, l))
    VSt = cm.CUDAMatrix(np.random.rand(n, k))
    VStUt = cm.CUDAMatrix(np.random.rand(n, m))
    UtX = cm.CUDAMatrix(np.random.rand(k, n))
    VStUtXV = cm.CUDAMatrix(np.random.rand(n, l))
    V_aux = cm.CUDAMatrix(np.random.rand(n, l))

    UtXV = cm.CUDAMatrix(np.random.rand(k, l))
    UtUS = cm.CUDAMatrix(np.random.rand(k, l))
    UtUSVt = cm.CUDAMatrix(np.random.rand(k, n))
    UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l))
    S_aux = cm.CUDAMatrix(np.random.rand(k, l))

    error_best = np.inf
    error = np.inf

    for i in range(num_iters):
        # compute U
        cm.dot(X, V, target=XV)
        cm.dot(XV, S.T, target=XVSt)

        if i is 0:
            cm.dot(U, S, target=US)
            cm.dot(US, V.T, target=USVt)
        cm.dot(USVt, X.T, target=USVtXt)
        cm.dot(USVtXt, U, target=USVtXtU)

        cm.divide(XVSt, USVtXtU, U_aux)
        cm.mult(U, U_aux, U)

        # compute V
        cm.dot(U, S, target=US)
        cm.dot(X.T, US, target=XtUS)
        cm.dot(V, S.T, target=VSt)
        cm.dot(VSt, U.T, target=VStUt)
        cm.dot(VStUt, XV, target=VStUtXV)

        cm.divide(XtUS, VStUtXV, target=V_aux)
        cm.mult(V, V_aux, V)

        # compute S
        cm.dot(U.T, X, target=UtX)
        cm.dot(UtX, V, target=UtXV)

        cm.dot(U.T, US, target=UtUS)
        cm.dot(UtUS, V.T, UtUSVt)
        cm.dot(UtUSVt, V, target=UtUSVtV)

        cm.divide(UtXV, UtUSVtV, target=S_aux)
        cm.mult(S, S_aux, target=S)

        error_ant = error

        cm.dot(U, S, target=US)
        cm.dot(US, V.T, target=USVt)
        error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0)

        if error < error_best:
            U_best_cm = U
            S_best_cm = S
            V_best_cm = V
            error_best = error

        if np.abs(error - error_ant) <= 0.000001:
            break

        U_best = U_best_cm.asarray()
        S_best = S_best_cm.asarray()
        V_best = V_best_cm.asarray()

    Du = np.diag(np.ones(m).dot(U_best))
    Dv = np.diag(np.ones(n).dot(V_best))

    U_norm = U_best.dot( np.diag(S_best.dot(Dv).dot(np.ones(l))) )
    V_norm = V_best.dot( np.diag(np.ones(k).dot(Du).dot(S_best)) )

    rows_ind = np.argmax(U_best, axis=1)
    cols_ind = np.argmax(V_best, axis=1)

    cm.shutdown()

    return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best
Exemple #18
0
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs) :
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose :
        print "Processing data ..."

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1*np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1,nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1,nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                labelTrue = deviceLabel.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target = visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target = labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)
            weightLabel.add_mult(weightLabelInc, eta/batchSize)
            biasLabel.add_mult(biasLabelInc, eta/batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose :
            print "Epoch %d/%d, reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel :
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name,model)

    return model_
Exemple #19
0
def rbmFit(X, numHid, y, isSaveModel=False, name=None, **kwargs):
    """
    X              ... data. should be binary, or in [0,1] interpreted as
                   ... probabilities
    numhid         ... number of hidden units
    y              ... List of discrete labels

    nClass          number of classes
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer

    model.weightlabel       ... The weights on labels layer
    model.biasLabel       ... The biases on labels layer

    errors          The errors in reconstruction at each epoch
       """

    arg = util.processOptions(kwargs, \
                            nClass = np.unique(y).size, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)
    [nClass, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["nClass"],\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    if verbose:
        print "Processing data ..."

    # from which step, we start to compute the average


#    avgStart = maxEpoch - avgLast

# for weight decay use
#    oldPenalty = penalty

# numCases : number of example
# numDims : the length of each example
# each row is an example
    [numCases, numDims] = list(X.shape)

    numVis = numDims
    uniqueLabel = np.unique(y)
    numBatch = util.ceil(numCases, batchSize)

    y = util.matrixLabel(y)

    # shuffle data and label
    data = copy.deepcopy(X)
    [data, label] = util.shuffle(data, y)

    # init CUDA
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))
    deviceLabel = cm.CUDAMatrix(cm.reformat(label))

    # init weights
    weight = cm.CUDAMatrix(0.1 * np.random.randn(numVis, numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabel = cm.CUDAMatrix(0.1 * np.random.randn(nClass, numHid))
    biasLabel = cm.CUDAMatrix(np.zeros((1, nClass)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis, numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1, numHid)))
    weightLabelInc = cm.CUDAMatrix(np.zeros((nClass, numHid)))
    biasLabelInc = cm.CUDAMatrix(np.zeros((1, nClass)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch):
        error = []

        for batch in range(numBatch):
            # train each data batch
            if batchSize * (batch + 1) > numCases:
                visTrue = deviceData.get_row_slice(batchSize * batch, numCases)
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else:
                visTrue = deviceData.get_row_slice(batchSize * batch,
                                                   batchSize * (batch + 1))
                labelTrue = deviceLabel.get_row_slice(batchSize * batch,
                                                      batchSize * (batch + 1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)
            weightLabel.mult(momentum)
            biasLabel.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)
            weightLabelInc.add_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0:
                if np.logical_and(np.equal(epoch, 1), np.equal(batch, 1)):
                    pass  # here does not need in practical use
            elif cmp(method, "CD") == 0:
                pass

            # negative phase
            cm.dot(hidActP, weight.T, target=visActP)
            visActP.add_row_vec(biasV)
            visActP.apply_sigmoid()

            cm.dot(hidActP, weightLabel.T, target=labelTrue)
            labelTrue.add_row_vec(biasLabel)
            labelTrue = util.softmax(labelTrue)

            # another positive phase
            cm.dot(visActP, weight, target=hidActP)
            hidActP.add_dot(labelTrue, weightLabel)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)
            weightLabelInc.subtract_dot(labelTrue.T, hidActP)
            biasLabelInc.add_sums(labelTrue, axis=0, mult=-1)

            # update weights and bias
            weight.add_mult(weightInc, eta / batchSize)
            biasV.add_mult(biasVInc, eta / batchSize)
            biasH.add_mult(biasHInc, eta / batchSize)
            weightLabel.add_mult(weightLabelInc, eta / batchSize)
            biasLabel.add_mult(biasLabelInc, eta / batchSize)

            # calculate reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm()**2)

            # free memory
            visTrue.free_device_memory()
            labelTrue.free_device_memory()

        if verbose:
            print "Epoch %d/%d, reconstruction error is %f " % (
                epoch + 1, maxEpoch, sum(error))

    # save rbm model
    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    weightLabel.copy_to_host()
    biasLabel.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, biasH.numpy_array, \
                        weightLabel = weightLabel.numpy_array,\
                        biasLabel = biasLabel.numpy_array, labels = uniqueLabel)

    # free device memory
    deviceData.free_device_memory()
    deviceLabel.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()
    weightLabel.free_device_memory()
    biasLabel.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()
    weightLabelInc.free_device_memory()
    biasLabelInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    if isSaveModel:
        modelList = []
        modelList.append(model_)
        model = np.array(modelList)
        np.save(name, model)

    return model_
Exemple #20
0
def rbm(data, numHid, modelType = "BB", **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

additional inputs (specified as name value pairs or in struct)
    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis
    penalty         weight decay factor
    batchsize       The number of training instances per batch
    verbose         For printing progress
    anneal          Flag. If set true, the penalty is annealed linearly
                through epochs to 10% of its original value

    OUTPUTS:
    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """

    arg = util.processOptions(kwargs, \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.9,\
                            maxEpoch = 50, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 50, \
                            verbose = True, \
                            anneal = False)
    [method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose, anneal] = [\
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"],\
        arg["anneal"]
    ]

    # from which step, we start to compute the average
    avgStart = maxEpoch - avgLast

    # for weight decay use
    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(data.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases,batchSize)

    # shuffle the data
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidActP2 = cm.empty((batchSize, numHid))
    visState = cm.empty((batchSize,numVis))
    hidState = cm.empty((batchSize, numHid))

    t = 1
    for epoch in range(maxEpoch) :
        error = []

        if anneal :
            # apply linear weight decay
            penalty = oldPenalty - 0.9 *epoch/maxEpoch*oldPenalty

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidState)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

                visState.fill_with_rand()
                visState.less_than(visActP, target = visState)
            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidState, weight.transpose(), target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visState)

            # another positive phase
            cm.dot(visState, weight, target = hidActP2)
            hidActP2.add_row_vec(biasH)
            hidActP2.apply_sigmoid()

            hidState.fill_with_rand()
            hidState.less_than(hidActP2, target=hidState)

            #update weight and bias
            dWeight = cm.dot(visTrue.transpose(), hidActP)
            dWeight.subtract_dot(visState.transpose(), hidActP2)
            dBiasV = visTrue.sum(axis = 0).subtract(visState.sum(axis = 0))
            dBiasH = hidActP.sum(axis=0).subtract(hidActP2.sum(axis = 0))

            dWeight.divide(batchSize).subtract(weight.mult(penalty))
            dBiasV.divide(batchSize)
            dBiasH.divide(batchSize)

            weightInc.mult(momentum).add_mult(dWeight, eta)
            biasVInc.mult(momentum).add_mult(dBiasV, eta)
            biasHInc.mult(momentum).add_mult(dBiasH, eta)

            weight.add(weightInc)
            biasV.add(biasVInc)
            biasH.add(biasHInc)

            if epoch > avgStart :
                # apply average
                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
                t = t+1
            else :
                weightAgv = weight
                biasVAgv = biasV
                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)
        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(deviceData, weightAgv, target = top)
    top.add_row_vec(biasHAgv)
    top.apply_sigmoid()

    model_ = m.rbmModel(weightAgv,biasVAgv,biasHAgv,type = modelType,top = top)

    cm.shutdown()

    return model_
Exemple #21
0
def shutdown():
    '''Free GPU resources'''
    cm.shutdown()
Exemple #22
0
def rbmPredict(m, X):
    """using trained rbm model to do prediction"""
    nClass = m.labels.size
    numCase = X.shape[0]

    # This part is executed on CPU
    # define the free energy
    #    FF = np.zeros((numCase, nClass))
    #    FFcol = np.zeros((numCase, 1))
    #    for index in range(nClass) :
    #        temp = np.zeros((numCase, nClass))
    #        temp[:, index] = 1
    #
    #        tt = np.emath.log(np.exp(np.dot(X, m.weight)+ np.dot(temp, m.weightLabel) + m.biasH)+1)
    #
    #        FFcol = temp[:,index] * m.biasLabel[0,index] + np.sum(tt,axis = 1)
    #
    #        FF[:, index] = FFcol
    #
    #    [x, y] = np.where(np.abs(FF - np.max(FF, axis=1, keepdims=True)) < 1e-5)

    #    result = np.zeros(y.shape)

    #    for index in range(y.size) :
    #        result[index] = m.labels[y[index]]

    # The following part runs on GPU
    cm.cublas_init()

    # copy data to GPU
    data = cm.CUDAMatrix(cm.reformat(X))
    weight = cm.CUDAMatrix(cm.reformat(m.weight))
    biasH = cm.CUDAMatrix(cm.reformat(m.biasH))
    weightLabel = cm.CUDAMatrix(cm.reformat(m.weightLabel))
    biasLabel = cm.CUDAMatrix(cm.reformat(m.biasLabel))

    F = cm.CUDAMatrix(np.zeros((numCase, nClass)))
    Fcol = cm.CUDAMatrix(np.zeros((numCase, 1)))
    temp = cm.CUDAMatrix(np.zeros((numCase, nClass)))

    tt = cm.CUDAMatrix(np.zeros((numCase, biasH.asarray().size)))
    for index in range(nClass):
        temp.assign(0)

        temp.set_col_slice(index, index + 1, 1)

        tt = cm.dot(data, weight)
        tt.add_dot(temp, weightLabel)
        tt.add_row_vec(biasH)
        cm.log_1_plus_exp(tt, target=tt, exact=True)

        Fcol = cm.sum(tt, axis=1)
        Fcol.add_mult(temp.get_col_slice(index, index + 1), biasLabel.numpy_array[0, index])

        F.set_col_slice(index, index + 1, Fcol)

        tt.free_device_memory()

    F.copy_to_host()
    [x, y] = np.where(np.abs(F.numpy_array - np.max(F.numpy_array, axis=1, keepdims=True)) < 1e-5)

    # free device memory
    data.free_device_memory()

    weight.free_device_memory()
    biasH.free_device_memory()
    biasLabel.free_device_memory()
    weightLabel.free_device_memory()

    F.free_device_memory()
    Fcol.free_device_memory()
    temp.free_device_memory()

    cm.shutdown()

    result = np.zeros(y.shape)

    for index in range(y.size):
        result[index] = m.labels[y[index]]

    return [result, F.numpy_array]
Exemple #23
0
def rbm(X, numHid, **kwargs) :
    """
    rbm defination
    data : when type is BB, should be binary, or in [0,1] to be interpreted as probabilities
           when type is GB, should be continuous real value. data should have a format of *.npy
    numHid : number nodes of and hidden layer
    type   : rbm type, can be set as BB or GB

    method          CD or SML
    eta             learning rate
    momentum        momentum for smoothness amd to prevent overfitting
                    NOTE: momentum is not recommended with SML
    maxepoch        # of epochs: each is a full pass through train data
    avglast         how many epochs before maxepoch to start averaging
                before. Procedure suggested for faster convergence by
                Kevin Swersky in his MSc thesis

    batchsize       The number of training instances per batch
    verbose         For printing progress

    model.type      Type of RBM (i.e. type of its visible and hidden units)
    model.weight         The weights of the connections
    model.biasH         The biases of the hidden layer
    model.biasV         The biases of the visible layer
    model.top       The activity of the top layer, to be used when training
                    DBN's
    errors          The errors in reconstruction at every epoch
       """
# when compute the transpose of a matrix, using the method *.transpose() is much space consuming. I suggest we can use
#  .T atrribute instead

    arg = util.processOptions(kwargs, \
                            modelType = "BB", \
                            method = "CD", \
                            eta = 0.1, \
                            momentum = 0.5,\
                            maxEpoch = 500, \
                            avgLast = 0, \
                            penalty = 0, \
                            batchSize = 100, \
                            verbose = True)

    [modelType, method, eta, momentum, maxEpoch, avgLast, penalty, batchSize, verbose] = [\
        arg["modelType"], \
        arg["method"],\
        arg["eta"],\
        arg["momentum"],\
        arg["maxEpoch"],\
        arg["avgLast"],\
        arg["penalty"],\
        arg["batchSize"],\
        arg["verbose"]
    ]

    # from which step, we start to compute the average
#    avgStart = maxEpoch - avgLast

    # for weight decay use
#    oldPenalty = penalty

    # numCases : number of example
    # numDims : the length of each example
    # each row is an example
    [numCases, numDims] = list(X.shape)

    if verbose :
        print "processing data"

    numVis = numDims
    numBatch = util.ceil(numCases, batchSize)

    # shuffle the data
    data = copy.deepcopy(X)
    np.random.shuffle(data)

    # init CUDA
#    cm.cuda_set_device()
    cm.cublas_init()
    cm.CUDAMatrix.init_random(100)
    deviceData = cm.CUDAMatrix(cm.reformat(data))

    # init weights
    weight = cm.CUDAMatrix(0.1*np.random.randn(numVis,numHid))
    biasV = cm.CUDAMatrix(np.zeros((1, numVis)))
    biasH = cm.CUDAMatrix(np.zeros((1, numHid)))

    # init weight update
    weightInc = cm.CUDAMatrix(np.zeros((numVis,numHid)))
    biasVInc = cm.CUDAMatrix(np.zeros((1,numVis)))
    biasHInc = cm.CUDAMatrix(np.zeros((1,numHid)))

    #init temporary storage
    visActP = cm.empty((batchSize, numVis))
    hidActP = cm.empty((batchSize, numHid))
    hidState = cm.empty((batchSize, numHid))

    for epoch in range(maxEpoch) :
        error = []

        for batch in range(numBatch) :
            # train each data batch
            if batchSize*(batch+1) > numCases :
                visTrue = deviceData.get_row_slice(batchSize*batch, numCases)
                batchSize = visTrue.shape[0]

                visActP = cm.empty((batchSize, numVis))
                hidActP = cm.empty((batchSize, numHid))
                hidState = cm.empty((batchSize, numHid))
            else :
                visTrue = deviceData.get_row_slice(batchSize*batch, batchSize*(batch+1))
                batchSize = visTrue.shape[0]

            visActP.assign(visTrue)

            #apply momentum
            weightInc.mult(momentum)
            biasVInc.mult(momentum)
            biasHInc.mult(momentum)

            # positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.add_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0)
            biasHInc.add_sums(hidActP, axis=0)

            hidState.fill_with_rand()
            hidState.less_than(hidActP, target=hidActP)

            if cmp(method, "SML") == 0 :
                if np.logical_and(np.equal(epoch,1), np.equal(batch,1)) :
                    pass # here does not need in practical use
            elif cmp(method, "CD") == 0 :
                pass

            # negetive phase
            if cmp(modelType, "BB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)
                visActP.apply_sigmoid()

            elif cmp(modelType, "GB") == 0 :
                cm.dot(hidActP, weight.T, target = visActP)
                visActP.add_row_vec(biasV)

                visActP.add(np.random.randn(batchSize, numVis),target=visActP)

            # another positive phase
            cm.dot(visActP, weight, target = hidActP)
            hidActP.add_row_vec(biasH)
            hidActP.apply_sigmoid()

            weightInc.subtract_dot(visActP.T, hidActP)
            biasVInc.add_sums(visActP, axis=0, mult=-1)
            biasHInc.add_sums(hidActP, axis=0, mult=-1)

            #update weight and bias
            weight.add_mult(weightInc, eta/batchSize)
            biasV.add_mult(biasVInc, eta/batchSize)
            biasH.add_mult(biasHInc, eta/batchSize)

#            if epoch > avgStart :
#                # apply average
#                weightAgv.subtract(weightAgv.subtract(weight).mult(1.0/t))
#                biasVAgv.subtract(biasVAgv.subtract(biasV).mult(1.0/t))
#                biasHAgv.subtract(biasHAgv.subtract(biasH).mult(1.0/t))
#                t = t+1
#            else :
#                weightAgv = weight
#                biasVAgv = biasV
#                biasHAgv = biasH

            # reconstruction error
            visTrue.subtract(visActP)
            error.append(visTrue.euclid_norm() ** 2)

            # free device memory
            visTrue.free_device_memory()

        if verbose :
            print "epoch %d/%d. Reconstruction error is %f " % (epoch+1, maxEpoch, sum(error))

    # save rbm model
    top = cm.CUDAMatrix(np.zeros((numCases, numHid)))
    cm.dot(cm.CUDAMatrix(cm.reformat(X)), weight, target = top)
    top.add_row_vec(biasH)
    top.apply_sigmoid()

    weight.copy_to_host()
    biasV.copy_to_host()
    biasH.copy_to_host()
    top.copy_to_host()

    model_ = m.rbmModel(weight.numpy_array, biasV.numpy_array, \
                        biasH.numpy_array, type = modelType, top = top.numpy_array)


    # free device memory
    deviceData.free_device_memory()

    weight.free_device_memory()
    biasV.free_device_memory()
    biasH.free_device_memory()

    weightInc.free_device_memory()
    biasVInc.free_device_memory()
    biasHInc.free_device_memory()

    hidActP.free_device_memory()
    visActP.free_device_memory()
    hidState.free_device_memory()

    cm.shutdown()

    return model_
    open(
        outputPath + "/lambdas_{}_{}_{}_{}_{}.p".format(
            eval(args.sim).__name__, len(reprVocab), kernel, hyperparam,
            n_components), "wb"))
'''
Projection to KPCA embeddings of the vocabulary
'''
with codecs.open(vocabPath, "r") as fIn:
    vocab = [normalize_word(w[:-1]) for w in fIn if len(w[:-1].split()) == 1]

termcolor.cprint("Projecting known vocabulary to KPCA embeddings\n", "blue")

alphas_lambdas_div = alphas / lambdas

if useGPU:
    X_train = projectWordsGPU(vocab, reprVocab, hyperparam, alphas_lambdas_div,
                              kernel)
    cm.shutdown()
else:
    X_train = pool.map(
        projectWordTup,
        [(word, reprVocab, hyperparam, alphas_lambdas_div, kernel)
         for word in vocab])

pickle.dump(
    X_train,
    open(
        outputPath + "/KPCA_{}_{}_{}_{}_{}.p".format(
            eval(args.sim).__name__, len(reprVocab), kernel, hyperparam,
            n_components), "wb"))
def matrix_factorization_clustering(X_aux, k, l, norm=False, num_iters=100):
    cm.cublas_init()

    m, n = X_aux.shape
    U = cm.CUDAMatrix(np.random.rand(m, k))
    S = cm.CUDAMatrix(np.random.rand(k, l))
    V = cm.CUDAMatrix(np.random.rand(n, l))

    X = cm.CUDAMatrix(X_aux)

    # if norm:
    #     X = Normalizer().fit_transform(X)

    XV = cm.CUDAMatrix(np.random.rand(m, l))
    XVSt = cm.CUDAMatrix(np.random.rand(m, k))
    US = cm.CUDAMatrix(np.random.rand(m, l))
    USVt = cm.CUDAMatrix(np.random.rand(m, n))
    USVtXt = cm.CUDAMatrix(np.random.rand(m, m))
    USVtXtU = cm.CUDAMatrix(np.random.rand(m, k))
    U_aux = cm.CUDAMatrix(np.random.rand(m, k))

    XtUS = cm.CUDAMatrix(np.random.rand(m, l))
    VSt = cm.CUDAMatrix(np.random.rand(n, k))
    VStUt = cm.CUDAMatrix(np.random.rand(n, m))
    UtX = cm.CUDAMatrix(np.random.rand(k, n))
    VStUtXV = cm.CUDAMatrix(np.random.rand(n, l))
    V_aux = cm.CUDAMatrix(np.random.rand(n, l))

    UtXV = cm.CUDAMatrix(np.random.rand(k, l))
    UtUS = cm.CUDAMatrix(np.random.rand(k, l))
    UtUSVt = cm.CUDAMatrix(np.random.rand(k, n))
    UtUSVtV = cm.CUDAMatrix(np.random.rand(k, l))
    S_aux = cm.CUDAMatrix(np.random.rand(k, l))

    error_best = np.inf
    error = np.inf

    for i in range(num_iters):
        # compute U
        cm.dot(X, V, target=XV)
        cm.dot(XV, S.T, target=XVSt)

        if i is 0:
            cm.dot(U, S, target=US)
            cm.dot(US, V.T, target=USVt)
        cm.dot(USVt, X.T, target=USVtXt)
        cm.dot(USVtXt, U, target=USVtXtU)

        cm.divide(XVSt, USVtXtU, U_aux)
        cm.mult(U, U_aux, U)

        # compute V
        cm.dot(U, S, target=US)
        cm.dot(X.T, US, target=XtUS)
        cm.dot(V, S.T, target=VSt)
        cm.dot(VSt, U.T, target=VStUt)
        cm.dot(VStUt, XV, target=VStUtXV)

        cm.divide(XtUS, VStUtXV, target=V_aux)
        cm.mult(V, V_aux, V)

        # compute S
        cm.dot(U.T, X, target=UtX)
        cm.dot(UtX, V, target=UtXV)

        cm.dot(U.T, US, target=UtUS)
        cm.dot(UtUS, V.T, UtUSVt)
        cm.dot(UtUSVt, V, target=UtUSVtV)

        cm.divide(UtXV, UtUSVtV, target=S_aux)
        cm.mult(S, S_aux, target=S)

        error_ant = error

        cm.dot(U, S, target=US)
        cm.dot(US, V.T, target=USVt)
        error = cm.sum(cm.pow(cm.subtract(X, USVt), 2), axis=0)

        if error < error_best:
            U_best_cm = U
            S_best_cm = S
            V_best_cm = V
            error_best = error

        if np.abs(error - error_ant) <= 0.000001:
            break

        U_best = U_best_cm.asarray()
        S_best = S_best_cm.asarray()
        V_best = V_best_cm.asarray()

    Du = np.diag(np.ones(m).dot(U_best))
    Dv = np.diag(np.ones(n).dot(V_best))

    U_norm = U_best.dot(np.diag(S_best.dot(Dv).dot(np.ones(l))))
    V_norm = V_best.dot(np.diag(np.ones(k).dot(Du).dot(S_best)))

    rows_ind = np.argmax(U_best, axis=1)
    cols_ind = np.argmax(V_best, axis=1)

    cm.shutdown()

    return U_norm, S_best, V_norm, rows_ind, cols_ind, error_best