def tensorm_reconstruct(X, L, hyperparms=[.5, 1.0], return_layer=False):

    if not X.dtype == np.int8:
        X = np.array(X, dtype=np.int8)

    if np.all([y in [0, 1] for y in np.unique(X)]):
        X = 2 * X - 1

    p_init = hyperparms[0]
    lbda_init = hyperparms[1]

    orm = lom.Machine()
    data = orm.add_matrix(X, fixed=True)
    layer = orm.add_layer(
        child=data, latent_size=L, model='OR-AND')

    orm.infer(burn_in_min=100, no_samples=50)

    X_recon = layer.output(technique='factor_mean', lazy=False)
    X_recon_plugin = layer.output(technique='factor_map', lazy=False)
    f_tensorm = (layer.z.mean(), layer.u.mean(), layer.v.mean())

    if return_layer is True:
        return layer
    else:
        return X_recon, f_tensorm, X_recon_plugin
def generate_data(N=100, D=100):

    np.random.seed(2)

    L = 5

    U = np.array(2 * (np.random.rand(D, L) > .5) - 1, dtype=np.int8)
    Z = np.array(2 * (np.random.rand(N, L) > .5) - 1, dtype=np.int8)
    X = np.array(2 * np.dot(Z == 1, U.transpose() == 1) - 1, dtype=np.int8)

    X[int(X.shape[0] / 2):, :] *= -1

    orm = lom.Machine()

    data = orm.add_matrix(val=X, sampling_indicator=False)

    layer = orm.add_layer(size=3,
                          child=data,
                          lbda_init=2.,
                          noise_model='or-link')

    layer.z.val = Z
    layer.u.val = U

    return layer
def test_orm():

    np.random.seed(3)

    N = 10
    M = 5
    L = 3

    X = 2 * np.array([
        M * [0, 0, 1, 1, 0, 0], M * [1, 1, 0, 0, 0, 0], M * [0, 0, 0, 0, 1, 1]
    ]) - 1

    X = np.concatenate(N * [X])
    N, D = X.shape

    orm = lom.Machine()

    orm.framework = 'numba'

    data = orm.add_matrix(val=X, fixed=True)  # , sampling_indicator=False)

    layer1 = orm.add_layer(latent_size=L, child=data, model='OR-AND')
    layer1.lbda.val = 2.0

    # layer1.factors[0].val = np.array(2*np.ones([N,L])-1, dtype=np.int8)
    # layer1.factors[1].val = np.array(2*np.ones([D,L])-1, dtype=np.int8)

    orm.infer(convergence_window=20,
              no_samples=20,
              convergence_eps=1e-3,
              burn_in_min=20,
              burn_in_max=1000)

    assert abs(1 / (1 + np.exp(-orm.layers[0].lbda())) - 1.) < 1e-2
예제 #4
0
def test_lambda_update_or():

    model = 'OR-AND'

    U, Z, X = generate_orm_product()

    orm = lom.Machine()

    data = orm.add_matrix(val=X, fixed=True)

    layer = orm.add_layer(latent_size=3, child=data, model=model)

    layer.factors[0].val = Z
    layer.factors[1].val = U

    assert np.all(np.dot(Z == 1, U.transpose() == 1) == (data() == 1))

    lbda_update_fct = lambda_updates_numba.make_lbda_update_fct(model, 2)
    lbda_update_fct(layer.lbda)

    ND = np.prod(X.shape)

    print('\n')
    print(ND)

    assert layer.lbda() == -np.log(((ND + 2) / (ND + 1)) - 1)
def LOM_predictive(experiment, return_machine=True):
    """
    Experiment is a tuple with all relevant settings
    """

    # unpack experiment parameters
    X, X_train, train_mask, machine, L, random_idx, lbda_init, anneal = experiment

    orm = lom.Machine()
    data = orm.add_matrix(X_train, fixed=True)
    layer = orm.add_layer(latent_size=L, child=data, model=machine)
    layer.lbda.val = lbda_init

    # layer.auto_reset = True
    if anneal is True:
        orm.anneal = True
        orm.infer(burn_in_min=600, fix_lbda_iters=0,
                  convergence_window=50, burn_in_max=1000, no_samples=10)

    else:
        orm.infer(burn_in_min=100, fix_lbda_iters=50,
                  convergence_window=10, burn_in_max=150, no_samples=10)

    out = layer.output(technique='factor_mean')[train_mask] > .5
    truth = (-2 * layer.invert_data + 1) * X[train_mask] == 1

    if return_machine is False:
        return ([np.mean(out == truth), machine, layer.size])
    else:
        return ([np.mean(out == truth), machine, layer.size],
                [x.mean() for x in layer.factors])
def tensorm_reconstruct_indp(X, L, hyperparms=[0.5, 1.0]):

    if not X.dtype == np.int8:
        X = np.array(X, dtype=np.int8)

    if np.all([y in [0, 1] for y in np.unique(X)]):
        X = 2 * X - 1

    p_init = hyperparms[0]
    lbda_init = hyperparms[1]

    orm = lom.Machine()
    data = orm.add_matrix(X, sampling_indicator=False)
    layer = orm.add_tensorm_layer(
        child=data, size=L,
        lbda_init=lbda_init,
        inits=3 * [p_init],
        noise_model='tensorm-link-indp')

    # assign the correct updating functions
    for factor_matrix in data.parents:
        factor_matrix.sampling_fct = wrappers.draw_tensorm_indp_noparents_onechild_wrapper

    layer.lbda_p.sampling_fct = sampling.draw_lbda_tensorm_indp_p
    layer.lbda_m.sampling_fct = sampling.draw_lbda_tensorm_indp_m
    layer.lbda = (layer.lbda_p, layer.lbda_m)

    orm.infer(burn_in_min=1000, no_samples=50)

    X_recon = layer.output(recon_model='mc', force_computation=True)
    X_recon_plugin = layer.output(recon_model='plugin', force_computation=True)
    f_tensorm = (layer.z.mean(), layer.u.mean(), layer.v.mean())

    return X_recon, f_tensorm, X_recon_plugin
예제 #7
0
def test_ibp():

    X = generate_random_2D_data()

    orm = lom.Machine()
    data = orm.add_matrix(X, fixed=True)
    layer = orm.add_layer(latent_size=1, child=data, model='OR-AND-IBP')

    orm.infer(burn_in_min=200)

    assert np.mean((2 * layer.output() - 1) == X) > .9
def test_all_3D_LOMs():

    operators = ['AND', 'NAND', 'OR', 'NOR', 'XOR', 'NXOR']
    # operators = ['OR', 'AND']
    machines = [
        x[0] + '-' + x[1] for x in list(itertools.product(operators, repeat=2))
    ]

    for machine in aux.canonical_loms():  # machines:

        N = 50
        D = 10
        L = 3

        Z = np.array(np.random.rand(N, L) > .5, dtype=np.int8)
        U = np.array(np.random.rand(D, L) > .5, dtype=np.int8)
        V = np.array(np.random.rand(D, L) > .5, dtype=np.int8)
        # generate_data_fast is not available for all machines
        X = aux.lom_generate_data([2 * Z - 1, 2 * U - 1, 2 * V - 1],
                                  model=machine)

        orm = lom.Machine()

        data = orm.add_matrix(X, fixed=True)
        layer = orm.add_layer(latent_size=L, child=data, model=machine)
        layer.z.val = (1 - 2 * layer.invert_factors) * (2 * Z - 1)
        layer.u.val = (1 - 2 * layer.invert_factors) * (2 * U - 1)
        layer.v.val = (1 - 2 * layer.invert_factors) * (2 * V - 1)

        # we initialise with ground truth, hence set lbda large to avoid effectively
        # random initialisation
        layer.lbda.val = 3.0

        orm.infer(burn_in_min=10, fix_lbda_iters=2)

        try:
            assert np.mean((2 * (layer.output(technique='factor_map') > .5) -
                            1) == data()) > .98
            assert np.mean((2 * (layer.output(technique='factor_mean') > .5) -
                            1) == data()) > .98
        except:
            acc = np.mean((2 * (layer.output(technique='factor_mean') > .5) -
                           1) == data())
            print(machine + ' failed with reconstruction accuracy of ' +
                  str(acc))
            # import pdb; pdb.set_trace()
            raise ValueError()
def test_maxmachine():

    # generate toy data
    A = 2 * np.array([[0, 0, 0, 0, 0, 1, 1]]) - 1
    B = 2 * np.array([[0, 0, 1, 1, 1, 1, 0]]) - 1
    C = 2 * np.array([[1, 1, 1, 1, 0, 0, 0]]) - 1
    X = np.concatenate(100 * [C] + 100 * [B] +
                       100 * [A])  # + 100 *[2*((A==1) + (B==1))-1])
    # X = np.concatenate(100*[C]+50*[2*np.array([[0,0,0,1,1,1,1]])-1])
    for i in range(X.shape[0]):
        for j in range(0, X.shape[1]):
            if np.random.rand() > .98:  # .9
                X[i, j] = -X[i, j]
        # heterosced noise
        if True:
            for j in range(4, X.shape[1]):
                if np.random.rand() > .95:  # .75
                    X[i, j] = -X[i, j]

    machine = 'MAX-AND'

    orm = lom.Machine()
    L = 3

    data = orm.add_matrix(X, fixed=True)
    layer = orm.add_layer(latent_size=L, child=data, model=machine)

    # we initialise with ground truth, hence set lbda large to avoid effectively
    # random initialisation

    layer.u.val = np.array([[1, -1, -1], [1, -1, -1], [1, 1, -1], [1, 1, -1],
                            [-1, 1, 1], [-1, 1, 1], [-1, -1, 1]],
                           dtype=np.int8)

    layer.z.val = np.array(np.concatenate(
        [100 * [[1, -1, -1]], 100 * [[-1, 1, -1]], 100 * [[-1, -1, 1]]]),
                           dtype=np.int8)

    layer.lbda.val = np.array([.99 for x in range(L + 1)])

    orm.infer(burn_in_min=50,
              burn_in_max=200,
              fix_lbda_iters=20,
              no_samples=100)

    assert np.mean((layer.output(technique='mc') > .5) == (X == 1)) > .8
예제 #10
0
def generate_data(N=100, D=100, L=10):

    np.random.seed(2)

    U = np.array(2 * (np.random.rand(D, L) > .5) - 1, dtype=np.int8)
    Z = np.array(2 * (np.random.rand(N, L) > .5) - 1, dtype=np.int8)
    X = np.array(2 * np.dot(Z == 1, U.transpose() == 1) - 1, dtype=np.int8)

    X[int(X.shape[0] / 2):, :] *= -1

    orm = lom.Machine()

    data = orm.add_matrix(val=X, fixed=True)

    layer = orm.add_layer(latent_size=3, child=data, model='OR-AND')

    layer.factors[0].val = Z
    layer.factors[1].val = U

    return layer
reDataMin = reDataNum.min(axis=0)
reScDataNum = (reDataNum - reDataMin) * (dataNumMax - dataNumMin) / (
    reDataMax - reDataMin) + dataNumMin

# insert reconstructed data in missing values
re2DataNum = dataNum.copy()
for idx in randIdxNum:
    re2DataNum.loc[idx[0], idx[1]] = reScDataNum.loc[idx[0], idx[1]]

#------------------------------------------------------------------------------
#--------------------------Boolean matrix factorization------------------------
#------------------------------------------------------------------------------
#BMF with missing values see https://github.com/TammoR/LogicalFactorisationMachines

dataBinVal = dataBin.copy()
orm = lom.Machine()
data = orm.add_matrix(dataBinVal.values, fixed=True)
layer1 = orm.add_layer(latent_size=10, child=data, model='OR-AND')
layer2 = orm.add_layer(latent_size=3, child=layer1.z, model='OR-AND')

orm.infer(burn_in_min=100, burn_in_max=1000, no_samples=1000)

reDataBin = pd.DataFrame(layer1.output(technique='factor_map'),
                         columns=dataBinVal.columns)
# turn into boolean again
reDataBin[reDataBin == -1] = 0
reDataBool = reDataBin.astype(bool)

rssBMF = sum([
    abs(reDataBool.loc[idx[0], idx[1]] ^ dataBool.loc[idx[0], idx[1]])
    for idx in randIdxBool