예제 #1
0
def run_on_real_dataset(dataset_dir):
    """`dataset_dir` should contain a file `*.data` in NumPy format."""
    if os.path.isdir(dataset_dir):
        data_files = [
            x for x in os.listdir(dataset_dir) if x.endswith('.data')
        ]
        if len(data_files) != 1:
            raise ValueError("The dataset directory {} ".format(dataset_dir) +
                             "should contain one `.data` file but got " +
                             "{}.".format(data_files))
        data_file = data_files[0]
        data_path = os.path.join(dataset_dir, data_file)
        name_expe = data_file.split('.')[0]

        # Load real dataset and binarize
        perfs = binarize(np.loadtxt(data_path))
        # da_matrix = DAMatrix(perfs=perf, name=name_expe)
        da_matrix = DAMatrix.load(dataset_dir)
        da_matrix.perfs = perfs
        meta_learners = get_the_meta_learners(
            exclude_optimal=True)[1:]  # Exclude random

        run_expe(da_matrix,
                 meta_learners=meta_learners,
                 name_expe=name_expe,
                 with_once_random=True,
                 show_legend=True)
    else:
        raise ValueError("Not a directory: {}".format(dataset_dir))
예제 #2
0
def run_nfl():
    n_datasets = 20000
    n_algos = 5
    perfs = (np.random.rand(n_datasets, n_algos) < 0.5).astype(int)
    name_expe = 'nfl'
    da_matrix = DAMatrix(perfs=perfs, name=name_expe)
    meta_learners = get_the_meta_learners()
    run_expe(da_matrix, meta_learners, name_expe=name_expe)
예제 #3
0
def run_3a():
    n_datasets = 20000
    n_algos = 4
    col = (np.random.rand(n_datasets, 1) < 0.5).astype(int)
    perfs = np.concatenate([col] * n_algos, axis=1)
    name_expe = '3a-repeated-columns'
    da_matrix = DAMatrix(perfs=perfs, name=name_expe)
    meta_learners = get_the_meta_learners()
    run_expe(da_matrix, meta_learners, name_expe=name_expe, ylim=(0.45, 1.05))
예제 #4
0
def run_3g():
    n_datasets = 20000
    name_expe = '3g'
    epsilon = 1e-1
    X1 = (np.random.rand(n_datasets, 1) < 0.5 + epsilon).astype(int)
    X2 = 1 - X1
    perfs = np.concatenate([X1, X1, X1, X2, X2, X2], axis=1)
    da_matrix = DAMatrix(perfs=perfs, name=name_expe)
    meta_learners = get_the_meta_learners(exclude_greedy_plus=True)
    run_expe(da_matrix, meta_learners, name_expe=name_expe, show_legend=False)
예제 #5
0
def run_3b():
    n_datasets = 20000
    n_algos = 5
    X1 = (np.random.rand(n_datasets, 1) < 0.5).astype(int)
    X2 = 1 - X1
    perfs = np.concatenate([X1, X1, X2, X2], axis=1)
    name_expe = '3b-complementary-2-algos'
    da_matrix = DAMatrix(perfs=perfs, name=name_expe)
    # da_matrix = ComplementaryDAMatrix()
    meta_learners = get_the_meta_learners(exclude_greedy_plus=True)
    run_expe(da_matrix, meta_learners, name_expe=name_expe)
예제 #6
0
def test_unit_TopkRankMetaLearner():
    meta_learner = TopkRankMetaLearner()

    for _ in range(10):
        perfs = np.array([
            [1, 3, 2],
            [4, 6, 5],
        ])
        n_algos = perfs.shape[1]
        perm = np.random.permutation(n_algos)
        perfs = perfs[:, perm]
        da_matrix = DAMatrix(perfs=perfs)

        meta_learner.meta_fit(da_matrix)
        idx = meta_learner.indices_algo_to_reveal
        # print(idx)
        # print([da_matrix.algos[i] for i in idx])
        # print(perfs, idx[0], perm)
        assert perm[idx[0]] == 1
예제 #7
0
def run_3f_old():
    n_datasets = 20000
    name_expe = '3f'
    epsilon = 1e-1
    A = (np.random.rand(n_datasets * 2, 1) < 0.5 + 2 * epsilon).astype(int)
    B = (np.random.rand(n_datasets * 2, 1) < 0.5 + epsilon).astype(int)
    C = (np.random.rand(n_datasets * 2, 1) < 0.5 - epsilon).astype(int)
    D = (np.random.rand(n_datasets * 2, 1) < 0.5 - 2 * epsilon).astype(int)
    perfs = np.concatenate([A, B, C, D], axis=1)

    valid_rows = []
    for row in perfs:
        if not (row[0] == 0 and row[1] == 0 and row[2] == 0):
            valid_rows.append(row)
        if len(valid_rows) == n_datasets:
            break
    perfs = np.array(valid_rows)[:n_datasets]
    assert len(perfs) == n_datasets
    da_matrix = DAMatrix(perfs=perfs, name=name_expe)
    meta_learners = get_the_meta_learners()
    run_expe(da_matrix, meta_learners, name_expe=name_expe)
예제 #8
0
def test_nfldamatrix():
    da_matrix = NFLDAMatrix()
    path_to_dir = da_matrix.save()
    da_matrix2 = DAMatrix.load(path_to_dir)
    print(da_matrix.perfs)
    print(da_matrix2.perfs)
예제 #9
0
from mlt.data import DAMatrix

from mlt.metric import AccuracyMetric
from mlt.metric import ArgmaxMeanMetric
from mlt.metric import EmpArgmaxMetric
from mlt.metric import AverageRankMetric

import numpy as np

### Test cases ###
perfs = np.array([
    [0, 1],
    [0, 1],
])
da_te = DAMatrix(perfs=perfs)
da_te.best_algo = 1
dist_pred = np.array([0.4, 0.6])


def test_AccuracyMetric():
    accuray_metric = AccuracyMetric()
    assert accuray_metric(dist_pred, da_te) == 0.6


def test_ArgmaxMeanMetric():
    argmax_mean_metric = ArgmaxMeanMetric()
    assert argmax_mean_metric(dist_pred, da_te) == 0.6


def test_EmpArgmaxMetric():
예제 #10
0
def get_da_matrix_3f():
    fpath = '../results/da_matrix_4f.txt'
    perfs = np.loadtxt(fpath)
    da_matrix = DAMatrix(perfs=perfs)
    return da_matrix
예제 #11
0
def get_multivariate_bernoulli_3f(epsilon=1e-1,
                                  n_datasets=20000,
                                  use_cvxopt=USE_CVXOPT):
    """ 
        ABCD
    x0: 0000
    x1: 0001
    x2: 0010
    x3: 0011
    x4: 0100
    x5: 0101
    x6: 0110
    x7: 0111
    x8: 1000
    x9: 1001
    x10: 1010
    x11: 1011
    x12: 1100
    x13: 1101
    x14: 1110
    x15: 1111
    """
    n_algos = 4
    e = epsilon

    B = [
        0.5 - 2 * e,  # P(A=0)
        0.5 - e,  # P(B=0)
        0.5 + e,  # P(C=0)
        0.5 + 2 * e,  # P(D=0)
        0,  # P(B=0|A=0)        = 0.5 + 2e
        0,  # P(C=0|A=0)        = 0.5 + 2e
        0,  # P(D=0|A=0)        = 0.5 + e
        0,  # P(C=0|A=0,B=0)    = 0
        0,  # P(B=0|A=0,D=0)    = 0.5
        0,  # P(C=0|A=0,D=0)    = 0.5
        1,  # P(all)            = 1
    ]

    A = np.zeros(shape=(len(B), 2**n_algos))
    indicess = [[] for _ in range(n_algos)]

    # A=0
    indices_A0 = []
    for i in range(2):
        for j in range(2):
            for k in range(2):
                s = "0{}{}{}".format(i, j, k)
                idx = int(s, base=2)
                indices_A0.append(idx)
    # B=0
    indices_B0 = []
    for i in range(2):
        for j in range(2):
            for k in range(2):
                s = "{}0{}{}".format(i, j, k)
                idx = int(s, base=2)
                indices_B0.append(idx)
    # C=0
    indices_C0 = []
    for i in range(2):
        for j in range(2):
            for k in range(2):
                s = "{}{}0{}".format(i, j, k)
                idx = int(s, base=2)
                indices_C0.append(idx)
    # D=0
    indices_D0 = []
    for i in range(2):
        for j in range(2):
            for k in range(2):
                s = "{}{}{}0".format(i, j, k)
                idx = int(s, base=2)
                indices_D0.append(idx)
    # A=0,B=0
    indices_A0B0 = []
    for i in range(2):
        for j in range(2):
            s = "00{}{}".format(i, j)
            idx = int(s, base=2)
            indices_A0B0.append(idx)
    # A=0,C=0
    indices_A0C0 = []
    for i in range(2):
        for j in range(2):
            s = "0{}0{}".format(i, j)
            idx = int(s, base=2)
            indices_A0C0.append(idx)
    # A=0,D=0
    indices_A0D0 = []
    for i in range(2):
        for j in range(2):
            s = "0{}{}0".format(i, j)
            idx = int(s, base=2)
            indices_A0D0.append(idx)
    # A=0,B=0,C=0
    indices_A0B0C0 = []
    for i in range(2):
        s = "000{}".format(i)
        idx = int(s, base=2)
        indices_A0B0C0.append(idx)
    # A=0,C=0,D=0
    indices_A0C0D0 = []
    for i in range(2):
        s = "0{}00".format(i)
        idx = int(s, base=2)
        indices_A0C0D0.append(idx)
    # A=0,B=0,D=0
    indices_A0B0D0 = []
    for i in range(2):
        s = "00{}0".format(i)
        idx = int(s, base=2)
        indices_A0B0D0.append(idx)

    for idx in indices_A0:
        A[0, idx] += 1  # P(A)
    for idx in indices_B0:
        A[1, idx] += 1  # P(B)
    for idx in indices_C0:
        A[2, idx] += 1  # P(C)
    for idx in indices_D0:
        A[3, idx] += 1  # P(D)

    factor_importance = 1
    fi = factor_importance

    # P(B=0|A=0) = 0.5 + 2e
    for idx in indices_A0B0:
        A[4, idx] += 1
    for idx in indices_A0:
        A[4, idx] += -(0.5 + 2 * e)
    A[4] *= fi**2

    # P(C=0|A=0) = 0.5 + 2e
    for idx in indices_A0C0:
        A[5, idx] += 1
    for idx in indices_A0:
        A[5, idx] += -(0.5 + 2 * e)
    A[5] *= fi**2

    # We want Greedy to choose D at step 2
    # P(D=0|A=0) = 0.5 + e
    for idx in indices_A0D0:
        A[6, idx] += 1
    for idx in indices_A0:
        # A[6, idx] += - (0.5 + e)
        A[6, idx] += -0.5
    A[6] *= fi

    # We want Mean to be perfect at step 3
    # P(C=0|A=0,B=0) = 0
    for idx in indices_A0B0C0:
        A[7, idx] += 1
    A[7] *= fi

    # We wang Greedy to be bad at step 3
    # P(C=0|A=0,D=0) = 0.5
    for idx in indices_A0C0D0:
        A[8, idx] += 1
    for idx in indices_A0D0:
        A[8, idx] += -0.5
    A[8] *= fi
    # P(B=0|A=0,D=0) = 0.5
    for idx in indices_A0B0D0:
        A[9, idx] += 1
    for idx in indices_A0D0:
        A[9, idx] += -0.5
    A[9] *= fi

    # P(all) = 1
    for idx in range(2**n_algos):
        A[10, idx] += 1

    if not use_cvxopt:
        # # Use optimization tool to solve the equation
        def f(x):
            y = np.dot(A, x) - B
            return np.dot(y, y)

        cons = [
            {
                'type': 'eq',
                'fun': lambda x: x.sum() - 1
            },
            LinearConstraint(A=np.eye(2**n_algos),
                             lb=np.zeros(2**n_algos),
                             ub=np.ones(2**n_algos)),
        ]
        res = minimize(f,
                       np.zeros(2**n_algos),
                       method='SLSQP',
                       constraints=cons,
                       options={'disp': False})

        x = np.array(res['x'])

    else:
        # Use CVXOPT
        print(A)
        print(A.shape)
        print("np.linalg.matrix_rank(A)", np.linalg.matrix_rank(A))

        P = matrix(A.T.dot(A))
        q = matrix(-A.T.dot(B))
        G = np.concatenate([np.eye(16), -np.eye(16)])
        G = matrix(G)
        h = [1.0] * 16 + [0.0] * 16
        h = matrix(h)
        # AA = np.array([1.0] * 16).reshape(1, 16)
        # AA = matrix(AA)
        # b = matrix(1.0)
        # sol = solvers.qp(P, q, G, h, AA, b)
        sol = solvers.qp(P, q, G, h)
        x = np.array(sol['x']).reshape(16)

    x = [e if e >= 0 else 0 for e in x]
    x = np.array(x)
    x = x / x.sum()

    residu = A.dot(x) - B
    print("Ax:", A.dot(x))
    print("B:", B)
    print("x.sum()", x.sum())
    print(x >= 0)
    print(x <= 1)
    print("residu.shape:", residu.shape)
    print("residu:", residu)
    print("residu norm:", residu.dot(residu))
    print("x:", x)

    perfs = []
    for i in range(n_datasets):
        idx = np.random.choice(2**n_algos, p=x)
        bits = []
        for _ in range(n_algos):
            bits.append(idx % 2)
            idx //= 2
        bits = bits[::-1]
        perfs.append(bits)
    perfs = np.array(perfs)

    name = '3f'
    da_matrix = DAMatrix(perfs=perfs, name=name)

    PA0 = sum([x[i] for i in indices_A0])
    PA0C0 = sum([x[i] for i in indices_A0C0])
    print("Real P(C=0|A=0)={}".format(PA0C0 / PA0))
    PA0 = sum([x[i] for i in indices_A0])
    PA0D0 = sum([x[i] for i in indices_A0D0])
    print("Real P(D=0|A=0)={}".format(PA0D0 / PA0))

    return da_matrix