Esempio n. 1
0
def run(dataset):
    n_features = len(meta[dataset]['val_name'])

    result_online = Result('%s-%s' %(dataset, 'aws-online'), aws=True)
    result_baseline = Result('%s-%s' %(dataset, 'aws-baseline'), aws=True)
    result_active = Result('%s-%s' %(dataset, 'aws-active'), aws=True)

    for repeat in range(0, n_repeat):
        print 'Round %d of %d'% (repeat, n_repeat - 1)

        ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1)

        test_x, test_y = load_svmlight_file('/Users/Fan/dev/ML/code/binary-classifiers/targets/%s/test.scale' % dataset, n_features)
        test_x = test_x.todense()
        test_y = [a if a == 1 else 0 for a in test_y]
        train_x, train_y = [], []

        for i in result_active.index:
            q_by_u = result_active.Q_by_U[i]
            print 'Active learning with budget %d / %d' % (q_by_u, q_by_u * (n_features + 1))
            main = ActiveLearning(ex, (None, None), (test_x, test_y), n_features,
                                  q_by_u * (n_features + 1), 5)

            L_unif, L_test = main.do()

            result_active.L_unif[i].append(L_unif)
            result_active.L_test[i].append(L_test)
            result_active.nquery[i].append(ex.get_n_query())

        ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1)

        for i in result_online.index:
            q_by_u = result_online.Q_by_U[i]
            print 'collecting up to budget %d / %d' % (q_by_u, q_by_u * (n_features + 1))

            ex.collect_up_to_budget(q_by_u * (n_features + 1))
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)

            print 'retraining with %d points' % len(train_y)

            # online
            e = RBFKernelRetraining(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features)
            L_unif, L_test = e.grid_retrain_in_x()

            result_online.L_unif[i].append(L_unif)
            result_online.L_test[i].append(L_test)
            result_online.nquery[i].append(ex.get_n_query())

            # baseline
            e = Baseline(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features)
            L_unif, L_test = e.do()

            result_baseline.L_unif[i].append(L_unif)
            result_baseline.L_test[i].append(L_test)
            result_baseline.nquery[i].append(ex.get_n_query())

    print result_online
    print result_baseline
    print result_active
Esempio n. 2
0
import sys

import numpy as np
from sklearn.datasets import load_svmlight_file
import sklearn.metrics as sm
from scipy.special import expit
from scipy.special import logit

from algorithms.awsOnline import AWSOnline

online = False
if online:
    val_name = map(lambda x: 'v%d' % x, range(1, 11))
    print val_name

    test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1)
    # spec = FeatureSpec('norm', (-1, 1), (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55))
    spec = None
    test.collect_with_score(20, spec=spec)
    sys.exit(-1)


class LRSolver(object):
    def __init__(self):
        self.w = None

    def fit(self, X, s):
        _x = np.ones((X.shape[0], X.shape[1] + 1))
        _x[:, : - 1] = X
        self.w, _, _, _ = np.linalg.lstsq(_x, logit(s))
Esempio n. 3
0
__author__ = 'Fan'

import numpy as np

from algorithms.awsOnline import AWSOnline
from algorithms.OnlineBase import FeatureSpec

for i in range(0, 1):
    val_name = map(lambda x: 'v%d' % x, range(1, 11))
    print val_name

    test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1)
    # X, Y = test.collect_pts(2, -1)

    spec = FeatureSpec(
        'norm', (-1, 1),
        (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55))
    X, Y = test.collect_universe(1000, spec=spec)
    print np.bincount(Y)
Esempio n. 4
0
import matplotlib.pyplot as plt
from algorithms.awsOnline import AWSOnline
import numpy as np
from algorithms.RBFTrainer import RBFKernelRetraining
from sklearn.datasets import load_svmlight_file

for i in range(0, 1):
    val_name = ['v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10']
    n_features = 10

    ex = AWSOnline(model_id='ml-lkYRYeldcrH',
                   label_p=1,
                   label_n=0,
                   n_features=n_features,
                   val_name=val_name,
                   ftype='uniform',
                   error=.1)

    step = 6
    train_x, train_y = [], []
    val_x, val_y = [], []
    test_x, test_y = load_svmlight_file('test.scale', n_features)
    test_x = test_x.todense()
    test_y = [a if a == 1 else 0 for a in test_y]
    try:
        while True:
            ex.collect_pts(step)
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)
            val_x.extend(ex.support_pts)
            val_y.extend(ex.support_labels)
Esempio n. 5
0
__author__ = 'Fan'

import os

from sklearn.datasets import load_svmlight_file
from sklearn.metrics import accuracy_score

from algorithms.awsOnline import AWSOnline

val_name = map(lambda x: 'v%d' % x, range(1, 9))
print val_name
test = AWSOnline('ml-lzYmNFzPh2N', 1, 0, 8, val_name, 'uniform', .1)

Xt, Yt = load_svmlight_file(os.getenv('HOME') + '/Dropbox/Projects/SVM/dataset/cod-rna/cod-rna.test.scaled',
                            n_features=8)
Xt = Xt.todense().tolist()

import random

z = zip(Xt, Yt)
random.shuffle(z)
Xt, Yt = zip(*z)

start = 0
cutoff = 10

Xt = Xt[start:start + cutoff]

yy = [test.query(x) for x in Xt]
print 'True:', Yt[start:start + cutoff]
print 'Predict:', yy
Esempio n. 6
0
__author__ = 'Fan'

import numpy as np

from algorithms.awsOnline import AWSOnline
from algorithms.OnlineBase import FeatureSpec

for i in range(0, 1):
    val_name = map(lambda x: 'v%d' % x, range(1, 11))
    print val_name

    test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1)
    # X, Y = test.collect_pts(2, -1)

    spec = FeatureSpec('norm', (-1, 1), (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55))
    X, Y = test.collect_universe(1000, spec=spec)
    print np.bincount(Y)
Esempio n. 7
0
import matplotlib.pyplot as plt
from algorithms.awsOnline import AWSOnline
import numpy as np
from algorithms.RBFTrainer import RBFKernelRetraining

for i in range(0, 1):
    val_name = ['x1', 'x2']

    ex = AWSOnline('ml-i0GeYZaGQ3f', 1, 0, 2, val_name, 'uniform', .1)

    step = 6
    train_x, train_y = [], []
    val_x, val_y = [], []
    try:
        while True:
            ex.collect_pts(step)
            train_x.extend(ex.pts_near_b)
            train_y.extend(ex.pts_near_b_labels)
            val_x.extend(ex.support_pts)
            val_y.extend(ex.support_labels)
            e = RBFKernelRetraining('circle', train_x, train_y, val_x, val_y, train_x, train_y, n_features=2)
            print ex.get_n_query(), e.grid_retrain_in_x()
    except KeyboardInterrupt:
        print 'Done'

    train_x = np.array(train_x)
    plt.figure()
    plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y)
    plt.xlim((-2, 2))
    plt.ylim((-2, 2))
    plt.show()