def run(dataset): n_features = len(meta[dataset]['val_name']) result_online = Result('%s-%s' %(dataset, 'aws-online'), aws=True) result_baseline = Result('%s-%s' %(dataset, 'aws-baseline'), aws=True) result_active = Result('%s-%s' %(dataset, 'aws-active'), aws=True) for repeat in range(0, n_repeat): print 'Round %d of %d'% (repeat, n_repeat - 1) ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1) test_x, test_y = load_svmlight_file('/Users/Fan/dev/ML/code/binary-classifiers/targets/%s/test.scale' % dataset, n_features) test_x = test_x.todense() test_y = [a if a == 1 else 0 for a in test_y] train_x, train_y = [], [] for i in result_active.index: q_by_u = result_active.Q_by_U[i] print 'Active learning with budget %d / %d' % (q_by_u, q_by_u * (n_features + 1)) main = ActiveLearning(ex, (None, None), (test_x, test_y), n_features, q_by_u * (n_features + 1), 5) L_unif, L_test = main.do() result_active.L_unif[i].append(L_unif) result_active.L_test[i].append(L_test) result_active.nquery[i].append(ex.get_n_query()) ex = AWSOnline(meta[dataset]['model_id'], 1, 0, n_features, meta[dataset]['val_name'], ftype='uniform', error=.1) for i in result_online.index: q_by_u = result_online.Q_by_U[i] print 'collecting up to budget %d / %d' % (q_by_u, q_by_u * (n_features + 1)) ex.collect_up_to_budget(q_by_u * (n_features + 1)) train_x.extend(ex.pts_near_b) train_y.extend(ex.pts_near_b_labels) print 'retraining with %d points' % len(train_y) # online e = RBFKernelRetraining(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features) L_unif, L_test = e.grid_retrain_in_x() result_online.L_unif[i].append(L_unif) result_online.L_test[i].append(L_test) result_online.nquery[i].append(ex.get_n_query()) # baseline e = Baseline(ex.batch_predict, (train_x, train_y), (test_x, test_y), n_features) L_unif, L_test = e.do() result_baseline.L_unif[i].append(L_unif) result_baseline.L_test[i].append(L_test) result_baseline.nquery[i].append(ex.get_n_query()) print result_online print result_baseline print result_active
import sys import numpy as np from sklearn.datasets import load_svmlight_file import sklearn.metrics as sm from scipy.special import expit from scipy.special import logit from algorithms.awsOnline import AWSOnline online = False if online: val_name = map(lambda x: 'v%d' % x, range(1, 11)) print val_name test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1) # spec = FeatureSpec('norm', (-1, 1), (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55)) spec = None test.collect_with_score(20, spec=spec) sys.exit(-1) class LRSolver(object): def __init__(self): self.w = None def fit(self, X, s): _x = np.ones((X.shape[0], X.shape[1] + 1)) _x[:, : - 1] = X self.w, _, _, _ = np.linalg.lstsq(_x, logit(s))
__author__ = 'Fan' import numpy as np from algorithms.awsOnline import AWSOnline from algorithms.OnlineBase import FeatureSpec for i in range(0, 1): val_name = map(lambda x: 'v%d' % x, range(1, 11)) print val_name test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1) # X, Y = test.collect_pts(2, -1) spec = FeatureSpec( 'norm', (-1, 1), (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55)) X, Y = test.collect_universe(1000, spec=spec) print np.bincount(Y)
import matplotlib.pyplot as plt from algorithms.awsOnline import AWSOnline import numpy as np from algorithms.RBFTrainer import RBFKernelRetraining from sklearn.datasets import load_svmlight_file for i in range(0, 1): val_name = ['v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10'] n_features = 10 ex = AWSOnline(model_id='ml-lkYRYeldcrH', label_p=1, label_n=0, n_features=n_features, val_name=val_name, ftype='uniform', error=.1) step = 6 train_x, train_y = [], [] val_x, val_y = [], [] test_x, test_y = load_svmlight_file('test.scale', n_features) test_x = test_x.todense() test_y = [a if a == 1 else 0 for a in test_y] try: while True: ex.collect_pts(step) train_x.extend(ex.pts_near_b) train_y.extend(ex.pts_near_b_labels) val_x.extend(ex.support_pts) val_y.extend(ex.support_labels)
__author__ = 'Fan' import os from sklearn.datasets import load_svmlight_file from sklearn.metrics import accuracy_score from algorithms.awsOnline import AWSOnline val_name = map(lambda x: 'v%d' % x, range(1, 9)) print val_name test = AWSOnline('ml-lzYmNFzPh2N', 1, 0, 8, val_name, 'uniform', .1) Xt, Yt = load_svmlight_file(os.getenv('HOME') + '/Dropbox/Projects/SVM/dataset/cod-rna/cod-rna.test.scaled', n_features=8) Xt = Xt.todense().tolist() import random z = zip(Xt, Yt) random.shuffle(z) Xt, Yt = zip(*z) start = 0 cutoff = 10 Xt = Xt[start:start + cutoff] yy = [test.query(x) for x in Xt] print 'True:', Yt[start:start + cutoff] print 'Predict:', yy
__author__ = 'Fan' import numpy as np from algorithms.awsOnline import AWSOnline from algorithms.OnlineBase import FeatureSpec for i in range(0, 1): val_name = map(lambda x: 'v%d' % x, range(1, 11)) print val_name test = AWSOnline('ml-lkYRYeldcrH', 1, 0, 10, val_name, 'uniform', .1) # X, Y = test.collect_pts(2, -1) spec = FeatureSpec('norm', (-1, 1), (-.85, -.85, -.20, -.51, -.49, -.58, -.48, -.38, -.44, -.55)) X, Y = test.collect_universe(1000, spec=spec) print np.bincount(Y)
import matplotlib.pyplot as plt from algorithms.awsOnline import AWSOnline import numpy as np from algorithms.RBFTrainer import RBFKernelRetraining for i in range(0, 1): val_name = ['x1', 'x2'] ex = AWSOnline('ml-i0GeYZaGQ3f', 1, 0, 2, val_name, 'uniform', .1) step = 6 train_x, train_y = [], [] val_x, val_y = [], [] try: while True: ex.collect_pts(step) train_x.extend(ex.pts_near_b) train_y.extend(ex.pts_near_b_labels) val_x.extend(ex.support_pts) val_y.extend(ex.support_labels) e = RBFKernelRetraining('circle', train_x, train_y, val_x, val_y, train_x, train_y, n_features=2) print ex.get_n_query(), e.grid_retrain_in_x() except KeyboardInterrupt: print 'Done' train_x = np.array(train_x) plt.figure() plt.scatter(train_x[:, 0], train_x[:, 1], c=train_y) plt.xlim((-2, 2)) plt.ylim((-2, 2)) plt.show()