def weight_analysis(verbose=0, stack_option='s'): logging.info('starting ensemble weight analysis') stack = STACK if stack_option == 's' else MODELS pool = multiprocessing.Pool(processes=4) drivers = settings.DRIVER_IDS#[:1000] CUTOFF = -1 results = pool.map( compute_weights, map(lambda x: (x, verbose, stack_option), drivers) ) predictions = {} for i, get_data, model, _ in stack: predictions[i] = np.array(list(itertools.chain(*[r[1][i] for r in results]))) testY = list(itertools.chain(*[r[2] for r in results])) model_names = [ ('%s.%s.%s' % (get_data.func_name, model.__name__, i), i) for i, get_data, model, repeat in stack ] model_names.sort(key=lambda x: x[0]) keys = [x[1] for x in model_names] model_names = [x[0] for x in model_names] lasso = Lasso(alpha=0.0, positive=True) trainX = [] for row_id in xrange(len(testY)): train_row = [predictions[i][row_id] for i in keys] trainX.append(train_row) a, b = trainX[:CUTOFF], trainX[CUTOFF:] c, d = testY[:CUTOFF], testY[CUTOFF:] lasso.fit(a, c) pred = lasso.predict(b) pred_train = lasso.predict(a) #logging.info('auc: %s' % util.compute_auc(d, pred)) logging.info('coefficients:') weights = {} for i, name in enumerate(model_names): logging.info('%s: %.3f' % (model_names[i], lasso.coef_[i])) weights[keys[i]] = lasso.coef_[i] logging.info('individual scores:') for i, key in enumerate(keys): logging.info('%s: %.3f' % ( model_names[i], util.compute_auc(testY, predictions[key]) )) logging.info('weights dictionary: %s' % weights) # and again in the end, so you don't have to scroll logging.info('------------') #logging.info('auc: %s' % util.compute_auc(d, pred)) logging.info('auc train: %s' % util.compute_auc(c, pred_train))
def train(verbose=0): logging.info('running ensemble') pool = multiprocessing.Pool(processes=4) drivers = settings.DRIVER_IDS #[1000:] results = pool.map(run_ensemble, [(x, verbose) for x in drivers]) predictions = np.array(list(itertools.chain(*[r[0] for r in results]))) testY = list(itertools.chain(*[r[1] for r in results])) S = sum([s[-1] for s in STACK]) logging.info(util.compute_auc(testY, predictions))
def train(verbose=0): logging.info('running ensemble') pool = multiprocessing.Pool(processes=4) drivers = settings.DRIVER_IDS #[1000:] results = pool.map( run_ensemble, map(lambda x: (x, verbose), drivers) ) predictions = np.array(list(itertools.chain(*[r[0] for r in results]))) testY = list(itertools.chain(*[r[1] for r in results])) S = sum([s[-1] for s in STACK]) logging.info(util.compute_auc(testY, predictions))
import logging import multiprocessing import itertools import numpy as np from model_run import run_model, test_model import model_run import model_def import settings import util logging.root.setLevel(level=logging.INFO) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s') if __name__ == '__main__': logging.info('starting main.py') #run_model((100, 203, model_def.Model_GBC, model_run.get_data_accel_v2_svd, 1)); raise Exception pool = multiprocessing.Pool(processes=1) results = pool.map( run_model, [(100, x, model_def.Model_LR2, model_run.get_data_movements_accel, 1) for x in settings.DRIVER_IDS[:10]]) predictions = np.array(list(itertools.chain(*[r[0] for r in results]))) testY = list(itertools.chain(*[r[-1] for r in results])) logging.info(util.compute_auc(testY, predictions))
import logging import multiprocessing import itertools import numpy as np from model_run import run_model, test_model import model_run import model_def import settings import util logging.root.setLevel(level=logging.INFO) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s') if __name__ == '__main__': logging.info('starting main.py') #run_model((100, 203, model_def.Model_GBC, model_run.get_data_accel_v2_svd, 1)); raise Exception pool = multiprocessing.Pool(processes=1) results = pool.map( run_model, map(lambda x: (100, x, model_def.Model_LR2, model_run.get_data_movements_accel, 1), settings.DRIVER_IDS[:10]) ) predictions = np.array(list(itertools.chain(*[r[0] for r in results]))) testY = list(itertools.chain(*[r[-1] for r in results])) logging.info(util.compute_auc(testY, predictions))