Exemple #1
0
def weight_analysis(verbose=0, stack_option='s'):
  logging.info('starting ensemble weight analysis')

  stack = STACK if stack_option == 's' else MODELS

  pool = multiprocessing.Pool(processes=4)
  drivers = settings.DRIVER_IDS#[:1000]
  CUTOFF = -1
  results = pool.map(
      compute_weights,
      map(lambda x: (x, verbose, stack_option), drivers)
  )

  predictions = {}
  for i, get_data, model, _ in stack:
    predictions[i] = np.array(list(itertools.chain(*[r[1][i] for r in results])))
  testY = list(itertools.chain(*[r[2] for r in results]))

  model_names = [
      ('%s.%s.%s' % (get_data.func_name, model.__name__, i), i)
      for i, get_data, model, repeat in stack
  ]
  model_names.sort(key=lambda x: x[0])
  keys = [x[1] for x in model_names]
  model_names = [x[0] for x in model_names]

  lasso = Lasso(alpha=0.0, positive=True)
  trainX = []
  for row_id in xrange(len(testY)):
    train_row = [predictions[i][row_id] for i in keys]
    trainX.append(train_row)

  a, b = trainX[:CUTOFF], trainX[CUTOFF:]
  c, d = testY[:CUTOFF], testY[CUTOFF:]
  lasso.fit(a, c)
  pred = lasso.predict(b)
  pred_train = lasso.predict(a)
  #logging.info('auc: %s' % util.compute_auc(d, pred))

  logging.info('coefficients:')
  weights = {}
  for i, name in enumerate(model_names):
    logging.info('%s: %.3f' % (model_names[i], lasso.coef_[i]))
    weights[keys[i]] = lasso.coef_[i]

  logging.info('individual scores:')
  for i, key in enumerate(keys):
    logging.info('%s: %.3f' % (
        model_names[i],
        util.compute_auc(testY, predictions[key])
    ))

  logging.info('weights dictionary: %s' % weights)

  # and again in the end, so you don't have to scroll
  logging.info('------------')
  #logging.info('auc: %s' % util.compute_auc(d, pred))
  logging.info('auc train: %s' % util.compute_auc(c, pred_train))
Exemple #2
0
def train(verbose=0):
    logging.info('running ensemble')
    pool = multiprocessing.Pool(processes=4)
    drivers = settings.DRIVER_IDS  #[1000:]
    results = pool.map(run_ensemble, [(x, verbose) for x in drivers])

    predictions = np.array(list(itertools.chain(*[r[0] for r in results])))
    testY = list(itertools.chain(*[r[1] for r in results]))

    S = sum([s[-1] for s in STACK])
    logging.info(util.compute_auc(testY, predictions))
def train(verbose=0):
  logging.info('running ensemble')
  pool = multiprocessing.Pool(processes=4)
  drivers = settings.DRIVER_IDS #[1000:]
  results = pool.map(
      run_ensemble,
      map(lambda x: (x, verbose), drivers)
  )

  predictions = np.array(list(itertools.chain(*[r[0] for r in results])))
  testY = list(itertools.chain(*[r[1] for r in results]))

  S = sum([s[-1] for s in STACK])
  logging.info(util.compute_auc(testY, predictions))
Exemple #4
0
import logging
import multiprocessing
import itertools

import numpy as np

from model_run import run_model, test_model
import model_run
import model_def
import settings
import util

logging.root.setLevel(level=logging.INFO)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')

if __name__ == '__main__':
    logging.info('starting main.py')

    #run_model((100, 203, model_def.Model_GBC, model_run.get_data_accel_v2_svd, 1)); raise Exception

    pool = multiprocessing.Pool(processes=1)
    results = pool.map(
        run_model,
        [(100, x, model_def.Model_LR2, model_run.get_data_movements_accel, 1)
         for x in settings.DRIVER_IDS[:10]])
    predictions = np.array(list(itertools.chain(*[r[0] for r in results])))
    testY = list(itertools.chain(*[r[-1] for r in results]))
    logging.info(util.compute_auc(testY, predictions))
import logging
import multiprocessing
import itertools

import numpy as np

from model_run import run_model, test_model
import model_run
import model_def
import settings
import util

logging.root.setLevel(level=logging.INFO)
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s')

if __name__ == '__main__':
  logging.info('starting main.py')

  #run_model((100, 203, model_def.Model_GBC, model_run.get_data_accel_v2_svd, 1)); raise Exception

  pool = multiprocessing.Pool(processes=1)
  results = pool.map(
      run_model,
      map(lambda x: (100, x, model_def.Model_LR2, model_run.get_data_movements_accel, 1), settings.DRIVER_IDS[:10])
  )
  predictions = np.array(list(itertools.chain(*[r[0] for r in results])))
  testY = list(itertools.chain(*[r[-1] for r in results]))
  logging.info(util.compute_auc(testY, predictions))