Beispiel #1
0
def cmdStackEvalPrepare():
    print 'load train data'
    trainRows = data.loadTrainJson('d/train.toks.csv')
    print 'load test data'
    testRows = data.loadTrainJson('d/test1.toks.csv')

    print 'prepare eval'
    ev = stack.StackEval('d/stack_1')
    ev.prepare(trainRows, testRows)
Beispiel #2
0
def cmdStackEvalPrepare():
  print 'load train data'
  trainRows = data.loadTrainJson('d/train.toks.csv')
  print 'load test data'
  testRows = data.loadTrainJson('d/test1.toks.csv')

  print 'prepare eval'
  ev = stack.StackEval('d/stack_1')
  ev.prepare(trainRows, testRows)
Beispiel #3
0
def cmdStackEval():
  print 'load train data'
  trainRows = data.loadTrainJson('d/train.toks.csv')
  print 'load test data'
  testRows = data.loadTrainJson('d/test1.toks.csv')

  print 'eval'
  ev = StackEval('d/stack_1')
  
  auc, e = ev.eval(lambda: ensemble.ExtraTreesRegressor(n_estimators = 4000, compute_importances = True, n_jobs = -1))
  
  print 'auc: %f' % auc
  print
  print 'features'
  print '\n'.join(map(str, e.feature_importances_))
Beispiel #4
0
def cmdMakeSubmission(n_estimators, k, n, trainFile, testFile, outFile):
  dt = datetime.now()
  print 'load train data... ',
  trainRows = data.loadTrainJson(trainFile)
  print str(datetime.now() - dt)

  dt = datetime.now()
  print 'load test data... ',
  testRows = data.loadTestJson(testFile)
  print str(datetime.now() - dt)

  dt = datetime.now()
  print 'train model'
  m = StackModel(n_estimators, k, n)
  m.train(trainRows)
  print 'train model, done in %s' % str(datetime.now() - dt)

  m.estimator.n_jobs = 1

  dt = datetime.now()
  print 'generate submission'
  with open(outFile, 'w') as f:
    f.write('Insult,Date,Comment\n')
    f.writelines(('%f,%s,%s\n' % (m.classify1(row), row.dt, row.rawText) for row in testRows))

  print 'generate submission, done in %s' % str(datetime.now() - dt)
Beispiel #5
0
def cmdMakeSubmission(n_estimators, k, n, trainFile, testFile, outFile):
    dt = datetime.now()
    print 'load train data... ',
    trainRows = data.loadTrainJson(trainFile)
    print str(datetime.now() - dt)

    dt = datetime.now()
    print 'load test data... ',
    testRows = data.loadTestJson(testFile)
    print str(datetime.now() - dt)

    dt = datetime.now()
    print 'train model'
    m = StackModel(n_estimators, k, n)
    m.train(trainRows)
    print 'train model, done in %s' % str(datetime.now() - dt)

    m.estimator.n_jobs = 1

    dt = datetime.now()
    print 'generate submission'
    with open(outFile, 'w') as f:
        f.write('Insult,Date,Comment\n')
        f.writelines(('%f,%s,%s\n' % (m.classify1(row), row.dt, row.rawText)
                      for row in testRows))

    print 'generate submission, done in %s' % str(datetime.now() - dt)
Beispiel #6
0
def cmdStackEval():
    print 'load train data'
    trainRows = data.loadTrainJson('d/train.toks.csv')
    print 'load test data'
    testRows = data.loadTrainJson('d/test1.toks.csv')

    print 'eval'
    ev = StackEval('d/stack_1')

    auc, e = ev.eval(lambda: ensemble.ExtraTreesRegressor(
        n_estimators=4000, compute_importances=True, n_jobs=-1))

    print 'auc: %f' % auc
    print
    print 'features'
    print '\n'.join(map(str, e.feature_importances_))
Beispiel #7
0
def mkSub(modelF, trainFile, testFile, outFile):
    printNow("make submission")
    trainRows = list(loadTrainJson(trainFile))
    printNow("  load train rows")

    m = modelF()
    m.train(trainRows)
    printNow("  trained")

    makeSubmission(m, testFile, outFile)
    return m
Beispiel #8
0
def mkSub(modelF, trainFile, testFile, outFile):
  printNow("make submission")
  trainRows = list(loadTrainJson(trainFile))
  printNow("  load train rows")

  m = modelF()
  m.train(trainRows)
  printNow("  trained")
  
  makeSubmission(m, testFile, outFile)
  return m