def cmdStackEvalPrepare(): print 'load train data' trainRows = data.loadTrainJson('d/train.toks.csv') print 'load test data' testRows = data.loadTrainJson('d/test1.toks.csv') print 'prepare eval' ev = stack.StackEval('d/stack_1') ev.prepare(trainRows, testRows)
def cmdStackEval(): print 'load train data' trainRows = data.loadTrainJson('d/train.toks.csv') print 'load test data' testRows = data.loadTrainJson('d/test1.toks.csv') print 'eval' ev = StackEval('d/stack_1') auc, e = ev.eval(lambda: ensemble.ExtraTreesRegressor(n_estimators = 4000, compute_importances = True, n_jobs = -1)) print 'auc: %f' % auc print print 'features' print '\n'.join(map(str, e.feature_importances_))
def cmdMakeSubmission(n_estimators, k, n, trainFile, testFile, outFile): dt = datetime.now() print 'load train data... ', trainRows = data.loadTrainJson(trainFile) print str(datetime.now() - dt) dt = datetime.now() print 'load test data... ', testRows = data.loadTestJson(testFile) print str(datetime.now() - dt) dt = datetime.now() print 'train model' m = StackModel(n_estimators, k, n) m.train(trainRows) print 'train model, done in %s' % str(datetime.now() - dt) m.estimator.n_jobs = 1 dt = datetime.now() print 'generate submission' with open(outFile, 'w') as f: f.write('Insult,Date,Comment\n') f.writelines(('%f,%s,%s\n' % (m.classify1(row), row.dt, row.rawText) for row in testRows)) print 'generate submission, done in %s' % str(datetime.now() - dt)
def cmdStackEval(): print 'load train data' trainRows = data.loadTrainJson('d/train.toks.csv') print 'load test data' testRows = data.loadTrainJson('d/test1.toks.csv') print 'eval' ev = StackEval('d/stack_1') auc, e = ev.eval(lambda: ensemble.ExtraTreesRegressor( n_estimators=4000, compute_importances=True, n_jobs=-1)) print 'auc: %f' % auc print print 'features' print '\n'.join(map(str, e.feature_importances_))
def mkSub(modelF, trainFile, testFile, outFile): printNow("make submission") trainRows = list(loadTrainJson(trainFile)) printNow(" load train rows") m = modelF() m.train(trainRows) printNow(" trained") makeSubmission(m, testFile, outFile) return m