# random.shuffle(tmplist) # TODO: for each feature group, make one plot of all # the AUCs compared to each other # need: for each featurelist: # - a name for it # - the AUC + confidence interval for featurelist in itertools.combinations(featuregroup,pr): # for featurelist in tmplist[:10]: print "Features: \n\t%s"%'\n\t'.join(featurelist) m = getattr(am,modelname)(**thispdict) e = Experiment(model=m, feature_list=featurelist, dloader=dload, id=None,nan_handling=cfg['nan_handling'], logFolder=cfg['logFolder'], looplog=cfg['looplog'], summary_only=cfg['summary_only']) e.run_experiment() # tmp[(tuple(featurelist)] = (e.auc, e.auc_train) # tmp.loc[rowIdx] = [e.auc,e.auc_train,featurelist] # if e.auc>e.auc_train: # print "THIS NO GOOD" # print ', '.join(featurelist) # print "++++++++++++++++++++" # rowIdx+=1 # print "Best so far: " # bla = max(tmp.items(),key=lambda x: x[1][0]) # print '\tModel: ', bla[0][0]
# the AUCs compared to each other # need: for each featurelist: # - a name for it # - the AUC + confidence interval for featurelist in itertools.combinations( featuregroup, pr): # for featurelist in tmplist[:10]: print "Features: \n\t%s" % '\n\t'.join(featurelist) m = getattr(am, modelname)(**thispdict) e = Experiment(model=m, feature_list=featurelist, dloader=dload, id=None, nan_handling=cfg['nan_handling'], logFolder=cfg['logFolder'], looplog=cfg['looplog'], summary_only=cfg['summary_only']) e.run_experiment() # tmp[(tuple(featurelist)] = (e.auc, e.auc_train) # tmp.loc[rowIdx] = [e.auc,e.auc_train,featurelist] # if e.auc>e.auc_train: # print "THIS NO GOOD" # print ', '.join(featurelist) # print "++++++++++++++++++++" # rowIdx+=1 # print "Best so far: "
# go over feature groups res = {} for featuregroup in cfg['features']: model = cfg['model'] if (len(model.keys()) > 1) or (len(model.values()) > 1): raise IOError("A model is not specified correctly.") modelname = model.keys()[0] paramdict = model.values()[0] m = getattr(am,modelname)(**paramdict) e = Experiment(model=m, feature_list=featuregroup.values()[0], dloader=dload, id=None,nan_handling=cfg['nan_handling'], logFolder=cfg['logFolder'], looplog=cfg['looplog']) e.apply_postprocessors() e.handle_NAs() # need to make sure that we only select the columns that are the same across train and test, need the intersection test_cols = set(e.test_rows.columns.values) train_cols = set(e.train_rows.columns.values) predictor_cols = list(test_cols & train_cols) predictor_cols.remove(e.target_col) # take out the dataframe we'll be working with df = e.train_rows[predictor_cols + [e.target_col]] randIdxs = np.random.randint(df.shape[0],size=(df.shape[0],cfg['n_boot']))