def benchmark_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    lbl_enc = preprocessing.LabelEncoder()
    train['target'] = lbl_enc.fit_transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    y=train['target']
    X_test=test[feature_cols]
    test_ids=test['id']
    print "benchmark solution"
    cross_v(get_rf(),X_train.values,y.values)#0.596256539386
def feature_engineering_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    X_test=feature_engineering(test[feature_cols])
    feature_cols= [col for col in X_train.columns]#std 0.607958003167 mean 0.615741311533
    X_train=X_train[feature_cols]
    X_test=X_test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'feature_engineering_solution'
    cross_v(get_rf(),X_train.values,y.values)#0.600017926514
def parameter_tuning_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=train[feature_cols]
    X_test=test[feature_cols]
    y=train['target']
    test_ids=test['id']
    print 'parameter_tuning_solution800_6'
    cross_v(get_tuned_rf(),X_train.values,y.values)#0.546637992781
    clf=get_tuned_rf()
    clf.fit(X_train,y)
    preds = clf.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/parameter_tuning_solution800_6.csv')
def clf_score(models,X_train,y_train):
    index=[]
    score=[]
    for clf in models:
        index.append(clf[0])
        cv=cross_v(clf[1],X_train.values,y_train.values)
        print clf[0]
        print cv
        score.append(cv)
    return pd.DataFrame(score,index=index)
def feature_selection_solution():
    train=load_data('train.csv')
    test=load_data('test.csv')
    le = preprocessing.LabelEncoder()
    le.fit(train['target'])
    train['target']=le.transform(train['target'])
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    X_test=feature_engineering(test[feature_cols])
    feature_cols=[col for col in X_train.columns if col not in ['mean','std','nonzero','feat_6','feat_82','feat_84']]
    X_train=X_train[feature_cols]
    X_test=X_test[feature_cols]
    print X_train.columns
    y=train['target']
    test_ids=test['id']
    print 'feature_selection_solution'
    cross_v(get_rf(),X_train.values,y.values)# mean 0.595288515439   std 0.593551044059 nonzero  0.597406303207
    #no fg 6 82 84 0.603600594376
    #0.600058535601
    clf=get_rf()
    clf.fit(X_train,y)
    preds = clf.predict_proba(X_test)
    write_submission(test_ids,preds,'submissions/feature_selection_rf100_84_82_6_nofg.csv')
        clf=clfs[name]['est'](**params)
        print name
        #cross_v(clf,X_train.values,y.values)
        clf.fit(X_train.values,y.values)
        
def main():
    train=load_data('train.csv')
    feature_cols= [col for col in train.columns if col  not in ['target','id']]
    X_train=feature_engineering(train[feature_cols])
    y=train['target']
    grid_search(X_train,y,get_clfs())

#if __name__ == '__main__':
#    main()
train=load_data('train.csv')
le = preprocessing.LabelEncoder()
le.fit(train['target'])
train['target']=le.transform(train['target'])
feature_cols= [col for col in train.columns if col  not in ['target','id']]
X_train=train[feature_cols]
y=train['target']
clfs=get_gb()
for name, clf in clfs.iteritems(): 
    print name
    param_list = list(ParameterGrid(clfs[name]['grid']))
    for i in range(0,len(param_list)):
        clf=clfs[name]['est'].set_params(**param_list[i])
        print clf
        print param_list[i]
        cross_v(clf,X_train.values,y.values)