from question_query import create_questions_df
from answer_query import create_answers_df
from data_cleaning import DataCleaner
from model_tester import FindOptimalModels


if __name__ == '__main__':
    numrows = 1e6
    print("Connecting and getting ~{}".format(numrows))
    a = create_answers_df(numrows)
    print("Got rows, cleaning data")
    a_train_dc = DataCleaner(a, questions=False, training=True,
                             simple_regression=True, time_split=False,
                             normalize=False)
    A, b = a_train_dc.get_clean()

    default_models = [RandomForestRegressor, GradientBoostingRegressor]

    param_dict = {'rf': {'n_estimators': [50, 100, 5000], 'max_depth':
                  [2, 3, 5]},
                  'gbr': {'learning_rate': [.001, .01, .1, .2], 'max_depth':
                          [2, 3, 5], 'n_estimators': [50, 100, 5000]}}
    print('Finding optimal models')
    finder = FindOptimalModels(A, b, question=False, time_split=False)
    finder.baseline_model()
    fitted_models = finder.run_default_models(default_models)
    print("starting grid search")
    opt_params = finder.run_grid_search(fitted_models, param_dict)
    opt_results = finder.run_optimized_models()
from answer_query import create_answers_df
from data_cleaning import DataCleaner
from model_tester import FindOptimalModels

if __name__ == '__main__':
    numrows = 1e6
    print("Connecting and getting ~{}".format(numrows))
    q = create_questions_df(numrows)
    print("Got rows, cleaning data")
    q_train_dc = DataCleaner(q,
                             questions=True,
                             training=True,
                             simple_regression=True,
                             time_split=False,
                             normalize=False)
    X, y = q_train_dc.get_clean()

    default_models = [RandomForestRegressor, GradientBoostingRegressor]

    param_dict = {
        'rf': {
            'n_estimators': [50, 100, 5000],
            'max_depth': [2, 3, 5]
        },
        'gbr': {
            'learning_rate': [.001, .01, .1, .2],
            'max_depth': [2, 3, 5],
            'n_estimators': [50, 100, 5000]
        }
    }
    print('Finding optimal models')