rnp.seed(3056)
########################################

from io_helper import IOHelper
from data_helper import DataHelper
from config_helper import ConfigHelper
from metrics_helper import MetricsHelper

if __name__ == "__main__":

    train_data = IOHelper.read_dataset("train")
    train_X, train_y = DataHelper.extract_feature_labels(train_data)

    predef = ConfigHelper.use_predefined_cols

    DataHelper.add_nan_indication_cols(train_X)
    DataHelper.remove_high_nan_rate_cols(train_X, predef)
    DataHelper.remove_small_variance_cols(train_X, predef)

    train_y = DataHelper.remove_high_nan_rate_rows(train_X, train_y)
    DataHelper.fill_missing_data(train_X, is_train=True)
    train_X = DataHelper.split_categorical_cols(train_X, is_train=True)
    DataHelper.scale_continuous_cols(train_X, is_train=True)
    train_X = DataHelper.select_best_features(train_X,
                                              None,
                                              train_y,
                                              ConfigHelper.max_nb_features,
                                              is_train=True)

    test_X = IOHelper.read_dataset("test")
import time

from io_helper import IOHelper
from data_helper import DataHelper
from config_helper import ConfigHelper
from metrics_helper import MetricsHelper

if __name__ == "__main__":

    data = IOHelper.read_dataset("train")
    feats, labels = DataHelper.extract_feature_labels(data)

    predef = ConfigHelper.use_predefined_cols

    DataHelper.add_nan_indication_cols(feats)
    DataHelper.remove_high_nan_rate_cols(feats, predef)
    DataHelper.remove_small_variance_cols(feats, predef)

    for e in xrange(ConfigHelper.nb_executions):
        print "Execution: " + str(e + 1)

        MetricsHelper.reset_metrics()

        for f, (train_idxs,
                val_idxs) in enumerate(ConfigHelper.k_fold_cv(labels)):
            start_time = time.time()
            print "Fold: " + str(f + 1)

            DataHelper.reset_scaler()