from sklearn.externals import joblib from sklearn.model_selection import KFold from sklearn.neural_network import MLPRegressor from sklearn.preprocessing import StandardScaler import env import util.logging from data_scripts import pcsml_data_loader as dl from modeling import preprocessing, score_util from modeling.preprocessing import make_one_hot_pipeline util.logging.setup_default(env.result_path) log = logging.getLogger(__name__) log.info("Running...") log.info("Env:\n%s", env.dump()) # load the data frame (a sample of the sample to make debugging faster...) # df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018().sample(2000) df: DataFrame = dl.load_df_corn_pkl_smpl_25_20171018() logging.debug("data shape: %s", df.shape) y = df.pop('Dry_Yield') X = df ### # transform ### X, label_cols = preprocessing.shape_gis_pps(X)
def test_dump(): os.environ['FOO'] = 'BAR' f = io.StringIO() env.dump(f) assert "= ENV =" in f.getvalue() assert "FOO=BAR" in f.getvalue()