def create_submission_file(estimator, cols, weights=None, in_path=None, out_path=None, verbose=None, load_from_temp=None, temp_path=None): """ Creates the submission.txt file. """ # Set defaults if verbose is None or verbose == 'WARNING': logger.setLevel(logging.WARNING) if verbose == 'INFO': logger.setLevel(logging.INFO) if verbose == 'DEBUG': logger.setLevel(logging.INFO) if out_path is None: out_path = os.path.join(CONFIG.results_path, "submission.txt") train_df, weather_df = get_df(cols, load_from_temp, temp_path) submission_df = load_submission(in_path) ff = FeatureFactory(submission_df.copy(), weather_df) for col in set(cols).union({'TIME', 'WEEKEND'}): ff(col) sub_df = ff.X predictions = {} for assignment in CONFIG.submission_assignments: logger.info('Preparing for submission: %s...' % assignment) t_df = train_df[train_df['ASS_ASSIGNMENT'] == assignment] X, y, _, _ = get_cross_validation_parameters(t_df, cols, weather_df=weather_df, weights=weights) s_df = sub_df[sub_df['ASS_ASSIGNMENT'] == assignment] X_submission, _, _, _ = get_cross_validation_parameters(s_df, cols, weather_df=weather_df, weights=weights, label='prediction') estimator.fit(X, y) predictions[assignment] = list(estimator.predict(X_submission)) sub_df['raw_prediction'] = sub_df['ASS_ASSIGNMENT'].apply(lambda x: predictions[x].pop(0)) submission_df['prediction'] = \ sub_df.apply(lambda x: max(int(x['raw_prediction'] + x['MEAN'] + 0.5), 0), axis=1) submission_df.drop('MEAN', axis=1, inplace=True) submission_df.to_csv(out_path, sep='\t', index=None, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S.000') return submission_df
OrthogonalMatchingPursuit, RANSACRegressor from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor from sklearn.svm import SVR, LinearSVR from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor, RandomForestRegressor from sklearn.isotonic import IsotonicRegression from sklearn import metrics from sklearn.feature_selection import SelectKBest, chi2 from learning.stacked_regressor import StackedRegression pd.options.mode.chained_assignment = None # Remove warnings logger.setLevel(logging.DEBUG) # _df = load_train_df(CONFIG.preprocessed_train_path) _submission_df = load_submission() # _estimator = KNeighborsRegressor(n_neighbors=3) # _estimator = ARDRegression() _estimator = BayesianRidge() # _estimator = OrthogonalMatchingPursuit() # _estimator = ExtraTreeRegressor() _scoring = 'mean_squared_error' _k_fold = 3 _n_jobs = 3 _verbose = 0 _fit_params = None _cols = ["CUM_DAYS"] _weights = None _temp_path = os.path.join(os.getcwd(), 'train.csv')
preprocess_meteo3(df) # Booleans for each department where average amount of rain is above 1mm and where it has frozen. print('Meteo4...') preprocess_meteo4(df) return df # logger.debug('Meteo dataframes concatenated.') # logger.debug('Summing departments...') # df = df.groupby('DATE').agg({'MIN_TEMP': lambda x: pd.Series([(x <= 2).sum()]), # 'PRECIP': lambda x: pd.Series([(x > 1).sum()])}) # df = df.rename(columns={'MIN_TEMP': 'NUMB_FROZEN_DEPT'}) # df = df.rename(columns={'PRECIP': 'NUMB_WET_DEPT'}) # df[["NUMB_FROZEN_DEPT", 'NUMB_WET_DEPT']] = df[["NUMB_FROZEN_DEPT", "NUMB_WET_DEPT"]].astype(int) # df.to_csv(CONFIG.preprocessed_meteo_path) # logger.info('Saved meteo in csv file.') if __name__ == "__main__": # parse_train_as_dict(CONFIG.raw_train_path, CONFIG.preprocessed_train_path) # parse_train_as_df(CONFIG.raw_train_path, CONFIG.preprocessed_train_path) # df1 = parse_meteo_as_df(CONFIG.raw_meteo_path1) # df2 = parse_meteo_as_df(CONFIG.raw_meteo_path2) # df = pd.concat([df1, df2]) # print(df) # run('train', verbose=1) from utils import load_submission print(complete_with_means(load_submission(CONFIG.submission_path), CONFIG.submission_path_mean)) pass