コード例 #1
0
def create_submission_file(estimator, cols, weights=None, in_path=None, out_path=None, verbose=None,
                           load_from_temp=None, temp_path=None):
    """
    Creates the submission.txt file.
    """
    # Set defaults
    if verbose is None or verbose == 'WARNING':
        logger.setLevel(logging.WARNING)
    if verbose == 'INFO':
        logger.setLevel(logging.INFO)
    if verbose == 'DEBUG':
        logger.setLevel(logging.INFO)
    if out_path is None:
        out_path = os.path.join(CONFIG.results_path, "submission.txt")

    train_df, weather_df = get_df(cols, load_from_temp, temp_path)

    submission_df = load_submission(in_path)
    ff = FeatureFactory(submission_df.copy(), weather_df)
    for col in set(cols).union({'TIME', 'WEEKEND'}):
        ff(col)
    sub_df = ff.X

    predictions = {}

    for assignment in CONFIG.submission_assignments:
        logger.info('Preparing for submission: %s...' % assignment)
        t_df = train_df[train_df['ASS_ASSIGNMENT'] == assignment]
        X, y, _, _ = get_cross_validation_parameters(t_df, cols, weather_df=weather_df,
                                                     weights=weights)
        s_df = sub_df[sub_df['ASS_ASSIGNMENT'] == assignment]
        X_submission, _, _, _ = get_cross_validation_parameters(s_df, cols, weather_df=weather_df,
                                                                weights=weights, label='prediction')
        estimator.fit(X, y)
        predictions[assignment] = list(estimator.predict(X_submission))

    sub_df['raw_prediction'] = sub_df['ASS_ASSIGNMENT'].apply(lambda x: predictions[x].pop(0))
    submission_df['prediction'] = \
        sub_df.apply(lambda x: max(int(x['raw_prediction'] + x['MEAN'] + 0.5), 0), axis=1)
    submission_df.drop('MEAN', axis=1, inplace=True)

    submission_df.to_csv(out_path, sep='\t', index=None, encoding='utf-8', date_format='%Y-%m-%d %H:%M:%S.000')
    return submission_df
コード例 #2
0
        OrthogonalMatchingPursuit, RANSACRegressor
    from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
    from sklearn.svm import SVR, LinearSVR
    from sklearn.ensemble import AdaBoostRegressor, BaggingRegressor, GradientBoostingRegressor, RandomForestRegressor
    from sklearn.isotonic import IsotonicRegression

    from sklearn import metrics
    from sklearn.feature_selection import SelectKBest, chi2

    from learning.stacked_regressor import StackedRegression
    pd.options.mode.chained_assignment = None  # Remove warnings

    logger.setLevel(logging.DEBUG)

    # _df = load_train_df(CONFIG.preprocessed_train_path)
    _submission_df = load_submission()
    # _estimator = KNeighborsRegressor(n_neighbors=3)
    # _estimator = ARDRegression()
    _estimator = BayesianRidge()
    # _estimator = OrthogonalMatchingPursuit()
    # _estimator = ExtraTreeRegressor()

    _scoring = 'mean_squared_error'
    _k_fold = 3
    _n_jobs = 3
    _verbose = 0
    _fit_params = None
    _cols = ["CUM_DAYS"]
    _weights = None
    _temp_path = os.path.join(os.getcwd(), 'train.csv')
コード例 #3
0
        preprocess_meteo3(df)

        # Booleans for each department where average amount of rain is above 1mm and where it has frozen.
        print('Meteo4...')
        preprocess_meteo4(df)

        return df

        # logger.debug('Meteo dataframes concatenated.')
        # logger.debug('Summing departments...')
        # df = df.groupby('DATE').agg({'MIN_TEMP': lambda x: pd.Series([(x <= 2).sum()]),
        #                                'PRECIP': lambda x: pd.Series([(x > 1).sum()])})
        # df = df.rename(columns={'MIN_TEMP': 'NUMB_FROZEN_DEPT'})
        # df = df.rename(columns={'PRECIP': 'NUMB_WET_DEPT'})
        # df[["NUMB_FROZEN_DEPT", 'NUMB_WET_DEPT']] = df[["NUMB_FROZEN_DEPT", "NUMB_WET_DEPT"]].astype(int)
        # df.to_csv(CONFIG.preprocessed_meteo_path)
        # logger.info('Saved meteo in csv file.')


if __name__ == "__main__":
    # parse_train_as_dict(CONFIG.raw_train_path, CONFIG.preprocessed_train_path)
    # parse_train_as_df(CONFIG.raw_train_path, CONFIG.preprocessed_train_path)
    # df1 = parse_meteo_as_df(CONFIG.raw_meteo_path1)
    # df2 = parse_meteo_as_df(CONFIG.raw_meteo_path2)
    # df = pd.concat([df1, df2])
    # print(df)
    # run('train', verbose=1)
    from utils import load_submission
    print(complete_with_means(load_submission(CONFIG.submission_path), CONFIG.submission_path_mean))
    pass