Ejemplo n.º 1
0
def ANALYSIS_all(division='men', my_data={}, metric=True):
    open_data = get_analysis_dataframe(competition='open', division=division)
    open_data = clear_outliers(open_data)

    regionals_data = get_analysis_dataframe(competition='regionals',
                                            division=division)
    regionals_data = clear_outliers(regionals_data)

    games_data = get_analysis_dataframe(competition='games', division=division)
    games_data = clear_outliers(games_data)

    box_plots_all(open_data, regionals_data, games_data, division.title(),
                  my_data, metric)
Ejemplo n.º 2
0
def ANALYSIS_all_imputed(division='men', my_data={}, metric=True):
    open_data = get_imputed_dataframe(division=division, competition='open')
    regionals_data = get_analysis_dataframe(division=division,
                                            competition='regionals')
    games_data = get_analysis_dataframe(division=division, competition='games')

    # use imputed values from open data to fill in athlete stats for regionals/games data
    regionals_data = pd.merge(
        open_data.drop(['overallrank', 'overallscore'], axis=1),
        regionals_data[['userid', 'overallrank', 'overallscore']],
        on='userid',
        how='inner')
    games_data = pd.merge(open_data.drop(['overallrank', 'overallscore'],
                                         axis=1),
                          games_data[['userid', 'overallrank',
                                      'overallscore']],
                          on='userid',
                          how='inner')

    box_plots_all(open_data, regionals_data, games_data,
                  "Imputed " + division.title(), my_data, metric)
Ejemplo n.º 3
0
def ANALYSIS_games(division='men'):
    games_data = get_analysis_dataframe(competition='games', division=division)
    games_data = clear_outliers(games_data)

    box_plots(games_data, 'Games')
Ejemplo n.º 4
0
def ANALYSIS_regionals(division='men'):
    regionals_data = get_analysis_dataframe(competition='regionals',
                                            division=division)
    regionals_data = clear_outliers(regionals_data)

    box_plots(regionals_data, 'Regionals')
Ejemplo n.º 5
0
def ANALYSIS_open(division='men'):
    open_data = get_analysis_dataframe(competition='open', division=division)
    open_data = clear_outliers(open_data)

    box_plots(open_data, 'Open')
Ejemplo n.º 6
0
def _get_imputed_dataframe(*args, **kwargs):
    def impute_rows(data, X_cols, y_cols):
        rows_idx = np.argwhere(
            np.logical_and(
                np.isnan(data[:, y_cols]).all(axis=1),
                ~np.isnan(data[:, X_cols]).any(axis=1)))
        y_pred = np.zeros((len(rows_idx), len(y_cols)))
        if len(rows_idx) > 0:
            print("\tImputing", len(rows_idx), "rows")
            full_rows = np.argwhere(
                np.logical_and(~np.isnan(data[:, X_cols]).any(axis=1),
                               ~np.isnan(data[:, y_cols]).any(axis=1)))
            reg = RANSACRegressor()
            reg.fit(data[full_rows, X_cols], data[full_rows, y_cols])
            y_pred = reg.predict(data[rows_idx, X_cols]).clip(min=0)
        return (rows_idx, y_cols, y_pred)

    def impute_update_data(data, X_cols, y_cols):
        print(X_cols, "predicting", y_cols)
        cols = list(data)
        X_cols = [cols.index(x) for x in X_cols]
        y_cols = [cols.index(y) for y in y_cols]
        matrix = data.as_matrix()
        rows_idx, y_cols, y_pred = impute_rows(matrix, X_cols, y_cols)
        matrix[rows_idx, y_cols] = y_pred
        return pd.DataFrame(matrix, index=data.index, columns=data.columns)

    data = get_analysis_dataframe(*args, **kwargs)
    data = data.astype(float)
    data = clear_outliers(data)

    Xys = [
        #(['Height'],['Weight']),
        #(['Weight'],['Height']),
        (['Snatch'], ['Clean and Jerk']),
        (['Clean and Jerk'], ['Snatch']),
        (['Snatch', 'Clean and Jerk'], ['Back Squat']),
        (['Snatch', 'Clean and Jerk', 'Back Squat'], ['Deadlift']),
        (['Back Squat'], ['Deadlift']),
        (['Deadlift'], ['Back Squat']),

        #(['Run 5k'],['Sprint 400m']),
        #(['Sprint 400m'],['Run 5k']),
        (['Weight', 'Snatch', 'Clean and Jerk', 'Back Squat',
          'Deadlift'], ['Max Pull-ups']),
        (['Weight', 'Back Squat', 'Deadlift'], ['Max Pull-ups']),
        (['Weight', 'Snatch', 'Clean and Jerk'], ['Max Pull-ups']),

        #(['Filthy 50'],['Fight Gone Bad']),
        #(['Fight Gone Bad'],['Filthy 50']),
        (['Max Pull-ups', 'Clean and Jerk'], ['Fran']),
        (['Clean and Jerk', 'Fran'], ['Grace']),
        (['Max Pull-ups', 'Sprint 400m', 'Run 5k'], ['Helen']),
        #(['Max Pull-ups', 'Grace'],['Fran']),
    ]
    for x, y in Xys:
        data = impute_update_data(data, x, y)

    data = clear_outliers(data)

    imputer = RecursiveKNN(verbose=1,
                           n_jobs=4,
                           feature_selector=DecisionTreeRegressor)
    data = pd.DataFrame(imputer.complete(data),
                        index=data.index,
                        columns=data.columns)
    return data
Ejemplo n.º 7
0
    'Max Pull-ups': 25,
    'Fran': 5
}
pan = {
    'Age': 22,
    'Height': 158,
    'Weight': 53,
    'Back Squat': 57,
    'Clean and Jerk': 35,
    'Snatch': 28,
    'Deadlift': 70,
    'Max Pull-ups': 0
}

fraser = get_analysis_dataframe(division='men',
                                competition='games').iloc[0].dropna().drop(
                                    ['overallscore', 'userid',
                                     'overallrank']).to_dict()
tct = get_analysis_dataframe(division='women',
                             competition='games').iloc[0].dropna().drop(
                                 ['overallscore', 'userid',
                                  'overallrank']).to_dict()
sara = get_analysis_dataframe(division='women',
                              competition='open').iloc[0].dropna().drop(
                                  ['overallscore', 'userid',
                                   'overallrank']).to_dict()

import xgboost as xgb


@memoize()
def get_fitted_model(data):