Exemple #1
0
def cross_validate(matrix,
                   identifiers,
                   features,
                   id2name,
                   model,
                   n_folds=3,
                   seed=None):
    """Use data from all year deltas > target_delta to predict scores."""
    feature_cols = [
        idx for idx, (feat, delta) in enumerate(features) if delta != 0
    ]
    objective_index = features.index(('fantasy_points', 0))

    def get_features_objective(_matrix):
        X = _matrix[:, feature_cols]
        y = _matrix[:, objective_index]
        return X, y

    accum_test_identifiers = []
    accum_test_scores = []
    accum_test_preds = []
    for fold, (train_index, test_index) in \
            enumerate(KFold(n=matrix.shape[0], n_folds=n_folds, shuffle=True,
                            random_state=seed)):
        imputer = Imputer()
        scaler = StandardScaler()  # Need to standardize for eg SVR
        train_matrix = matrix[train_index, :]
        test_matrix = matrix[test_index, :]
        imputer.fit(train_matrix)
        train_imputed = scaler.fit_transform(imputer.transform(train_matrix))
        test_imputed = scaler.transform(imputer.transform(test_matrix))

        X_train, y_train = get_features_objective(train_imputed)
        model.fit(X_train, y_train)
        X_test, y_test = get_features_objective(test_imputed)
        y_pred = model.predict(X_test)

        test_identifiers = [identifiers[idx] for idx in test_index]
        accum_test_identifiers.extend(test_identifiers)
        accum_test_scores.extend(y_test)
        accum_test_preds.extend(y_pred)

    pos_ranks_true = position_ranking_lists(accum_test_identifiers,
                                            accum_test_scores, id2name)
    pos_ranks_pred = position_ranking_lists(accum_test_identifiers,
                                            accum_test_preds, id2name)
    taus = compute_taus(pos_ranks_true, pos_ranks_pred)
    for deltapos in sorted(taus, key=lambda x: (x[1], x[0])):
        print deltapos, taus[deltapos]

    return
Exemple #2
0
def main():
    id2year2stats = load_files(
        {year: 'fant%d.csv' % year
         for year in xrange(2008, 2013)}, SPECIAL_CASE_TRADES)

    def id_to_useful_name(id):
        year2stats = id2year2stats[id]
        any_year = year2stats[year2stats.keys()[0]]
        return (any_year['Name'], any_year['Tm'], any_year['FantasyFantPos'])

    current_players = set(id for id in id2year2stats
                          if BASE_YEAR - 1 in id2year2stats[id])

    matrix, identifiers, features = construct_feature_matrix(id2year2stats)
    id2name = {
        ident[ID]: id_to_useful_name(ident[ID])
        for ident in identifiers
    }

    from sklearn import linear_model
    from sklearn import ensemble
    from sklearn import svm

    seed = randint(0, 2**32 - 1)
    for model in [
            linear_model.LinearRegression(),
            linear_model.Ridge(),
            ensemble.RandomForestRegressor(),
            ensemble.ExtraTreesRegressor(),
            ensemble.AdaBoostRegressor(),
            ensemble.GradientBoostingRegressor(),
            svm.SVR(),
            svm.NuSVR(),
    ]:
        print str(model).split('(')[0]
        cross_validate(matrix,
                       identifiers,
                       features,
                       id2name,
                       model,
                       n_folds=10,
                       seed=seed)
        print

    model = ensemble.RandomForestRegressor()
    current_predictions, current_ids = \
        predict_current_year(matrix, identifiers, features, id2name, model)

    current_predictions, current_ids = zip(
        *[(pred, ident)
          for pred, ident in zip(current_predictions, current_ids)
          if ident[ID] in current_players])

    current_predicted_ranks = position_ranking_lists(current_ids,
                                                     current_predictions,
                                                     id2name)

    dump_predictions(current_predicted_ranks)

    return
def cross_validate(matrix, identifiers, features, id2name, model, n_folds=3, seed=None):
    """Use data from all year deltas > target_delta to predict scores."""
    feature_cols = [idx for idx, (feat, delta) in enumerate(features) if delta != 0]
    objective_index = features.index(("fantasy_points", 0))

    def get_features_objective(_matrix):
        X = _matrix[:, feature_cols]
        y = _matrix[:, objective_index]
        return X, y

    accum_test_identifiers = []
    accum_test_scores = []
    accum_test_preds = []
    for fold, (train_index, test_index) in enumerate(
        KFold(n=matrix.shape[0], n_folds=n_folds, shuffle=True, random_state=seed)
    ):
        imputer = Imputer()
        scaler = StandardScaler()  # Need to standardize for eg SVR
        train_matrix = matrix[train_index, :]
        test_matrix = matrix[test_index, :]
        imputer.fit(train_matrix)
        train_imputed = scaler.fit_transform(imputer.transform(train_matrix))
        test_imputed = scaler.transform(imputer.transform(test_matrix))

        X_train, y_train = get_features_objective(train_imputed)
        model.fit(X_train, y_train)
        X_test, y_test = get_features_objective(test_imputed)
        y_pred = model.predict(X_test)

        test_identifiers = [identifiers[idx] for idx in test_index]
        accum_test_identifiers.extend(test_identifiers)
        accum_test_scores.extend(y_test)
        accum_test_preds.extend(y_pred)

    pos_ranks_true = position_ranking_lists(accum_test_identifiers, accum_test_scores, id2name)
    pos_ranks_pred = position_ranking_lists(accum_test_identifiers, accum_test_preds, id2name)
    taus = compute_taus(pos_ranks_true, pos_ranks_pred)
    for deltapos in sorted(taus, key=lambda x: (x[1], x[0])):
        print deltapos, taus[deltapos]

    return
def main():
    id2year2stats = load_files(
        {year: 'fant%d.csv' % year for year in xrange(2008, 2013)},
        SPECIAL_CASE_TRADES)

    def id_to_useful_name(id):
        year2stats = id2year2stats[id]
        any_year = year2stats[year2stats.keys()[0]]
        return (any_year['Name'], any_year['Tm'],
                any_year['FantasyFantPos'])

    current_players = set(id for id in id2year2stats if BASE_YEAR - 1 in
                          id2year2stats[id])

    matrix, identifiers, features = construct_feature_matrix(id2year2stats)
    id2name = {ident[ID]: id_to_useful_name(ident[ID]) for ident in
               identifiers}

    from sklearn import linear_model
    from sklearn import ensemble
    from sklearn import svm

    seed = randint(0, 2**32 - 1)
    for model in [linear_model.LinearRegression(),
                  linear_model.Ridge(),
                  ensemble.RandomForestRegressor(),
                  ensemble.ExtraTreesRegressor(),
                  ensemble.AdaBoostRegressor(),
                  ensemble.GradientBoostingRegressor(),
                  svm.SVR(),
                  svm.NuSVR(),
                  ]:
        print str(model).split('(')[0]
        cross_validate(matrix, identifiers, features, id2name, model,
                       n_folds=10, seed=seed)
        print

    model = ensemble.RandomForestRegressor()
    current_predictions, current_ids = \
        predict_current_year(matrix, identifiers, features, id2name, model)

    current_predictions, current_ids = zip(
        *[(pred, ident) for pred, ident
          in zip(current_predictions, current_ids)
          if ident[ID] in current_players])

    current_predicted_ranks = position_ranking_lists(
        current_ids, current_predictions, id2name)

    dump_predictions(current_predicted_ranks)

    return