def predict(self, cs, df, last_test_day): """Predict outcomes given a Synergy Graph and last day to predict.""" test_df = df[(df['date'] > self._last_graph_day) & (df['date'] <= last_test_day)] test_df = combine_same_matchups(test_df) test_df = greater_than_minute(test_df) predict_df = self.predict_over_dates(cs, test_df) return predict_df
def get_train_df(self, df): """Subset the training set on days before a given day.""" train_df = before_date_df(df, self._last_graph_day) train_df = combine_same_matchups(train_df) train_df = greater_than_minute(train_df) return train_df
# com_df['correct'] = com_df['i_margin'] * com_df['prediction'] > 0 # # com_df = com_df[com_df['prediction'].notnull()] return predict_df.reset_index(drop=True) if __name__ == '__main__': season = '2008' X = read_season('matchups_reordered', season) X = add_date(X) # X = X[:int(len(X) * 0.3) + 1] all_preds = pd.DataFrame() k_folds = 1 for k in xrange(k_folds): train_df, test_df = train_test_split(X, test_size=0.1) train_df = combine_same_matchups(train_df) # train_df = greater_than_minute(train_df) # Reset index on test set to make it easier to merge later test_df = test_df.reset_index(drop=True) # Compute and Predict for Unweighted Graph cs = ComputeSynergies(train_df) cs.initialize_random_graphs(10) # cs.simulated_annealing(200) preds = predict_all(cs, test_df, season) # Compute and Predict for Weighted Graph csw = ComputeWeightedSynergies(train_df) csw.genetic_algorithm(60, count=5) preds_w = predict_all(csw, test_df, '2008')