def main(args=None): r"""AlphaPy Main Program Notes ----- (1) Initialize logging. (2) Parse the command line arguments. (3) Get the model configuration. (4) Create the model object. (5) Call the main AlphaPy pipeline. """ # Logging logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s", filename="alphapy.log", filemode='a', level=logging.DEBUG, datefmt='%m/%d/%y %H:%M:%S') formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s", datefmt='%m/%d/%y %H:%M:%S') console = logging.StreamHandler() console.setFormatter(formatter) console.setLevel(logging.INFO) logging.getLogger().addHandler(console) # Start the pipeline logger.info('*' * 80) logger.info("AlphaPy Start") logger.info('*' * 80) # Argument Parsing parser = argparse.ArgumentParser(description="AlphaPy Parser") parser.add_mutually_exclusive_group(required=False) parser.add_argument('--predict', dest='predict_mode', action='store_true') parser.add_argument('--train', dest='predict_mode', action='store_false') parser.set_defaults(predict_mode=False) args = parser.parse_args() # Read configuration file specs = get_model_config() specs['predict_mode'] = args.predict_mode # Create directories if necessary output_dirs = ['config', 'data', 'input', 'model', 'output', 'plots'] for od in output_dirs: output_dir = SSEP.join([specs['directory'], od]) if not os.path.exists(output_dir): logger.info("Creating directory %s", output_dir) os.makedirs(output_dir) # Create a model from the arguments logger.info("Creating Model") model = Model(specs) # Start the pipeline logger.info("Calling Pipeline") model = main_pipeline(model) # Complete the pipeline logger.info('*' * 80) logger.info("AlphaPy End") logger.info('*' * 80)
def main(args=None): r"""MarketFlow Main Program Notes ----- (1) Initialize logging. (2) Parse the command line arguments. (3) Get the market configuration. (4) Get the model configuration. (5) Create the model object. (6) Call the main MarketFlow pipeline. Raises ------ ValueError Training date must be before prediction date. """ # Suppress Warnings warnings.simplefilter(action='ignore', category=DeprecationWarning) warnings.simplefilter(action='ignore', category=FutureWarning) # Logging logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s", filename="market_flow.log", filemode='a', level=logging.DEBUG, datefmt='%m/%d/%y %H:%M:%S') formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s", datefmt='%m/%d/%y %H:%M:%S') console = logging.StreamHandler() console.setFormatter(formatter) console.setLevel(logging.INFO) logging.getLogger().addHandler(console) # Start the pipeline logger.info('*' * 80) logger.info("MarketFlow Start") logger.info('*' * 80) # Argument Parsing parser = argparse.ArgumentParser(description="MarketFlow Parser") parser.add_argument('--pdate', dest='predict_date', help="prediction date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_argument('--tdate', dest='train_date', help="training date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_mutually_exclusive_group(required=False) parser.add_argument('--predict', dest='predict_mode', action='store_true') parser.add_argument('--train', dest='predict_mode', action='store_false') parser.set_defaults(predict_mode=False) args = parser.parse_args() # Set train and predict dates if args.train_date: train_date = args.train_date else: train_date = pd.datetime(1900, 1, 1).strftime("%Y-%m-%d") if args.predict_date: predict_date = args.predict_date else: predict_date = datetime.date.today().strftime("%Y-%m-%d") # Verify that the dates are in sequence. if train_date >= predict_date: raise ValueError("Training date must be before prediction date") else: logger.info("Training Date: %s", train_date) logger.info("Prediction Date: %s", predict_date) # Read stock configuration file market_specs = get_market_config() # Read model configuration file model_specs = get_model_config() model_specs['predict_mode'] = args.predict_mode model_specs['predict_date'] = predict_date model_specs['train_date'] = train_date # Create directories if necessary output_dirs = [ 'config', 'data', 'input', 'model', 'output', 'plots', 'systems' ] for od in output_dirs: output_dir = SSEP.join([model_specs['directory'], od]) if not os.path.exists(output_dir): logger.info("Creating directory %s", output_dir) os.makedirs(output_dir) # Create a model object from the specifications model = Model(model_specs) # Start the pipeline model = market_pipeline(model, market_specs) # Complete the pipeline logger.info('*' * 80) logger.info("MarketFlow End") logger.info('*' * 80)
def main(args=None): r"""The main program for SportFlow. Notes ----- (1) Initialize logging. (2) Parse the command line arguments. (3) Get the game configuration. (4) Get the model configuration. (5) Generate game frames for each season. (6) Create statistics for each team. (7) Merge the team frames into the final model frame. (8) Run the AlphaPy pipeline. Raises ------ ValueError Training date must be before prediction date. """ # Logging logging.basicConfig(format="[%(asctime)s] %(levelname)s\t%(message)s", filename="sport_flow.log", filemode='a', level=logging.DEBUG, datefmt='%m/%d/%y %H:%M:%S') formatter = logging.Formatter("[%(asctime)s] %(levelname)s\t%(message)s", datefmt='%m/%d/%y %H:%M:%S') console = logging.StreamHandler() console.setFormatter(formatter) console.setLevel(logging.INFO) logging.getLogger().addHandler(console) logger = logging.getLogger(__name__) # Start the pipeline logger.info('*'*80) logger.info("SportFlow Start") logger.info('*'*80) # Argument Parsing parser = argparse.ArgumentParser(description="SportFlow Parser") parser.add_argument('--pdate', dest='predict_date', help="prediction date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_argument('--tdate', dest='train_date', help="training date is in the format: YYYY-MM-DD", required=False, type=valid_date) parser.add_mutually_exclusive_group(required=False) parser.add_argument('--predict', dest='predict_mode', action='store_true') parser.add_argument('--train', dest='predict_mode', action='store_false') parser.set_defaults(predict_mode=False) args = parser.parse_args() # Set train and predict dates if args.train_date: train_date = args.train_date else: train_date = pd.datetime(1900, 1, 1).strftime("%Y-%m-%d") if args.predict_date: predict_date = args.predict_date else: predict_date = datetime.date.today().strftime("%Y-%m-%d") # Verify that the dates are in sequence. if train_date >= predict_date: raise ValueError("Training date must be before prediction date") else: logger.info("Training Date: %s", train_date) logger.info("Prediction Date: %s", predict_date) # Read game configuration file sport_specs = get_sport_config() # Section: game league = sport_specs['league'] points_max = sport_specs['points_max'] points_min = sport_specs['points_min'] random_scoring = sport_specs['random_scoring'] seasons = sport_specs['seasons'] window = sport_specs['rolling_window'] # Read model configuration file specs = get_model_config() # Add command line arguments to model specifications specs['predict_mode'] = args.predict_mode specs['predict_date'] = args.predict_date specs['train_date'] = args.train_date # Unpack model arguments directory = specs['directory'] target = specs['target'] # Create directories if necessary output_dirs = ['config', 'data', 'input', 'model', 'output', 'plots'] for od in output_dirs: output_dir = SSEP.join([directory, od]) if not os.path.exists(output_dir): logger.info("Creating directory %s", output_dir) os.makedirs(output_dir) # Create the game scores space space = Space('game', 'scores', '1g') # # Derived Variables # series = space.schema team1_prefix = 'home' team2_prefix = 'away' home_team = PSEP.join([team1_prefix, 'team']) away_team = PSEP.join([team2_prefix, 'team']) # # Read in the game frame. This is the feature generation phase. # logger.info("Reading Game Data") data_dir = SSEP.join([directory, 'data']) file_base = USEP.join([league, space.subject, space.schema, space.fractal]) df = read_frame(data_dir, file_base, specs['extension'], specs['separator']) logger.info("Total Game Records: %d", df.shape[0]) # # Locate any rows with null values # null_rows = df.isnull().any(axis=1) null_indices = [i for i, val in enumerate(null_rows.tolist()) if val == True] for i in null_indices: logger.info("Null Record: %d on Date: %s", i, df.date[i]) # # Run the game pipeline on a seasonal loop # if not seasons: # run model on all seasons seasons = df['season'].unique().tolist() # # Initialize the final frame # ff = pd.DataFrame() # # Iterate through each season of the game frame # for season in seasons: # Generate a frame for each season gf = df[df['season'] == season] gf = gf.reset_index() # Generate derived variables for the game frame total_games = gf.shape[0] if random_scoring: gf['home.score'] = np.random.randint(points_min, points_max, total_games) gf['away.score'] = np.random.randint(points_min, points_max, total_games) gf['total_points'] = gf['home.score'] + gf['away.score'] gf = add_features(gf, game_dict, gf.shape[0]) for index, row in gf.iterrows(): gf['point_margin_game'].at[index] = get_point_margin(row, 'home.score', 'away.score') gf['won_on_points'].at[index] = True if gf['point_margin_game'].at[index] > 0 else False gf['lost_on_points'].at[index] = True if gf['point_margin_game'].at[index] < 0 else False gf['cover_margin_game'].at[index] = gf['point_margin_game'].at[index] + row['line'] gf['won_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] > 0 else False gf['lost_on_spread'].at[index] = True if gf['cover_margin_game'].at[index] <= 0 else False gf['overunder_margin'].at[index] = gf['total_points'].at[index] - row['over_under'] gf['over'].at[index] = True if gf['overunder_margin'].at[index] > 0 else False gf['under'].at[index] = True if gf['overunder_margin'].at[index] < 0 else False # Generate each team frame team_frames = {} teams = gf.groupby([home_team]) for team, data in teams: team_frame = USEP.join([league, team.lower(), series, str(season)]) logger.info("Generating team frame: %s", team_frame) tf = get_team_frame(gf, team, home_team, away_team) tf = tf.reset_index() tf = generate_team_frame(team, tf, home_team, away_team, window) team_frames[team_frame] = tf # Create the model frame, initializing the home and away frames mdict = {k:v for (k,v) in list(sports_dict.items()) if v != bool} team1_frame = pd.DataFrame() team1_frame = add_features(team1_frame, mdict, gf.shape[0], prefix=team1_prefix) team2_frame = pd.DataFrame() team2_frame = add_features(team2_frame, mdict, gf.shape[0], prefix=team2_prefix) frames = [gf, team1_frame, team2_frame] mf = pd.concat(frames, axis=1) # Loop through each team frame, inserting data into the model frame row # get index+1 [if valid] # determine if team is home or away to get prefix # try: np.where((gf[home_team] == 'PHI') & (gf['date'] == '09/07/14'))[0][0] # Assign team frame fields to respective model frame fields: set gf.at(pos, field) for team, data in teams: team_frame = USEP.join([league, team.lower(), series, str(season)]) logger.info("Merging team frame %s into model frame", team_frame) tf = team_frames[team_frame] for index in range(0, tf.shape[0]-1): gindex = index + 1 model_row = tf.iloc[gindex] key_date = model_row['date'] at_home = False if team == model_row[home_team]: at_home = True key_team = model_row[home_team] elif team == model_row[away_team]: key_team = model_row[away_team] else: raise KeyError("Team %s not found in Team Frame" % team) try: if at_home: mpos = np.where((mf[home_team] == key_team) & (mf['date'] == key_date))[0][0] else: mpos = np.where((mf[away_team] == key_team) & (mf['date'] == key_date))[0][0] except: raise IndexError("Team/Date Key not found in Model Frame") # print team, gindex, mpos # insert team data into model row mf = insert_model_data(mf, mpos, mdict, tf, index, team1_prefix if at_home else team2_prefix) # Compute delta data 'home' - 'away' mf = generate_delta_data(mf, mdict, team1_prefix, team2_prefix) # Append this to final frame frames = [ff, mf] ff = pd.concat(frames) # Write out dataframes input_dir = SSEP.join([directory, 'input']) if args.predict_mode: new_predict_frame = ff.loc[ff.date >= predict_date] if len(new_predict_frame) <= 1: raise ValueError("Prediction frame has length 1 or less") # rewrite with all the features to the train and test files logger.info("Saving prediction frame") write_frame(new_predict_frame, input_dir, datasets[Partition.predict], specs['extension'], specs['separator']) else: # split data into training and test data new_train_frame = ff.loc[(ff.date >= train_date) & (ff.date < predict_date)] if len(new_train_frame) <= 1: raise ValueError("Training frame has length 1 or less") new_test_frame = ff.loc[ff.date >= predict_date] if len(new_test_frame) <= 1: raise ValueError("Testing frame has length 1 or less") # rewrite with all the features to the train and test files logger.info("Saving training frame") write_frame(new_train_frame, input_dir, datasets[Partition.train], specs['extension'], specs['separator']) logger.info("Saving testing frame") write_frame(new_test_frame, input_dir, datasets[Partition.test], specs['extension'], specs['separator']) # Create the model from specs logger.info("Running Model") model = Model(specs) # Run the pipeline model = main_pipeline(model) # Complete the pipeline logger.info('*'*80) logger.info("SportFlow End") logger.info('*'*80)