def __init__(self, year, large_dfs, processors): self.year = year # Select current season of all dataframes self.data = Slicer.slice_all_data(large_dfs, self.year) self.data.boxscores = Loader.load_boxscores(self.year) # Create lists of dates, used for indexing and access self.dates = pd.unique(self.data.boxscores.teams.index.get_level_values('Date').values) self.shots_dates = pd.unique(self.data.shots.globals.index.get_level_values('Date').values) # initialize processor classes to use for processing data self.boxscore_processor, self.shots_processor, self.game_processor = processors
def __init__(self, year, large_dfs, processors): self.year = year # Select current season of all dataframes self.data = Slicer.slice_all_data(large_dfs, self.year) self.data.boxscores = Loader.load_boxscores(self.year) # Create lists of dates, used for indexing and access self.dates = pd.unique( self.data.boxscores.teams.index.get_level_values('Date').values) self.shots_dates = pd.unique( self.data.shots.globals.index.get_level_values('Date').values) # initialize processor classes to use for processing data self.boxscore_processor, self.shots_processor, self.game_processor = processors
def process_jobs(self): # get total number of jobs num_jobs = len(self.processor_list) print 'Number of Jobs: ' + str(num_jobs) start_time = time.time() # timer function # load dataframes that span multiple seasons large_dfs = Loader.load_large_dfs() # iterate through jobs for processors, params, num in zip(self.processor_list, self.param_list, range(1, num_jobs + 1)): print 'Starting Job #' + str(num) + ': ' + str(params['path']) # perform job with specified params and processor objects self.perform_job(large_dfs, params['min_year'], params['max_year'], processors, params['path'], params['output_format']) print "ALL JOBS FINISHED" print("Total time: --- %s seconds ---" % (time.time() - start_time))