def race_set_runner_trial(): #relevant_dir = 'fulldata/2005' relevant_dir = 'test_1' #relevant_dir = 'code_test_1_month' #relevant_dir = 'code_test_3_months' #relevant_dir = 'code_test_many_events' #relevant_dir = 'code_test_single_event' #relevant_dir = 'code_test_single_race' data_getter = DataGetter(relevant_dir) data_getter.format_data() df = data_getter.get_racesdataframe() #print df.head() #df['Date'] = df['Date'].apply(pd.to_datetime, format='%d/%m/%Y') #df['Race_Time'] = df['Race_Time'].apply(pd.to_datetime) ######## APPLY THIS IN DATAGETTER WHEN FORMING DATAFRAME ######### df['Race_Date_Time'] = pd.to_datetime(df.apply(lambda x: x['Date'] + ' ' + x['Race_Time'], 1),format = '%d/%m/%Y %H:%M') # df['Race_Date_Time'] = pd.concat([df['Date'],' ',df['Race_Time']], axis = 1) # df['Race_Date_Time'] = pd.to_datetime(df['Race_Date_Time']) df = df.drop(['Date','Race_Time'], 1) #print df.head() ########## END OF ROUTINE TO APPLY IN MAIN DATAFRAME ########################### race_runner_parameter_dict = {'min_num_race_results':10000} trial_parameters = {'Track_list':['Crayford','Coventry'],'Grade_list':['HP','A8','A7','A6','A5','A4','A3','A2','A1','IM',],'Start_Date':datetime.datetime(2005,01,01), 'End_Date':datetime.datetime(2005,01,03),'num_sims':10} trial_parameters = {'Track_list':[],'Grade_list':[],'Start_Date':None, 'End_Date':None,'num_sims':10000} race_runner_object = None RSR = RaceSetRunner(df,trial_parameters, race_runner_object, race_runner_parameter_dict) RSR.mainline()
def mainline(self): ''' Load the relevant directory data from a file into a dataframe If the file does not exist, create the file ''' # Get all the relevant data into a dataframe # - if it doesn't exist as a CSV file, then save it as a CSV #experiment_results_table = self.create_experiment_results_table() data_file_list = ut.get_shallow_csv_file_list(self.data_dir_path) if len(data_file_list) != 1: data_getter = DataGetter(self.data_dir_path) data_getter.format_data() data_getter.write_csv() self.data_df = data_getter.get_racesdataframe() else: data_getter = DataGetter(self.data_dir_path) data_getter.load_csv() self.data_df = data_getter.get_racesdataframe() # print self.data_df.tail() trial_results = [] for trial_ref, trial_definition in self.experiment_design.items(): #print trial_ref#, trial_definition trial_runner = TrialRunner(self.data_df, trial_definition) trial_result_table = trial_runner.mainline() #print trial_result_table #print 'trial_result_table.shape' , trial_result_table.shape trial_results.append(trial_result_table) #print trial_result_dict experiment_results = trial_results[0] experiment_results['trial'] = 1 for ind, result_table in enumerate(trial_results[1:]): result_table['trial'] = ind+2 experiment_results = experiment_results.append(result_table) #print 'experiment_results.shape' #print experiment_results.shape experiment_results.to_csv(self.results_write_file_path, encoding='utf-8') print 'Experiment Completed' return experiment_results
def trial_RaceRunner(): #relevant_dir = 'fulldata/2005' #relevant_dir = 'test_1' #relevant_dir = 'code_test_1_month' relevant_dir = 'code_test_3_months' #relevant_dir = 'code_test_many_events' #relevant_dir = 'code_test_single_event' #relevant_dir = 'code_test_single_race' data_getter = DataGetter(relevant_dir) data_getter.format_data() df = data_getter.get_racesdataframe() #print df.head() #df['Date'] = df['Date'].apply(pd.to_datetime, format='%d/%m/%Y') #df['Race_Time'] = df['Race_Time'].apply(pd.to_datetime) ######## APPLY THIS IN DATAGETTER WHEN FORMING DATAFRAME ######### df['Race_Date_Time'] = pd.to_datetime(df.apply(lambda x: x['Date'] + ' ' + x['Race_Time'], 1),format = '%d/%m/%Y %H:%M') # df['Race_Date_Time'] = pd.concat([df['Date'],' ',df['Race_Time']], axis = 1) # df['Race_Date_Time'] = pd.to_datetime(df['Race_Date_Time']) df = df.drop(['Date','Race_Time'], 1) #print df.head() ########## END OF ROUTINE TO APPLY IN MAIN DATAFRAME ########################### ######## TO CREATE A LIST OF RACES FROM THE DATAFRAME ######## def split_2_part_tuple(some_tuple): return some_tuple[0], some_tuple[1] races = pd.DataFrame(pd.unique(df[[ 'Track','Race_Date_Time']].values)) #print races.head() races['Track'], races['Race_Date_Time'] = zip(*races[0].map(split_2_part_tuple)) races = races.drop(0,1) races = races.sort(['Race_Date_Time'], ascending = [1]) races.set_index('Race_Date_Time', inplace = True) ####### END OF CREATING A LIST OF RACES FROM THE DATAFRAME ############### #print races.head(10) #print list(races['Race_Date_Time']) for k, v in races.iterrows(): #print k,v race_date = df[df['Race_Date_Time'] == k] race_track = v # This returns a list #race_track = race_date['Track'].unique() #race_track = # race_track = df[df['Race_Date_Time'] == k] #print k dog_list = list(race_date['Dog']) #print 'dog_list' , #dog_list #print race_dogs['Dog'].head() # print k # print dog_list #print race_date.head(11) #race_track = race_date['Track'].unique() #print race_track, k # race_track = list(race_track) # print type(race_track) # print type(k) ## k = k.to_datetime() ## print k # #print k.to_datetime() # race_track.extend([k]) # race_reference = race_track[:] # #race_reference = list(race_track).append(k) # print 'race reference' , race_reference # print 'v', v[0] # print # print k #print 'v', v full_df = df ; race_reference_dict = {'Track':v[0],'Race_Date_Time':k} ; num_sims = 10000 ; parameter_dict = {'min_num_race_results':10} race_runner = RaceRunner(full_df, race_reference_dict, num_sims, parameter_dict) results = race_runner.main_line() print 'results_shape' , results.shape print results.head()